100-overlayfs.patch 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231
  1. --- a/Documentation/filesystems/Locking
  2. +++ b/Documentation/filesystems/Locking
  3. @@ -65,6 +65,7 @@ prototypes:
  4. struct file *, unsigned open_flag,
  5. umode_t create_mode, int *opened);
  6. int (*tmpfile) (struct inode *, struct dentry *, umode_t);
  7. + int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
  8. locking rules:
  9. all may block
  10. @@ -93,6 +94,7 @@ fiemap: no
  11. update_time: no
  12. atomic_open: yes
  13. tmpfile: no
  14. +dentry_open: no
  15. Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
  16. victim.
  17. --- /dev/null
  18. +++ b/Documentation/filesystems/overlayfs.txt
  19. @@ -0,0 +1,199 @@
  20. +Written by: Neil Brown <[email protected]>
  21. +
  22. +Overlay Filesystem
  23. +==================
  24. +
  25. +This document describes a prototype for a new approach to providing
  26. +overlay-filesystem functionality in Linux (sometimes referred to as
  27. +union-filesystems). An overlay-filesystem tries to present a
  28. +filesystem which is the result over overlaying one filesystem on top
  29. +of the other.
  30. +
  31. +The result will inevitably fail to look exactly like a normal
  32. +filesystem for various technical reasons. The expectation is that
  33. +many use cases will be able to ignore these differences.
  34. +
  35. +This approach is 'hybrid' because the objects that appear in the
  36. +filesystem do not all appear to belong to that filesystem. In many
  37. +cases an object accessed in the union will be indistinguishable
  38. +from accessing the corresponding object from the original filesystem.
  39. +This is most obvious from the 'st_dev' field returned by stat(2).
  40. +
  41. +While directories will report an st_dev from the overlay-filesystem,
  42. +all non-directory objects will report an st_dev from the lower or
  43. +upper filesystem that is providing the object. Similarly st_ino will
  44. +only be unique when combined with st_dev, and both of these can change
  45. +over the lifetime of a non-directory object. Many applications and
  46. +tools ignore these values and will not be affected.
  47. +
  48. +Upper and Lower
  49. +---------------
  50. +
  51. +An overlay filesystem combines two filesystems - an 'upper' filesystem
  52. +and a 'lower' filesystem. When a name exists in both filesystems, the
  53. +object in the 'upper' filesystem is visible while the object in the
  54. +'lower' filesystem is either hidden or, in the case of directories,
  55. +merged with the 'upper' object.
  56. +
  57. +It would be more correct to refer to an upper and lower 'directory
  58. +tree' rather than 'filesystem' as it is quite possible for both
  59. +directory trees to be in the same filesystem and there is no
  60. +requirement that the root of a filesystem be given for either upper or
  61. +lower.
  62. +
  63. +The lower filesystem can be any filesystem supported by Linux and does
  64. +not need to be writable. The lower filesystem can even be another
  65. +overlayfs. The upper filesystem will normally be writable and if it
  66. +is it must support the creation of trusted.* extended attributes, and
  67. +must provide valid d_type in readdir responses, at least for symbolic
  68. +links - so NFS is not suitable.
  69. +
  70. +A read-only overlay of two read-only filesystems may use any
  71. +filesystem type.
  72. +
  73. +Directories
  74. +-----------
  75. +
  76. +Overlaying mainly involves directories. If a given name appears in both
  77. +upper and lower filesystems and refers to a non-directory in either,
  78. +then the lower object is hidden - the name refers only to the upper
  79. +object.
  80. +
  81. +Where both upper and lower objects are directories, a merged directory
  82. +is formed.
  83. +
  84. +At mount time, the two directories given as mount options are combined
  85. +into a merged directory:
  86. +
  87. + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper /overlay
  88. +
  89. +Then whenever a lookup is requested in such a merged directory, the
  90. +lookup is performed in each actual directory and the combined result
  91. +is cached in the dentry belonging to the overlay filesystem. If both
  92. +actual lookups find directories, both are stored and a merged
  93. +directory is created, otherwise only one is stored: the upper if it
  94. +exists, else the lower.
  95. +
  96. +Only the lists of names from directories are merged. Other content
  97. +such as metadata and extended attributes are reported for the upper
  98. +directory only. These attributes of the lower directory are hidden.
  99. +
  100. +whiteouts and opaque directories
  101. +--------------------------------
  102. +
  103. +In order to support rm and rmdir without changing the lower
  104. +filesystem, an overlay filesystem needs to record in the upper filesystem
  105. +that files have been removed. This is done using whiteouts and opaque
  106. +directories (non-directories are always opaque).
  107. +
  108. +The overlay filesystem uses extended attributes with a
  109. +"trusted.overlay." prefix to record these details.
  110. +
  111. +A whiteout is created as a symbolic link with target
  112. +"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y".
  113. +When a whiteout is found in the upper level of a merged directory, any
  114. +matching name in the lower level is ignored, and the whiteout itself
  115. +is also hidden.
  116. +
  117. +A directory is made opaque by setting the xattr "trusted.overlay.opaque"
  118. +to "y". Where the upper filesystem contains an opaque directory, any
  119. +directory in the lower filesystem with the same name is ignored.
  120. +
  121. +readdir
  122. +-------
  123. +
  124. +When a 'readdir' request is made on a merged directory, the upper and
  125. +lower directories are each read and the name lists merged in the
  126. +obvious way (upper is read first, then lower - entries that already
  127. +exist are not re-added). This merged name list is cached in the
  128. +'struct file' and so remains as long as the file is kept open. If the
  129. +directory is opened and read by two processes at the same time, they
  130. +will each have separate caches. A seekdir to the start of the
  131. +directory (offset 0) followed by a readdir will cause the cache to be
  132. +discarded and rebuilt.
  133. +
  134. +This means that changes to the merged directory do not appear while a
  135. +directory is being read. This is unlikely to be noticed by many
  136. +programs.
  137. +
  138. +seek offsets are assigned sequentially when the directories are read.
  139. +Thus if
  140. + - read part of a directory
  141. + - remember an offset, and close the directory
  142. + - re-open the directory some time later
  143. + - seek to the remembered offset
  144. +
  145. +there may be little correlation between the old and new locations in
  146. +the list of filenames, particularly if anything has changed in the
  147. +directory.
  148. +
  149. +Readdir on directories that are not merged is simply handled by the
  150. +underlying directory (upper or lower).
  151. +
  152. +
  153. +Non-directories
  154. +---------------
  155. +
  156. +Objects that are not directories (files, symlinks, device-special
  157. +files etc.) are presented either from the upper or lower filesystem as
  158. +appropriate. When a file in the lower filesystem is accessed in a way
  159. +the requires write-access, such as opening for write access, changing
  160. +some metadata etc., the file is first copied from the lower filesystem
  161. +to the upper filesystem (copy_up). Note that creating a hard-link
  162. +also requires copy_up, though of course creation of a symlink does
  163. +not.
  164. +
  165. +The copy_up may turn out to be unnecessary, for example if the file is
  166. +opened for read-write but the data is not modified.
  167. +
  168. +The copy_up process first makes sure that the containing directory
  169. +exists in the upper filesystem - creating it and any parents as
  170. +necessary. It then creates the object with the same metadata (owner,
  171. +mode, mtime, symlink-target etc.) and then if the object is a file, the
  172. +data is copied from the lower to the upper filesystem. Finally any
  173. +extended attributes are copied up.
  174. +
  175. +Once the copy_up is complete, the overlay filesystem simply
  176. +provides direct access to the newly created file in the upper
  177. +filesystem - future operations on the file are barely noticed by the
  178. +overlay filesystem (though an operation on the name of the file such as
  179. +rename or unlink will of course be noticed and handled).
  180. +
  181. +
  182. +Non-standard behavior
  183. +---------------------
  184. +
  185. +The copy_up operation essentially creates a new, identical file and
  186. +moves it over to the old name. The new file may be on a different
  187. +filesystem, so both st_dev and st_ino of the file may change.
  188. +
  189. +Any open files referring to this inode will access the old data and
  190. +metadata. Similarly any file locks obtained before copy_up will not
  191. +apply to the copied up file.
  192. +
  193. +On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
  194. +fsetxattr(2) will fail with EROFS.
  195. +
  196. +If a file with multiple hard links is copied up, then this will
  197. +"break" the link. Changes will not be propagated to other names
  198. +referring to the same inode.
  199. +
  200. +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
  201. +object in overlayfs will not contain valid absolute paths, only
  202. +relative paths leading up to the filesystem's root. This will be
  203. +fixed in the future.
  204. +
  205. +Some operations are not atomic, for example a crash during copy_up or
  206. +rename will leave the filesystem in an inconsistent state. This will
  207. +be addressed in the future.
  208. +
  209. +Changes to underlying filesystems
  210. +---------------------------------
  211. +
  212. +Offline changes, when the overlay is not mounted, are allowed to either
  213. +the upper or the lower trees.
  214. +
  215. +Changes to the underlying filesystems while part of a mounted overlay
  216. +filesystem are not allowed. If the underlying filesystem is changed,
  217. +the behavior of the overlay is undefined, though it will not result in
  218. +a crash or deadlock.
  219. --- a/Documentation/filesystems/vfs.txt
  220. +++ b/Documentation/filesystems/vfs.txt
  221. @@ -362,6 +362,7 @@ struct inode_operations {
  222. int (*atomic_open)(struct inode *, struct dentry *, struct file *,
  223. unsigned open_flag, umode_t create_mode, int *opened);
  224. int (*tmpfile) (struct inode *, struct dentry *, umode_t);
  225. + int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
  226. };
  227. Again, all methods are called without any locks being held, unless
  228. @@ -681,6 +682,12 @@ struct address_space_operations {
  229. but instead uses bmap to find out where the blocks in the file
  230. are and uses those addresses directly.
  231. + dentry_open: this is an alternative to f_op->open(), the difference is that
  232. + this method may open a file not necessarily originating from the same
  233. + filesystem as the one i_op->open() was called on. It may be
  234. + useful for stacking filesystems which want to allow native I/O directly
  235. + on underlying files.
  236. +
  237. invalidatepage: If a page has PagePrivate set, then invalidatepage
  238. will be called when part or all of the page is to be removed
  239. --- a/MAINTAINERS
  240. +++ b/MAINTAINERS
  241. @@ -6345,6 +6345,13 @@ F: drivers/scsi/osd/
  242. F: include/scsi/osd_*
  243. F: fs/exofs/
  244. +OVERLAYFS FILESYSTEM
  245. +M: Miklos Szeredi <[email protected]>
  246. +L: [email protected]
  247. +S: Supported
  248. +F: fs/overlayfs/*
  249. +F: Documentation/filesystems/overlayfs.txt
  250. +
  251. P54 WIRELESS DRIVER
  252. M: Christian Lamparter <[email protected]>
  253. L: [email protected]
  254. --- a/fs/Kconfig
  255. +++ b/fs/Kconfig
  256. @@ -67,6 +67,7 @@ source "fs/quota/Kconfig"
  257. source "fs/autofs4/Kconfig"
  258. source "fs/fuse/Kconfig"
  259. +source "fs/overlayfs/Kconfig"
  260. config GENERIC_ACL
  261. bool
  262. --- a/fs/Makefile
  263. +++ b/fs/Makefile
  264. @@ -105,6 +105,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/
  265. obj-$(CONFIG_AUTOFS4_FS) += autofs4/
  266. obj-$(CONFIG_ADFS_FS) += adfs/
  267. obj-$(CONFIG_FUSE_FS) += fuse/
  268. +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
  269. obj-$(CONFIG_UDF_FS) += udf/
  270. obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
  271. obj-$(CONFIG_OMFS_FS) += omfs/
  272. --- a/fs/ecryptfs/main.c
  273. +++ b/fs/ecryptfs/main.c
  274. @@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(str
  275. s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
  276. s->s_blocksize = path.dentry->d_sb->s_blocksize;
  277. s->s_magic = ECRYPTFS_SUPER_MAGIC;
  278. + s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
  279. +
  280. + rc = -EINVAL;
  281. + if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  282. + pr_err("eCryptfs: maximum fs stacking depth exceeded\n");
  283. + goto out_free;
  284. + }
  285. inode = ecryptfs_get_inode(path.dentry->d_inode, s);
  286. rc = PTR_ERR(inode);
  287. --- a/fs/internal.h
  288. +++ b/fs/internal.h
  289. @@ -42,7 +42,6 @@ extern void __init chrdev_init(void);
  290. /*
  291. * namei.c
  292. */
  293. -extern int __inode_permission(struct inode *, int);
  294. extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
  295. extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
  296. const char *, unsigned int, struct path *);
  297. @@ -135,12 +134,6 @@ extern ssize_t __kernel_write(struct fil
  298. extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
  299. /*
  300. - * splice.c
  301. - */
  302. -extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
  303. - loff_t *opos, size_t len, unsigned int flags);
  304. -
  305. -/*
  306. * pipe.c
  307. */
  308. extern const struct file_operations pipefifo_fops;
  309. --- a/fs/namei.c
  310. +++ b/fs/namei.c
  311. @@ -402,6 +402,7 @@ int __inode_permission(struct inode *ino
  312. return security_inode_permission(inode, mask);
  313. }
  314. +EXPORT_SYMBOL(__inode_permission);
  315. /**
  316. * sb_permission - Check superblock-level permissions
  317. @@ -3027,9 +3028,12 @@ finish_open_created:
  318. error = may_open(&nd->path, acc_mode, open_flag);
  319. if (error)
  320. goto out;
  321. - file->f_path.mnt = nd->path.mnt;
  322. - error = finish_open(file, nd->path.dentry, NULL, opened);
  323. - if (error) {
  324. +
  325. + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
  326. + error = vfs_open(&nd->path, file, current_cred());
  327. + if (!error) {
  328. + *opened |= FILE_OPENED;
  329. + } else {
  330. if (error == -EOPENSTALE)
  331. goto stale_open;
  332. goto out;
  333. --- a/fs/namespace.c
  334. +++ b/fs/namespace.c
  335. @@ -1495,6 +1495,33 @@ void drop_collected_mounts(struct vfsmou
  336. namespace_unlock();
  337. }
  338. +/**
  339. + * clone_private_mount - create a private clone of a path
  340. + *
  341. + * This creates a new vfsmount, which will be the clone of @path. The new will
  342. + * not be attached anywhere in the namespace and will be private (i.e. changes
  343. + * to the originating mount won't be propagated into this).
  344. + *
  345. + * Release with mntput().
  346. + */
  347. +struct vfsmount *clone_private_mount(struct path *path)
  348. +{
  349. + struct mount *old_mnt = real_mount(path->mnt);
  350. + struct mount *new_mnt;
  351. +
  352. + if (IS_MNT_UNBINDABLE(old_mnt))
  353. + return ERR_PTR(-EINVAL);
  354. +
  355. + down_read(&namespace_sem);
  356. + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
  357. + up_read(&namespace_sem);
  358. + if (IS_ERR(new_mnt))
  359. + return ERR_CAST(new_mnt);
  360. +
  361. + return &new_mnt->mnt;
  362. +}
  363. +EXPORT_SYMBOL_GPL(clone_private_mount);
  364. +
  365. int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
  366. struct vfsmount *root)
  367. {
  368. --- a/fs/open.c
  369. +++ b/fs/open.c
  370. @@ -830,8 +830,7 @@ struct file *dentry_open(const struct pa
  371. f = get_empty_filp();
  372. if (!IS_ERR(f)) {
  373. f->f_flags = flags;
  374. - f->f_path = *path;
  375. - error = do_dentry_open(f, NULL, cred);
  376. + error = vfs_open(path, f, cred);
  377. if (!error) {
  378. /* from now on we need fput() to dispose of f */
  379. error = open_check_o_direct(f);
  380. @@ -848,6 +847,26 @@ struct file *dentry_open(const struct pa
  381. }
  382. EXPORT_SYMBOL(dentry_open);
  383. +/**
  384. + * vfs_open - open the file at the given path
  385. + * @path: path to open
  386. + * @filp: newly allocated file with f_flag initialized
  387. + * @cred: credentials to use
  388. + */
  389. +int vfs_open(const struct path *path, struct file *filp,
  390. + const struct cred *cred)
  391. +{
  392. + struct inode *inode = path->dentry->d_inode;
  393. +
  394. + if (inode->i_op->dentry_open)
  395. + return inode->i_op->dentry_open(path->dentry, filp, cred);
  396. + else {
  397. + filp->f_path = *path;
  398. + return do_dentry_open(filp, NULL, cred);
  399. + }
  400. +}
  401. +EXPORT_SYMBOL(vfs_open);
  402. +
  403. static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
  404. {
  405. int lookup_flags = 0;
  406. --- /dev/null
  407. +++ b/fs/overlayfs/Kconfig
  408. @@ -0,0 +1,10 @@
  409. +config OVERLAYFS_FS
  410. + tristate "Overlay filesystem support"
  411. + help
  412. + An overlay filesystem combines two filesystems - an 'upper' filesystem
  413. + and a 'lower' filesystem. When a name exists in both filesystems, the
  414. + object in the 'upper' filesystem is visible while the object in the
  415. + 'lower' filesystem is either hidden or, in the case of directories,
  416. + merged with the 'upper' object.
  417. +
  418. + For more information see Documentation/filesystems/overlayfs.txt
  419. --- /dev/null
  420. +++ b/fs/overlayfs/Makefile
  421. @@ -0,0 +1,7 @@
  422. +#
  423. +# Makefile for the overlay filesystem.
  424. +#
  425. +
  426. +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
  427. +
  428. +overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
  429. --- /dev/null
  430. +++ b/fs/overlayfs/copy_up.c
  431. @@ -0,0 +1,388 @@
  432. +/*
  433. + *
  434. + * Copyright (C) 2011 Novell Inc.
  435. + *
  436. + * This program is free software; you can redistribute it and/or modify it
  437. + * under the terms of the GNU General Public License version 2 as published by
  438. + * the Free Software Foundation.
  439. + */
  440. +
  441. +#include <linux/fs.h>
  442. +#include <linux/slab.h>
  443. +#include <linux/file.h>
  444. +#include <linux/splice.h>
  445. +#include <linux/xattr.h>
  446. +#include <linux/security.h>
  447. +#include <linux/uaccess.h>
  448. +#include <linux/sched.h>
  449. +#include "overlayfs.h"
  450. +
  451. +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  452. +
  453. +static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new)
  454. +{
  455. + ssize_t list_size, size;
  456. + char *buf, *name, *value;
  457. + int error;
  458. +
  459. + if (!old->d_inode->i_op->getxattr ||
  460. + !new->d_inode->i_op->getxattr)
  461. + return 0;
  462. +
  463. + list_size = vfs_listxattr(old, NULL, 0);
  464. + if (list_size <= 0) {
  465. + if (list_size == -EOPNOTSUPP)
  466. + return 0;
  467. + return list_size;
  468. + }
  469. +
  470. + buf = kzalloc(list_size, GFP_KERNEL);
  471. + if (!buf)
  472. + return -ENOMEM;
  473. +
  474. + error = -ENOMEM;
  475. + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
  476. + if (!value)
  477. + goto out;
  478. +
  479. + list_size = vfs_listxattr(old, buf, list_size);
  480. + if (list_size <= 0) {
  481. + error = list_size;
  482. + goto out_free_value;
  483. + }
  484. +
  485. + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
  486. + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
  487. + if (size <= 0) {
  488. + error = size;
  489. + goto out_free_value;
  490. + }
  491. + error = vfs_setxattr(new, name, value, size, 0);
  492. + if (error)
  493. + goto out_free_value;
  494. + }
  495. +
  496. +out_free_value:
  497. + kfree(value);
  498. +out:
  499. + kfree(buf);
  500. + return error;
  501. +}
  502. +
  503. +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
  504. +{
  505. + struct file *old_file;
  506. + struct file *new_file;
  507. + loff_t old_pos = 0;
  508. + loff_t new_pos = 0;
  509. + int error = 0;
  510. +
  511. + if (len == 0)
  512. + return 0;
  513. +
  514. + old_file = ovl_path_open(old, O_RDONLY);
  515. + if (IS_ERR(old_file))
  516. + return PTR_ERR(old_file);
  517. +
  518. + new_file = ovl_path_open(new, O_WRONLY);
  519. + if (IS_ERR(new_file)) {
  520. + error = PTR_ERR(new_file);
  521. + goto out_fput;
  522. + }
  523. +
  524. + /* FIXME: copy up sparse files efficiently */
  525. + while (len) {
  526. + size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
  527. + long bytes;
  528. +
  529. + if (len < this_len)
  530. + this_len = len;
  531. +
  532. + if (signal_pending_state(TASK_KILLABLE, current)) {
  533. + error = -EINTR;
  534. + break;
  535. + }
  536. +
  537. + bytes = do_splice_direct(old_file, &old_pos,
  538. + new_file, &new_pos,
  539. + this_len, SPLICE_F_MOVE);
  540. + if (bytes <= 0) {
  541. + error = bytes;
  542. + break;
  543. + }
  544. + WARN_ON(old_pos != new_pos);
  545. +
  546. + len -= bytes;
  547. + }
  548. +
  549. + fput(new_file);
  550. +out_fput:
  551. + fput(old_file);
  552. + return error;
  553. +}
  554. +
  555. +static char *ovl_read_symlink(struct dentry *realdentry)
  556. +{
  557. + int res;
  558. + char *buf;
  559. + struct inode *inode = realdentry->d_inode;
  560. + mm_segment_t old_fs;
  561. +
  562. + res = -EINVAL;
  563. + if (!inode->i_op->readlink)
  564. + goto err;
  565. +
  566. + res = -ENOMEM;
  567. + buf = (char *) __get_free_page(GFP_KERNEL);
  568. + if (!buf)
  569. + goto err;
  570. +
  571. + old_fs = get_fs();
  572. + set_fs(get_ds());
  573. + /* The cast to a user pointer is valid due to the set_fs() */
  574. + res = inode->i_op->readlink(realdentry,
  575. + (char __user *)buf, PAGE_SIZE - 1);
  576. + set_fs(old_fs);
  577. + if (res < 0) {
  578. + free_page((unsigned long) buf);
  579. + goto err;
  580. + }
  581. + buf[res] = '\0';
  582. +
  583. + return buf;
  584. +
  585. +err:
  586. + return ERR_PTR(res);
  587. +}
  588. +
  589. +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
  590. +{
  591. + struct iattr attr = {
  592. + .ia_valid =
  593. + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
  594. + .ia_atime = stat->atime,
  595. + .ia_mtime = stat->mtime,
  596. + };
  597. +
  598. + return notify_change(upperdentry, &attr, NULL);
  599. +}
  600. +
  601. +static int ovl_set_mode(struct dentry *upperdentry, umode_t mode)
  602. +{
  603. + struct iattr attr = {
  604. + .ia_valid = ATTR_MODE,
  605. + .ia_mode = mode,
  606. + };
  607. +
  608. + return notify_change(upperdentry, &attr, NULL);
  609. +}
  610. +
  611. +static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry,
  612. + struct path *lowerpath, struct kstat *stat,
  613. + const char *link)
  614. +{
  615. + int err;
  616. + struct path newpath;
  617. + umode_t mode = stat->mode;
  618. +
  619. + /* Can't properly set mode on creation because of the umask */
  620. + stat->mode &= S_IFMT;
  621. +
  622. + ovl_path_upper(dentry, &newpath);
  623. + WARN_ON(newpath.dentry);
  624. + newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link);
  625. + if (IS_ERR(newpath.dentry))
  626. + return PTR_ERR(newpath.dentry);
  627. +
  628. + if (S_ISREG(stat->mode)) {
  629. + err = ovl_copy_up_data(lowerpath, &newpath, stat->size);
  630. + if (err)
  631. + goto err_remove;
  632. + }
  633. +
  634. + err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry);
  635. + if (err)
  636. + goto err_remove;
  637. +
  638. + mutex_lock(&newpath.dentry->d_inode->i_mutex);
  639. + if (!S_ISLNK(stat->mode))
  640. + err = ovl_set_mode(newpath.dentry, mode);
  641. + if (!err)
  642. + err = ovl_set_timestamps(newpath.dentry, stat);
  643. + mutex_unlock(&newpath.dentry->d_inode->i_mutex);
  644. + if (err)
  645. + goto err_remove;
  646. +
  647. + ovl_dentry_update(dentry, newpath.dentry);
  648. +
  649. + /*
  650. + * Easiest way to get rid of the lower dentry reference is to
  651. + * drop this dentry. This is neither needed nor possible for
  652. + * directories.
  653. + */
  654. + if (!S_ISDIR(stat->mode))
  655. + d_drop(dentry);
  656. +
  657. + return 0;
  658. +
  659. +err_remove:
  660. + if (S_ISDIR(stat->mode))
  661. + vfs_rmdir(upperdir->d_inode, newpath.dentry);
  662. + else
  663. + vfs_unlink(upperdir->d_inode, newpath.dentry, NULL);
  664. +
  665. + dput(newpath.dentry);
  666. +
  667. + return err;
  668. +}
  669. +
  670. +/*
  671. + * Copy up a single dentry
  672. + *
  673. + * Directory renames only allowed on "pure upper" (already created on
  674. + * upper filesystem, never copied up). Directories which are on lower or
  675. + * are merged may not be renamed. For these -EXDEV is returned and
  676. + * userspace has to deal with it. This means, when copying up a
  677. + * directory we can rely on it and ancestors being stable.
  678. + *
  679. + * Non-directory renames start with copy up of source if necessary. The
  680. + * actual rename will only proceed once the copy up was successful. Copy
  681. + * up uses upper parent i_mutex for exclusion. Since rename can change
  682. + * d_parent it is possible that the copy up will lock the old parent. At
  683. + * that point the file will have already been copied up anyway.
  684. + */
  685. +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
  686. + struct path *lowerpath, struct kstat *stat)
  687. +{
  688. + int err;
  689. + struct kstat pstat;
  690. + struct path parentpath;
  691. + struct dentry *upperdir;
  692. + const struct cred *old_cred;
  693. + struct cred *override_cred;
  694. + char *link = NULL;
  695. +
  696. + ovl_path_upper(parent, &parentpath);
  697. + upperdir = parentpath.dentry;
  698. +
  699. + err = vfs_getattr(&parentpath, &pstat);
  700. + if (err)
  701. + return err;
  702. +
  703. + if (S_ISLNK(stat->mode)) {
  704. + link = ovl_read_symlink(lowerpath->dentry);
  705. + if (IS_ERR(link))
  706. + return PTR_ERR(link);
  707. + }
  708. +
  709. + err = -ENOMEM;
  710. + override_cred = prepare_creds();
  711. + if (!override_cred)
  712. + goto out_free_link;
  713. +
  714. + override_cred->fsuid = stat->uid;
  715. + override_cred->fsgid = stat->gid;
  716. + /*
  717. + * CAP_SYS_ADMIN for copying up extended attributes
  718. + * CAP_DAC_OVERRIDE for create
  719. + * CAP_FOWNER for chmod, timestamp update
  720. + * CAP_FSETID for chmod
  721. + * CAP_MKNOD for mknod
  722. + */
  723. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  724. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  725. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  726. + cap_raise(override_cred->cap_effective, CAP_FSETID);
  727. + cap_raise(override_cred->cap_effective, CAP_MKNOD);
  728. + old_cred = override_creds(override_cred);
  729. +
  730. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  731. + if (ovl_path_type(dentry) != OVL_PATH_LOWER) {
  732. + err = 0;
  733. + } else {
  734. + err = ovl_copy_up_locked(upperdir, dentry, lowerpath,
  735. + stat, link);
  736. + if (!err) {
  737. + /* Restore timestamps on parent (best effort) */
  738. + ovl_set_timestamps(upperdir, &pstat);
  739. + }
  740. + }
  741. +
  742. + mutex_unlock(&upperdir->d_inode->i_mutex);
  743. +
  744. + revert_creds(old_cred);
  745. + put_cred(override_cred);
  746. +
  747. +out_free_link:
  748. + if (link)
  749. + free_page((unsigned long) link);
  750. +
  751. + return err;
  752. +}
  753. +
  754. +int ovl_copy_up(struct dentry *dentry)
  755. +{
  756. + int err;
  757. +
  758. + err = 0;
  759. + while (!err) {
  760. + struct dentry *next;
  761. + struct dentry *parent;
  762. + struct path lowerpath;
  763. + struct kstat stat;
  764. + enum ovl_path_type type = ovl_path_type(dentry);
  765. +
  766. + if (type != OVL_PATH_LOWER)
  767. + break;
  768. +
  769. + next = dget(dentry);
  770. + /* find the topmost dentry not yet copied up */
  771. + for (;;) {
  772. + parent = dget_parent(next);
  773. +
  774. + type = ovl_path_type(parent);
  775. + if (type != OVL_PATH_LOWER)
  776. + break;
  777. +
  778. + dput(next);
  779. + next = parent;
  780. + }
  781. +
  782. + ovl_path_lower(next, &lowerpath);
  783. + err = vfs_getattr(&lowerpath, &stat);
  784. + if (!err)
  785. + err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
  786. +
  787. + dput(parent);
  788. + dput(next);
  789. + }
  790. +
  791. + return err;
  792. +}
  793. +
  794. +/* Optimize by not copying up the file first and truncating later */
  795. +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size)
  796. +{
  797. + int err;
  798. + struct kstat stat;
  799. + struct path lowerpath;
  800. + struct dentry *parent = dget_parent(dentry);
  801. +
  802. + err = ovl_copy_up(parent);
  803. + if (err)
  804. + goto out_dput_parent;
  805. +
  806. + ovl_path_lower(dentry, &lowerpath);
  807. + err = vfs_getattr(&lowerpath, &stat);
  808. + if (err)
  809. + goto out_dput_parent;
  810. +
  811. + if (size < stat.size)
  812. + stat.size = size;
  813. +
  814. + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
  815. +
  816. +out_dput_parent:
  817. + dput(parent);
  818. + return err;
  819. +}
  820. --- /dev/null
  821. +++ b/fs/overlayfs/dir.c
  822. @@ -0,0 +1,606 @@
  823. +/*
  824. + *
  825. + * Copyright (C) 2011 Novell Inc.
  826. + *
  827. + * This program is free software; you can redistribute it and/or modify it
  828. + * under the terms of the GNU General Public License version 2 as published by
  829. + * the Free Software Foundation.
  830. + */
  831. +
  832. +#include <linux/fs.h>
  833. +#include <linux/namei.h>
  834. +#include <linux/xattr.h>
  835. +#include <linux/security.h>
  836. +#include <linux/cred.h>
  837. +#include "overlayfs.h"
  838. +
  839. +static const char *ovl_whiteout_symlink = "(overlay-whiteout)";
  840. +
  841. +static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry)
  842. +{
  843. + int err;
  844. + struct dentry *newdentry;
  845. + const struct cred *old_cred;
  846. + struct cred *override_cred;
  847. +
  848. + /* FIXME: recheck lower dentry to see if whiteout is really needed */
  849. +
  850. + err = -ENOMEM;
  851. + override_cred = prepare_creds();
  852. + if (!override_cred)
  853. + goto out;
  854. +
  855. + /*
  856. + * CAP_SYS_ADMIN for setxattr
  857. + * CAP_DAC_OVERRIDE for symlink creation
  858. + * CAP_FOWNER for unlink in sticky directory
  859. + */
  860. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  861. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  862. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  863. + override_cred->fsuid = GLOBAL_ROOT_UID;
  864. + override_cred->fsgid = GLOBAL_ROOT_GID;
  865. + old_cred = override_creds(override_cred);
  866. +
  867. + newdentry = lookup_one_len(dentry->d_name.name, upperdir,
  868. + dentry->d_name.len);
  869. + err = PTR_ERR(newdentry);
  870. + if (IS_ERR(newdentry))
  871. + goto out_put_cred;
  872. +
  873. + /* Just been removed within the same locked region */
  874. + WARN_ON(newdentry->d_inode);
  875. +
  876. + err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink);
  877. + if (err)
  878. + goto out_dput;
  879. +
  880. + ovl_dentry_version_inc(dentry->d_parent);
  881. +
  882. + err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0);
  883. + if (err)
  884. + vfs_unlink(upperdir->d_inode, newdentry, NULL);
  885. +
  886. +out_dput:
  887. + dput(newdentry);
  888. +out_put_cred:
  889. + revert_creds(old_cred);
  890. + put_cred(override_cred);
  891. +out:
  892. + if (err) {
  893. + /*
  894. + * There's no way to recover from failure to whiteout.
  895. + * What should we do? Log a big fat error and... ?
  896. + */
  897. + pr_err("overlayfs: ERROR - failed to whiteout '%s'\n",
  898. + dentry->d_name.name);
  899. + }
  900. +
  901. + return err;
  902. +}
  903. +
  904. +static struct dentry *ovl_lookup_create(struct dentry *upperdir,
  905. + struct dentry *template)
  906. +{
  907. + int err;
  908. + struct dentry *newdentry;
  909. + struct qstr *name = &template->d_name;
  910. +
  911. + newdentry = lookup_one_len(name->name, upperdir, name->len);
  912. + if (IS_ERR(newdentry))
  913. + return newdentry;
  914. +
  915. + if (newdentry->d_inode) {
  916. + const struct cred *old_cred;
  917. + struct cred *override_cred;
  918. +
  919. + /* No need to check whiteout if lower parent is non-existent */
  920. + err = -EEXIST;
  921. + if (!ovl_dentry_lower(template->d_parent))
  922. + goto out_dput;
  923. +
  924. + if (!S_ISLNK(newdentry->d_inode->i_mode))
  925. + goto out_dput;
  926. +
  927. + err = -ENOMEM;
  928. + override_cred = prepare_creds();
  929. + if (!override_cred)
  930. + goto out_dput;
  931. +
  932. + /*
  933. + * CAP_SYS_ADMIN for getxattr
  934. + * CAP_FOWNER for unlink in sticky directory
  935. + */
  936. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  937. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  938. + old_cred = override_creds(override_cred);
  939. +
  940. + err = -EEXIST;
  941. + if (ovl_is_whiteout(newdentry))
  942. + err = vfs_unlink(upperdir->d_inode, newdentry, NULL);
  943. +
  944. + revert_creds(old_cred);
  945. + put_cred(override_cred);
  946. + if (err)
  947. + goto out_dput;
  948. +
  949. + dput(newdentry);
  950. + newdentry = lookup_one_len(name->name, upperdir, name->len);
  951. + if (IS_ERR(newdentry)) {
  952. + ovl_whiteout(upperdir, template);
  953. + return newdentry;
  954. + }
  955. +
  956. + /*
  957. + * Whiteout just been successfully removed, parent
  958. + * i_mutex is still held, there's no way the lookup
  959. + * could return positive.
  960. + */
  961. + WARN_ON(newdentry->d_inode);
  962. + }
  963. +
  964. + return newdentry;
  965. +
  966. +out_dput:
  967. + dput(newdentry);
  968. + return ERR_PTR(err);
  969. +}
  970. +
  971. +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
  972. + struct kstat *stat, const char *link)
  973. +{
  974. + int err;
  975. + struct dentry *newdentry;
  976. + struct inode *dir = upperdir->d_inode;
  977. +
  978. + newdentry = ovl_lookup_create(upperdir, dentry);
  979. + if (IS_ERR(newdentry))
  980. + goto out;
  981. +
  982. + switch (stat->mode & S_IFMT) {
  983. + case S_IFREG:
  984. + err = vfs_create(dir, newdentry, stat->mode, NULL);
  985. + break;
  986. +
  987. + case S_IFDIR:
  988. + err = vfs_mkdir(dir, newdentry, stat->mode);
  989. + break;
  990. +
  991. + case S_IFCHR:
  992. + case S_IFBLK:
  993. + case S_IFIFO:
  994. + case S_IFSOCK:
  995. + err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev);
  996. + break;
  997. +
  998. + case S_IFLNK:
  999. + err = vfs_symlink(dir, newdentry, link);
  1000. + break;
  1001. +
  1002. + default:
  1003. + err = -EPERM;
  1004. + }
  1005. + if (err) {
  1006. + if (ovl_dentry_is_opaque(dentry))
  1007. + ovl_whiteout(upperdir, dentry);
  1008. + dput(newdentry);
  1009. + newdentry = ERR_PTR(err);
  1010. + } else if (WARN_ON(!newdentry->d_inode)) {
  1011. + /*
  1012. + * Not quite sure if non-instantiated dentry is legal or not.
  1013. + * VFS doesn't seem to care so check and warn here.
  1014. + */
  1015. + dput(newdentry);
  1016. + newdentry = ERR_PTR(-ENOENT);
  1017. + }
  1018. +
  1019. +out:
  1020. + return newdentry;
  1021. +
  1022. +}
  1023. +
  1024. +static int ovl_set_opaque(struct dentry *upperdentry)
  1025. +{
  1026. + int err;
  1027. + const struct cred *old_cred;
  1028. + struct cred *override_cred;
  1029. +
  1030. + override_cred = prepare_creds();
  1031. + if (!override_cred)
  1032. + return -ENOMEM;
  1033. +
  1034. + /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */
  1035. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1036. + old_cred = override_creds(override_cred);
  1037. + err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
  1038. + revert_creds(old_cred);
  1039. + put_cred(override_cred);
  1040. +
  1041. + return err;
  1042. +}
  1043. +
  1044. +static int ovl_remove_opaque(struct dentry *upperdentry)
  1045. +{
  1046. + int err;
  1047. + const struct cred *old_cred;
  1048. + struct cred *override_cred;
  1049. +
  1050. + override_cred = prepare_creds();
  1051. + if (!override_cred)
  1052. + return -ENOMEM;
  1053. +
  1054. + /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */
  1055. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1056. + old_cred = override_creds(override_cred);
  1057. + err = vfs_removexattr(upperdentry, ovl_opaque_xattr);
  1058. + revert_creds(old_cred);
  1059. + put_cred(override_cred);
  1060. +
  1061. + return err;
  1062. +}
  1063. +
  1064. +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
  1065. + struct kstat *stat)
  1066. +{
  1067. + int err;
  1068. + enum ovl_path_type type;
  1069. + struct path realpath;
  1070. +
  1071. + type = ovl_path_real(dentry, &realpath);
  1072. + err = vfs_getattr(&realpath, stat);
  1073. + if (err)
  1074. + return err;
  1075. +
  1076. + stat->dev = dentry->d_sb->s_dev;
  1077. + stat->ino = dentry->d_inode->i_ino;
  1078. +
  1079. + /*
  1080. + * It's probably not worth it to count subdirs to get the
  1081. + * correct link count. nlink=1 seems to pacify 'find' and
  1082. + * other utilities.
  1083. + */
  1084. + if (type == OVL_PATH_MERGE)
  1085. + stat->nlink = 1;
  1086. +
  1087. + return 0;
  1088. +}
  1089. +
  1090. +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
  1091. + const char *link)
  1092. +{
  1093. + int err;
  1094. + struct dentry *newdentry;
  1095. + struct dentry *upperdir;
  1096. + struct inode *inode;
  1097. + struct kstat stat = {
  1098. + .mode = mode,
  1099. + .rdev = rdev,
  1100. + };
  1101. +
  1102. + err = -ENOMEM;
  1103. + inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
  1104. + if (!inode)
  1105. + goto out;
  1106. +
  1107. + err = ovl_copy_up(dentry->d_parent);
  1108. + if (err)
  1109. + goto out_iput;
  1110. +
  1111. + upperdir = ovl_dentry_upper(dentry->d_parent);
  1112. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1113. +
  1114. + newdentry = ovl_upper_create(upperdir, dentry, &stat, link);
  1115. + err = PTR_ERR(newdentry);
  1116. + if (IS_ERR(newdentry))
  1117. + goto out_unlock;
  1118. +
  1119. + ovl_dentry_version_inc(dentry->d_parent);
  1120. + if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) {
  1121. + err = ovl_set_opaque(newdentry);
  1122. + if (err) {
  1123. + vfs_rmdir(upperdir->d_inode, newdentry);
  1124. + ovl_whiteout(upperdir, dentry);
  1125. + goto out_dput;
  1126. + }
  1127. + }
  1128. + ovl_dentry_update(dentry, newdentry);
  1129. + ovl_copyattr(newdentry->d_inode, inode);
  1130. + d_instantiate(dentry, inode);
  1131. + inode = NULL;
  1132. + newdentry = NULL;
  1133. + err = 0;
  1134. +
  1135. +out_dput:
  1136. + dput(newdentry);
  1137. +out_unlock:
  1138. + mutex_unlock(&upperdir->d_inode->i_mutex);
  1139. +out_iput:
  1140. + iput(inode);
  1141. +out:
  1142. + return err;
  1143. +}
  1144. +
  1145. +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  1146. + bool excl)
  1147. +{
  1148. + return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
  1149. +}
  1150. +
  1151. +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  1152. +{
  1153. + return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
  1154. +}
  1155. +
  1156. +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
  1157. + dev_t rdev)
  1158. +{
  1159. + return ovl_create_object(dentry, mode, rdev, NULL);
  1160. +}
  1161. +
  1162. +static int ovl_symlink(struct inode *dir, struct dentry *dentry,
  1163. + const char *link)
  1164. +{
  1165. + return ovl_create_object(dentry, S_IFLNK, 0, link);
  1166. +}
  1167. +
  1168. +static int ovl_do_remove(struct dentry *dentry, bool is_dir)
  1169. +{
  1170. + int err;
  1171. + enum ovl_path_type type;
  1172. + struct path realpath;
  1173. + struct dentry *upperdir;
  1174. +
  1175. + err = ovl_copy_up(dentry->d_parent);
  1176. + if (err)
  1177. + return err;
  1178. +
  1179. + upperdir = ovl_dentry_upper(dentry->d_parent);
  1180. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1181. + type = ovl_path_real(dentry, &realpath);
  1182. + if (type != OVL_PATH_LOWER) {
  1183. + err = -ESTALE;
  1184. + if (realpath.dentry->d_parent != upperdir)
  1185. + goto out_d_drop;
  1186. +
  1187. + /* FIXME: create whiteout up front and rename to target */
  1188. +
  1189. + if (is_dir)
  1190. + err = vfs_rmdir(upperdir->d_inode, realpath.dentry);
  1191. + else
  1192. + err = vfs_unlink(upperdir->d_inode, realpath.dentry,
  1193. + NULL);
  1194. + if (err)
  1195. + goto out_d_drop;
  1196. +
  1197. + ovl_dentry_version_inc(dentry->d_parent);
  1198. + }
  1199. +
  1200. + if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry))
  1201. + err = ovl_whiteout(upperdir, dentry);
  1202. +
  1203. + /*
  1204. + * Keeping this dentry hashed would mean having to release
  1205. + * upperpath/lowerpath, which could only be done if we are the
  1206. + * sole user of this dentry. Too tricky... Just unhash for
  1207. + * now.
  1208. + */
  1209. +out_d_drop:
  1210. + d_drop(dentry);
  1211. + mutex_unlock(&upperdir->d_inode->i_mutex);
  1212. +
  1213. + return err;
  1214. +}
  1215. +
  1216. +static int ovl_unlink(struct inode *dir, struct dentry *dentry)
  1217. +{
  1218. + return ovl_do_remove(dentry, false);
  1219. +}
  1220. +
  1221. +
  1222. +static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
  1223. +{
  1224. + int err;
  1225. + enum ovl_path_type type;
  1226. +
  1227. + type = ovl_path_type(dentry);
  1228. + if (type != OVL_PATH_UPPER) {
  1229. + err = ovl_check_empty_and_clear(dentry, type);
  1230. + if (err)
  1231. + return err;
  1232. + }
  1233. +
  1234. + return ovl_do_remove(dentry, true);
  1235. +}
  1236. +
  1237. +static int ovl_link(struct dentry *old, struct inode *newdir,
  1238. + struct dentry *new)
  1239. +{
  1240. + int err;
  1241. + struct dentry *olddentry;
  1242. + struct dentry *newdentry;
  1243. + struct dentry *upperdir;
  1244. + struct inode *newinode;
  1245. +
  1246. + err = ovl_copy_up(old);
  1247. + if (err)
  1248. + goto out;
  1249. +
  1250. + err = ovl_copy_up(new->d_parent);
  1251. + if (err)
  1252. + goto out;
  1253. +
  1254. + upperdir = ovl_dentry_upper(new->d_parent);
  1255. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1256. + newdentry = ovl_lookup_create(upperdir, new);
  1257. + err = PTR_ERR(newdentry);
  1258. + if (IS_ERR(newdentry))
  1259. + goto out_unlock;
  1260. +
  1261. + olddentry = ovl_dentry_upper(old);
  1262. + err = vfs_link(olddentry, upperdir->d_inode, newdentry, NULL);
  1263. + if (!err) {
  1264. + if (WARN_ON(!newdentry->d_inode)) {
  1265. + dput(newdentry);
  1266. + err = -ENOENT;
  1267. + goto out_unlock;
  1268. + }
  1269. + newinode = ovl_new_inode(old->d_sb, newdentry->d_inode->i_mode,
  1270. + new->d_fsdata);
  1271. + if (!newinode) {
  1272. + err = -ENOMEM;
  1273. + goto link_fail;
  1274. + }
  1275. + ovl_copyattr(upperdir->d_inode, newinode);
  1276. +
  1277. + ovl_dentry_version_inc(new->d_parent);
  1278. + ovl_dentry_update(new, newdentry);
  1279. +
  1280. + d_instantiate(new, newinode);
  1281. + } else {
  1282. +link_fail:
  1283. + if (ovl_dentry_is_opaque(new))
  1284. + ovl_whiteout(upperdir, new);
  1285. + dput(newdentry);
  1286. + }
  1287. +out_unlock:
  1288. + mutex_unlock(&upperdir->d_inode->i_mutex);
  1289. +out:
  1290. + return err;
  1291. +}
  1292. +
  1293. +static int ovl_rename(struct inode *olddir, struct dentry *old,
  1294. + struct inode *newdir, struct dentry *new)
  1295. +{
  1296. + int err;
  1297. + enum ovl_path_type old_type;
  1298. + enum ovl_path_type new_type;
  1299. + struct dentry *old_upperdir;
  1300. + struct dentry *new_upperdir;
  1301. + struct dentry *olddentry;
  1302. + struct dentry *newdentry;
  1303. + struct dentry *trap;
  1304. + bool old_opaque;
  1305. + bool new_opaque;
  1306. + bool new_create = false;
  1307. + bool is_dir = S_ISDIR(old->d_inode->i_mode);
  1308. +
  1309. + /* Don't copy up directory trees */
  1310. + old_type = ovl_path_type(old);
  1311. + if (old_type != OVL_PATH_UPPER && is_dir)
  1312. + return -EXDEV;
  1313. +
  1314. + if (new->d_inode) {
  1315. + new_type = ovl_path_type(new);
  1316. +
  1317. + if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
  1318. + if (ovl_dentry_lower(old)->d_inode ==
  1319. + ovl_dentry_lower(new)->d_inode)
  1320. + return 0;
  1321. + }
  1322. + if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
  1323. + if (ovl_dentry_upper(old)->d_inode ==
  1324. + ovl_dentry_upper(new)->d_inode)
  1325. + return 0;
  1326. + }
  1327. +
  1328. + if (new_type != OVL_PATH_UPPER &&
  1329. + S_ISDIR(new->d_inode->i_mode)) {
  1330. + err = ovl_check_empty_and_clear(new, new_type);
  1331. + if (err)
  1332. + return err;
  1333. + }
  1334. + } else {
  1335. + new_type = OVL_PATH_UPPER;
  1336. + }
  1337. +
  1338. + err = ovl_copy_up(old);
  1339. + if (err)
  1340. + return err;
  1341. +
  1342. + err = ovl_copy_up(new->d_parent);
  1343. + if (err)
  1344. + return err;
  1345. +
  1346. + old_upperdir = ovl_dentry_upper(old->d_parent);
  1347. + new_upperdir = ovl_dentry_upper(new->d_parent);
  1348. +
  1349. + trap = lock_rename(new_upperdir, old_upperdir);
  1350. +
  1351. + olddentry = ovl_dentry_upper(old);
  1352. + newdentry = ovl_dentry_upper(new);
  1353. + if (newdentry) {
  1354. + dget(newdentry);
  1355. + } else {
  1356. + new_create = true;
  1357. + newdentry = ovl_lookup_create(new_upperdir, new);
  1358. + err = PTR_ERR(newdentry);
  1359. + if (IS_ERR(newdentry))
  1360. + goto out_unlock;
  1361. + }
  1362. +
  1363. + err = -ESTALE;
  1364. + if (olddentry->d_parent != old_upperdir)
  1365. + goto out_dput;
  1366. + if (newdentry->d_parent != new_upperdir)
  1367. + goto out_dput;
  1368. + if (olddentry == trap)
  1369. + goto out_dput;
  1370. + if (newdentry == trap)
  1371. + goto out_dput;
  1372. +
  1373. + old_opaque = ovl_dentry_is_opaque(old);
  1374. + new_opaque = ovl_dentry_is_opaque(new) || new_type != OVL_PATH_UPPER;
  1375. +
  1376. + if (is_dir && !old_opaque && new_opaque) {
  1377. + err = ovl_set_opaque(olddentry);
  1378. + if (err)
  1379. + goto out_dput;
  1380. + }
  1381. +
  1382. + err = vfs_rename(old_upperdir->d_inode, olddentry,
  1383. + new_upperdir->d_inode, newdentry, NULL);
  1384. +
  1385. + if (err) {
  1386. + if (new_create && ovl_dentry_is_opaque(new))
  1387. + ovl_whiteout(new_upperdir, new);
  1388. + if (is_dir && !old_opaque && new_opaque)
  1389. + ovl_remove_opaque(olddentry);
  1390. + goto out_dput;
  1391. + }
  1392. +
  1393. + if (old_type != OVL_PATH_UPPER || old_opaque)
  1394. + err = ovl_whiteout(old_upperdir, old);
  1395. + if (is_dir && old_opaque && !new_opaque)
  1396. + ovl_remove_opaque(olddentry);
  1397. +
  1398. + if (old_opaque != new_opaque)
  1399. + ovl_dentry_set_opaque(old, new_opaque);
  1400. +
  1401. + ovl_dentry_version_inc(old->d_parent);
  1402. + ovl_dentry_version_inc(new->d_parent);
  1403. +
  1404. +out_dput:
  1405. + dput(newdentry);
  1406. +out_unlock:
  1407. + unlock_rename(new_upperdir, old_upperdir);
  1408. + return err;
  1409. +}
  1410. +
  1411. +const struct inode_operations ovl_dir_inode_operations = {
  1412. + .lookup = ovl_lookup,
  1413. + .mkdir = ovl_mkdir,
  1414. + .symlink = ovl_symlink,
  1415. + .unlink = ovl_unlink,
  1416. + .rmdir = ovl_rmdir,
  1417. + .rename = ovl_rename,
  1418. + .link = ovl_link,
  1419. + .setattr = ovl_setattr,
  1420. + .create = ovl_create,
  1421. + .mknod = ovl_mknod,
  1422. + .permission = ovl_permission,
  1423. + .getattr = ovl_dir_getattr,
  1424. + .setxattr = ovl_setxattr,
  1425. + .getxattr = ovl_getxattr,
  1426. + .listxattr = ovl_listxattr,
  1427. + .removexattr = ovl_removexattr,
  1428. +};
  1429. --- /dev/null
  1430. +++ b/fs/overlayfs/inode.c
  1431. @@ -0,0 +1,372 @@
  1432. +/*
  1433. + *
  1434. + * Copyright (C) 2011 Novell Inc.
  1435. + *
  1436. + * This program is free software; you can redistribute it and/or modify it
  1437. + * under the terms of the GNU General Public License version 2 as published by
  1438. + * the Free Software Foundation.
  1439. + */
  1440. +
  1441. +#include <linux/fs.h>
  1442. +#include <linux/slab.h>
  1443. +#include <linux/xattr.h>
  1444. +#include "overlayfs.h"
  1445. +
  1446. +int ovl_setattr(struct dentry *dentry, struct iattr *attr)
  1447. +{
  1448. + struct dentry *upperdentry;
  1449. + int err;
  1450. +
  1451. + if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry))
  1452. + err = ovl_copy_up_truncate(dentry, attr->ia_size);
  1453. + else
  1454. + err = ovl_copy_up(dentry);
  1455. + if (err)
  1456. + return err;
  1457. +
  1458. + upperdentry = ovl_dentry_upper(dentry);
  1459. +
  1460. + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
  1461. + attr->ia_valid &= ~ATTR_MODE;
  1462. +
  1463. + mutex_lock(&upperdentry->d_inode->i_mutex);
  1464. + err = notify_change(upperdentry, attr, NULL);
  1465. + if (!err)
  1466. + ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
  1467. + mutex_unlock(&upperdentry->d_inode->i_mutex);
  1468. +
  1469. + return err;
  1470. +}
  1471. +
  1472. +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
  1473. + struct kstat *stat)
  1474. +{
  1475. + struct path realpath;
  1476. +
  1477. + ovl_path_real(dentry, &realpath);
  1478. + return vfs_getattr(&realpath, stat);
  1479. +}
  1480. +
  1481. +int ovl_permission(struct inode *inode, int mask)
  1482. +{
  1483. + struct ovl_entry *oe;
  1484. + struct dentry *alias = NULL;
  1485. + struct inode *realinode;
  1486. + struct dentry *realdentry;
  1487. + bool is_upper;
  1488. + int err;
  1489. +
  1490. + if (S_ISDIR(inode->i_mode)) {
  1491. + oe = inode->i_private;
  1492. + } else if (mask & MAY_NOT_BLOCK) {
  1493. + return -ECHILD;
  1494. + } else {
  1495. + /*
  1496. + * For non-directories find an alias and get the info
  1497. + * from there.
  1498. + */
  1499. + alias = d_find_any_alias(inode);
  1500. + if (WARN_ON(!alias))
  1501. + return -ENOENT;
  1502. +
  1503. + oe = alias->d_fsdata;
  1504. + }
  1505. +
  1506. + realdentry = ovl_entry_real(oe, &is_upper);
  1507. +
  1508. + /* Careful in RCU walk mode */
  1509. + realinode = ACCESS_ONCE(realdentry->d_inode);
  1510. + if (!realinode) {
  1511. + WARN_ON(!(mask & MAY_NOT_BLOCK));
  1512. + err = -ENOENT;
  1513. + goto out_dput;
  1514. + }
  1515. +
  1516. + if (mask & MAY_WRITE) {
  1517. + umode_t mode = realinode->i_mode;
  1518. +
  1519. + /*
  1520. + * Writes will always be redirected to upper layer, so
  1521. + * ignore lower layer being read-only.
  1522. + *
  1523. + * If the overlay itself is read-only then proceed
  1524. + * with the permission check, don't return EROFS.
  1525. + * This will only happen if this is the lower layer of
  1526. + * another overlayfs.
  1527. + *
  1528. + * If upper fs becomes read-only after the overlay was
  1529. + * constructed return EROFS to prevent modification of
  1530. + * upper layer.
  1531. + */
  1532. + err = -EROFS;
  1533. + if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
  1534. + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
  1535. + goto out_dput;
  1536. + }
  1537. +
  1538. + err = __inode_permission(realinode, mask);
  1539. +out_dput:
  1540. + dput(alias);
  1541. + return err;
  1542. +}
  1543. +
  1544. +
  1545. +struct ovl_link_data {
  1546. + struct dentry *realdentry;
  1547. + void *cookie;
  1548. +};
  1549. +
  1550. +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
  1551. +{
  1552. + void *ret;
  1553. + struct dentry *realdentry;
  1554. + struct inode *realinode;
  1555. +
  1556. + realdentry = ovl_dentry_real(dentry);
  1557. + realinode = realdentry->d_inode;
  1558. +
  1559. + if (WARN_ON(!realinode->i_op->follow_link))
  1560. + return ERR_PTR(-EPERM);
  1561. +
  1562. + ret = realinode->i_op->follow_link(realdentry, nd);
  1563. + if (IS_ERR(ret))
  1564. + return ret;
  1565. +
  1566. + if (realinode->i_op->put_link) {
  1567. + struct ovl_link_data *data;
  1568. +
  1569. + data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
  1570. + if (!data) {
  1571. + realinode->i_op->put_link(realdentry, nd, ret);
  1572. + return ERR_PTR(-ENOMEM);
  1573. + }
  1574. + data->realdentry = realdentry;
  1575. + data->cookie = ret;
  1576. +
  1577. + return data;
  1578. + } else {
  1579. + return NULL;
  1580. + }
  1581. +}
  1582. +
  1583. +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
  1584. +{
  1585. + struct inode *realinode;
  1586. + struct ovl_link_data *data = c;
  1587. +
  1588. + if (!data)
  1589. + return;
  1590. +
  1591. + realinode = data->realdentry->d_inode;
  1592. + realinode->i_op->put_link(data->realdentry, nd, data->cookie);
  1593. + kfree(data);
  1594. +}
  1595. +
  1596. +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
  1597. +{
  1598. + struct path realpath;
  1599. + struct inode *realinode;
  1600. +
  1601. + ovl_path_real(dentry, &realpath);
  1602. + realinode = realpath.dentry->d_inode;
  1603. +
  1604. + if (!realinode->i_op->readlink)
  1605. + return -EINVAL;
  1606. +
  1607. + touch_atime(&realpath);
  1608. +
  1609. + return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
  1610. +}
  1611. +
  1612. +
  1613. +static bool ovl_is_private_xattr(const char *name)
  1614. +{
  1615. + return strncmp(name, "trusted.overlay.", 14) == 0;
  1616. +}
  1617. +
  1618. +int ovl_setxattr(struct dentry *dentry, const char *name,
  1619. + const void *value, size_t size, int flags)
  1620. +{
  1621. + int err;
  1622. + struct dentry *upperdentry;
  1623. +
  1624. + if (ovl_is_private_xattr(name))
  1625. + return -EPERM;
  1626. +
  1627. + err = ovl_copy_up(dentry);
  1628. + if (err)
  1629. + return err;
  1630. +
  1631. + upperdentry = ovl_dentry_upper(dentry);
  1632. + return vfs_setxattr(upperdentry, name, value, size, flags);
  1633. +}
  1634. +
  1635. +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
  1636. + void *value, size_t size)
  1637. +{
  1638. + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
  1639. + ovl_is_private_xattr(name))
  1640. + return -ENODATA;
  1641. +
  1642. + return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
  1643. +}
  1644. +
  1645. +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
  1646. +{
  1647. + ssize_t res;
  1648. + int off;
  1649. +
  1650. + res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
  1651. + if (res <= 0 || size == 0)
  1652. + return res;
  1653. +
  1654. + if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
  1655. + return res;
  1656. +
  1657. + /* filter out private xattrs */
  1658. + for (off = 0; off < res;) {
  1659. + char *s = list + off;
  1660. + size_t slen = strlen(s) + 1;
  1661. +
  1662. + BUG_ON(off + slen > res);
  1663. +
  1664. + if (ovl_is_private_xattr(s)) {
  1665. + res -= slen;
  1666. + memmove(s, s + slen, res - off);
  1667. + } else {
  1668. + off += slen;
  1669. + }
  1670. + }
  1671. +
  1672. + return res;
  1673. +}
  1674. +
  1675. +int ovl_removexattr(struct dentry *dentry, const char *name)
  1676. +{
  1677. + int err;
  1678. + struct path realpath;
  1679. + enum ovl_path_type type;
  1680. +
  1681. + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
  1682. + ovl_is_private_xattr(name))
  1683. + return -ENODATA;
  1684. +
  1685. + type = ovl_path_real(dentry, &realpath);
  1686. + if (type == OVL_PATH_LOWER) {
  1687. + err = vfs_getxattr(realpath.dentry, name, NULL, 0);
  1688. + if (err < 0)
  1689. + return err;
  1690. +
  1691. + err = ovl_copy_up(dentry);
  1692. + if (err)
  1693. + return err;
  1694. +
  1695. + ovl_path_upper(dentry, &realpath);
  1696. + }
  1697. +
  1698. + return vfs_removexattr(realpath.dentry, name);
  1699. +}
  1700. +
  1701. +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
  1702. + struct dentry *realdentry)
  1703. +{
  1704. + if (type != OVL_PATH_LOWER)
  1705. + return false;
  1706. +
  1707. + if (special_file(realdentry->d_inode->i_mode))
  1708. + return false;
  1709. +
  1710. + if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
  1711. + return false;
  1712. +
  1713. + return true;
  1714. +}
  1715. +
  1716. +static int ovl_dentry_open(struct dentry *dentry, struct file *file,
  1717. + const struct cred *cred)
  1718. +{
  1719. + int err;
  1720. + struct path realpath;
  1721. + enum ovl_path_type type;
  1722. +
  1723. + type = ovl_path_real(dentry, &realpath);
  1724. + if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
  1725. + if (file->f_flags & O_TRUNC)
  1726. + err = ovl_copy_up_truncate(dentry, 0);
  1727. + else
  1728. + err = ovl_copy_up(dentry);
  1729. + if (err)
  1730. + return err;
  1731. +
  1732. + ovl_path_upper(dentry, &realpath);
  1733. + }
  1734. +
  1735. + return vfs_open(&realpath, file, cred);
  1736. +}
  1737. +
  1738. +static const struct inode_operations ovl_file_inode_operations = {
  1739. + .setattr = ovl_setattr,
  1740. + .permission = ovl_permission,
  1741. + .getattr = ovl_getattr,
  1742. + .setxattr = ovl_setxattr,
  1743. + .getxattr = ovl_getxattr,
  1744. + .listxattr = ovl_listxattr,
  1745. + .removexattr = ovl_removexattr,
  1746. + .dentry_open = ovl_dentry_open,
  1747. +};
  1748. +
  1749. +static const struct inode_operations ovl_symlink_inode_operations = {
  1750. + .setattr = ovl_setattr,
  1751. + .follow_link = ovl_follow_link,
  1752. + .put_link = ovl_put_link,
  1753. + .readlink = ovl_readlink,
  1754. + .getattr = ovl_getattr,
  1755. + .setxattr = ovl_setxattr,
  1756. + .getxattr = ovl_getxattr,
  1757. + .listxattr = ovl_listxattr,
  1758. + .removexattr = ovl_removexattr,
  1759. +};
  1760. +
  1761. +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
  1762. + struct ovl_entry *oe)
  1763. +{
  1764. + struct inode *inode;
  1765. +
  1766. + inode = new_inode(sb);
  1767. + if (!inode)
  1768. + return NULL;
  1769. +
  1770. + mode &= S_IFMT;
  1771. +
  1772. + inode->i_ino = get_next_ino();
  1773. + inode->i_mode = mode;
  1774. + inode->i_flags |= S_NOATIME | S_NOCMTIME;
  1775. +
  1776. + switch (mode) {
  1777. + case S_IFDIR:
  1778. + inode->i_private = oe;
  1779. + inode->i_op = &ovl_dir_inode_operations;
  1780. + inode->i_fop = &ovl_dir_operations;
  1781. + break;
  1782. +
  1783. + case S_IFLNK:
  1784. + inode->i_op = &ovl_symlink_inode_operations;
  1785. + break;
  1786. +
  1787. + case S_IFREG:
  1788. + case S_IFSOCK:
  1789. + case S_IFBLK:
  1790. + case S_IFCHR:
  1791. + case S_IFIFO:
  1792. + inode->i_op = &ovl_file_inode_operations;
  1793. + break;
  1794. +
  1795. + default:
  1796. + WARN(1, "illegal file type: %i\n", mode);
  1797. + iput(inode);
  1798. + inode = NULL;
  1799. + }
  1800. +
  1801. + return inode;
  1802. +
  1803. +}
  1804. --- /dev/null
  1805. +++ b/fs/overlayfs/overlayfs.h
  1806. @@ -0,0 +1,70 @@
  1807. +/*
  1808. + *
  1809. + * Copyright (C) 2011 Novell Inc.
  1810. + *
  1811. + * This program is free software; you can redistribute it and/or modify it
  1812. + * under the terms of the GNU General Public License version 2 as published by
  1813. + * the Free Software Foundation.
  1814. + */
  1815. +
  1816. +struct ovl_entry;
  1817. +
  1818. +enum ovl_path_type {
  1819. + OVL_PATH_UPPER,
  1820. + OVL_PATH_MERGE,
  1821. + OVL_PATH_LOWER,
  1822. +};
  1823. +
  1824. +extern const char *ovl_opaque_xattr;
  1825. +extern const char *ovl_whiteout_xattr;
  1826. +extern const struct dentry_operations ovl_dentry_operations;
  1827. +
  1828. +enum ovl_path_type ovl_path_type(struct dentry *dentry);
  1829. +u64 ovl_dentry_version_get(struct dentry *dentry);
  1830. +void ovl_dentry_version_inc(struct dentry *dentry);
  1831. +void ovl_path_upper(struct dentry *dentry, struct path *path);
  1832. +void ovl_path_lower(struct dentry *dentry, struct path *path);
  1833. +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
  1834. +struct dentry *ovl_dentry_upper(struct dentry *dentry);
  1835. +struct dentry *ovl_dentry_lower(struct dentry *dentry);
  1836. +struct dentry *ovl_dentry_real(struct dentry *dentry);
  1837. +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
  1838. +bool ovl_dentry_is_opaque(struct dentry *dentry);
  1839. +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
  1840. +bool ovl_is_whiteout(struct dentry *dentry);
  1841. +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
  1842. +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
  1843. + unsigned int flags);
  1844. +struct file *ovl_path_open(struct path *path, int flags);
  1845. +
  1846. +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
  1847. + struct kstat *stat, const char *link);
  1848. +
  1849. +/* readdir.c */
  1850. +extern const struct file_operations ovl_dir_operations;
  1851. +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type);
  1852. +
  1853. +/* inode.c */
  1854. +int ovl_setattr(struct dentry *dentry, struct iattr *attr);
  1855. +int ovl_permission(struct inode *inode, int mask);
  1856. +int ovl_setxattr(struct dentry *dentry, const char *name,
  1857. + const void *value, size_t size, int flags);
  1858. +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
  1859. + void *value, size_t size);
  1860. +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
  1861. +int ovl_removexattr(struct dentry *dentry, const char *name);
  1862. +
  1863. +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
  1864. + struct ovl_entry *oe);
  1865. +static inline void ovl_copyattr(struct inode *from, struct inode *to)
  1866. +{
  1867. + to->i_uid = from->i_uid;
  1868. + to->i_gid = from->i_gid;
  1869. +}
  1870. +
  1871. +/* dir.c */
  1872. +extern const struct inode_operations ovl_dir_inode_operations;
  1873. +
  1874. +/* copy_up.c */
  1875. +int ovl_copy_up(struct dentry *dentry);
  1876. +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size);
  1877. --- /dev/null
  1878. +++ b/fs/overlayfs/readdir.c
  1879. @@ -0,0 +1,567 @@
  1880. +/*
  1881. + *
  1882. + * Copyright (C) 2011 Novell Inc.
  1883. + *
  1884. + * This program is free software; you can redistribute it and/or modify it
  1885. + * under the terms of the GNU General Public License version 2 as published by
  1886. + * the Free Software Foundation.
  1887. + */
  1888. +
  1889. +#include <linux/fs.h>
  1890. +#include <linux/slab.h>
  1891. +#include <linux/namei.h>
  1892. +#include <linux/file.h>
  1893. +#include <linux/xattr.h>
  1894. +#include <linux/rbtree.h>
  1895. +#include <linux/security.h>
  1896. +#include <linux/cred.h>
  1897. +#include "overlayfs.h"
  1898. +
  1899. +struct ovl_cache_entry {
  1900. + const char *name;
  1901. + unsigned int len;
  1902. + unsigned int type;
  1903. + u64 ino;
  1904. + bool is_whiteout;
  1905. + struct list_head l_node;
  1906. + struct rb_node node;
  1907. +};
  1908. +
  1909. +struct ovl_readdir_data {
  1910. + struct dir_context ctx;
  1911. + bool is_merge;
  1912. + struct rb_root *root;
  1913. + struct list_head *list;
  1914. + struct list_head *middle;
  1915. + struct dentry *dir;
  1916. + int count;
  1917. + int err;
  1918. +};
  1919. +
  1920. +struct ovl_dir_file {
  1921. + bool is_real;
  1922. + bool is_cached;
  1923. + struct list_head cursor;
  1924. + u64 cache_version;
  1925. + struct list_head cache;
  1926. + struct file *realfile;
  1927. +};
  1928. +
  1929. +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  1930. +{
  1931. + return container_of(n, struct ovl_cache_entry, node);
  1932. +}
  1933. +
  1934. +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  1935. + const char *name, int len)
  1936. +{
  1937. + struct rb_node *node = root->rb_node;
  1938. + int cmp;
  1939. +
  1940. + while (node) {
  1941. + struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
  1942. +
  1943. + cmp = strncmp(name, p->name, len);
  1944. + if (cmp > 0)
  1945. + node = p->node.rb_right;
  1946. + else if (cmp < 0 || len < p->len)
  1947. + node = p->node.rb_left;
  1948. + else
  1949. + return p;
  1950. + }
  1951. +
  1952. + return NULL;
  1953. +}
  1954. +
  1955. +static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
  1956. + u64 ino, unsigned int d_type)
  1957. +{
  1958. + struct ovl_cache_entry *p;
  1959. +
  1960. + p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
  1961. + if (p) {
  1962. + char *name_copy = (char *) (p + 1);
  1963. + memcpy(name_copy, name, len);
  1964. + name_copy[len] = '\0';
  1965. + p->name = name_copy;
  1966. + p->len = len;
  1967. + p->type = d_type;
  1968. + p->ino = ino;
  1969. + p->is_whiteout = false;
  1970. + }
  1971. +
  1972. + return p;
  1973. +}
  1974. +
  1975. +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
  1976. + const char *name, int len, u64 ino,
  1977. + unsigned int d_type)
  1978. +{
  1979. + struct rb_node **newp = &rdd->root->rb_node;
  1980. + struct rb_node *parent = NULL;
  1981. + struct ovl_cache_entry *p;
  1982. +
  1983. + while (*newp) {
  1984. + int cmp;
  1985. + struct ovl_cache_entry *tmp;
  1986. +
  1987. + parent = *newp;
  1988. + tmp = ovl_cache_entry_from_node(*newp);
  1989. + cmp = strncmp(name, tmp->name, len);
  1990. + if (cmp > 0)
  1991. + newp = &tmp->node.rb_right;
  1992. + else if (cmp < 0 || len < tmp->len)
  1993. + newp = &tmp->node.rb_left;
  1994. + else
  1995. + return 0;
  1996. + }
  1997. +
  1998. + p = ovl_cache_entry_new(name, len, ino, d_type);
  1999. + if (p == NULL)
  2000. + return -ENOMEM;
  2001. +
  2002. + list_add_tail(&p->l_node, rdd->list);
  2003. + rb_link_node(&p->node, parent, newp);
  2004. + rb_insert_color(&p->node, rdd->root);
  2005. +
  2006. + return 0;
  2007. +}
  2008. +
  2009. +static int ovl_fill_lower(struct ovl_readdir_data *rdd,
  2010. + const char *name, int namelen,
  2011. + loff_t offset, u64 ino, unsigned int d_type)
  2012. +{
  2013. + struct ovl_cache_entry *p;
  2014. +
  2015. + p = ovl_cache_entry_find(rdd->root, name, namelen);
  2016. + if (p) {
  2017. + list_move_tail(&p->l_node, rdd->middle);
  2018. + } else {
  2019. + p = ovl_cache_entry_new(name, namelen, ino, d_type);
  2020. + if (p == NULL)
  2021. + rdd->err = -ENOMEM;
  2022. + else
  2023. + list_add_tail(&p->l_node, rdd->middle);
  2024. + }
  2025. +
  2026. + return rdd->err;
  2027. +}
  2028. +
  2029. +static void ovl_cache_free(struct list_head *list)
  2030. +{
  2031. + struct ovl_cache_entry *p;
  2032. + struct ovl_cache_entry *n;
  2033. +
  2034. + list_for_each_entry_safe(p, n, list, l_node)
  2035. + kfree(p);
  2036. +
  2037. + INIT_LIST_HEAD(list);
  2038. +}
  2039. +
  2040. +static int ovl_fill_merge(void *buf, const char *name, int namelen,
  2041. + loff_t offset, u64 ino, unsigned int d_type)
  2042. +{
  2043. + struct ovl_readdir_data *rdd = buf;
  2044. +
  2045. + rdd->count++;
  2046. + if (!rdd->is_merge)
  2047. + return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
  2048. + else
  2049. + return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
  2050. +}
  2051. +
  2052. +static inline int ovl_dir_read(struct path *realpath,
  2053. + struct ovl_readdir_data *rdd)
  2054. +{
  2055. + struct file *realfile;
  2056. + int err;
  2057. +
  2058. + realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
  2059. + if (IS_ERR(realfile))
  2060. + return PTR_ERR(realfile);
  2061. +
  2062. + rdd->ctx.pos = 0;
  2063. + do {
  2064. + rdd->count = 0;
  2065. + rdd->err = 0;
  2066. + err = iterate_dir(realfile, &rdd->ctx);
  2067. + if (err >= 0)
  2068. + err = rdd->err;
  2069. + } while (!err && rdd->count);
  2070. + fput(realfile);
  2071. +
  2072. + return 0;
  2073. +}
  2074. +
  2075. +static void ovl_dir_reset(struct file *file)
  2076. +{
  2077. + struct ovl_dir_file *od = file->private_data;
  2078. + enum ovl_path_type type = ovl_path_type(file->f_path.dentry);
  2079. +
  2080. + if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) {
  2081. + list_del_init(&od->cursor);
  2082. + ovl_cache_free(&od->cache);
  2083. + od->is_cached = false;
  2084. + }
  2085. + WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
  2086. + if (od->is_real && type == OVL_PATH_MERGE) {
  2087. + fput(od->realfile);
  2088. + od->realfile = NULL;
  2089. + od->is_real = false;
  2090. + }
  2091. +}
  2092. +
  2093. +static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd)
  2094. +{
  2095. + struct ovl_cache_entry *p;
  2096. + struct dentry *dentry;
  2097. + const struct cred *old_cred;
  2098. + struct cred *override_cred;
  2099. +
  2100. + override_cred = prepare_creds();
  2101. + if (!override_cred) {
  2102. + ovl_cache_free(rdd->list);
  2103. + return -ENOMEM;
  2104. + }
  2105. +
  2106. + /*
  2107. + * CAP_SYS_ADMIN for getxattr
  2108. + * CAP_DAC_OVERRIDE for lookup
  2109. + */
  2110. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2111. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  2112. + old_cred = override_creds(override_cred);
  2113. +
  2114. + mutex_lock(&rdd->dir->d_inode->i_mutex);
  2115. + list_for_each_entry(p, rdd->list, l_node) {
  2116. + if (p->type != DT_LNK)
  2117. + continue;
  2118. +
  2119. + dentry = lookup_one_len(p->name, rdd->dir, p->len);
  2120. + if (IS_ERR(dentry))
  2121. + continue;
  2122. +
  2123. + p->is_whiteout = ovl_is_whiteout(dentry);
  2124. + dput(dentry);
  2125. + }
  2126. + mutex_unlock(&rdd->dir->d_inode->i_mutex);
  2127. +
  2128. + revert_creds(old_cred);
  2129. + put_cred(override_cred);
  2130. +
  2131. + return 0;
  2132. +}
  2133. +
  2134. +static inline int ovl_dir_read_merged(struct path *upperpath,
  2135. + struct path *lowerpath,
  2136. + struct list_head *list)
  2137. +{
  2138. + int err;
  2139. + struct rb_root root = RB_ROOT;
  2140. + struct list_head middle;
  2141. + struct ovl_readdir_data rdd = {
  2142. + .ctx.actor = ovl_fill_merge,
  2143. + .list = list,
  2144. + .root = &root,
  2145. + .is_merge = false,
  2146. + };
  2147. +
  2148. + if (upperpath->dentry) {
  2149. + rdd.dir = upperpath->dentry;
  2150. + err = ovl_dir_read(upperpath, &rdd);
  2151. + if (err)
  2152. + goto out;
  2153. +
  2154. + err = ovl_dir_mark_whiteouts(&rdd);
  2155. + if (err)
  2156. + goto out;
  2157. + }
  2158. + /*
  2159. + * Insert lowerpath entries before upperpath ones, this allows
  2160. + * offsets to be reasonably constant
  2161. + */
  2162. + list_add(&middle, rdd.list);
  2163. + rdd.middle = &middle;
  2164. + rdd.is_merge = true;
  2165. + err = ovl_dir_read(lowerpath, &rdd);
  2166. + list_del(&middle);
  2167. +out:
  2168. + return err;
  2169. +}
  2170. +
  2171. +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
  2172. +{
  2173. + struct list_head *l;
  2174. + loff_t off;
  2175. +
  2176. + l = od->cache.next;
  2177. + for (off = 0; off < pos; off++) {
  2178. + if (l == &od->cache)
  2179. + break;
  2180. + l = l->next;
  2181. + }
  2182. + list_move_tail(&od->cursor, l);
  2183. +}
  2184. +
  2185. +static int ovl_iterate(struct file *file, struct dir_context *ctx)
  2186. +{
  2187. + struct ovl_dir_file *od = file->private_data;
  2188. + int res;
  2189. +
  2190. + if (!ctx->pos)
  2191. + ovl_dir_reset(file);
  2192. +
  2193. + if (od->is_real) {
  2194. + res = iterate_dir(od->realfile, ctx);
  2195. +
  2196. + return res;
  2197. + }
  2198. +
  2199. + if (!od->is_cached) {
  2200. + struct path lowerpath;
  2201. + struct path upperpath;
  2202. +
  2203. + ovl_path_lower(file->f_path.dentry, &lowerpath);
  2204. + ovl_path_upper(file->f_path.dentry, &upperpath);
  2205. +
  2206. + res = ovl_dir_read_merged(&upperpath, &lowerpath, &od->cache);
  2207. + if (res) {
  2208. + ovl_cache_free(&od->cache);
  2209. + return res;
  2210. + }
  2211. +
  2212. + od->cache_version = ovl_dentry_version_get(file->f_path.dentry);
  2213. + od->is_cached = true;
  2214. +
  2215. + ovl_seek_cursor(od, ctx->pos);
  2216. + }
  2217. +
  2218. + while (od->cursor.next != &od->cache) {
  2219. + struct ovl_cache_entry *p;
  2220. +
  2221. + p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node);
  2222. + if (!p->is_whiteout) {
  2223. + if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
  2224. + break;
  2225. + }
  2226. + ctx->pos++;
  2227. + list_move(&od->cursor, &p->l_node);
  2228. + }
  2229. +
  2230. + return 0;
  2231. +}
  2232. +
  2233. +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
  2234. +{
  2235. + loff_t res;
  2236. + struct ovl_dir_file *od = file->private_data;
  2237. +
  2238. + mutex_lock(&file_inode(file)->i_mutex);
  2239. + if (!file->f_pos)
  2240. + ovl_dir_reset(file);
  2241. +
  2242. + if (od->is_real) {
  2243. + res = vfs_llseek(od->realfile, offset, origin);
  2244. + file->f_pos = od->realfile->f_pos;
  2245. + } else {
  2246. + res = -EINVAL;
  2247. +
  2248. + switch (origin) {
  2249. + case SEEK_CUR:
  2250. + offset += file->f_pos;
  2251. + break;
  2252. + case SEEK_SET:
  2253. + break;
  2254. + default:
  2255. + goto out_unlock;
  2256. + }
  2257. + if (offset < 0)
  2258. + goto out_unlock;
  2259. +
  2260. + if (offset != file->f_pos) {
  2261. + file->f_pos = offset;
  2262. + if (od->is_cached)
  2263. + ovl_seek_cursor(od, offset);
  2264. + }
  2265. + res = offset;
  2266. + }
  2267. +out_unlock:
  2268. + mutex_unlock(&file_inode(file)->i_mutex);
  2269. +
  2270. + return res;
  2271. +}
  2272. +
  2273. +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
  2274. + int datasync)
  2275. +{
  2276. + struct ovl_dir_file *od = file->private_data;
  2277. +
  2278. + /* May need to reopen directory if it got copied up */
  2279. + if (!od->realfile) {
  2280. + struct path upperpath;
  2281. +
  2282. + ovl_path_upper(file->f_path.dentry, &upperpath);
  2283. + od->realfile = ovl_path_open(&upperpath, O_RDONLY);
  2284. + if (IS_ERR(od->realfile))
  2285. + return PTR_ERR(od->realfile);
  2286. + }
  2287. +
  2288. + return vfs_fsync_range(od->realfile, start, end, datasync);
  2289. +}
  2290. +
  2291. +static int ovl_dir_release(struct inode *inode, struct file *file)
  2292. +{
  2293. + struct ovl_dir_file *od = file->private_data;
  2294. +
  2295. + list_del(&od->cursor);
  2296. + ovl_cache_free(&od->cache);
  2297. + if (od->realfile)
  2298. + fput(od->realfile);
  2299. + kfree(od);
  2300. +
  2301. + return 0;
  2302. +}
  2303. +
  2304. +static int ovl_dir_open(struct inode *inode, struct file *file)
  2305. +{
  2306. + struct path realpath;
  2307. + struct file *realfile;
  2308. + struct ovl_dir_file *od;
  2309. + enum ovl_path_type type;
  2310. +
  2311. + od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
  2312. + if (!od)
  2313. + return -ENOMEM;
  2314. +
  2315. + type = ovl_path_real(file->f_path.dentry, &realpath);
  2316. + realfile = ovl_path_open(&realpath, file->f_flags);
  2317. + if (IS_ERR(realfile)) {
  2318. + kfree(od);
  2319. + return PTR_ERR(realfile);
  2320. + }
  2321. + INIT_LIST_HEAD(&od->cache);
  2322. + INIT_LIST_HEAD(&od->cursor);
  2323. + od->is_cached = false;
  2324. + od->realfile = realfile;
  2325. + od->is_real = (type != OVL_PATH_MERGE);
  2326. + file->private_data = od;
  2327. +
  2328. + return 0;
  2329. +}
  2330. +
  2331. +const struct file_operations ovl_dir_operations = {
  2332. + .read = generic_read_dir,
  2333. + .open = ovl_dir_open,
  2334. + .iterate = ovl_iterate,
  2335. + .llseek = ovl_dir_llseek,
  2336. + .fsync = ovl_dir_fsync,
  2337. + .release = ovl_dir_release,
  2338. +};
  2339. +
  2340. +static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
  2341. +{
  2342. + int err;
  2343. + struct path lowerpath;
  2344. + struct path upperpath;
  2345. + struct ovl_cache_entry *p;
  2346. +
  2347. + ovl_path_upper(dentry, &upperpath);
  2348. + ovl_path_lower(dentry, &lowerpath);
  2349. +
  2350. + err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
  2351. + if (err)
  2352. + return err;
  2353. +
  2354. + err = 0;
  2355. +
  2356. + list_for_each_entry(p, list, l_node) {
  2357. + if (p->is_whiteout)
  2358. + continue;
  2359. +
  2360. + if (p->name[0] == '.') {
  2361. + if (p->len == 1)
  2362. + continue;
  2363. + if (p->len == 2 && p->name[1] == '.')
  2364. + continue;
  2365. + }
  2366. + err = -ENOTEMPTY;
  2367. + break;
  2368. + }
  2369. +
  2370. + return err;
  2371. +}
  2372. +
  2373. +static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list)
  2374. +{
  2375. + struct path upperpath;
  2376. + struct dentry *upperdir;
  2377. + struct ovl_cache_entry *p;
  2378. + const struct cred *old_cred;
  2379. + struct cred *override_cred;
  2380. + int err;
  2381. +
  2382. + ovl_path_upper(dir, &upperpath);
  2383. + upperdir = upperpath.dentry;
  2384. +
  2385. + override_cred = prepare_creds();
  2386. + if (!override_cred)
  2387. + return -ENOMEM;
  2388. +
  2389. + /*
  2390. + * CAP_DAC_OVERRIDE for lookup and unlink
  2391. + * CAP_SYS_ADMIN for setxattr of "trusted" namespace
  2392. + * CAP_FOWNER for unlink in sticky directory
  2393. + */
  2394. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  2395. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2396. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  2397. + old_cred = override_creds(override_cred);
  2398. +
  2399. + err = vfs_setxattr(upperdir, ovl_opaque_xattr, "y", 1, 0);
  2400. + if (err)
  2401. + goto out_revert_creds;
  2402. +
  2403. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  2404. + list_for_each_entry(p, list, l_node) {
  2405. + struct dentry *dentry;
  2406. + int ret;
  2407. +
  2408. + if (!p->is_whiteout)
  2409. + continue;
  2410. +
  2411. + dentry = lookup_one_len(p->name, upperdir, p->len);
  2412. + if (IS_ERR(dentry)) {
  2413. + pr_warn(
  2414. + "overlayfs: failed to lookup whiteout %.*s: %li\n",
  2415. + p->len, p->name, PTR_ERR(dentry));
  2416. + continue;
  2417. + }
  2418. + ret = vfs_unlink(upperdir->d_inode, dentry, NULL);
  2419. + dput(dentry);
  2420. + if (ret)
  2421. + pr_warn(
  2422. + "overlayfs: failed to unlink whiteout %.*s: %i\n",
  2423. + p->len, p->name, ret);
  2424. + }
  2425. + mutex_unlock(&upperdir->d_inode->i_mutex);
  2426. +
  2427. +out_revert_creds:
  2428. + revert_creds(old_cred);
  2429. + put_cred(override_cred);
  2430. +
  2431. + return err;
  2432. +}
  2433. +
  2434. +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type)
  2435. +{
  2436. + int err;
  2437. + LIST_HEAD(list);
  2438. +
  2439. + err = ovl_check_empty_dir(dentry, &list);
  2440. + if (!err && type == OVL_PATH_MERGE)
  2441. + err = ovl_remove_whiteouts(dentry, &list);
  2442. +
  2443. + ovl_cache_free(&list);
  2444. +
  2445. + return err;
  2446. +}
  2447. --- /dev/null
  2448. +++ b/fs/overlayfs/super.c
  2449. @@ -0,0 +1,685 @@
  2450. +/*
  2451. + *
  2452. + * Copyright (C) 2011 Novell Inc.
  2453. + *
  2454. + * This program is free software; you can redistribute it and/or modify it
  2455. + * under the terms of the GNU General Public License version 2 as published by
  2456. + * the Free Software Foundation.
  2457. + */
  2458. +
  2459. +#include <linux/fs.h>
  2460. +#include <linux/namei.h>
  2461. +#include <linux/xattr.h>
  2462. +#include <linux/security.h>
  2463. +#include <linux/mount.h>
  2464. +#include <linux/slab.h>
  2465. +#include <linux/parser.h>
  2466. +#include <linux/module.h>
  2467. +#include <linux/cred.h>
  2468. +#include <linux/sched.h>
  2469. +#include <linux/statfs.h>
  2470. +#include <linux/seq_file.h>
  2471. +#include "overlayfs.h"
  2472. +
  2473. +MODULE_AUTHOR("Miklos Szeredi <[email protected]>");
  2474. +MODULE_DESCRIPTION("Overlay filesystem");
  2475. +MODULE_LICENSE("GPL");
  2476. +
  2477. +#define OVERLAYFS_SUPER_MAGIC 0x794c764f
  2478. +
  2479. +struct ovl_config {
  2480. + char *lowerdir;
  2481. + char *upperdir;
  2482. +};
  2483. +
  2484. +/* private information held for overlayfs's superblock */
  2485. +struct ovl_fs {
  2486. + struct vfsmount *upper_mnt;
  2487. + struct vfsmount *lower_mnt;
  2488. + long lower_namelen;
  2489. + /* pathnames of lower and upper dirs, for show_options */
  2490. + struct ovl_config config;
  2491. +};
  2492. +
  2493. +/* private information held for every overlayfs dentry */
  2494. +struct ovl_entry {
  2495. + /*
  2496. + * Keep "double reference" on upper dentries, so that
  2497. + * d_delete() doesn't think it's OK to reset d_inode to NULL.
  2498. + */
  2499. + struct dentry *__upperdentry;
  2500. + struct dentry *lowerdentry;
  2501. + union {
  2502. + struct {
  2503. + u64 version;
  2504. + bool opaque;
  2505. + };
  2506. + struct rcu_head rcu;
  2507. + };
  2508. +};
  2509. +
  2510. +const char *ovl_whiteout_xattr = "trusted.overlay.whiteout";
  2511. +const char *ovl_opaque_xattr = "trusted.overlay.opaque";
  2512. +
  2513. +
  2514. +enum ovl_path_type ovl_path_type(struct dentry *dentry)
  2515. +{
  2516. + struct ovl_entry *oe = dentry->d_fsdata;
  2517. +
  2518. + if (oe->__upperdentry) {
  2519. + if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode))
  2520. + return OVL_PATH_MERGE;
  2521. + else
  2522. + return OVL_PATH_UPPER;
  2523. + } else {
  2524. + return OVL_PATH_LOWER;
  2525. + }
  2526. +}
  2527. +
  2528. +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
  2529. +{
  2530. + struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
  2531. + smp_read_barrier_depends();
  2532. + return upperdentry;
  2533. +}
  2534. +
  2535. +void ovl_path_upper(struct dentry *dentry, struct path *path)
  2536. +{
  2537. + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2538. + struct ovl_entry *oe = dentry->d_fsdata;
  2539. +
  2540. + path->mnt = ofs->upper_mnt;
  2541. + path->dentry = ovl_upperdentry_dereference(oe);
  2542. +}
  2543. +
  2544. +void ovl_path_lower(struct dentry *dentry, struct path *path)
  2545. +{
  2546. + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2547. + struct ovl_entry *oe = dentry->d_fsdata;
  2548. +
  2549. + path->mnt = ofs->lower_mnt;
  2550. + path->dentry = oe->lowerdentry;
  2551. +}
  2552. +
  2553. +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
  2554. +{
  2555. +
  2556. + enum ovl_path_type type = ovl_path_type(dentry);
  2557. +
  2558. + if (type == OVL_PATH_LOWER)
  2559. + ovl_path_lower(dentry, path);
  2560. + else
  2561. + ovl_path_upper(dentry, path);
  2562. +
  2563. + return type;
  2564. +}
  2565. +
  2566. +struct dentry *ovl_dentry_upper(struct dentry *dentry)
  2567. +{
  2568. + struct ovl_entry *oe = dentry->d_fsdata;
  2569. +
  2570. + return ovl_upperdentry_dereference(oe);
  2571. +}
  2572. +
  2573. +struct dentry *ovl_dentry_lower(struct dentry *dentry)
  2574. +{
  2575. + struct ovl_entry *oe = dentry->d_fsdata;
  2576. +
  2577. + return oe->lowerdentry;
  2578. +}
  2579. +
  2580. +struct dentry *ovl_dentry_real(struct dentry *dentry)
  2581. +{
  2582. + struct ovl_entry *oe = dentry->d_fsdata;
  2583. + struct dentry *realdentry;
  2584. +
  2585. + realdentry = ovl_upperdentry_dereference(oe);
  2586. + if (!realdentry)
  2587. + realdentry = oe->lowerdentry;
  2588. +
  2589. + return realdentry;
  2590. +}
  2591. +
  2592. +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
  2593. +{
  2594. + struct dentry *realdentry;
  2595. +
  2596. + realdentry = ovl_upperdentry_dereference(oe);
  2597. + if (realdentry) {
  2598. + *is_upper = true;
  2599. + } else {
  2600. + realdentry = oe->lowerdentry;
  2601. + *is_upper = false;
  2602. + }
  2603. + return realdentry;
  2604. +}
  2605. +
  2606. +bool ovl_dentry_is_opaque(struct dentry *dentry)
  2607. +{
  2608. + struct ovl_entry *oe = dentry->d_fsdata;
  2609. + return oe->opaque;
  2610. +}
  2611. +
  2612. +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
  2613. +{
  2614. + struct ovl_entry *oe = dentry->d_fsdata;
  2615. + oe->opaque = opaque;
  2616. +}
  2617. +
  2618. +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
  2619. +{
  2620. + struct ovl_entry *oe = dentry->d_fsdata;
  2621. +
  2622. + WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
  2623. + WARN_ON(oe->__upperdentry);
  2624. + BUG_ON(!upperdentry->d_inode);
  2625. + smp_wmb();
  2626. + oe->__upperdentry = dget(upperdentry);
  2627. +}
  2628. +
  2629. +void ovl_dentry_version_inc(struct dentry *dentry)
  2630. +{
  2631. + struct ovl_entry *oe = dentry->d_fsdata;
  2632. +
  2633. + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  2634. + oe->version++;
  2635. +}
  2636. +
  2637. +u64 ovl_dentry_version_get(struct dentry *dentry)
  2638. +{
  2639. + struct ovl_entry *oe = dentry->d_fsdata;
  2640. +
  2641. + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  2642. + return oe->version;
  2643. +}
  2644. +
  2645. +bool ovl_is_whiteout(struct dentry *dentry)
  2646. +{
  2647. + int res;
  2648. + char val;
  2649. +
  2650. + if (!dentry)
  2651. + return false;
  2652. + if (!dentry->d_inode)
  2653. + return false;
  2654. + if (!S_ISLNK(dentry->d_inode->i_mode))
  2655. + return false;
  2656. +
  2657. + res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1);
  2658. + if (res == 1 && val == 'y')
  2659. + return true;
  2660. +
  2661. + return false;
  2662. +}
  2663. +
  2664. +static bool ovl_is_opaquedir(struct dentry *dentry)
  2665. +{
  2666. + int res;
  2667. + char val;
  2668. +
  2669. + if (!S_ISDIR(dentry->d_inode->i_mode))
  2670. + return false;
  2671. +
  2672. + res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1);
  2673. + if (res == 1 && val == 'y')
  2674. + return true;
  2675. +
  2676. + return false;
  2677. +}
  2678. +
  2679. +static void ovl_entry_free(struct rcu_head *head)
  2680. +{
  2681. + struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu);
  2682. + kfree(oe);
  2683. +}
  2684. +
  2685. +static void ovl_dentry_release(struct dentry *dentry)
  2686. +{
  2687. + struct ovl_entry *oe = dentry->d_fsdata;
  2688. +
  2689. + if (oe) {
  2690. + dput(oe->__upperdentry);
  2691. + dput(oe->__upperdentry);
  2692. + dput(oe->lowerdentry);
  2693. + call_rcu(&oe->rcu, ovl_entry_free);
  2694. + }
  2695. +}
  2696. +
  2697. +const struct dentry_operations ovl_dentry_operations = {
  2698. + .d_release = ovl_dentry_release,
  2699. +};
  2700. +
  2701. +static struct ovl_entry *ovl_alloc_entry(void)
  2702. +{
  2703. + return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
  2704. +}
  2705. +
  2706. +static inline struct dentry *ovl_lookup_real(struct dentry *dir,
  2707. + struct qstr *name)
  2708. +{
  2709. + struct dentry *dentry;
  2710. +
  2711. + mutex_lock(&dir->d_inode->i_mutex);
  2712. + dentry = lookup_one_len(name->name, dir, name->len);
  2713. + mutex_unlock(&dir->d_inode->i_mutex);
  2714. +
  2715. + if (IS_ERR(dentry)) {
  2716. + if (PTR_ERR(dentry) == -ENOENT)
  2717. + dentry = NULL;
  2718. + } else if (!dentry->d_inode) {
  2719. + dput(dentry);
  2720. + dentry = NULL;
  2721. + }
  2722. + return dentry;
  2723. +}
  2724. +
  2725. +static int ovl_do_lookup(struct dentry *dentry)
  2726. +{
  2727. + struct ovl_entry *oe;
  2728. + struct dentry *upperdir;
  2729. + struct dentry *lowerdir;
  2730. + struct dentry *upperdentry = NULL;
  2731. + struct dentry *lowerdentry = NULL;
  2732. + struct inode *inode = NULL;
  2733. + int err;
  2734. +
  2735. + err = -ENOMEM;
  2736. + oe = ovl_alloc_entry();
  2737. + if (!oe)
  2738. + goto out;
  2739. +
  2740. + upperdir = ovl_dentry_upper(dentry->d_parent);
  2741. + lowerdir = ovl_dentry_lower(dentry->d_parent);
  2742. +
  2743. + if (upperdir) {
  2744. + upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
  2745. + err = PTR_ERR(upperdentry);
  2746. + if (IS_ERR(upperdentry))
  2747. + goto out_put_dir;
  2748. +
  2749. + if (lowerdir && upperdentry &&
  2750. + (S_ISLNK(upperdentry->d_inode->i_mode) ||
  2751. + S_ISDIR(upperdentry->d_inode->i_mode))) {
  2752. + const struct cred *old_cred;
  2753. + struct cred *override_cred;
  2754. +
  2755. + err = -ENOMEM;
  2756. + override_cred = prepare_creds();
  2757. + if (!override_cred)
  2758. + goto out_dput_upper;
  2759. +
  2760. + /* CAP_SYS_ADMIN needed for getxattr */
  2761. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2762. + old_cred = override_creds(override_cred);
  2763. +
  2764. + if (ovl_is_opaquedir(upperdentry)) {
  2765. + oe->opaque = true;
  2766. + } else if (ovl_is_whiteout(upperdentry)) {
  2767. + dput(upperdentry);
  2768. + upperdentry = NULL;
  2769. + oe->opaque = true;
  2770. + }
  2771. + revert_creds(old_cred);
  2772. + put_cred(override_cred);
  2773. + }
  2774. + }
  2775. + if (lowerdir && !oe->opaque) {
  2776. + lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
  2777. + err = PTR_ERR(lowerdentry);
  2778. + if (IS_ERR(lowerdentry))
  2779. + goto out_dput_upper;
  2780. + }
  2781. +
  2782. + if (lowerdentry && upperdentry &&
  2783. + (!S_ISDIR(upperdentry->d_inode->i_mode) ||
  2784. + !S_ISDIR(lowerdentry->d_inode->i_mode))) {
  2785. + dput(lowerdentry);
  2786. + lowerdentry = NULL;
  2787. + oe->opaque = true;
  2788. + }
  2789. +
  2790. + if (lowerdentry || upperdentry) {
  2791. + struct dentry *realdentry;
  2792. +
  2793. + realdentry = upperdentry ? upperdentry : lowerdentry;
  2794. + err = -ENOMEM;
  2795. + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
  2796. + oe);
  2797. + if (!inode)
  2798. + goto out_dput;
  2799. + ovl_copyattr(realdentry->d_inode, inode);
  2800. + }
  2801. +
  2802. + if (upperdentry)
  2803. + oe->__upperdentry = dget(upperdentry);
  2804. +
  2805. + if (lowerdentry)
  2806. + oe->lowerdentry = lowerdentry;
  2807. +
  2808. + dentry->d_fsdata = oe;
  2809. + dentry->d_op = &ovl_dentry_operations;
  2810. + d_add(dentry, inode);
  2811. +
  2812. + return 0;
  2813. +
  2814. +out_dput:
  2815. + dput(lowerdentry);
  2816. +out_dput_upper:
  2817. + dput(upperdentry);
  2818. +out_put_dir:
  2819. + kfree(oe);
  2820. +out:
  2821. + return err;
  2822. +}
  2823. +
  2824. +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
  2825. + unsigned int flags)
  2826. +{
  2827. + int err = ovl_do_lookup(dentry);
  2828. +
  2829. + if (err)
  2830. + return ERR_PTR(err);
  2831. +
  2832. + return NULL;
  2833. +}
  2834. +
  2835. +struct file *ovl_path_open(struct path *path, int flags)
  2836. +{
  2837. + return dentry_open(path, flags, current_cred());
  2838. +}
  2839. +
  2840. +static void ovl_put_super(struct super_block *sb)
  2841. +{
  2842. + struct ovl_fs *ufs = sb->s_fs_info;
  2843. +
  2844. + if (!(sb->s_flags & MS_RDONLY))
  2845. + mnt_drop_write(ufs->upper_mnt);
  2846. +
  2847. + mntput(ufs->upper_mnt);
  2848. + mntput(ufs->lower_mnt);
  2849. +
  2850. + kfree(ufs->config.lowerdir);
  2851. + kfree(ufs->config.upperdir);
  2852. + kfree(ufs);
  2853. +}
  2854. +
  2855. +static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data)
  2856. +{
  2857. + int flags = *flagsp;
  2858. + struct ovl_fs *ufs = sb->s_fs_info;
  2859. +
  2860. + /* When remounting rw or ro, we need to adjust the write access to the
  2861. + * upper fs.
  2862. + */
  2863. + if (((flags ^ sb->s_flags) & MS_RDONLY) == 0)
  2864. + /* No change to readonly status */
  2865. + return 0;
  2866. +
  2867. + if (flags & MS_RDONLY) {
  2868. + mnt_drop_write(ufs->upper_mnt);
  2869. + return 0;
  2870. + } else
  2871. + return mnt_want_write(ufs->upper_mnt);
  2872. +}
  2873. +
  2874. +/**
  2875. + * ovl_statfs
  2876. + * @sb: The overlayfs super block
  2877. + * @buf: The struct kstatfs to fill in with stats
  2878. + *
  2879. + * Get the filesystem statistics. As writes always target the upper layer
  2880. + * filesystem pass the statfs to the same filesystem.
  2881. + */
  2882. +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
  2883. +{
  2884. + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2885. + struct dentry *root_dentry = dentry->d_sb->s_root;
  2886. + struct path path;
  2887. + int err;
  2888. +
  2889. + ovl_path_upper(root_dentry, &path);
  2890. +
  2891. + err = vfs_statfs(&path, buf);
  2892. + if (!err) {
  2893. + buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
  2894. + buf->f_type = OVERLAYFS_SUPER_MAGIC;
  2895. + }
  2896. +
  2897. + return err;
  2898. +}
  2899. +
  2900. +/**
  2901. + * ovl_show_options
  2902. + *
  2903. + * Prints the mount options for a given superblock.
  2904. + * Returns zero; does not fail.
  2905. + */
  2906. +static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
  2907. +{
  2908. + struct super_block *sb = dentry->d_sb;
  2909. + struct ovl_fs *ufs = sb->s_fs_info;
  2910. +
  2911. + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
  2912. + seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
  2913. + return 0;
  2914. +}
  2915. +
  2916. +static const struct super_operations ovl_super_operations = {
  2917. + .put_super = ovl_put_super,
  2918. + .remount_fs = ovl_remount_fs,
  2919. + .statfs = ovl_statfs,
  2920. + .show_options = ovl_show_options,
  2921. +};
  2922. +
  2923. +enum {
  2924. + OPT_LOWERDIR,
  2925. + OPT_UPPERDIR,
  2926. + OPT_ERR,
  2927. +};
  2928. +
  2929. +static const match_table_t ovl_tokens = {
  2930. + {OPT_LOWERDIR, "lowerdir=%s"},
  2931. + {OPT_UPPERDIR, "upperdir=%s"},
  2932. + {OPT_ERR, NULL}
  2933. +};
  2934. +
  2935. +static int ovl_parse_opt(char *opt, struct ovl_config *config)
  2936. +{
  2937. + char *p;
  2938. +
  2939. + config->upperdir = NULL;
  2940. + config->lowerdir = NULL;
  2941. +
  2942. + while ((p = strsep(&opt, ",")) != NULL) {
  2943. + int token;
  2944. + substring_t args[MAX_OPT_ARGS];
  2945. +
  2946. + if (!*p)
  2947. + continue;
  2948. +
  2949. + token = match_token(p, ovl_tokens, args);
  2950. + switch (token) {
  2951. + case OPT_UPPERDIR:
  2952. + kfree(config->upperdir);
  2953. + config->upperdir = match_strdup(&args[0]);
  2954. + if (!config->upperdir)
  2955. + return -ENOMEM;
  2956. + break;
  2957. +
  2958. + case OPT_LOWERDIR:
  2959. + kfree(config->lowerdir);
  2960. + config->lowerdir = match_strdup(&args[0]);
  2961. + if (!config->lowerdir)
  2962. + return -ENOMEM;
  2963. + break;
  2964. +
  2965. + default:
  2966. + return -EINVAL;
  2967. + }
  2968. + }
  2969. + return 0;
  2970. +}
  2971. +
  2972. +static int ovl_fill_super(struct super_block *sb, void *data, int silent)
  2973. +{
  2974. + struct path lowerpath;
  2975. + struct path upperpath;
  2976. + struct inode *root_inode;
  2977. + struct dentry *root_dentry;
  2978. + struct ovl_entry *oe;
  2979. + struct ovl_fs *ufs;
  2980. + struct kstatfs statfs;
  2981. + int err;
  2982. +
  2983. + err = -ENOMEM;
  2984. + ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL);
  2985. + if (!ufs)
  2986. + goto out;
  2987. +
  2988. + err = ovl_parse_opt((char *) data, &ufs->config);
  2989. + if (err)
  2990. + goto out_free_ufs;
  2991. +
  2992. + err = -EINVAL;
  2993. + if (!ufs->config.upperdir || !ufs->config.lowerdir) {
  2994. + pr_err("overlayfs: missing upperdir or lowerdir\n");
  2995. + goto out_free_config;
  2996. + }
  2997. +
  2998. + oe = ovl_alloc_entry();
  2999. + if (oe == NULL)
  3000. + goto out_free_config;
  3001. +
  3002. + err = kern_path(ufs->config.upperdir, LOOKUP_FOLLOW, &upperpath);
  3003. + if (err)
  3004. + goto out_free_oe;
  3005. +
  3006. + err = kern_path(ufs->config.lowerdir, LOOKUP_FOLLOW, &lowerpath);
  3007. + if (err)
  3008. + goto out_put_upperpath;
  3009. +
  3010. + err = -ENOTDIR;
  3011. + if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
  3012. + !S_ISDIR(lowerpath.dentry->d_inode->i_mode))
  3013. + goto out_put_lowerpath;
  3014. +
  3015. + err = vfs_statfs(&lowerpath, &statfs);
  3016. + if (err) {
  3017. + pr_err("overlayfs: statfs failed on lowerpath\n");
  3018. + goto out_put_lowerpath;
  3019. + }
  3020. + ufs->lower_namelen = statfs.f_namelen;
  3021. +
  3022. + sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
  3023. + lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
  3024. +
  3025. + err = -EINVAL;
  3026. + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  3027. + pr_err("overlayfs: maximum fs stacking depth exceeded\n");
  3028. + goto out_put_lowerpath;
  3029. + }
  3030. +
  3031. +
  3032. + ufs->upper_mnt = clone_private_mount(&upperpath);
  3033. + err = PTR_ERR(ufs->upper_mnt);
  3034. + if (IS_ERR(ufs->upper_mnt)) {
  3035. + pr_err("overlayfs: failed to clone upperpath\n");
  3036. + goto out_put_lowerpath;
  3037. + }
  3038. +
  3039. + ufs->lower_mnt = clone_private_mount(&lowerpath);
  3040. + err = PTR_ERR(ufs->lower_mnt);
  3041. + if (IS_ERR(ufs->lower_mnt)) {
  3042. + pr_err("overlayfs: failed to clone lowerpath\n");
  3043. + goto out_put_upper_mnt;
  3044. + }
  3045. +
  3046. + /*
  3047. + * Make lower_mnt R/O. That way fchmod/fchown on lower file
  3048. + * will fail instead of modifying lower fs.
  3049. + */
  3050. + ufs->lower_mnt->mnt_flags |= MNT_READONLY;
  3051. +
  3052. + /* If the upper fs is r/o, we mark overlayfs r/o too */
  3053. + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
  3054. + sb->s_flags |= MS_RDONLY;
  3055. +
  3056. + if (!(sb->s_flags & MS_RDONLY)) {
  3057. + err = mnt_want_write(ufs->upper_mnt);
  3058. + if (err)
  3059. + goto out_put_lower_mnt;
  3060. + }
  3061. +
  3062. + err = -ENOMEM;
  3063. + root_inode = ovl_new_inode(sb, S_IFDIR, oe);
  3064. + if (!root_inode)
  3065. + goto out_drop_write;
  3066. +
  3067. + root_dentry = d_make_root(root_inode);
  3068. + if (!root_dentry)
  3069. + goto out_drop_write;
  3070. +
  3071. + mntput(upperpath.mnt);
  3072. + mntput(lowerpath.mnt);
  3073. +
  3074. + oe->__upperdentry = dget(upperpath.dentry);
  3075. + oe->lowerdentry = lowerpath.dentry;
  3076. +
  3077. + root_dentry->d_fsdata = oe;
  3078. + root_dentry->d_op = &ovl_dentry_operations;
  3079. +
  3080. + sb->s_magic = OVERLAYFS_SUPER_MAGIC;
  3081. + sb->s_op = &ovl_super_operations;
  3082. + sb->s_root = root_dentry;
  3083. + sb->s_fs_info = ufs;
  3084. +
  3085. + return 0;
  3086. +
  3087. +out_drop_write:
  3088. + if (!(sb->s_flags & MS_RDONLY))
  3089. + mnt_drop_write(ufs->upper_mnt);
  3090. +out_put_lower_mnt:
  3091. + mntput(ufs->lower_mnt);
  3092. +out_put_upper_mnt:
  3093. + mntput(ufs->upper_mnt);
  3094. +out_put_lowerpath:
  3095. + path_put(&lowerpath);
  3096. +out_put_upperpath:
  3097. + path_put(&upperpath);
  3098. +out_free_oe:
  3099. + kfree(oe);
  3100. +out_free_config:
  3101. + kfree(ufs->config.lowerdir);
  3102. + kfree(ufs->config.upperdir);
  3103. +out_free_ufs:
  3104. + kfree(ufs);
  3105. +out:
  3106. + return err;
  3107. +}
  3108. +
  3109. +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
  3110. + const char *dev_name, void *raw_data)
  3111. +{
  3112. + return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
  3113. +}
  3114. +
  3115. +static struct file_system_type ovl_fs_type = {
  3116. + .owner = THIS_MODULE,
  3117. + .name = "overlayfs",
  3118. + .mount = ovl_mount,
  3119. + .kill_sb = kill_anon_super,
  3120. +};
  3121. +MODULE_ALIAS_FS("overlayfs");
  3122. +
  3123. +static int __init ovl_init(void)
  3124. +{
  3125. + return register_filesystem(&ovl_fs_type);
  3126. +}
  3127. +
  3128. +static void __exit ovl_exit(void)
  3129. +{
  3130. + unregister_filesystem(&ovl_fs_type);
  3131. +}
  3132. +
  3133. +module_init(ovl_init);
  3134. +module_exit(ovl_exit);
  3135. --- a/fs/splice.c
  3136. +++ b/fs/splice.c
  3137. @@ -1327,6 +1327,7 @@ long do_splice_direct(struct file *in, l
  3138. return ret;
  3139. }
  3140. +EXPORT_SYMBOL(do_splice_direct);
  3141. static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
  3142. struct pipe_inode_info *opipe,
  3143. --- a/include/linux/fs.h
  3144. +++ b/include/linux/fs.h
  3145. @@ -246,6 +246,12 @@ struct iattr {
  3146. */
  3147. #include <linux/quota.h>
  3148. +/*
  3149. + * Maximum number of layers of fs stack. Needs to be limited to
  3150. + * prevent kernel stack overflow
  3151. + */
  3152. +#define FILESYSTEM_MAX_STACK_DEPTH 2
  3153. +
  3154. /**
  3155. * enum positive_aop_returns - aop return codes with specific semantics
  3156. *
  3157. @@ -1325,6 +1331,11 @@ struct super_block {
  3158. struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
  3159. struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
  3160. struct rcu_head rcu;
  3161. +
  3162. + /*
  3163. + * Indicates how deep in a filesystem stack this SB is
  3164. + */
  3165. + int s_stack_depth;
  3166. };
  3167. extern struct timespec current_fs_time(struct super_block *sb);
  3168. @@ -1580,6 +1591,7 @@ struct inode_operations {
  3169. struct file *, unsigned open_flag,
  3170. umode_t create_mode, int *opened);
  3171. int (*tmpfile) (struct inode *, struct dentry *, umode_t);
  3172. + int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
  3173. } ____cacheline_aligned;
  3174. ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  3175. @@ -2074,6 +2086,7 @@ extern struct file *file_open_name(struc
  3176. extern struct file *filp_open(const char *, int, umode_t);
  3177. extern struct file *file_open_root(struct dentry *, struct vfsmount *,
  3178. const char *, int);
  3179. +extern int vfs_open(const struct path *, struct file *, const struct cred *);
  3180. extern struct file * dentry_open(const struct path *, int, const struct cred *);
  3181. extern int filp_close(struct file *, fl_owner_t id);
  3182. @@ -2280,6 +2293,7 @@ extern sector_t bmap(struct inode *, sec
  3183. #endif
  3184. extern int notify_change(struct dentry *, struct iattr *, struct inode **);
  3185. extern int inode_permission(struct inode *, int);
  3186. +extern int __inode_permission(struct inode *, int);
  3187. extern int generic_permission(struct inode *, int);
  3188. static inline bool execute_ok(struct inode *inode)
  3189. @@ -2488,6 +2502,9 @@ extern ssize_t generic_file_splice_write
  3190. struct file *, loff_t *, size_t, unsigned int);
  3191. extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
  3192. struct file *out, loff_t *, size_t len, unsigned int flags);
  3193. +extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
  3194. + loff_t *opos, size_t len, unsigned int flags);
  3195. +
  3196. extern void
  3197. file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
  3198. --- a/include/linux/mount.h
  3199. +++ b/include/linux/mount.h
  3200. @@ -71,6 +71,9 @@ extern void mnt_pin(struct vfsmount *mnt
  3201. extern void mnt_unpin(struct vfsmount *mnt);
  3202. extern int __mnt_is_readonly(struct vfsmount *mnt);
  3203. +struct path;
  3204. +extern struct vfsmount *clone_private_mount(struct path *path);
  3205. +
  3206. struct file_system_type;
  3207. extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
  3208. int flags, const char *name,