|
|
@@ -1,5203 +0,0 @@
|
|
|
---- /dev/null
|
|
|
-+++ b/Documentation/filesystems/union-mounts.txt
|
|
|
-@@ -0,0 +1,187 @@
|
|
|
-+VFS based Union Mounts
|
|
|
-+----------------------
|
|
|
-+
|
|
|
-+ 1. What are "Union Mounts"
|
|
|
-+ 2. The Union Stack
|
|
|
-+ 3. Whiteouts, Opaque Directories, and Fallthrus
|
|
|
-+ 4. Copy-up
|
|
|
-+ 5. Directory Reading
|
|
|
-+ 6. Known Problems
|
|
|
-+ 7. References
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+1. What are "Union Mounts"
|
|
|
-+==========================
|
|
|
-+
|
|
|
-+Please note: this is NOT about UnionFS and it is NOT derived work!
|
|
|
-+
|
|
|
-+Traditionally the mount operation is opaque, which means that the content of
|
|
|
-+the mount point, the directory where the file system is mounted on, is hidden
|
|
|
-+by the content of the mounted file system's root directory until the file
|
|
|
-+system is unmounted again. Unlike the traditional UNIX mount mechanism, that
|
|
|
-+hides the contents of the mount point, a union mount presents a view as if
|
|
|
-+both filesystems are merged together. Although only the topmost layer of the
|
|
|
-+mount stack can be altered, it appears as if transparent file system mounts
|
|
|
-+allow any file to be created, modified or deleted.
|
|
|
-+
|
|
|
-+Most people know the concepts and features of union mounts from other
|
|
|
-+operating systems like Sun's Translucent Filesystem, Plan9 or BSD. For an
|
|
|
-+in-depth review of union mounts and other unioning file systems, see:
|
|
|
-+
|
|
|
-+http://lwn.net/Articles/324291/
|
|
|
-+http://lwn.net/Articles/325369/
|
|
|
-+http://lwn.net/Articles/327738/
|
|
|
-+
|
|
|
-+Here are the key features of this implementation:
|
|
|
-+- completely VFS based
|
|
|
-+- does not change the namespace stacking
|
|
|
-+- directory listings have duplicate entries removed in the kernel
|
|
|
-+- writable unions: only the topmost file system layer may be writable
|
|
|
-+- writable unions: new whiteout filetype handled inside the kernel
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+2. The Union Stack
|
|
|
-+==================
|
|
|
-+
|
|
|
-+The mounted file systems are organized in the "file system hierarchy" (tree of
|
|
|
-+vfsmount structures), which keeps track about the stacking of file systems
|
|
|
-+upon each other. The per-directory view on the file system hierarchy is called
|
|
|
-+"mount stack" and reflects the order of file systems, which are mounted on a
|
|
|
-+specific directory.
|
|
|
-+
|
|
|
-+Union mounts present a single unified view of the contents of two or more file
|
|
|
-+systems as if they are merged together. Since the information which file
|
|
|
-+system objects are part of a unified view is not directly available from the
|
|
|
-+file system hierarchy there is a need for a new structure. The file system
|
|
|
-+objects, which are part of a unified view are ordered in a so-called "union
|
|
|
-+stack". Only directories can be part of a unified view.
|
|
|
-+
|
|
|
-+The link between two layers of the union stack is maintained using the
|
|
|
-+union_mount structure (#include <linux/union.h>):
|
|
|
-+
|
|
|
-+struct union_mount {
|
|
|
-+ atomic_t u_count; /* reference count */
|
|
|
-+ struct mutex u_mutex;
|
|
|
-+ struct list_head u_unions; /* list head for d_unions */
|
|
|
-+ struct hlist_node u_hash; /* list head for searching */
|
|
|
-+ struct hlist_node u_rhash; /* list head for reverse searching */
|
|
|
-+
|
|
|
-+ struct path u_this; /* this is me */
|
|
|
-+ struct path u_next; /* this is what I overlay */
|
|
|
-+};
|
|
|
-+
|
|
|
-+The union_mount structure holds a reference (dget,mntget) to the next lower
|
|
|
-+layer of the union stack. Since a dentry can be part of multiple unions
|
|
|
-+(e.g. with bind mounts) they are tied together via the d_unions field of the
|
|
|
-+dentry structure.
|
|
|
-+
|
|
|
-+All union_mount structures are cached in two hash tables, one for lookups of
|
|
|
-+the next lower layer of the union stack and one for reverse lookups of the
|
|
|
-+next upper layer of the union stack. The reverse lookup is necessary to
|
|
|
-+resolve CWD relative path lookups. For calculation of the hash value, the
|
|
|
-+(dentry,vfsmount) pair is used. The u_this field is used for the hash table
|
|
|
-+which is used in forward lookups and the u_next field for the reverse lookups.
|
|
|
-+
|
|
|
-+During every new mount (or mount propagation), a new union_mount structure is
|
|
|
-+allocated. A reference to the mountpoint's vfsmount and dentry is taken and
|
|
|
-+stored in the u_next field. In almost the same manner an union_mount
|
|
|
-+structure is created during the first time lookup of a directory within a
|
|
|
-+union mount point. In this case the lookup proceeds to all lower layers of the
|
|
|
-+union. Therefore the complete union stack is constructed during lookups.
|
|
|
-+
|
|
|
-+The union_mount structures of a dentry are destroyed when the dentry itself is
|
|
|
-+destroyed. Therefore the dentry cache is indirectly driving the union_mount
|
|
|
-+cache like this is done for inodes too. Please note that lower layer
|
|
|
-+union_mount structures are kept in memory until the topmost dentry is
|
|
|
-+destroyed.
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+3. Whiteouts, Opaque Directories, and Fallthrus
|
|
|
-+===========================================================
|
|
|
-+
|
|
|
-+The whiteout filetype isn't new. It has been there for quite some time now
|
|
|
-+but Linux's VFS hasn't used it yet. With the availability of union mount code
|
|
|
-+inside the VFS the whiteout filetype is getting important to support writable
|
|
|
-+union mounts. For read-only union mounts, support for whiteouts or
|
|
|
-+copy-on-open is not necessary.
|
|
|
-+
|
|
|
-+The whiteout filetype has the same function as negative dentries: they
|
|
|
-+describe a filename which isn't there. The creation of whiteouts needs
|
|
|
-+lowlevel filesystem support. At the time of writing this, there is whiteout
|
|
|
-+support for tmpfs, ext2 and ext3 available. The VFS is extended to make the
|
|
|
-+whiteout handling transparent to all its users. The whiteouts are not
|
|
|
-+visible to user-space.
|
|
|
-+
|
|
|
-+What happens when we create a directory that was previously whited-out? We
|
|
|
-+don't want the directory entries from underlying filesystems to suddenly appear
|
|
|
-+in the newly created directory. So we mark the directory opaque (the file
|
|
|
-+system must support storage of the opaque flag).
|
|
|
-+
|
|
|
-+Fallthrus are directory entries that override the opaque flag on a directory
|
|
|
-+for that specific directory entry name (the lookup "falls through" to the next
|
|
|
-+layer of the union mount). Fallthrus are mainly useful for implementing
|
|
|
-+readdir().
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+4. Copy-up
|
|
|
-+===========
|
|
|
-+
|
|
|
-+Any write to an object on any layer other than the topmost triggers a copy-up
|
|
|
-+of the object to the topmost file system. For regular files, the copy-up
|
|
|
-+happens when it is opened in writable mode.
|
|
|
-+
|
|
|
-+Directories are copied up on open, regardless of intent to write, to simplify
|
|
|
-+copy-up of any object located below it in the namespace. Otherwise we have to
|
|
|
-+walk the entire pathname to create intermediate directories whenever we do a
|
|
|
-+copy-up. This is the same approach as BSD union mounts and uses a negigible
|
|
|
-+amount of disk space. Note that the actual directory entries themselves are
|
|
|
-+not copied-up from the lower levels until (a) the directory is written to, or
|
|
|
-+(b) the first readdir() of the directory (more on that later).
|
|
|
-+
|
|
|
-+Rename across different levels of the union is implemented as a copy-up
|
|
|
-+operation for regular files. Rename of directories simply returns EXDEV, the
|
|
|
-+same as if we tried to rename across different mounts. Most applications have
|
|
|
-+to handle this case anyway. Some applications do not expect EXDEV on
|
|
|
-+rename operations within the same directory, but these applications will also
|
|
|
-+be broken with bind mounts.
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+5. Directory Reading
|
|
|
-+====================
|
|
|
-+
|
|
|
-+readdir() is somewhat difficult to implement in a unioning file system. We must
|
|
|
-+eliminate duplicates, apply whiteouts, and start up readdir() where we left
|
|
|
-+off, given a single f_pos value. Our solution is to copy up all the directory
|
|
|
-+entries to the topmost directory the first time readdir() is called on a
|
|
|
-+directory. During this copy-up, we skip duplicates and entries covered by
|
|
|
-+whiteouts, and then create fallthru entries for each remaining visible dentry.
|
|
|
-+Then we mark the whole directory opaque. From then on, we just use the topmost
|
|
|
-+file system's normal readdir() operation.
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+6. Known Problems
|
|
|
-+=================
|
|
|
-+
|
|
|
-+- copyup() for other filetypes that reg and dir (e.g. for chown() on devices)
|
|
|
-+- symlinks are untested
|
|
|
-+
|
|
|
-+-------------------------------------------------------------------------------
|
|
|
-+
|
|
|
-+7. References
|
|
|
-+=============
|
|
|
-+
|
|
|
-+[1] http://marc.info/?l=linux-fsdevel&m=96035682927821&w=2
|
|
|
-+[2] http://marc.info/?l=linux-fsdevel&m=117681527820133&w=2
|
|
|
-+[3] http://marc.info/?l=linux-fsdevel&m=117913503200362&w=2
|
|
|
-+[4] http://marc.info/?l=linux-fsdevel&m=118231827024394&w=2
|
|
|
-+
|
|
|
-+Authors:
|
|
|
-+Jan Blunck <[email protected]>
|
|
|
-+Bharata B Rao <[email protected]>
|
|
|
-+Valerie Aurora <[email protected]>
|
|
|
---- a/fs/autofs4/autofs_i.h
|
|
|
-+++ b/fs/autofs4/autofs_i.h
|
|
|
-@@ -130,6 +130,7 @@ struct autofs_sb_info {
|
|
|
- int reghost_enabled;
|
|
|
- int needs_reghost;
|
|
|
- struct super_block *sb;
|
|
|
-+ struct vfsmount *mnt;
|
|
|
- struct mutex wq_mutex;
|
|
|
- spinlock_t fs_lock;
|
|
|
- struct autofs_wait_queue *queues; /* Wait queue pointer */
|
|
|
---- a/fs/autofs4/init.c
|
|
|
-+++ b/fs/autofs4/init.c
|
|
|
-@@ -17,7 +17,16 @@
|
|
|
- static int autofs_get_sb(struct file_system_type *fs_type,
|
|
|
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
|
|
|
- {
|
|
|
-- return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
|
|
|
-+ struct autofs_sb_info *sbi;
|
|
|
-+ int ret;
|
|
|
-+
|
|
|
-+ ret = get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
|
|
|
-+ if (ret)
|
|
|
-+ return ret;
|
|
|
-+
|
|
|
-+ sbi = autofs4_sbi(mnt->mnt_sb);
|
|
|
-+ sbi->mnt = mnt;
|
|
|
-+ return 0;
|
|
|
- }
|
|
|
-
|
|
|
- static struct file_system_type autofs_fs_type = {
|
|
|
---- a/fs/autofs4/root.c
|
|
|
-+++ b/fs/autofs4/root.c
|
|
|
-@@ -179,6 +179,12 @@ static void *autofs4_follow_link(struct
|
|
|
- DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
|
|
|
- dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
|
|
|
- nd->flags);
|
|
|
-+
|
|
|
-+ dput(nd->path.dentry);
|
|
|
-+ mntput(nd->path.mnt);
|
|
|
-+ nd->path.mnt = mntget(sbi->mnt);
|
|
|
-+ nd->path.dentry = dget(dentry);
|
|
|
-+
|
|
|
- /*
|
|
|
- * For an expire of a covered direct or offset mount we need
|
|
|
- * to break out of follow_down() at the autofs mount trigger
|
|
|
---- a/fs/compat.c
|
|
|
-+++ b/fs/compat.c
|
|
|
-@@ -840,6 +840,9 @@ static int compat_fillonedir(void *__buf
|
|
|
- struct compat_old_linux_dirent __user *dirent;
|
|
|
- compat_ulong_t d_ino;
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
- if (buf->result)
|
|
|
- return -EINVAL;
|
|
|
- d_ino = ino;
|
|
|
-@@ -911,6 +914,9 @@ static int compat_filldir(void *__buf, c
|
|
|
- compat_ulong_t d_ino;
|
|
|
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t));
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
- buf->error = -EINVAL; /* only used if we fail.. */
|
|
|
- if (reclen > buf->count)
|
|
|
- return -EINVAL;
|
|
|
-@@ -1000,6 +1006,9 @@ static int compat_filldir64(void * __buf
|
|
|
- int reclen = ALIGN(jj + namlen + 1, sizeof(u64));
|
|
|
- u64 off;
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
- buf->error = -EINVAL; /* only used if we fail.. */
|
|
|
- if (reclen > buf->count)
|
|
|
- return -EINVAL;
|
|
|
---- a/fs/dcache.c
|
|
|
-+++ b/fs/dcache.c
|
|
|
-@@ -18,6 +18,7 @@
|
|
|
- #include <linux/string.h>
|
|
|
- #include <linux/mm.h>
|
|
|
- #include <linux/fs.h>
|
|
|
-+#include <linux/union.h>
|
|
|
- #include <linux/fsnotify.h>
|
|
|
- #include <linux/slab.h>
|
|
|
- #include <linux/init.h>
|
|
|
-@@ -158,14 +159,19 @@ static void dentry_lru_del_init(struct d
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
-- * d_kill - kill dentry and return parent
|
|
|
-+ * __d_kill - kill dentry and return parent
|
|
|
- * @dentry: dentry to kill
|
|
|
-+ * @list: kill list
|
|
|
-+ * @greedy: return parent instead of putting it on the kill list
|
|
|
- *
|
|
|
- * The dentry must already be unhashed and removed from the LRU.
|
|
|
- *
|
|
|
-- * If this is the root of the dentry tree, return NULL.
|
|
|
-+ * If this is the root of the dentry tree, return NULL. If greedy is zero, we
|
|
|
-+ * put the parent of this dentry on the kill list instead. The callers must
|
|
|
-+ * make sure that __d_kill_final() is called on all dentries on the kill list.
|
|
|
- */
|
|
|
--static struct dentry *d_kill(struct dentry *dentry)
|
|
|
-+static struct dentry *__d_kill(struct dentry *dentry, struct list_head *list,
|
|
|
-+ int greedy)
|
|
|
- __releases(dentry->d_lock)
|
|
|
- __releases(dcache_lock)
|
|
|
- {
|
|
|
-@@ -173,13 +179,78 @@ static struct dentry *d_kill(struct dent
|
|
|
-
|
|
|
- list_del(&dentry->d_u.d_child);
|
|
|
- dentry_stat.nr_dentry--; /* For d_free, below */
|
|
|
-- /*drops the locks, at that point nobody can reach this dentry */
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * If we are not greedy we just put this on a list for later processing
|
|
|
-+ * (follow up to parent, releasing of inode and freeing dentry memory).
|
|
|
-+ */
|
|
|
-+ if (!greedy) {
|
|
|
-+ list_del_init(&dentry->d_alias);
|
|
|
-+ /* at this point nobody can reach this dentry */
|
|
|
-+ list_add(&dentry->d_lru, list);
|
|
|
-+ spin_unlock(&dentry->d_lock);
|
|
|
-+ spin_unlock(&dcache_lock);
|
|
|
-+ __shrink_d_unions(dentry, list);
|
|
|
-+ return NULL;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* drops the locks, at that point nobody can reach this dentry */
|
|
|
- dentry_iput(dentry);
|
|
|
-+ /* If the dentry was in an union delete them */
|
|
|
-+ __shrink_d_unions(dentry, list);
|
|
|
-+ if (IS_ROOT(dentry))
|
|
|
-+ parent = NULL;
|
|
|
-+ else
|
|
|
-+ parent = dentry->d_parent;
|
|
|
-+ d_free(dentry);
|
|
|
-+ return parent;
|
|
|
-+}
|
|
|
-+
|
|
|
-+void __dput(struct dentry *, struct list_head *, int);
|
|
|
-+
|
|
|
-+static void __d_kill_final(struct dentry *dentry, struct list_head *list)
|
|
|
-+{
|
|
|
-+ struct dentry *parent;
|
|
|
-+ struct inode *inode = dentry->d_inode;
|
|
|
-+
|
|
|
-+ if (inode) {
|
|
|
-+ dentry->d_inode = NULL;
|
|
|
-+ if (!inode->i_nlink)
|
|
|
-+ fsnotify_inoderemove(inode);
|
|
|
-+ if (dentry->d_op && dentry->d_op->d_iput)
|
|
|
-+ dentry->d_op->d_iput(dentry, inode);
|
|
|
-+ else
|
|
|
-+ iput(inode);
|
|
|
-+ }
|
|
|
-+
|
|
|
- if (IS_ROOT(dentry))
|
|
|
- parent = NULL;
|
|
|
- else
|
|
|
- parent = dentry->d_parent;
|
|
|
- d_free(dentry);
|
|
|
-+ __dput(parent, list, 1);
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * d_kill - kill dentry and return parent
|
|
|
-+ * @dentry: dentry to kill
|
|
|
-+ *
|
|
|
-+ * The dentry must already be unhashed and removed from the LRU.
|
|
|
-+ *
|
|
|
-+ * If this is the root of the dentry tree, return NULL.
|
|
|
-+ */
|
|
|
-+static struct dentry *d_kill(struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ LIST_HEAD(mortuary);
|
|
|
-+ struct dentry *parent;
|
|
|
-+
|
|
|
-+ parent = __d_kill(dentry, &mortuary, 1);
|
|
|
-+ while (!list_empty(&mortuary)) {
|
|
|
-+ dentry = list_entry(mortuary.next, struct dentry, d_lru);
|
|
|
-+ list_del(&dentry->d_lru);
|
|
|
-+ __d_kill_final(dentry, &mortuary);
|
|
|
-+ }
|
|
|
-+
|
|
|
- return parent;
|
|
|
- }
|
|
|
-
|
|
|
-@@ -200,19 +271,24 @@ static struct dentry *d_kill(struct dent
|
|
|
- * Real recursion would eat up our stack space.
|
|
|
- */
|
|
|
-
|
|
|
--/*
|
|
|
-- * dput - release a dentry
|
|
|
-- * @dentry: dentry to release
|
|
|
-+/**
|
|
|
-+ * __dput - release a dentry
|
|
|
-+ * @dentry: dentry to release
|
|
|
-+ * @list: kill list argument for __d_kill()
|
|
|
-+ * @greedy: greedy argument for __d_kill()
|
|
|
- *
|
|
|
- * Release a dentry. This will drop the usage count and if appropriate
|
|
|
- * call the dentry unlink method as well as removing it from the queues and
|
|
|
- * releasing its resources. If the parent dentries were scheduled for release
|
|
|
-- * they too may now get deleted.
|
|
|
-+ * they too may now get deleted if @greedy is not zero. Otherwise parent is
|
|
|
-+ * added to the kill list. The callers must make sure that __d_kill_final() is
|
|
|
-+ * called on all dentries on the kill list.
|
|
|
-+ *
|
|
|
-+ * You probably want to use dput() instead.
|
|
|
- *
|
|
|
- * no dcache lock, please.
|
|
|
- */
|
|
|
--
|
|
|
--void dput(struct dentry *dentry)
|
|
|
-+void __dput(struct dentry *dentry, struct list_head *list, int greedy)
|
|
|
- {
|
|
|
- if (!dentry)
|
|
|
- return;
|
|
|
-@@ -253,12 +329,35 @@ unhash_it:
|
|
|
- kill_it:
|
|
|
- /* if dentry was on the d_lru list delete it from there */
|
|
|
- dentry_lru_del(dentry);
|
|
|
-- dentry = d_kill(dentry);
|
|
|
-+ dentry = __d_kill(dentry, list, greedy);
|
|
|
- if (dentry)
|
|
|
- goto repeat;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
-+ * dput - release a dentry
|
|
|
-+ * @dentry: dentry to release
|
|
|
-+ *
|
|
|
-+ * Release a dentry. This will drop the usage count and if appropriate
|
|
|
-+ * call the dentry unlink method as well as removing it from the queues and
|
|
|
-+ * releasing its resources. If the parent dentries were scheduled for release
|
|
|
-+ * they too may now get deleted.
|
|
|
-+ *
|
|
|
-+ * no dcache lock, please.
|
|
|
-+ */
|
|
|
-+void dput(struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ LIST_HEAD(mortuary);
|
|
|
-+
|
|
|
-+ __dput(dentry, &mortuary, 1);
|
|
|
-+ while (!list_empty(&mortuary)) {
|
|
|
-+ dentry = list_entry(mortuary.next, struct dentry, d_lru);
|
|
|
-+ list_del(&dentry->d_lru);
|
|
|
-+ __d_kill_final(dentry, &mortuary);
|
|
|
-+ }
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
- * d_invalidate - invalidate a dentry
|
|
|
- * @dentry: dentry to invalidate
|
|
|
- *
|
|
|
-@@ -690,6 +789,7 @@ static void shrink_dcache_for_umount_sub
|
|
|
- iput(inode);
|
|
|
- }
|
|
|
-
|
|
|
-+ shrink_d_unions(dentry);
|
|
|
- d_free(dentry);
|
|
|
-
|
|
|
- /* finished when we fall off the top of the tree,
|
|
|
-@@ -952,6 +1052,10 @@ struct dentry *d_alloc(struct dentry * p
|
|
|
- INIT_LIST_HEAD(&dentry->d_lru);
|
|
|
- INIT_LIST_HEAD(&dentry->d_subdirs);
|
|
|
- INIT_LIST_HEAD(&dentry->d_alias);
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+ INIT_LIST_HEAD(&dentry->d_unions);
|
|
|
-+ dentry->d_unionized = 0;
|
|
|
-+#endif
|
|
|
-
|
|
|
- if (parent) {
|
|
|
- dentry->d_parent = dget(parent);
|
|
|
-@@ -982,8 +1086,10 @@ struct dentry *d_alloc_name(struct dentr
|
|
|
- /* the caller must hold dcache_lock */
|
|
|
- static void __d_instantiate(struct dentry *dentry, struct inode *inode)
|
|
|
- {
|
|
|
-- if (inode)
|
|
|
-+ if (inode) {
|
|
|
-+ dentry->d_flags &= ~(DCACHE_WHITEOUT|DCACHE_FALLTHRU);
|
|
|
- list_add(&dentry->d_alias, &inode->i_dentry);
|
|
|
-+ }
|
|
|
- dentry->d_inode = inode;
|
|
|
- fsnotify_d_instantiate(dentry, inode);
|
|
|
- }
|
|
|
-@@ -1514,7 +1620,9 @@ void d_delete(struct dentry * dentry)
|
|
|
- spin_lock(&dentry->d_lock);
|
|
|
- isdir = S_ISDIR(dentry->d_inode->i_mode);
|
|
|
- if (atomic_read(&dentry->d_count) == 1) {
|
|
|
-+ __d_drop_unions(dentry);
|
|
|
- dentry_iput(dentry);
|
|
|
-+ shrink_d_unions(dentry);
|
|
|
- fsnotify_nameremove(dentry, isdir);
|
|
|
- return;
|
|
|
- }
|
|
|
-@@ -1525,14 +1633,14 @@ void d_delete(struct dentry * dentry)
|
|
|
- spin_unlock(&dentry->d_lock);
|
|
|
- spin_unlock(&dcache_lock);
|
|
|
-
|
|
|
-+ shrink_d_unions(dentry);
|
|
|
- fsnotify_nameremove(dentry, isdir);
|
|
|
- }
|
|
|
-
|
|
|
- static void __d_rehash(struct dentry * entry, struct hlist_head *list)
|
|
|
- {
|
|
|
--
|
|
|
-- entry->d_flags &= ~DCACHE_UNHASHED;
|
|
|
-- hlist_add_head_rcu(&entry->d_hash, list);
|
|
|
-+ entry->d_flags &= ~DCACHE_UNHASHED;
|
|
|
-+ hlist_add_head_rcu(&entry->d_hash, list);
|
|
|
- }
|
|
|
-
|
|
|
- static void _d_rehash(struct dentry * entry)
|
|
|
-@@ -1551,6 +1659,7 @@ void d_rehash(struct dentry * entry)
|
|
|
- {
|
|
|
- spin_lock(&dcache_lock);
|
|
|
- spin_lock(&entry->d_lock);
|
|
|
-+ BUG_ON(!d_unhashed(entry));
|
|
|
- _d_rehash(entry);
|
|
|
- spin_unlock(&entry->d_lock);
|
|
|
- spin_unlock(&dcache_lock);
|
|
|
-@@ -2183,7 +2292,9 @@ resume:
|
|
|
- struct list_head *tmp = next;
|
|
|
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
|
|
|
- next = tmp->next;
|
|
|
-- if (d_unhashed(dentry)||!dentry->d_inode)
|
|
|
-+ if (d_unhashed(dentry)||(!dentry->d_inode &&
|
|
|
-+ !d_is_whiteout(dentry) &&
|
|
|
-+ !d_is_fallthru(dentry)))
|
|
|
- continue;
|
|
|
- if (!list_empty(&dentry->d_subdirs)) {
|
|
|
- this_parent = dentry;
|
|
|
---- a/fs/ext2/dir.c
|
|
|
-+++ b/fs/ext2/dir.c
|
|
|
-@@ -219,7 +219,8 @@ static inline int ext2_match (int len, c
|
|
|
- {
|
|
|
- if (len != de->name_len)
|
|
|
- return 0;
|
|
|
-- if (!de->inode)
|
|
|
-+ if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
|
|
|
-+ (de->file_type != EXT2_FT_FALLTHRU)))
|
|
|
- return 0;
|
|
|
- return !memcmp(name, de->name, len);
|
|
|
- }
|
|
|
-@@ -255,6 +256,8 @@ static unsigned char ext2_filetype_table
|
|
|
- [EXT2_FT_FIFO] = DT_FIFO,
|
|
|
- [EXT2_FT_SOCK] = DT_SOCK,
|
|
|
- [EXT2_FT_SYMLINK] = DT_LNK,
|
|
|
-+ [EXT2_FT_WHT] = DT_WHT,
|
|
|
-+ [EXT2_FT_FALLTHRU] = DT_UNKNOWN,
|
|
|
- };
|
|
|
-
|
|
|
- #define S_SHIFT 12
|
|
|
-@@ -341,6 +344,18 @@ ext2_readdir (struct file * filp, void *
|
|
|
- ext2_put_page(page);
|
|
|
- return 0;
|
|
|
- }
|
|
|
-+ } else if (de->file_type == EXT2_FT_FALLTHRU) {
|
|
|
-+ int over;
|
|
|
-+ unsigned char d_type = DT_UNKNOWN;
|
|
|
-+
|
|
|
-+ offset = (char *)de - kaddr;
|
|
|
-+ over = filldir(dirent, de->name, de->name_len,
|
|
|
-+ (n<<PAGE_CACHE_SHIFT) | offset,
|
|
|
-+ 123, d_type);
|
|
|
-+ if (over) {
|
|
|
-+ ext2_put_page(page);
|
|
|
-+ return 0;
|
|
|
-+ }
|
|
|
- }
|
|
|
- filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
|
|
|
- }
|
|
|
-@@ -448,6 +463,30 @@ ino_t ext2_inode_by_name(struct inode *d
|
|
|
- return res;
|
|
|
- }
|
|
|
-
|
|
|
-+/* Special version for filetype based whiteout support */
|
|
|
-+ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ ino_t res = 0;
|
|
|
-+ struct ext2_dir_entry_2 *de;
|
|
|
-+ struct page *page;
|
|
|
-+
|
|
|
-+ de = ext2_find_entry (dir, &dentry->d_name, &page);
|
|
|
-+ if (de) {
|
|
|
-+ res = le32_to_cpu(de->inode);
|
|
|
-+ if (!res && de->file_type == EXT2_FT_WHT) {
|
|
|
-+ spin_lock(&dentry->d_lock);
|
|
|
-+ dentry->d_flags |= DCACHE_WHITEOUT;
|
|
|
-+ spin_unlock(&dentry->d_lock);
|
|
|
-+ } else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
|
|
|
-+ spin_lock(&dentry->d_lock);
|
|
|
-+ dentry->d_flags |= DCACHE_FALLTHRU;
|
|
|
-+ spin_unlock(&dentry->d_lock);
|
|
|
-+ }
|
|
|
-+ ext2_put_page(page);
|
|
|
-+ }
|
|
|
-+ return res;
|
|
|
-+}
|
|
|
-+
|
|
|
- /* Releases the page */
|
|
|
- void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
|
|
|
- struct page *page, struct inode *inode, int update_times)
|
|
|
-@@ -472,9 +511,10 @@ void ext2_set_link(struct inode *dir, st
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-- * Parent is locked.
|
|
|
-+ * Find or append a given dentry to the parent directory
|
|
|
- */
|
|
|
--int ext2_add_link (struct dentry *dentry, struct inode *inode)
|
|
|
-+static ext2_dirent * ext2_append_entry(struct dentry * dentry,
|
|
|
-+ struct page ** page)
|
|
|
- {
|
|
|
- struct inode *dir = dentry->d_parent->d_inode;
|
|
|
- const char *name = dentry->d_name.name;
|
|
|
-@@ -482,13 +522,10 @@ int ext2_add_link (struct dentry *dentry
|
|
|
- unsigned chunk_size = ext2_chunk_size(dir);
|
|
|
- unsigned reclen = EXT2_DIR_REC_LEN(namelen);
|
|
|
- unsigned short rec_len, name_len;
|
|
|
-- struct page *page = NULL;
|
|
|
-- ext2_dirent * de;
|
|
|
-+ ext2_dirent * de = NULL;
|
|
|
- unsigned long npages = dir_pages(dir);
|
|
|
- unsigned long n;
|
|
|
- char *kaddr;
|
|
|
-- loff_t pos;
|
|
|
-- int err;
|
|
|
-
|
|
|
- /*
|
|
|
- * We take care of directory expansion in the same loop.
|
|
|
-@@ -498,55 +535,97 @@ int ext2_add_link (struct dentry *dentry
|
|
|
- for (n = 0; n <= npages; n++) {
|
|
|
- char *dir_end;
|
|
|
-
|
|
|
-- page = ext2_get_page(dir, n, 0);
|
|
|
-- err = PTR_ERR(page);
|
|
|
-- if (IS_ERR(page))
|
|
|
-+ *page = ext2_get_page(dir, n, 0);
|
|
|
-+ de = ERR_PTR(PTR_ERR(*page));
|
|
|
-+ if (IS_ERR(*page))
|
|
|
- goto out;
|
|
|
-- lock_page(page);
|
|
|
-- kaddr = page_address(page);
|
|
|
-+ lock_page(*page);
|
|
|
-+ kaddr = page_address(*page);
|
|
|
- dir_end = kaddr + ext2_last_byte(dir, n);
|
|
|
- de = (ext2_dirent *)kaddr;
|
|
|
- kaddr += PAGE_CACHE_SIZE - reclen;
|
|
|
- while ((char *)de <= kaddr) {
|
|
|
- if ((char *)de == dir_end) {
|
|
|
- /* We hit i_size */
|
|
|
-- name_len = 0;
|
|
|
-- rec_len = chunk_size;
|
|
|
-+ de->name_len = 0;
|
|
|
- de->rec_len = ext2_rec_len_to_disk(chunk_size);
|
|
|
- de->inode = 0;
|
|
|
-+ de->file_type = 0;
|
|
|
- goto got_it;
|
|
|
- }
|
|
|
- if (de->rec_len == 0) {
|
|
|
- ext2_error(dir->i_sb, __func__,
|
|
|
- "zero-length directory entry");
|
|
|
-- err = -EIO;
|
|
|
-+ de = ERR_PTR(-EIO);
|
|
|
- goto out_unlock;
|
|
|
- }
|
|
|
-- err = -EEXIST;
|
|
|
- if (ext2_match (namelen, name, de))
|
|
|
-- goto out_unlock;
|
|
|
-+ goto got_it;
|
|
|
- name_len = EXT2_DIR_REC_LEN(de->name_len);
|
|
|
- rec_len = ext2_rec_len_from_disk(de->rec_len);
|
|
|
-- if (!de->inode && rec_len >= reclen)
|
|
|
-+ if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
|
|
|
-+ (de->file_type != EXT2_FT_FALLTHRU) &&
|
|
|
-+ (rec_len >= reclen))
|
|
|
- goto got_it;
|
|
|
- if (rec_len >= name_len + reclen)
|
|
|
- goto got_it;
|
|
|
- de = (ext2_dirent *) ((char *) de + rec_len);
|
|
|
- }
|
|
|
-- unlock_page(page);
|
|
|
-- ext2_put_page(page);
|
|
|
-+ unlock_page(*page);
|
|
|
-+ ext2_put_page(*page);
|
|
|
- }
|
|
|
-+
|
|
|
- BUG();
|
|
|
-- return -EINVAL;
|
|
|
-
|
|
|
- got_it:
|
|
|
-+ return de;
|
|
|
-+ /* OFFSET_CACHE */
|
|
|
-+out_unlock:
|
|
|
-+ unlock_page(*page);
|
|
|
-+ ext2_put_page(*page);
|
|
|
-+out:
|
|
|
-+ return de;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Parent is locked.
|
|
|
-+ */
|
|
|
-+int ext2_add_link (struct dentry *dentry, struct inode *inode)
|
|
|
-+{
|
|
|
-+ struct inode *dir = dentry->d_parent->d_inode;
|
|
|
-+ const char *name = dentry->d_name.name;
|
|
|
-+ int namelen = dentry->d_name.len;
|
|
|
-+ unsigned short rec_len, name_len;
|
|
|
-+ ext2_dirent * de;
|
|
|
-+ struct page *page;
|
|
|
-+ loff_t pos;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ de = ext2_append_entry(dentry, &page);
|
|
|
-+ if (IS_ERR(de))
|
|
|
-+ return PTR_ERR(de);
|
|
|
-+
|
|
|
-+ err = -EEXIST;
|
|
|
-+ if (ext2_match (namelen, name, de)) {
|
|
|
-+ if ((de->file_type == EXT2_FT_WHT) ||
|
|
|
-+ (de->file_type == EXT2_FT_FALLTHRU))
|
|
|
-+ goto got_it;
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+got_it:
|
|
|
-+ name_len = EXT2_DIR_REC_LEN(de->name_len);
|
|
|
-+ rec_len = ext2_rec_len_from_disk(de->rec_len);
|
|
|
-+
|
|
|
- pos = page_offset(page) +
|
|
|
- (char*)de - (char*)page_address(page);
|
|
|
- err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
|
|
|
- &page, NULL);
|
|
|
- if (err)
|
|
|
- goto out_unlock;
|
|
|
-- if (de->inode) {
|
|
|
-+ if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
|
|
|
-+ (de->file_type == EXT2_FT_FALLTHRU)) &&
|
|
|
-+ !ext2_match (namelen, name, de))) {
|
|
|
- ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
|
|
|
- de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
|
|
|
- de->rec_len = ext2_rec_len_to_disk(name_len);
|
|
|
-@@ -563,7 +642,60 @@ got_it:
|
|
|
- /* OFFSET_CACHE */
|
|
|
- out_put:
|
|
|
- ext2_put_page(page);
|
|
|
--out:
|
|
|
-+ return err;
|
|
|
-+out_unlock:
|
|
|
-+ unlock_page(page);
|
|
|
-+ goto out_put;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Create a fallthru entry.
|
|
|
-+ */
|
|
|
-+int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ const char *name = dentry->d_name.name;
|
|
|
-+ int namelen = dentry->d_name.len;
|
|
|
-+ unsigned short rec_len, name_len;
|
|
|
-+ ext2_dirent * de;
|
|
|
-+ struct page *page;
|
|
|
-+ loff_t pos;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ de = ext2_append_entry(dentry, &page);
|
|
|
-+ if (IS_ERR(de))
|
|
|
-+ return PTR_ERR(de);
|
|
|
-+
|
|
|
-+ err = -EEXIST;
|
|
|
-+ if (ext2_match (namelen, name, de))
|
|
|
-+ goto out_unlock;
|
|
|
-+
|
|
|
-+ name_len = EXT2_DIR_REC_LEN(de->name_len);
|
|
|
-+ rec_len = ext2_rec_len_from_disk(de->rec_len);
|
|
|
-+
|
|
|
-+ pos = page_offset(page) +
|
|
|
-+ (char*)de - (char*)page_address(page);
|
|
|
-+ err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
|
|
|
-+ &page, NULL);
|
|
|
-+ if (err)
|
|
|
-+ goto out_unlock;
|
|
|
-+ if (de->inode || (de->file_type == EXT2_FT_WHT) ||
|
|
|
-+ (de->file_type == EXT2_FT_FALLTHRU)) {
|
|
|
-+ ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
|
|
|
-+ de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
|
|
|
-+ de->rec_len = ext2_rec_len_to_disk(name_len);
|
|
|
-+ de = de1;
|
|
|
-+ }
|
|
|
-+ de->name_len = namelen;
|
|
|
-+ memcpy(de->name, name, namelen);
|
|
|
-+ de->inode = 0;
|
|
|
-+ de->file_type = EXT2_FT_FALLTHRU;
|
|
|
-+ err = ext2_commit_chunk(page, pos, rec_len);
|
|
|
-+ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
|
|
|
-+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
|
|
|
-+ mark_inode_dirty(dir);
|
|
|
-+ /* OFFSET_CACHE */
|
|
|
-+out_put:
|
|
|
-+ ext2_put_page(page);
|
|
|
- return err;
|
|
|
- out_unlock:
|
|
|
- unlock_page(page);
|
|
|
-@@ -616,6 +748,70 @@ out:
|
|
|
- return err;
|
|
|
- }
|
|
|
-
|
|
|
-+int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
|
|
|
-+ struct ext2_dir_entry_2 * de, struct page * page)
|
|
|
-+{
|
|
|
-+ const char *name = dentry->d_name.name;
|
|
|
-+ int namelen = dentry->d_name.len;
|
|
|
-+ unsigned short rec_len, name_len;
|
|
|
-+ loff_t pos;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ if (!de) {
|
|
|
-+ de = ext2_append_entry(dentry, &page);
|
|
|
-+ BUG_ON(!de);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ err = -EEXIST;
|
|
|
-+ if (ext2_match (namelen, name, de) &&
|
|
|
-+ (de->file_type == EXT2_FT_WHT)) {
|
|
|
-+ ext2_error(dir->i_sb, __func__,
|
|
|
-+ "entry is already a whiteout in directory #%lu",
|
|
|
-+ dir->i_ino);
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ name_len = EXT2_DIR_REC_LEN(de->name_len);
|
|
|
-+ rec_len = ext2_rec_len_from_disk(de->rec_len);
|
|
|
-+
|
|
|
-+ pos = page_offset(page) +
|
|
|
-+ (char*)de - (char*)page_address(page);
|
|
|
-+ err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
|
|
|
-+ &page, NULL);
|
|
|
-+ if (err)
|
|
|
-+ goto out_unlock;
|
|
|
-+ /*
|
|
|
-+ * We whiteout an existing entry. Do what ext2_delete_entry() would do,
|
|
|
-+ * except that we don't need to merge with the previous entry since
|
|
|
-+ * we are going to reuse it.
|
|
|
-+ */
|
|
|
-+ if (ext2_match (namelen, name, de))
|
|
|
-+ de->inode = 0;
|
|
|
-+ if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
|
|
|
-+ (de->file_type == EXT2_FT_FALLTHRU)) &&
|
|
|
-+ !ext2_match (namelen, name, de))) {
|
|
|
-+ ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
|
|
|
-+ de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
|
|
|
-+ de->rec_len = ext2_rec_len_to_disk(name_len);
|
|
|
-+ de = de1;
|
|
|
-+ }
|
|
|
-+ de->name_len = namelen;
|
|
|
-+ memcpy(de->name, name, namelen);
|
|
|
-+ de->inode = 0;
|
|
|
-+ de->file_type = EXT2_FT_WHT;
|
|
|
-+ err = ext2_commit_chunk(page, pos, rec_len);
|
|
|
-+ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
|
|
|
-+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
|
|
|
-+ mark_inode_dirty(dir);
|
|
|
-+ /* OFFSET_CACHE */
|
|
|
-+out_put:
|
|
|
-+ ext2_put_page(page);
|
|
|
-+ return err;
|
|
|
-+out_unlock:
|
|
|
-+ unlock_page(page);
|
|
|
-+ goto out_put;
|
|
|
-+}
|
|
|
-+
|
|
|
- /*
|
|
|
- * Set the first fragment of directory.
|
|
|
- */
|
|
|
---- a/fs/ext2/ext2.h
|
|
|
-+++ b/fs/ext2/ext2.h
|
|
|
-@@ -102,9 +102,13 @@ extern void ext2_rsv_window_add(struct s
|
|
|
- /* dir.c */
|
|
|
- extern int ext2_add_link (struct dentry *, struct inode *);
|
|
|
- extern ino_t ext2_inode_by_name(struct inode *, struct qstr *);
|
|
|
-+extern ino_t ext2_inode_by_dentry(struct inode *, struct dentry *);
|
|
|
- extern int ext2_make_empty(struct inode *, struct inode *);
|
|
|
- extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **);
|
|
|
- extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
|
|
|
-+extern int ext2_whiteout_entry (struct inode *, struct dentry *,
|
|
|
-+ struct ext2_dir_entry_2 *, struct page *);
|
|
|
-+extern int ext2_fallthru_entry (struct inode *, struct dentry *);
|
|
|
- extern int ext2_empty_dir (struct inode *);
|
|
|
- extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
|
|
|
- extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
|
|
|
---- a/fs/ext2/inode.c
|
|
|
-+++ b/fs/ext2/inode.c
|
|
|
-@@ -1178,7 +1178,8 @@ void ext2_set_inode_flags(struct inode *
|
|
|
- {
|
|
|
- unsigned int flags = EXT2_I(inode)->i_flags;
|
|
|
-
|
|
|
-- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
|
|
|
-+ inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
|
|
|
-+ S_OPAQUE);
|
|
|
- if (flags & EXT2_SYNC_FL)
|
|
|
- inode->i_flags |= S_SYNC;
|
|
|
- if (flags & EXT2_APPEND_FL)
|
|
|
-@@ -1189,6 +1190,8 @@ void ext2_set_inode_flags(struct inode *
|
|
|
- inode->i_flags |= S_NOATIME;
|
|
|
- if (flags & EXT2_DIRSYNC_FL)
|
|
|
- inode->i_flags |= S_DIRSYNC;
|
|
|
-+ if (flags & EXT2_OPAQUE_FL)
|
|
|
-+ inode->i_flags |= S_OPAQUE;
|
|
|
- }
|
|
|
-
|
|
|
- /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
|
|
|
-@@ -1196,8 +1199,8 @@ void ext2_get_inode_flags(struct ext2_in
|
|
|
- {
|
|
|
- unsigned int flags = ei->vfs_inode.i_flags;
|
|
|
-
|
|
|
-- ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
|
|
|
-- EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
|
|
|
-+ ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|EXT2_IMMUTABLE_FL|
|
|
|
-+ EXT2_NOATIME_FL|EXT2_DIRSYNC_FL|EXT2_OPAQUE_FL);
|
|
|
- if (flags & S_SYNC)
|
|
|
- ei->i_flags |= EXT2_SYNC_FL;
|
|
|
- if (flags & S_APPEND)
|
|
|
-@@ -1208,6 +1211,8 @@ void ext2_get_inode_flags(struct ext2_in
|
|
|
- ei->i_flags |= EXT2_NOATIME_FL;
|
|
|
- if (flags & S_DIRSYNC)
|
|
|
- ei->i_flags |= EXT2_DIRSYNC_FL;
|
|
|
-+ if (flags & S_OPAQUE)
|
|
|
-+ ei->i_flags |= EXT2_OPAQUE_FL;
|
|
|
- }
|
|
|
-
|
|
|
- struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
|
|
|
---- a/fs/ext2/namei.c
|
|
|
-+++ b/fs/ext2/namei.c
|
|
|
-@@ -54,15 +54,16 @@ static inline int ext2_add_nondir(struct
|
|
|
- * Methods themselves.
|
|
|
- */
|
|
|
-
|
|
|
--static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
|
|
|
-+static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry,
|
|
|
-+ struct nameidata *nd)
|
|
|
- {
|
|
|
- struct inode * inode;
|
|
|
- ino_t ino;
|
|
|
--
|
|
|
-+
|
|
|
- if (dentry->d_name.len > EXT2_NAME_LEN)
|
|
|
- return ERR_PTR(-ENAMETOOLONG);
|
|
|
-
|
|
|
-- ino = ext2_inode_by_name(dir, &dentry->d_name);
|
|
|
-+ ino = ext2_inode_by_dentry(dir, dentry);
|
|
|
- inode = NULL;
|
|
|
- if (ino) {
|
|
|
- inode = ext2_iget(dir->i_sb, ino);
|
|
|
-@@ -230,6 +231,10 @@ static int ext2_mkdir(struct inode * dir
|
|
|
- else
|
|
|
- inode->i_mapping->a_ops = &ext2_aops;
|
|
|
-
|
|
|
-+ /* if we call mkdir on a whiteout create an opaque directory */
|
|
|
-+ if (dentry->d_flags & DCACHE_WHITEOUT)
|
|
|
-+ inode->i_flags |= S_OPAQUE;
|
|
|
-+
|
|
|
- inode_inc_link_count(inode);
|
|
|
-
|
|
|
- err = ext2_make_empty(inode, dir);
|
|
|
-@@ -293,6 +298,78 @@ static int ext2_rmdir (struct inode * di
|
|
|
- return err;
|
|
|
- }
|
|
|
-
|
|
|
-+/*
|
|
|
-+ * Create a whiteout for the dentry
|
|
|
-+ */
|
|
|
-+static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
|
|
|
-+ struct dentry *new_dentry)
|
|
|
-+{
|
|
|
-+ struct inode * inode = dentry->d_inode;
|
|
|
-+ struct ext2_dir_entry_2 * de = NULL;
|
|
|
-+ struct page * page;
|
|
|
-+ int err = -ENOTEMPTY;
|
|
|
-+
|
|
|
-+ if (!EXT2_HAS_INCOMPAT_FEATURE(dir->i_sb,
|
|
|
-+ EXT2_FEATURE_INCOMPAT_FILETYPE)) {
|
|
|
-+ ext2_error (dir->i_sb, "ext2_whiteout",
|
|
|
-+ "can't set whiteout filetype");
|
|
|
-+ err = -EPERM;
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (inode) {
|
|
|
-+ if (S_ISDIR(inode->i_mode) && !ext2_empty_dir(inode))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ err = -ENOENT;
|
|
|
-+ de = ext2_find_entry (dir, &dentry->d_name, &page);
|
|
|
-+ if (!de)
|
|
|
-+ goto out;
|
|
|
-+ lock_page(page);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ err = ext2_whiteout_entry (dir, dentry, de, page);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ spin_lock(&new_dentry->d_lock);
|
|
|
-+ new_dentry->d_flags &= ~DCACHE_FALLTHRU;
|
|
|
-+ new_dentry->d_flags |= DCACHE_WHITEOUT;
|
|
|
-+ spin_unlock(&new_dentry->d_lock);
|
|
|
-+ d_add(new_dentry, NULL);
|
|
|
-+
|
|
|
-+ if (inode) {
|
|
|
-+ inode->i_ctime = dir->i_ctime;
|
|
|
-+ inode_dec_link_count(inode);
|
|
|
-+ if (S_ISDIR(inode->i_mode)) {
|
|
|
-+ inode->i_size = 0;
|
|
|
-+ inode_dec_link_count(inode);
|
|
|
-+ inode_dec_link_count(dir);
|
|
|
-+ }
|
|
|
-+ }
|
|
|
-+ err = 0;
|
|
|
-+out:
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Create a fallthru entry.
|
|
|
-+ */
|
|
|
-+static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ err = ext2_fallthru_entry(dir, dentry);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ d_instantiate(dentry, NULL);
|
|
|
-+ spin_lock(&dentry->d_lock);
|
|
|
-+ dentry->d_flags |= DCACHE_FALLTHRU;
|
|
|
-+ spin_unlock(&dentry->d_lock);
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
- static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
|
|
|
- struct inode * new_dir, struct dentry * new_dentry )
|
|
|
- {
|
|
|
-@@ -392,6 +469,8 @@ const struct inode_operations ext2_dir_i
|
|
|
- .mkdir = ext2_mkdir,
|
|
|
- .rmdir = ext2_rmdir,
|
|
|
- .mknod = ext2_mknod,
|
|
|
-+ .whiteout = ext2_whiteout,
|
|
|
-+ .fallthru = ext2_fallthru,
|
|
|
- .rename = ext2_rename,
|
|
|
- #ifdef CONFIG_EXT2_FS_XATTR
|
|
|
- .setxattr = generic_setxattr,
|
|
|
---- a/fs/ext2/super.c
|
|
|
-+++ b/fs/ext2/super.c
|
|
|
-@@ -1062,6 +1062,13 @@ static int ext2_fill_super(struct super_
|
|
|
- if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
|
|
|
- ext2_warning(sb, __func__,
|
|
|
- "mounting ext3 filesystem as ext2");
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Whiteouts (and fallthrus) require explicit whiteout support.
|
|
|
-+ */
|
|
|
-+ if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_WHITEOUT))
|
|
|
-+ sb->s_flags |= MS_WHITEOUT;
|
|
|
-+
|
|
|
- ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
|
|
|
- return 0;
|
|
|
-
|
|
|
---- a/fs/Kconfig
|
|
|
-+++ b/fs/Kconfig
|
|
|
-@@ -59,6 +59,14 @@ source "fs/notify/Kconfig"
|
|
|
-
|
|
|
- source "fs/quota/Kconfig"
|
|
|
-
|
|
|
-+config UNION_MOUNT
|
|
|
-+ bool "Union mount support (EXPERIMENTAL)"
|
|
|
-+ depends on EXPERIMENTAL
|
|
|
-+ ---help---
|
|
|
-+ If you say Y here, you will be able to mount file systems as
|
|
|
-+ union mount stacks. This is a VFS based implementation and
|
|
|
-+ should work with all file systems. If unsure, say N.
|
|
|
-+
|
|
|
- source "fs/autofs/Kconfig"
|
|
|
- source "fs/autofs4/Kconfig"
|
|
|
- source "fs/fuse/Kconfig"
|
|
|
---- a/fs/libfs.c
|
|
|
-+++ b/fs/libfs.c
|
|
|
-@@ -133,6 +133,7 @@ int dcache_readdir(struct file * filp, v
|
|
|
- struct dentry *cursor = filp->private_data;
|
|
|
- struct list_head *p, *q = &cursor->d_u.d_child;
|
|
|
- ino_t ino;
|
|
|
-+ int d_type;
|
|
|
- int i = filp->f_pos;
|
|
|
-
|
|
|
- switch (i) {
|
|
|
-@@ -158,14 +159,25 @@ int dcache_readdir(struct file * filp, v
|
|
|
- for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
|
|
|
- struct dentry *next;
|
|
|
- next = list_entry(p, struct dentry, d_u.d_child);
|
|
|
-- if (d_unhashed(next) || !next->d_inode)
|
|
|
-+ if (d_unhashed(next) || (!next->d_inode && !d_is_fallthru(next)))
|
|
|
- continue;
|
|
|
-
|
|
|
-+ if (d_is_fallthru(next)) {
|
|
|
-+ /* XXX Make up things we can
|
|
|
-+ * only get out of the inode.
|
|
|
-+ * Should probably really do a
|
|
|
-+ * lookup instead. */
|
|
|
-+ ino = 100; /* XXX Made up number of no significance */
|
|
|
-+ d_type = DT_UNKNOWN;
|
|
|
-+ } else {
|
|
|
-+ ino = next->d_inode->i_ino;
|
|
|
-+ d_type = dt_type(next->d_inode);
|
|
|
-+ }
|
|
|
-+
|
|
|
- spin_unlock(&dcache_lock);
|
|
|
- if (filldir(dirent, next->d_name.name,
|
|
|
- next->d_name.len, filp->f_pos,
|
|
|
-- next->d_inode->i_ino,
|
|
|
-- dt_type(next->d_inode)) < 0)
|
|
|
-+ ino, d_type) < 0)
|
|
|
- return 0;
|
|
|
- spin_lock(&dcache_lock);
|
|
|
- /* next is still alive */
|
|
|
---- a/fs/Makefile
|
|
|
-+++ b/fs/Makefile
|
|
|
-@@ -52,6 +52,7 @@ obj-$(CONFIG_NFS_COMMON) += nfs_common/
|
|
|
- obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
|
|
|
-
|
|
|
- obj-y += quota/
|
|
|
-+obj-$(CONFIG_UNION_MOUNT) += union.o
|
|
|
-
|
|
|
- obj-$(CONFIG_PROC_FS) += proc/
|
|
|
- obj-y += partitions/
|
|
|
---- a/fs/namei.c
|
|
|
-+++ b/fs/namei.c
|
|
|
-@@ -33,6 +33,7 @@
|
|
|
- #include <linux/fcntl.h>
|
|
|
- #include <linux/device_cgroup.h>
|
|
|
- #include <linux/fs_struct.h>
|
|
|
-+#include <linux/union.h>
|
|
|
- #include <asm/uaccess.h>
|
|
|
-
|
|
|
- #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
|
|
|
-@@ -242,16 +243,17 @@ int generic_permission(struct inode *ino
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
-- * inode_permission - check for access rights to a given inode
|
|
|
-+ * __inode_permission - check for access rights to a given inode
|
|
|
- * @inode: inode to check permission on
|
|
|
- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
|
|
|
-+ * @rofs: check for read-only fs
|
|
|
- *
|
|
|
- * Used to check for read/write/execute permissions on an inode.
|
|
|
- * We use "fsuid" for this, letting us set arbitrary permissions
|
|
|
- * for filesystem access without changing the "normal" uids which
|
|
|
- * are used for other things.
|
|
|
- */
|
|
|
--int inode_permission(struct inode *inode, int mask)
|
|
|
-+int __inode_permission(struct inode *inode, int mask, int rofs)
|
|
|
- {
|
|
|
- int retval;
|
|
|
-
|
|
|
-@@ -261,7 +263,7 @@ int inode_permission(struct inode *inode
|
|
|
- /*
|
|
|
- * Nobody gets write access to a read-only fs.
|
|
|
- */
|
|
|
-- if (IS_RDONLY(inode) &&
|
|
|
-+ if ((rofs & IS_RDONLY(inode)) &&
|
|
|
- (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
|
|
- return -EROFS;
|
|
|
-
|
|
|
-@@ -289,6 +291,18 @@ int inode_permission(struct inode *inode
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
-+ * inode_permission - check for access rights to a given inode
|
|
|
-+ * @inode: inode to check permission on
|
|
|
-+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
|
|
|
-+ *
|
|
|
-+ * This version pays attention to the MS_RDONLY flag on the fs.
|
|
|
-+ */
|
|
|
-+int inode_permission(struct inode *inode, int mask)
|
|
|
-+{
|
|
|
-+ return __inode_permission(inode, mask, 1);
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
- * file_permission - check for additional access rights to a given file
|
|
|
- * @file: file to check access rights for
|
|
|
- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
|
|
|
-@@ -417,15 +431,10 @@ do_revalidate(struct dentry *dentry, str
|
|
|
- * Internal lookup() using the new generic dcache.
|
|
|
- * SMP-safe
|
|
|
- */
|
|
|
--static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
|
|
|
-+static struct dentry *cache_lookup(struct dentry *parent, struct qstr *name,
|
|
|
-+ struct nameidata *nd)
|
|
|
- {
|
|
|
-- struct dentry * dentry = __d_lookup(parent, name);
|
|
|
--
|
|
|
-- /* lockess __d_lookup may fail due to concurrent d_move()
|
|
|
-- * in some unrelated directory, so try with d_lookup
|
|
|
-- */
|
|
|
-- if (!dentry)
|
|
|
-- dentry = d_lookup(parent, name);
|
|
|
-+ struct dentry *dentry = d_lookup(parent, name);
|
|
|
-
|
|
|
- if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
|
|
|
- dentry = do_revalidate(dentry, nd);
|
|
|
-@@ -434,6 +443,208 @@ static struct dentry * cached_lookup(str
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-+ * Theory of operation for opaque, whiteout, and fallthru:
|
|
|
-+ *
|
|
|
-+ * whiteout: Unconditionally stop lookup here - ENOENT
|
|
|
-+ *
|
|
|
-+ * opaque: Don't lookup in directories lower in the union stack
|
|
|
-+ *
|
|
|
-+ * fallthru: While looking up an entry, ignore the opaque flag for the
|
|
|
-+ * current directory only.
|
|
|
-+ *
|
|
|
-+ * A union stack is a linked list of directory dentries which appear
|
|
|
-+ * in the same place in the namespace. When constructing the union
|
|
|
-+ * stack, we include directories below opaque directories so that we
|
|
|
-+ * can properly handle fallthrus. All non-fallthru lookups have to
|
|
|
-+ * check for the opaque flag on the parent directory and obey it.
|
|
|
-+ *
|
|
|
-+ * In general, the code pattern is to lookup the the topmost entry
|
|
|
-+ * first (either the first visible non-negative dentry or a negative
|
|
|
-+ * dentry in the topmost layer of the union), then build the union
|
|
|
-+ * stack for the newly looked-up entry (if it is a directory).
|
|
|
-+ */
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * __cache_lookup_topmost - lookup the topmost (non-)negative dentry
|
|
|
-+ *
|
|
|
-+ * @nd - parent's nameidata
|
|
|
-+ * @name - pathname part to lookup
|
|
|
-+ * @path - found dentry for pathname part
|
|
|
-+ *
|
|
|
-+ * This is used for union mount lookups from dcache. The first non-negative
|
|
|
-+ * dentry is searched on all layers of the union stack. Otherwise the topmost
|
|
|
-+ * negative dentry is returned.
|
|
|
-+ */
|
|
|
-+static int __cache_lookup_topmost(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct dentry *dentry;
|
|
|
-+
|
|
|
-+ dentry = d_lookup(nd->path.dentry, name);
|
|
|
-+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
|
|
|
-+ dentry = do_revalidate(dentry, nd);
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Remember the topmost negative dentry in case we don't find anything
|
|
|
-+ */
|
|
|
-+ path->dentry = dentry;
|
|
|
-+ path->mnt = dentry ? nd->path.mnt : NULL;
|
|
|
-+
|
|
|
-+ if (!dentry || (dentry->d_inode || d_is_whiteout(dentry)))
|
|
|
-+ return !dentry;
|
|
|
-+
|
|
|
-+ /* Keep going through opaque directories if we found a fallthru */
|
|
|
-+ if (IS_OPAQUE(nd->path.dentry->d_inode) && !d_is_fallthru(dentry))
|
|
|
-+ return !dentry;
|
|
|
-+
|
|
|
-+ /* look for the first non-negative or whiteout dentry */
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ dentry = d_hash_and_lookup(nd->path.dentry, name);
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * If parts of the union stack are not in the dcache we need
|
|
|
-+ * to do a real lookup
|
|
|
-+ */
|
|
|
-+ if (!dentry)
|
|
|
-+ goto out_dput;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * If parts of the union don't survive the revalidation we
|
|
|
-+ * need to do a real lookup
|
|
|
-+ */
|
|
|
-+ if (dentry->d_op && dentry->d_op->d_revalidate) {
|
|
|
-+ dentry = do_revalidate(dentry, nd);
|
|
|
-+ if (!dentry)
|
|
|
-+ goto out_dput;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (dentry->d_inode || d_is_whiteout(dentry))
|
|
|
-+ goto out_dput;
|
|
|
-+
|
|
|
-+ /* Stop the lookup on opaque parent and non-fallthru child */
|
|
|
-+ if (IS_OPAQUE(nd->path.dentry->d_inode) && !d_is_fallthru(dentry))
|
|
|
-+ goto out_dput;
|
|
|
-+
|
|
|
-+ dput(dentry);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return !dentry;
|
|
|
-+
|
|
|
-+out_dput:
|
|
|
-+ dput(path->dentry);
|
|
|
-+ path->dentry = dentry;
|
|
|
-+ path->mnt = dentry ? mntget(nd->path.mnt) : NULL;
|
|
|
-+ return !dentry;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * __cache_lookup_build_union - build the union stack for this part,
|
|
|
-+ * cached version
|
|
|
-+ *
|
|
|
-+ * This is called after you have the topmost dentry in @path.
|
|
|
-+ */
|
|
|
-+static int __cache_lookup_build_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path last = *path;
|
|
|
-+ struct dentry *dentry;
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ dentry = d_hash_and_lookup(nd->path.dentry, name);
|
|
|
-+ if (!dentry)
|
|
|
-+ return 1;
|
|
|
-+
|
|
|
-+ if (dentry->d_op && dentry->d_op->d_revalidate) {
|
|
|
-+ dentry = do_revalidate(dentry, nd);
|
|
|
-+ if (!dentry)
|
|
|
-+ return 1;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (d_is_whiteout(dentry)) {
|
|
|
-+ dput(dentry);
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (!dentry->d_inode) {
|
|
|
-+ dput(dentry);
|
|
|
-+ continue;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* only directories can be part of a union stack */
|
|
|
-+ if (!S_ISDIR(dentry->d_inode->i_mode)) {
|
|
|
-+ dput(dentry);
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* Add the newly discovered dir to the union stack */
|
|
|
-+ append_to_union(last.mnt, last.dentry, nd->path.mnt, dentry);
|
|
|
-+
|
|
|
-+ if (last.dentry != path->dentry)
|
|
|
-+ path_put(&last);
|
|
|
-+ last.dentry = dentry;
|
|
|
-+ last.mnt = mntget(nd->path.mnt);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (last.dentry != path->dentry)
|
|
|
-+ path_put(&last);
|
|
|
-+
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * cache_lookup_union - lookup a single pathname part from dcache
|
|
|
-+ *
|
|
|
-+ * This is a union mount capable version of what d_lookup() & revalidate()
|
|
|
-+ * would do. This function returns a valid (union) dentry on success.
|
|
|
-+ *
|
|
|
-+ * Remember: On failure it means that parts of the union aren't cached. You
|
|
|
-+ * should call real_lookup() afterwards to find the proper (union) dentry.
|
|
|
-+ */
|
|
|
-+static int cache_lookup_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ int res ;
|
|
|
-+
|
|
|
-+ if (!IS_MNT_UNION(nd->path.mnt)) {
|
|
|
-+ path->dentry = cache_lookup(nd->path.dentry, name, nd);
|
|
|
-+ path->mnt = path->dentry ? nd->path.mnt : NULL;
|
|
|
-+ res = path->dentry ? 0 : 1;
|
|
|
-+ } else {
|
|
|
-+ struct path safe = {
|
|
|
-+ .dentry = nd->path.dentry,
|
|
|
-+ .mnt = nd->path.mnt
|
|
|
-+ };
|
|
|
-+
|
|
|
-+ path_get(&safe);
|
|
|
-+ res = __cache_lookup_topmost(nd, name, path);
|
|
|
-+ if (res)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ /* only directories can be part of a union stack */
|
|
|
-+ if (!path->dentry->d_inode ||
|
|
|
-+ !S_ISDIR(path->dentry->d_inode->i_mode))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ /* Build the union stack for this part */
|
|
|
-+ res = __cache_lookup_build_union(nd, name, path);
|
|
|
-+ if (res) {
|
|
|
-+ dput(path->dentry);
|
|
|
-+ if (path->mnt != safe.mnt)
|
|
|
-+ mntput(path->mnt);
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+out:
|
|
|
-+ path_put(&nd->path);
|
|
|
-+ nd->path.dentry = safe.dentry;
|
|
|
-+ nd->path.mnt = safe.mnt;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return res;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
- * Short-cut version of permission(), for calling by
|
|
|
- * path_walk(), when dcache lock is held. Combines parts
|
|
|
- * of permission() and generic_permission(), and tests ONLY for
|
|
|
-@@ -473,10 +684,11 @@ ok:
|
|
|
- * make sure that nobody added the entry to the dcache in the meantime..
|
|
|
- * SMP-safe
|
|
|
- */
|
|
|
--static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
|
|
|
-+static int real_lookup(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
- {
|
|
|
-- struct dentry * result;
|
|
|
-- struct inode *dir = parent->d_inode;
|
|
|
-+ struct inode *dir = nd->path.dentry->d_inode;
|
|
|
-+ int res = 0;
|
|
|
-
|
|
|
- mutex_lock(&dir->i_mutex);
|
|
|
- /*
|
|
|
-@@ -493,27 +705,36 @@ static struct dentry * real_lookup(struc
|
|
|
- *
|
|
|
- * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
|
|
|
- */
|
|
|
-- result = d_lookup(parent, name);
|
|
|
-- if (!result) {
|
|
|
-+ path->dentry = d_lookup(nd->path.dentry, name);
|
|
|
-+ path->mnt = nd->path.mnt;
|
|
|
-+ if (!path->dentry) {
|
|
|
- struct dentry *dentry;
|
|
|
-
|
|
|
- /* Don't create child dentry for a dead directory. */
|
|
|
-- result = ERR_PTR(-ENOENT);
|
|
|
-- if (IS_DEADDIR(dir))
|
|
|
-+ if (IS_DEADDIR(dir)) {
|
|
|
-+ res = -ENOENT;
|
|
|
- goto out_unlock;
|
|
|
-+ }
|
|
|
-
|
|
|
-- dentry = d_alloc(parent, name);
|
|
|
-- result = ERR_PTR(-ENOMEM);
|
|
|
-+ dentry = d_alloc(nd->path.dentry, name);
|
|
|
- if (dentry) {
|
|
|
-- result = dir->i_op->lookup(dir, dentry, nd);
|
|
|
-- if (result)
|
|
|
-+ path->dentry = dir->i_op->lookup(dir, dentry, nd);
|
|
|
-+ if (path->dentry) {
|
|
|
- dput(dentry);
|
|
|
-- else
|
|
|
-- result = dentry;
|
|
|
-+ if (IS_ERR(path->dentry)) {
|
|
|
-+ res = PTR_ERR(path->dentry);
|
|
|
-+ path->dentry = NULL;
|
|
|
-+ path->mnt = NULL;
|
|
|
-+ }
|
|
|
-+ } else
|
|
|
-+ path->dentry = dentry;
|
|
|
-+ } else {
|
|
|
-+ res = -ENOMEM;
|
|
|
-+ path->mnt = NULL;
|
|
|
- }
|
|
|
- out_unlock:
|
|
|
- mutex_unlock(&dir->i_mutex);
|
|
|
-- return result;
|
|
|
-+ return res;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-@@ -521,12 +742,170 @@ out_unlock:
|
|
|
- * we waited on the semaphore. Need to revalidate.
|
|
|
- */
|
|
|
- mutex_unlock(&dir->i_mutex);
|
|
|
-- if (result->d_op && result->d_op->d_revalidate) {
|
|
|
-- result = do_revalidate(result, nd);
|
|
|
-- if (!result)
|
|
|
-- result = ERR_PTR(-ENOENT);
|
|
|
-+ if (path->dentry->d_op && path->dentry->d_op->d_revalidate) {
|
|
|
-+ path->dentry = do_revalidate(path->dentry, nd);
|
|
|
-+ if (!path->dentry) {
|
|
|
-+ res = -ENOENT;
|
|
|
-+ path->mnt = NULL;
|
|
|
-+ }
|
|
|
-+ if (IS_ERR(path->dentry)) {
|
|
|
-+ res = PTR_ERR(path->dentry);
|
|
|
-+ path->dentry = NULL;
|
|
|
-+ path->mnt = NULL;
|
|
|
-+ }
|
|
|
- }
|
|
|
-- return result;
|
|
|
-+
|
|
|
-+ return res;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * __real_lookup_topmost - lookup topmost dentry, non-cached version
|
|
|
-+ *
|
|
|
-+ * If we reach a dentry with restricted access, we just stop the lookup
|
|
|
-+ * because we shouldn't see through that dentry. Same thing for dentry
|
|
|
-+ * type mismatch and whiteouts.
|
|
|
-+ *
|
|
|
-+ * FIXME:
|
|
|
-+ * - handle union stacks in use
|
|
|
-+ * - handle union stacks mounted upon union stacks
|
|
|
-+ * - avoid unnecessary allocations of union locks
|
|
|
-+ */
|
|
|
-+static int __real_lookup_topmost(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path next;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ err = real_lookup(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ if (path->dentry->d_inode || d_is_whiteout(path->dentry))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ if (IS_OPAQUE(nd->path.dentry->d_inode) && !d_is_fallthru(path->dentry))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
|
|
|
-+ err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
|
|
|
-+ name);
|
|
|
-+ if (err < 0)
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ err = real_lookup(nd, name, &next);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ if (next.dentry->d_inode || d_is_whiteout(next.dentry)) {
|
|
|
-+ dput(path->dentry);
|
|
|
-+ mntget(next.mnt);
|
|
|
-+ *path = next;
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (IS_OPAQUE(nd->path.dentry->d_inode) && !d_is_fallthru(next.dentry))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ dput(next.dentry);
|
|
|
-+ }
|
|
|
-+out:
|
|
|
-+ if (err)
|
|
|
-+ dput(path->dentry);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * __real_lookup_build_union: build the union stack for this pathname
|
|
|
-+ * part, non-cached version
|
|
|
-+ *
|
|
|
-+ * Called when not all parts of the union stack are in cache
|
|
|
-+ */
|
|
|
-+
|
|
|
-+static int __real_lookup_build_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path last = *path;
|
|
|
-+ struct path next;
|
|
|
-+ int err = 0;
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ /* We need to recompute the hash for lower layer lookups */
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
|
|
|
-+ err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
|
|
|
-+ name);
|
|
|
-+ if (err < 0)
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ err = real_lookup(nd, name, &next);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ if (d_is_whiteout(next.dentry)) {
|
|
|
-+ dput(next.dentry);
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (!next.dentry->d_inode) {
|
|
|
-+ dput(next.dentry);
|
|
|
-+ continue;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* only directories can be part of a union stack */
|
|
|
-+ if (!S_ISDIR(next.dentry->d_inode->i_mode)) {
|
|
|
-+ dput(next.dentry);
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* now we know we found something "real" */
|
|
|
-+ append_to_union(last.mnt, last.dentry, next.mnt, next.dentry);
|
|
|
-+
|
|
|
-+ if (last.dentry != path->dentry)
|
|
|
-+ path_put(&last);
|
|
|
-+ last.dentry = next.dentry;
|
|
|
-+ last.mnt = mntget(next.mnt);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (last.dentry != path->dentry)
|
|
|
-+ path_put(&last);
|
|
|
-+out:
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int real_lookup_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path safe = { .dentry = nd->path.dentry, .mnt = nd->path.mnt };
|
|
|
-+ int res ;
|
|
|
-+
|
|
|
-+ path_get(&safe);
|
|
|
-+ res = __real_lookup_topmost(nd, name, path);
|
|
|
-+ if (res)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ /* only directories can be part of a union stack */
|
|
|
-+ if (!path->dentry->d_inode ||
|
|
|
-+ !S_ISDIR(path->dentry->d_inode->i_mode))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ /* Build the union stack for this part */
|
|
|
-+ res = __real_lookup_build_union(nd, name, path);
|
|
|
-+ if (res) {
|
|
|
-+ dput(path->dentry);
|
|
|
-+ if (path->mnt != safe.mnt)
|
|
|
-+ mntput(path->mnt);
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+out:
|
|
|
-+ path_put(&nd->path);
|
|
|
-+ nd->path.dentry = safe.dentry;
|
|
|
-+ nd->path.mnt = safe.mnt;
|
|
|
-+ return res;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-@@ -629,11 +1008,8 @@ static __always_inline int __do_follow_l
|
|
|
- touch_atime(path->mnt, dentry);
|
|
|
- nd_set_link(nd, NULL);
|
|
|
-
|
|
|
-- if (path->mnt != nd->path.mnt) {
|
|
|
-- path_to_nameidata(path, nd);
|
|
|
-- dget(dentry);
|
|
|
-- }
|
|
|
-- mntget(path->mnt);
|
|
|
-+ if (path->mnt == nd->path.mnt)
|
|
|
-+ mntget(nd->path.mnt);
|
|
|
- cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
|
|
|
- error = PTR_ERR(cookie);
|
|
|
- if (!IS_ERR(cookie)) {
|
|
|
-@@ -721,7 +1097,7 @@ static int __follow_mount(struct path *p
|
|
|
- return res;
|
|
|
- }
|
|
|
-
|
|
|
--static void follow_mount(struct path *path)
|
|
|
-+void follow_mount(struct path *path)
|
|
|
- {
|
|
|
- while (d_mountpoint(path->dentry)) {
|
|
|
- struct vfsmount *mounted = lookup_mnt(path);
|
|
|
-@@ -786,6 +1162,7 @@ static __always_inline void follow_dotdo
|
|
|
- nd->path.mnt = parent;
|
|
|
- }
|
|
|
- follow_mount(&nd->path);
|
|
|
-+ follow_union_mount(&nd->path);
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-@@ -796,35 +1173,55 @@ static __always_inline void follow_dotdo
|
|
|
- static int do_lookup(struct nameidata *nd, struct qstr *name,
|
|
|
- struct path *path)
|
|
|
- {
|
|
|
-- struct vfsmount *mnt = nd->path.mnt;
|
|
|
-- struct dentry *dentry = __d_lookup(nd->path.dentry, name);
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ if (IS_MNT_UNION(nd->path.mnt))
|
|
|
-+ goto need_union_lookup;
|
|
|
-
|
|
|
-- if (!dentry)
|
|
|
-+ path->dentry = __d_lookup(nd->path.dentry, name);
|
|
|
-+ path->mnt = nd->path.mnt;
|
|
|
-+ if (!path->dentry)
|
|
|
- goto need_lookup;
|
|
|
-- if (dentry->d_op && dentry->d_op->d_revalidate)
|
|
|
-+ if (path->dentry->d_op && path->dentry->d_op->d_revalidate)
|
|
|
- goto need_revalidate;
|
|
|
-+
|
|
|
- done:
|
|
|
-- path->mnt = mnt;
|
|
|
-- path->dentry = dentry;
|
|
|
-- __follow_mount(path);
|
|
|
-+ if (nd->path.mnt != path->mnt) {
|
|
|
-+ nd->um_flags |= LAST_LOWLEVEL;
|
|
|
-+ follow_mount(path);
|
|
|
-+ } else
|
|
|
-+ __follow_mount(path);
|
|
|
-+ follow_union_mount(path);
|
|
|
- return 0;
|
|
|
-
|
|
|
- need_lookup:
|
|
|
-- dentry = real_lookup(nd->path.dentry, name, nd);
|
|
|
-- if (IS_ERR(dentry))
|
|
|
-+ err = real_lookup(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ goto fail;
|
|
|
-+ goto done;
|
|
|
-+
|
|
|
-+need_union_lookup:
|
|
|
-+ err = cache_lookup_union(nd, name, path);
|
|
|
-+ if (!err && path->dentry)
|
|
|
-+ goto done;
|
|
|
-+
|
|
|
-+ err = real_lookup_union(nd, name, path);
|
|
|
-+ if (err)
|
|
|
- goto fail;
|
|
|
- goto done;
|
|
|
-
|
|
|
- need_revalidate:
|
|
|
-- dentry = do_revalidate(dentry, nd);
|
|
|
-- if (!dentry)
|
|
|
-+ path->dentry = do_revalidate(path->dentry, nd);
|
|
|
-+ if (!path->dentry)
|
|
|
- goto need_lookup;
|
|
|
-- if (IS_ERR(dentry))
|
|
|
-+ if (IS_ERR(path->dentry)) {
|
|
|
-+ err = PTR_ERR(path->dentry);
|
|
|
- goto fail;
|
|
|
-+ }
|
|
|
- goto done;
|
|
|
-
|
|
|
- fail:
|
|
|
-- return PTR_ERR(dentry);
|
|
|
-+ return err;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-@@ -851,6 +1248,8 @@ static int __link_path_walk(const char *
|
|
|
- if (nd->depth)
|
|
|
- lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
|
|
|
-
|
|
|
-+ follow_union_mount(&nd->path);
|
|
|
-+
|
|
|
- /* At this point we know we have a real path component. */
|
|
|
- for(;;) {
|
|
|
- unsigned long hash;
|
|
|
-@@ -913,6 +1312,44 @@ static int __link_path_walk(const char *
|
|
|
- if (err)
|
|
|
- break;
|
|
|
-
|
|
|
-+ /*
|
|
|
-+ * We want to create this element on the top level
|
|
|
-+ * file system in two cases:
|
|
|
-+ *
|
|
|
-+ * - We are specifically told to - LOOKUP_TOPMOST.
|
|
|
-+ * - This is a directory, and it does not yet exist on
|
|
|
-+ * the top level. Various tricks only work if
|
|
|
-+ * directories always exist on the top level.
|
|
|
-+ *
|
|
|
-+ * In either case, only create this element on the top
|
|
|
-+ * level if the last element is located on the lower
|
|
|
-+ * level. If the last element is located on the top
|
|
|
-+ * level, then every single element in the path
|
|
|
-+ * already exists on the top level.
|
|
|
-+ *
|
|
|
-+ * Note that we can assume that the parent is on the
|
|
|
-+ * top level since we always create the directory on
|
|
|
-+ * the top level.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+ if ((nd->um_flags & LAST_LOWLEVEL) &&
|
|
|
-+ ((next.dentry->d_inode &&
|
|
|
-+ S_ISDIR(next.dentry->d_inode->i_mode) &&
|
|
|
-+ (nd->path.mnt != next.mnt)) ||
|
|
|
-+ (nd->flags & LOOKUP_TOPMOST))) {
|
|
|
-+ struct dentry *dentry;
|
|
|
-+
|
|
|
-+ dentry = union_create_topmost(nd, &this, &next);
|
|
|
-+ if (IS_ERR(dentry)) {
|
|
|
-+ err = PTR_ERR(dentry);
|
|
|
-+ goto out_dput;
|
|
|
-+ }
|
|
|
-+ path_put_conditional(&next, nd);
|
|
|
-+ next.mnt = nd->path.mnt;
|
|
|
-+ next.dentry = dentry;
|
|
|
-+ nd->um_flags &= ~LAST_LOWLEVEL;
|
|
|
-+ }
|
|
|
-+
|
|
|
- err = -ENOENT;
|
|
|
- inode = next.dentry->d_inode;
|
|
|
- if (!inode)
|
|
|
-@@ -962,6 +1399,25 @@ last_component:
|
|
|
- err = do_lookup(nd, &this, &next);
|
|
|
- if (err)
|
|
|
- break;
|
|
|
-+
|
|
|
-+ if ((nd->um_flags & LAST_LOWLEVEL) &&
|
|
|
-+ ((next.dentry->d_inode &&
|
|
|
-+ S_ISDIR(next.dentry->d_inode->i_mode) &&
|
|
|
-+ (nd->path.mnt != next.mnt)) ||
|
|
|
-+ (nd->flags & LOOKUP_TOPMOST))) {
|
|
|
-+ struct dentry *dentry;
|
|
|
-+
|
|
|
-+ dentry = union_create_topmost(nd, &this, &next);
|
|
|
-+ if (IS_ERR(dentry)) {
|
|
|
-+ err = PTR_ERR(dentry);
|
|
|
-+ goto out_dput;
|
|
|
-+ }
|
|
|
-+ path_put_conditional(&next, nd);
|
|
|
-+ next.mnt = nd->path.mnt;
|
|
|
-+ next.dentry = dentry;
|
|
|
-+ nd->um_flags &= ~LAST_LOWLEVEL;
|
|
|
-+ }
|
|
|
-+
|
|
|
- inode = next.dentry->d_inode;
|
|
|
- if ((lookup_flags & LOOKUP_FOLLOW)
|
|
|
- && inode && inode->i_op->follow_link) {
|
|
|
-@@ -1029,6 +1485,7 @@ static int path_init(int dfd, const char
|
|
|
-
|
|
|
- nd->last_type = LAST_ROOT; /* if there are only slashes... */
|
|
|
- nd->flags = flags;
|
|
|
-+ nd->um_flags = 0;
|
|
|
- nd->depth = 0;
|
|
|
- nd->root.mnt = NULL;
|
|
|
-
|
|
|
-@@ -1172,61 +1629,437 @@ static int path_lookup_open(int dfd, con
|
|
|
- }
|
|
|
-
|
|
|
- static struct dentry *__lookup_hash(struct qstr *name,
|
|
|
-- struct dentry *base, struct nameidata *nd)
|
|
|
-+ struct dentry *base, struct nameidata *nd)
|
|
|
-+{
|
|
|
-+ struct dentry *dentry;
|
|
|
-+ struct inode *inode;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ inode = base->d_inode;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * See if the low-level filesystem might want
|
|
|
-+ * to use its own hash..
|
|
|
-+ */
|
|
|
-+ if (base->d_op && base->d_op->d_hash) {
|
|
|
-+ err = base->d_op->d_hash(base, name);
|
|
|
-+ dentry = ERR_PTR(err);
|
|
|
-+ if (err < 0)
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ dentry = cache_lookup(base, name, nd);
|
|
|
-+ if (!dentry) {
|
|
|
-+ struct dentry *new;
|
|
|
-+
|
|
|
-+ /* Don't create child dentry for a dead directory. */
|
|
|
-+ dentry = ERR_PTR(-ENOENT);
|
|
|
-+ if (IS_DEADDIR(inode))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ new = d_alloc(base, name);
|
|
|
-+ dentry = ERR_PTR(-ENOMEM);
|
|
|
-+ if (!new)
|
|
|
-+ goto out;
|
|
|
-+ dentry = inode->i_op->lookup(inode, new, nd);
|
|
|
-+ if (!dentry)
|
|
|
-+ dentry = new;
|
|
|
-+ else
|
|
|
-+ dput(new);
|
|
|
-+ }
|
|
|
-+out:
|
|
|
-+ return dentry;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Restricted form of lookup. Doesn't follow links, single-component only,
|
|
|
-+ * needs parent already locked. Doesn't follow mounts.
|
|
|
-+ * SMP-safe.
|
|
|
-+ */
|
|
|
-+static int lookup_hash(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+ path->mnt = nd->path.mnt;
|
|
|
-+ path->dentry = __lookup_hash(name, nd->path.dentry, nd);
|
|
|
-+ if (IS_ERR(path->dentry)) {
|
|
|
-+ err = PTR_ERR(path->dentry);
|
|
|
-+ path->dentry = NULL;
|
|
|
-+ path->mnt = NULL;
|
|
|
-+ }
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int __hash_lookup_topmost(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path next;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ err = lookup_hash(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ if (path->dentry->d_inode || d_is_whiteout(path->dentry))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ if (IS_OPAQUE(nd->path.dentry->d_inode) && !d_is_fallthru(path->dentry))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
|
|
|
-+ err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
|
|
|
-+ name);
|
|
|
-+ if (err < 0)
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ mutex_lock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ err = lookup_hash(nd, name, &next);
|
|
|
-+ mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ if (next.dentry->d_inode || d_is_whiteout(next.dentry)) {
|
|
|
-+ dput(path->dentry);
|
|
|
-+ mntget(next.mnt);
|
|
|
-+ *path = next;
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (IS_OPAQUE(nd->path.dentry->d_inode) && !d_is_fallthru(next.dentry))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ dput(next.dentry);
|
|
|
-+ }
|
|
|
-+out:
|
|
|
-+ if (err)
|
|
|
-+ dput(path->dentry);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int __hash_lookup_build_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path last = *path;
|
|
|
-+ struct path next;
|
|
|
-+ int err = 0;
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ /* We need to recompute the hash for lower layer lookups */
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
|
|
|
-+ err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
|
|
|
-+ name);
|
|
|
-+ if (err < 0)
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ mutex_lock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ err = lookup_hash(nd, name, &next);
|
|
|
-+ mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ if (d_is_whiteout(next.dentry)) {
|
|
|
-+ dput(next.dentry);
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (!next.dentry->d_inode) {
|
|
|
-+ dput(next.dentry);
|
|
|
-+ continue;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* only directories can be part of a union stack */
|
|
|
-+ if (!S_ISDIR(next.dentry->d_inode->i_mode)) {
|
|
|
-+ dput(next.dentry);
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* now we know we found something "real" */
|
|
|
-+ append_to_union(last.mnt, last.dentry, next.mnt, next.dentry);
|
|
|
-+
|
|
|
-+ if (last.dentry != path->dentry)
|
|
|
-+ path_put(&last);
|
|
|
-+ last.dentry = next.dentry;
|
|
|
-+ last.mnt = mntget(next.mnt);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (last.dentry != path->dentry)
|
|
|
-+ path_put(&last);
|
|
|
-+out:
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+int hash_lookup_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path safe = { .dentry = nd->path.dentry, .mnt = nd->path.mnt };
|
|
|
-+ int res ;
|
|
|
-+
|
|
|
-+ path_get(&safe);
|
|
|
-+ res = __hash_lookup_topmost(nd, name, path);
|
|
|
-+ if (res)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ /* only directories can be part of a union stack */
|
|
|
-+ if (!path->dentry->d_inode ||
|
|
|
-+ !S_ISDIR(path->dentry->d_inode->i_mode))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ /* Build the union stack for this part */
|
|
|
-+ res = __hash_lookup_build_union(nd, name, path);
|
|
|
-+ if (res) {
|
|
|
-+ dput(path->dentry);
|
|
|
-+ if (path->mnt != safe.mnt)
|
|
|
-+ mntput(path->mnt);
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+out:
|
|
|
-+ path_put(&nd->path);
|
|
|
-+ nd->path.dentry = safe.dentry;
|
|
|
-+ nd->path.mnt = safe.mnt;
|
|
|
-+ return res;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * do_union_hash_lookup() - walk down the union stack and lookup_hash()
|
|
|
-+ * @nd: nameidata of parent to lookup from
|
|
|
-+ * @name: pathname component to lookup
|
|
|
-+ * @path: path to store result of lookup in
|
|
|
-+ *
|
|
|
-+ * Walk down the union stack and search for single pathname component name. It
|
|
|
-+ * is assumed that the caller already did a lookup_hash() in the topmost parent
|
|
|
-+ * that gave negative lookup result. Therefore this does call lookup_hash() in
|
|
|
-+ * every lower layer (!) of the union stack. If a directory is found the union
|
|
|
-+ * stack for that is assembled as well.
|
|
|
-+ *
|
|
|
-+ * Note:
|
|
|
-+ * The caller needs to take care of holding a valid reference to the topmost
|
|
|
-+ * parent.
|
|
|
-+ * On error we leave @path untouched as well as when we don't find anything.
|
|
|
-+ */
|
|
|
-+static int do_union_hash_lookup(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path next;
|
|
|
-+ int err = 0;
|
|
|
-+
|
|
|
-+ while (follow_union_down(&nd->path)) {
|
|
|
-+ /* rehash because of d_op->d_hash() by the previous layer */
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+
|
|
|
-+ mutex_lock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ err = lookup_hash(nd, name, &next);
|
|
|
-+ mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+
|
|
|
-+ if (err)
|
|
|
-+ break;
|
|
|
-+
|
|
|
-+ if (next.dentry->d_inode) {
|
|
|
-+ mntget(next.mnt);
|
|
|
-+ if (!S_ISDIR(next.dentry->d_inode->i_mode)) {
|
|
|
-+ *path = next;
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+ err = __hash_lookup_build_union(nd, name, &next);
|
|
|
-+ if (err)
|
|
|
-+ path_put(&next);
|
|
|
-+ else
|
|
|
-+ *path = next;
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ path_put_conditional(&next, nd);
|
|
|
-+
|
|
|
-+ if ((IS_OPAQUE(nd->path.dentry->d_inode) &&
|
|
|
-+ !d_is_fallthru(next.dentry)) ||
|
|
|
-+ d_is_whiteout(next.dentry))
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * _hash_lookup_union() - lookup single pathname component
|
|
|
-+ * @nd: nameidata of parent to lookup from
|
|
|
-+ * @name: pathname component to lookup
|
|
|
-+ * @path: path to store result of lookup in
|
|
|
-+ *
|
|
|
-+ * Returns the topmost parent locked and the target dentry found in the union
|
|
|
-+ * or the topmost negative target dentry otherwise.
|
|
|
-+ *
|
|
|
-+ * Note:
|
|
|
-+ * Returns topmost parent locked even on error.
|
|
|
-+ */
|
|
|
-+static int _hash_lookup_union(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct path parent = nd->path;
|
|
|
-+ struct path topmost;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ mutex_lock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ err = lookup_hash(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ /* return if we found something and it isn't a directory we are done */
|
|
|
-+ if (path->dentry->d_inode && !S_ISDIR(path->dentry->d_inode->i_mode))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ /* stop lookup if the parent directory is marked opaque */
|
|
|
-+ if ((IS_OPAQUE(nd->path.dentry->d_inode) &&
|
|
|
-+ !d_is_fallthru(path->dentry)) ||
|
|
|
-+ d_is_whiteout(path->dentry))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ if (!strcmp(path->mnt->mnt_sb->s_type->name, "proc") ||
|
|
|
-+ !strcmp(path->mnt->mnt_sb->s_type->name, "sysfs"))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * safe a reference to the topmost parent for walking the union stack
|
|
|
-+ */
|
|
|
-+ path_get(&parent);
|
|
|
-+ topmost = *path;
|
|
|
-+
|
|
|
-+ if (path->dentry->d_inode && S_ISDIR(path->dentry->d_inode->i_mode)) {
|
|
|
-+ err = __hash_lookup_build_union(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ goto err_lock_parent;
|
|
|
-+ goto out_lock_and_revalidate_parent;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ err = do_union_hash_lookup(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ goto err_lock_parent;
|
|
|
-+
|
|
|
-+out_lock_and_revalidate_parent:
|
|
|
-+ /* seems that we haven't found anything, so return the topmost */
|
|
|
-+ path_to_nameidata(&parent, nd);
|
|
|
-+ mutex_lock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+
|
|
|
-+ if (topmost.dentry == path->dentry) {
|
|
|
-+ spin_lock(&path->dentry->d_lock);
|
|
|
-+ if (nd->path.dentry != path->dentry->d_parent) {
|
|
|
-+ spin_unlock(&path->dentry->d_lock);
|
|
|
-+ dput(path->dentry);
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+ err = lookup_hash(nd, name, path);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+ /* FIXME: What if we find a directory here ... */
|
|
|
-+ return err;
|
|
|
-+ }
|
|
|
-+ spin_unlock(&path->dentry->d_lock);
|
|
|
-+ } else
|
|
|
-+ dput(topmost.dentry);
|
|
|
-+
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+err_lock_parent:
|
|
|
-+ path_to_nameidata(&parent, nd);
|
|
|
-+ path_put_conditional(path, nd);
|
|
|
-+ mutex_lock(&nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * lookup_rename_source() - lookup the source used by rename
|
|
|
-+ *
|
|
|
-+ * This is a special version of _hash_lookup_union() which becomes necessary
|
|
|
-+ * for finding the source of a rename on union mounts.
|
|
|
-+ *
|
|
|
-+ * See comment for _hash_lookup_union() above.
|
|
|
-+ */
|
|
|
-+static int lookup_rename_source(struct nameidata *oldnd,
|
|
|
-+ struct nameidata *newnd,
|
|
|
-+ struct dentry **trap, struct qstr *name,
|
|
|
-+ struct path *old)
|
|
|
- {
|
|
|
-- struct dentry *dentry;
|
|
|
-- struct inode *inode;
|
|
|
-+ struct path parent = oldnd->path;
|
|
|
-+ struct path topmost;
|
|
|
- int err;
|
|
|
-
|
|
|
-- inode = base->d_inode;
|
|
|
-+ err = lookup_hash(oldnd, name, old);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ /* return if we found something and it isn't a directory we are done */
|
|
|
-+ if (old->dentry->d_inode && !S_ISDIR(old->dentry->d_inode->i_mode))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ /* stop lookup if the parent directory is marked opaque */
|
|
|
-+ if ((IS_OPAQUE(oldnd->path.dentry->d_inode) &&
|
|
|
-+ !d_is_fallthru(old->dentry)) ||
|
|
|
-+ d_is_whiteout(old->dentry))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ if (!strcmp(old->mnt->mnt_sb->s_type->name, "proc") ||
|
|
|
-+ !strcmp(old->mnt->mnt_sb->s_type->name, "sysfs"))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ unlock_rename(oldnd->path.dentry, newnd->path.dentry);
|
|
|
-
|
|
|
- /*
|
|
|
-- * See if the low-level filesystem might want
|
|
|
-- * to use its own hash..
|
|
|
-+ * safe a reference to the topmost parent for walking the union stack
|
|
|
- */
|
|
|
-- if (base->d_op && base->d_op->d_hash) {
|
|
|
-- err = base->d_op->d_hash(base, name);
|
|
|
-- dentry = ERR_PTR(err);
|
|
|
-- if (err < 0)
|
|
|
-- goto out;
|
|
|
-+ path_get(&parent);
|
|
|
-+ topmost = *old;
|
|
|
-+
|
|
|
-+ if (old->dentry->d_inode && S_ISDIR(old->dentry->d_inode->i_mode)) {
|
|
|
-+ err = __hash_lookup_build_union(oldnd, name, old);
|
|
|
-+ if (err)
|
|
|
-+ goto err_lock;
|
|
|
-+ goto out_lock_and_revalidate_parent;
|
|
|
- }
|
|
|
-
|
|
|
-- dentry = cached_lookup(base, name, nd);
|
|
|
-- if (!dentry) {
|
|
|
-- struct dentry *new;
|
|
|
-+ err = do_union_hash_lookup(oldnd, name, old);
|
|
|
-+ if (err)
|
|
|
-+ goto err_lock;
|
|
|
-
|
|
|
-- /* Don't create child dentry for a dead directory. */
|
|
|
-- dentry = ERR_PTR(-ENOENT);
|
|
|
-- if (IS_DEADDIR(inode))
|
|
|
-- goto out;
|
|
|
-+out_lock_and_revalidate_parent:
|
|
|
-+ path_to_nameidata(&parent, oldnd);
|
|
|
-+ *trap = lock_rename(oldnd->path.dentry, newnd->path.dentry);
|
|
|
-
|
|
|
-- new = d_alloc(base, name);
|
|
|
-- dentry = ERR_PTR(-ENOMEM);
|
|
|
-- if (!new)
|
|
|
-- goto out;
|
|
|
-- dentry = inode->i_op->lookup(inode, new, nd);
|
|
|
-- if (!dentry)
|
|
|
-- dentry = new;
|
|
|
-- else
|
|
|
-- dput(new);
|
|
|
-- }
|
|
|
--out:
|
|
|
-- return dentry;
|
|
|
--}
|
|
|
-+ /*
|
|
|
-+ * If we return the topmost dentry we have to make sure that it has not
|
|
|
-+ * been moved away while we gave up the topmost parents i_mutex lock.
|
|
|
-+ */
|
|
|
-+ if (topmost.dentry == old->dentry) {
|
|
|
-+ spin_lock(&old->dentry->d_lock);
|
|
|
-+ if (oldnd->path.dentry != old->dentry->d_parent) {
|
|
|
-+ spin_unlock(&old->dentry->d_lock);
|
|
|
-+ dput(old->dentry);
|
|
|
-+ name->hash = full_name_hash(name->name, name->len);
|
|
|
-+ err = lookup_hash(oldnd, name, old);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+ /* FIXME: What if we find a directory here ... */
|
|
|
-+ return err;
|
|
|
-+ }
|
|
|
-+ spin_unlock(&old->dentry->d_lock);
|
|
|
-+ } else
|
|
|
-+ dput(topmost.dentry);
|
|
|
-
|
|
|
--/*
|
|
|
-- * Restricted form of lookup. Doesn't follow links, single-component only,
|
|
|
-- * needs parent already locked. Doesn't follow mounts.
|
|
|
-- * SMP-safe.
|
|
|
-- */
|
|
|
--static struct dentry *lookup_hash(struct nameidata *nd)
|
|
|
--{
|
|
|
-- int err;
|
|
|
-+ return 0;
|
|
|
-
|
|
|
-- err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
|
|
|
-- if (err)
|
|
|
-- return ERR_PTR(err);
|
|
|
-- return __lookup_hash(&nd->last, nd->path.dentry, nd);
|
|
|
-+err_lock:
|
|
|
-+ path_to_nameidata(&parent, oldnd);
|
|
|
-+ path_put_conditional(old, oldnd);
|
|
|
-+ *trap = lock_rename(oldnd->path.dentry, newnd->path.dentry);
|
|
|
-+ return err;
|
|
|
- }
|
|
|
-
|
|
|
- static int __lookup_one_len(const char *name, struct qstr *this,
|
|
|
-@@ -1502,8 +2335,9 @@ int vfs_create(struct inode *dir, struct
|
|
|
- return error;
|
|
|
- }
|
|
|
-
|
|
|
--int may_open(struct path *path, int acc_mode, int flag)
|
|
|
-+int may_open(struct nameidata *nd, int acc_mode, int flag)
|
|
|
- {
|
|
|
-+ struct path *path = &nd->path;
|
|
|
- struct dentry *dentry = path->dentry;
|
|
|
- struct inode *inode = dentry->d_inode;
|
|
|
- int error;
|
|
|
-@@ -1529,7 +2363,7 @@ int may_open(struct path *path, int acc_
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
-- error = inode_permission(inode, acc_mode);
|
|
|
-+ error = union_permission(path, acc_mode);
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
-
|
|
|
-@@ -1577,6 +2411,9 @@ int may_open(struct path *path, int acc_
|
|
|
- if (!error)
|
|
|
- error = security_path_truncate(path, 0,
|
|
|
- ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
|
|
|
-+ /* XXX don't copy up file data */
|
|
|
-+ if (is_unionized(path->dentry, path->mnt))
|
|
|
-+ error = union_copyup(nd, flag /* XXX not used */);
|
|
|
- if (!error) {
|
|
|
- vfs_dq_init(inode);
|
|
|
-
|
|
|
-@@ -1623,7 +2460,7 @@ out_unlock:
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
- /* Don't check for write permission, don't truncate */
|
|
|
-- return may_open(&nd->path, 0, flag & ~O_TRUNC);
|
|
|
-+ return may_open(nd, 0, flag & ~O_TRUNC);
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-@@ -1738,12 +2575,10 @@ struct file *do_filp_open(int dfd, const
|
|
|
- if (flag & O_EXCL)
|
|
|
- nd.flags |= LOOKUP_EXCL;
|
|
|
- mutex_lock(&dir->d_inode->i_mutex);
|
|
|
-- path.dentry = lookup_hash(&nd);
|
|
|
-- path.mnt = nd.path.mnt;
|
|
|
-+ error = hash_lookup_union(&nd, &nd.last, &path);
|
|
|
-
|
|
|
- do_last:
|
|
|
-- error = PTR_ERR(path.dentry);
|
|
|
-- if (IS_ERR(path.dentry)) {
|
|
|
-+ if (error) {
|
|
|
- mutex_unlock(&dir->d_inode->i_mutex);
|
|
|
- goto exit;
|
|
|
- }
|
|
|
-@@ -1803,10 +2638,23 @@ do_last:
|
|
|
- if (path.dentry->d_inode->i_op->follow_link)
|
|
|
- goto do_link;
|
|
|
-
|
|
|
-- path_to_nameidata(&path, &nd);
|
|
|
- error = -EISDIR;
|
|
|
- if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
|
|
|
-- goto exit;
|
|
|
-+ goto exit_dput;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * If this file is on a lower layer of the union stack, copy it to the
|
|
|
-+ * topmost layer before opening it
|
|
|
-+ */
|
|
|
-+ if (path.dentry->d_inode &&
|
|
|
-+ (path.dentry->d_parent != dir) &&
|
|
|
-+ S_ISREG(path.dentry->d_inode->i_mode)) {
|
|
|
-+ error = __union_copyup(&path, &nd, &path);
|
|
|
-+ if (error)
|
|
|
-+ goto exit_dput;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ path_to_nameidata(&path, &nd);
|
|
|
- ok:
|
|
|
- /*
|
|
|
- * Consider:
|
|
|
-@@ -1824,12 +2672,18 @@ ok:
|
|
|
- if (error)
|
|
|
- goto exit;
|
|
|
- }
|
|
|
-- error = may_open(&nd.path, acc_mode, flag);
|
|
|
-+ error = may_open(&nd, acc_mode, flag);
|
|
|
- if (error) {
|
|
|
- if (will_write)
|
|
|
- mnt_drop_write(nd.path.mnt);
|
|
|
- goto exit;
|
|
|
- }
|
|
|
-+ /* Okay, all permissions go, now copy up */
|
|
|
-+ if (!(flag & O_CREAT) && (flag & FMODE_WRITE)) {
|
|
|
-+ error = union_copyup(&nd, flag /* XXX not used */);
|
|
|
-+ if (error)
|
|
|
-+ goto exit;
|
|
|
-+ }
|
|
|
- filp = nameidata_to_filp(&nd, open_flag);
|
|
|
- if (IS_ERR(filp))
|
|
|
- ima_counts_put(&nd.path,
|
|
|
-@@ -1904,8 +2758,7 @@ do_link:
|
|
|
- }
|
|
|
- dir = nd.path.dentry;
|
|
|
- mutex_lock(&dir->d_inode->i_mutex);
|
|
|
-- path.dentry = lookup_hash(&nd);
|
|
|
-- path.mnt = nd.path.mnt;
|
|
|
-+ error = hash_lookup_union(&nd, &nd.last, &path);
|
|
|
- __putname(nd.last.name);
|
|
|
- goto do_last;
|
|
|
- }
|
|
|
-@@ -1939,7 +2792,8 @@ EXPORT_SYMBOL(filp_open);
|
|
|
- */
|
|
|
- struct dentry *lookup_create(struct nameidata *nd, int is_dir)
|
|
|
- {
|
|
|
-- struct dentry *dentry = ERR_PTR(-EEXIST);
|
|
|
-+ struct path path = { .dentry = ERR_PTR(-EEXIST) } ;
|
|
|
-+ int err;
|
|
|
-
|
|
|
- mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
|
|
|
- /*
|
|
|
-@@ -1955,11 +2809,13 @@ struct dentry *lookup_create(struct name
|
|
|
- /*
|
|
|
- * Do the final lookup.
|
|
|
- */
|
|
|
-- dentry = lookup_hash(nd);
|
|
|
-- if (IS_ERR(dentry))
|
|
|
-+ err = hash_lookup_union(nd, &nd->last, &path);
|
|
|
-+ if (err) {
|
|
|
-+ path.dentry = ERR_PTR(err);
|
|
|
- goto fail;
|
|
|
-+ }
|
|
|
-
|
|
|
-- if (dentry->d_inode)
|
|
|
-+ if (path.dentry->d_inode)
|
|
|
- goto eexist;
|
|
|
- /*
|
|
|
- * Special case - lookup gave negative, but... we had foo/bar/
|
|
|
-@@ -1968,15 +2824,17 @@ struct dentry *lookup_create(struct name
|
|
|
- * been asking for (non-existent) directory. -ENOENT for you.
|
|
|
- */
|
|
|
- if (unlikely(!is_dir && nd->last.name[nd->last.len])) {
|
|
|
-- dput(dentry);
|
|
|
-- dentry = ERR_PTR(-ENOENT);
|
|
|
-+ path_put_conditional(&path, nd);
|
|
|
-+ path.dentry = ERR_PTR(-ENOENT);
|
|
|
- }
|
|
|
-- return dentry;
|
|
|
-+ if (nd->path.mnt != path.mnt)
|
|
|
-+ mntput(path.mnt);
|
|
|
-+ return path.dentry;
|
|
|
- eexist:
|
|
|
-- dput(dentry);
|
|
|
-- dentry = ERR_PTR(-EEXIST);
|
|
|
-+ path_put_conditional(&path, nd);
|
|
|
-+ path.dentry = ERR_PTR(-EEXIST);
|
|
|
- fail:
|
|
|
-- return dentry;
|
|
|
-+ return path.dentry;
|
|
|
- }
|
|
|
- EXPORT_SYMBOL_GPL(lookup_create);
|
|
|
-
|
|
|
-@@ -2088,6 +2946,7 @@ SYSCALL_DEFINE3(mknod, const char __user
|
|
|
- int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|
|
- {
|
|
|
- int error = may_create(dir, dentry);
|
|
|
-+ int opaque = 0;
|
|
|
-
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
-@@ -2101,9 +2960,18 @@ int vfs_mkdir(struct inode *dir, struct
|
|
|
- return error;
|
|
|
-
|
|
|
- vfs_dq_init(dir);
|
|
|
-+
|
|
|
-+ if (d_is_whiteout(dentry))
|
|
|
-+ opaque = 1;
|
|
|
-+
|
|
|
- error = dir->i_op->mkdir(dir, dentry, mode);
|
|
|
-- if (!error)
|
|
|
-+ if (!error) {
|
|
|
- fsnotify_mkdir(dir, dentry);
|
|
|
-+ if (opaque) {
|
|
|
-+ dentry->d_inode->i_flags |= S_OPAQUE;
|
|
|
-+ mark_inode_dirty(dentry->d_inode);
|
|
|
-+ }
|
|
|
-+ }
|
|
|
- return error;
|
|
|
- }
|
|
|
-
|
|
|
-@@ -2149,6 +3017,212 @@ SYSCALL_DEFINE2(mkdir, const char __user
|
|
|
- return sys_mkdirat(AT_FDCWD, pathname, mode);
|
|
|
- }
|
|
|
-
|
|
|
-+
|
|
|
-+/* Checks on the victim for whiteout */
|
|
|
-+static inline int may_whiteout(struct inode *dir, struct dentry *victim,
|
|
|
-+ int isdir)
|
|
|
-+{
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ /* from may_create() */
|
|
|
-+ if (IS_DEADDIR(dir))
|
|
|
-+ return -ENOENT;
|
|
|
-+ err = inode_permission(dir, MAY_WRITE | MAY_EXEC);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ /* from may_delete() */
|
|
|
-+ if (IS_APPEND(dir))
|
|
|
-+ return -EPERM;
|
|
|
-+ if (!victim->d_inode)
|
|
|
-+ return 0;
|
|
|
-+ if (check_sticky(dir, victim->d_inode) ||
|
|
|
-+ IS_APPEND(victim->d_inode) ||
|
|
|
-+ IS_IMMUTABLE(victim->d_inode))
|
|
|
-+ return -EPERM;
|
|
|
-+ if (isdir) {
|
|
|
-+ if (!S_ISDIR(victim->d_inode->i_mode))
|
|
|
-+ return -ENOTDIR;
|
|
|
-+ if (IS_ROOT(victim))
|
|
|
-+ return -EBUSY;
|
|
|
-+ } else if (S_ISDIR(victim->d_inode->i_mode))
|
|
|
-+ return -EISDIR;
|
|
|
-+ if (victim->d_flags & DCACHE_NFSFS_RENAMED)
|
|
|
-+ return -EBUSY;
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * vfs_whiteout: creates a white-out for the given directory entry
|
|
|
-+ * @dir: parent inode
|
|
|
-+ * @dentry: directory entry to white-out
|
|
|
-+ *
|
|
|
-+ * Simply white-out a given directory entry. This functionality is usually used
|
|
|
-+ * in the sense of unlink. Therefore the given dentry can still be in-use and
|
|
|
-+ * contains an in-use inode. The filesystem has to do what unlink or rmdir
|
|
|
-+ * would in that case. Since the dentry still might be in-use we have to
|
|
|
-+ * provide a fresh unhashed dentry that whiteout can fill the new inode into.
|
|
|
-+ * In that case the given dentry is dropped and the fresh dentry containing the
|
|
|
-+ * whiteout is rehashed instead. If the given dentry is unused, the whiteout
|
|
|
-+ * inode is instantiated into it instead.
|
|
|
-+ *
|
|
|
-+ * After this returns with success, don't make any assumptions about the inode.
|
|
|
-+ * Just dput() it dentry.
|
|
|
-+ */
|
|
|
-+static int vfs_whiteout(struct inode *dir, struct dentry *dentry, int isdir)
|
|
|
-+{
|
|
|
-+ int err;
|
|
|
-+ struct inode *old_inode = dentry->d_inode;
|
|
|
-+ struct dentry *parent, *whiteout;
|
|
|
-+
|
|
|
-+ err = may_whiteout(dir, dentry, isdir);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ BUG_ON(dentry->d_parent->d_inode != dir);
|
|
|
-+
|
|
|
-+ if (!dir->i_op || !dir->i_op->whiteout)
|
|
|
-+ return -EOPNOTSUPP;
|
|
|
-+
|
|
|
-+ if (old_inode) {
|
|
|
-+ vfs_dq_init(dir);
|
|
|
-+
|
|
|
-+ mutex_lock(&old_inode->i_mutex);
|
|
|
-+ if (isdir)
|
|
|
-+ dentry_unhash(dentry);
|
|
|
-+ if (d_mountpoint(dentry))
|
|
|
-+ err = -EBUSY;
|
|
|
-+ else {
|
|
|
-+ if (isdir)
|
|
|
-+ err = security_inode_rmdir(dir, dentry);
|
|
|
-+ else
|
|
|
-+ err = security_inode_unlink(dir, dentry);
|
|
|
-+ }
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ parent = dget_parent(dentry);
|
|
|
-+ whiteout = d_alloc_name(parent, dentry->d_name.name);
|
|
|
-+
|
|
|
-+ if (!err)
|
|
|
-+ err = dir->i_op->whiteout(dir, dentry, whiteout);
|
|
|
-+
|
|
|
-+ if (old_inode) {
|
|
|
-+ mutex_unlock(&old_inode->i_mutex);
|
|
|
-+ if (!err) {
|
|
|
-+ fsnotify_link_count(old_inode);
|
|
|
-+ d_delete(dentry);
|
|
|
-+ }
|
|
|
-+ if (isdir)
|
|
|
-+ dput(dentry);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ dput(whiteout);
|
|
|
-+ dput(parent);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+int path_whiteout(struct path *dir_path, struct dentry *dentry, int isdir)
|
|
|
-+{
|
|
|
-+ int error = mnt_want_write(dir_path->mnt);
|
|
|
-+
|
|
|
-+ if (!error) {
|
|
|
-+ error = vfs_whiteout(dir_path->dentry->d_inode, dentry, isdir);
|
|
|
-+ mnt_drop_write(dir_path->mnt);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return error;
|
|
|
-+}
|
|
|
-+EXPORT_SYMBOL(path_whiteout);
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * This is abusing readdir to check if a union directory is logically empty.
|
|
|
-+ * Al Viro barfed when he saw this, but Val said: "Well, at this point I'm
|
|
|
-+ * aiming for working, pretty can come later"
|
|
|
-+ */
|
|
|
-+static int filldir_is_empty(void *__buf, const char *name, int namlen,
|
|
|
-+ loff_t offset, u64 ino, unsigned int d_type)
|
|
|
-+{
|
|
|
-+ int *is_empty = (int *)__buf;
|
|
|
-+
|
|
|
-+ switch (namlen) {
|
|
|
-+ case 2:
|
|
|
-+ if (name[1] != '.')
|
|
|
-+ break;
|
|
|
-+ case 1:
|
|
|
-+ if (name[0] != '.')
|
|
|
-+ break;
|
|
|
-+ return 0;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ (*is_empty) = 0;
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int directory_is_empty(struct dentry *dentry, struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct file *file;
|
|
|
-+ int err;
|
|
|
-+ int is_empty = 1;
|
|
|
-+
|
|
|
-+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
|
|
|
-+
|
|
|
-+ /* references for the file pointer */
|
|
|
-+ dget(dentry);
|
|
|
-+ mntget(mnt);
|
|
|
-+
|
|
|
-+ file = dentry_open(dentry, mnt, O_RDONLY, current_cred());
|
|
|
-+ if (IS_ERR(file))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ err = vfs_readdir(file, filldir_is_empty, &is_empty);
|
|
|
-+
|
|
|
-+ fput(file);
|
|
|
-+ return is_empty;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int do_whiteout(struct nameidata *nd, struct path *path, int isdir)
|
|
|
-+{
|
|
|
-+ struct path safe = { .dentry = dget(nd->path.dentry),
|
|
|
-+ .mnt = mntget(nd->path.mnt) };
|
|
|
-+ struct dentry *dentry = path->dentry;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ err = may_whiteout(nd->path.dentry->d_inode, dentry, isdir);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ err = -ENOENT;
|
|
|
-+ if (!dentry->d_inode)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ err = -ENOTEMPTY;
|
|
|
-+ if (isdir && !directory_is_empty(path->dentry, path->mnt))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ if (nd->path.dentry != dentry->d_parent) {
|
|
|
-+ dentry = __lookup_hash(&path->dentry->d_name, nd->path.dentry,
|
|
|
-+ nd);
|
|
|
-+ err = PTR_ERR(dentry);
|
|
|
-+ if (IS_ERR(dentry))
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ dput(path->dentry);
|
|
|
-+ if (path->mnt != safe.mnt)
|
|
|
-+ mntput(path->mnt);
|
|
|
-+ path->mnt = nd->path.mnt;
|
|
|
-+ path->dentry = dentry;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ err = vfs_whiteout(nd->path.dentry->d_inode, dentry, isdir);
|
|
|
-+
|
|
|
-+out:
|
|
|
-+ path_put(&safe);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
- /*
|
|
|
- * We try to drop the dentry early: we should have
|
|
|
- * a usage count of 2 if we're the only user of this
|
|
|
-@@ -2213,7 +3287,7 @@ static long do_rmdir(int dfd, const char
|
|
|
- {
|
|
|
- int error = 0;
|
|
|
- char * name;
|
|
|
-- struct dentry *dentry;
|
|
|
-+ struct path path;
|
|
|
- struct nameidata nd;
|
|
|
-
|
|
|
- error = user_path_parent(dfd, pathname, &nd, &name);
|
|
|
-@@ -2235,21 +3309,24 @@ static long do_rmdir(int dfd, const char
|
|
|
- nd.flags &= ~LOOKUP_PARENT;
|
|
|
-
|
|
|
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
|
|
|
-- dentry = lookup_hash(&nd);
|
|
|
-- error = PTR_ERR(dentry);
|
|
|
-- if (IS_ERR(dentry))
|
|
|
-+ error = hash_lookup_union(&nd, &nd.last, &path);
|
|
|
-+ if (error)
|
|
|
- goto exit2;
|
|
|
-+ if (is_unionized(nd.path.dentry, nd.path.mnt)) {
|
|
|
-+ error = do_whiteout(&nd, &path, 1);
|
|
|
-+ goto exit3;
|
|
|
-+ }
|
|
|
- error = mnt_want_write(nd.path.mnt);
|
|
|
- if (error)
|
|
|
- goto exit3;
|
|
|
-- error = security_path_rmdir(&nd.path, dentry);
|
|
|
-+ error = security_path_rmdir(&nd.path, path.dentry);
|
|
|
- if (error)
|
|
|
- goto exit4;
|
|
|
-- error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
|
|
|
-+ error = vfs_rmdir(nd.path.dentry->d_inode, path.dentry);
|
|
|
- exit4:
|
|
|
- mnt_drop_write(nd.path.mnt);
|
|
|
- exit3:
|
|
|
-- dput(dentry);
|
|
|
-+ path_put_conditional(&path, &nd);
|
|
|
- exit2:
|
|
|
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
|
|
|
- exit1:
|
|
|
-@@ -2304,7 +3381,7 @@ static long do_unlinkat(int dfd, const c
|
|
|
- {
|
|
|
- int error;
|
|
|
- char *name;
|
|
|
-- struct dentry *dentry;
|
|
|
-+ struct path path;
|
|
|
- struct nameidata nd;
|
|
|
- struct inode *inode = NULL;
|
|
|
-
|
|
|
-@@ -2319,26 +3396,29 @@ static long do_unlinkat(int dfd, const c
|
|
|
- nd.flags &= ~LOOKUP_PARENT;
|
|
|
-
|
|
|
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
|
|
|
-- dentry = lookup_hash(&nd);
|
|
|
-- error = PTR_ERR(dentry);
|
|
|
-- if (!IS_ERR(dentry)) {
|
|
|
-+ error = hash_lookup_union(&nd, &nd.last, &path);
|
|
|
-+ if (!error) {
|
|
|
- /* Why not before? Because we want correct error value */
|
|
|
- if (nd.last.name[nd.last.len])
|
|
|
- goto slashes;
|
|
|
-- inode = dentry->d_inode;
|
|
|
-+ inode = path.dentry->d_inode;
|
|
|
- if (inode)
|
|
|
- atomic_inc(&inode->i_count);
|
|
|
-+ if (is_unionized(nd.path.dentry, nd.path.mnt)) {
|
|
|
-+ error = do_whiteout(&nd, &path, 0);
|
|
|
-+ goto exit2;
|
|
|
-+ }
|
|
|
- error = mnt_want_write(nd.path.mnt);
|
|
|
- if (error)
|
|
|
- goto exit2;
|
|
|
-- error = security_path_unlink(&nd.path, dentry);
|
|
|
-+ error = security_path_unlink(&nd.path, path.dentry);
|
|
|
- if (error)
|
|
|
- goto exit3;
|
|
|
-- error = vfs_unlink(nd.path.dentry->d_inode, dentry);
|
|
|
-+ error = vfs_unlink(nd.path.dentry->d_inode, path.dentry);
|
|
|
- exit3:
|
|
|
- mnt_drop_write(nd.path.mnt);
|
|
|
- exit2:
|
|
|
-- dput(dentry);
|
|
|
-+ path_put_conditional(&path, &nd);
|
|
|
- }
|
|
|
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
|
|
|
- if (inode)
|
|
|
-@@ -2349,8 +3429,8 @@ exit1:
|
|
|
- return error;
|
|
|
-
|
|
|
- slashes:
|
|
|
-- error = !dentry->d_inode ? -ENOENT :
|
|
|
-- S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
|
|
|
-+ error = !path.dentry->d_inode ? -ENOENT :
|
|
|
-+ S_ISDIR(path.dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
|
|
|
- goto exit2;
|
|
|
- }
|
|
|
-
|
|
|
-@@ -2686,11 +3766,96 @@ int vfs_rename(struct inode *old_dir, st
|
|
|
- return error;
|
|
|
- }
|
|
|
-
|
|
|
-+static int vfs_rename_union(struct nameidata *oldnd, struct path *old,
|
|
|
-+ struct nameidata *newnd, struct path *new)
|
|
|
-+{
|
|
|
-+ struct inode *old_dir = oldnd->path.dentry->d_inode;
|
|
|
-+ struct inode *new_dir = newnd->path.dentry->d_inode;
|
|
|
-+ struct qstr old_name;
|
|
|
-+ char *name;
|
|
|
-+ struct dentry *dentry;
|
|
|
-+ int error;
|
|
|
-+
|
|
|
-+ if (old->dentry->d_inode == new->dentry->d_inode)
|
|
|
-+ return 0;
|
|
|
-+ error = may_whiteout(old_dir, old->dentry, 0);
|
|
|
-+ if (error)
|
|
|
-+ return error;
|
|
|
-+ if (!old_dir->i_op || !old_dir->i_op->whiteout)
|
|
|
-+ return -EPERM;
|
|
|
-+
|
|
|
-+ if (!new->dentry->d_inode)
|
|
|
-+ error = may_create(new_dir, new->dentry);
|
|
|
-+ else
|
|
|
-+ error = may_delete(new_dir, new->dentry, 0);
|
|
|
-+ if (error)
|
|
|
-+ return error;
|
|
|
-+
|
|
|
-+ vfs_dq_init(old_dir);
|
|
|
-+ vfs_dq_init(new_dir);
|
|
|
-+
|
|
|
-+ error = -EBUSY;
|
|
|
-+ if (d_mountpoint(old->dentry) || d_mountpoint(new->dentry))
|
|
|
-+ return error;
|
|
|
-+
|
|
|
-+ error = -ENOMEM;
|
|
|
-+ name = kmalloc(old->dentry->d_name.len, GFP_KERNEL);
|
|
|
-+ if (!name)
|
|
|
-+ return error;
|
|
|
-+ strncpy(name, old->dentry->d_name.name, old->dentry->d_name.len);
|
|
|
-+ name[old->dentry->d_name.len] = 0;
|
|
|
-+ old_name.len = old->dentry->d_name.len;
|
|
|
-+ old_name.hash = old->dentry->d_name.hash;
|
|
|
-+ old_name.name = name;
|
|
|
-+
|
|
|
-+ /* possibly delete the existing new file */
|
|
|
-+ if ((newnd->path.dentry == new->dentry->d_parent) &&
|
|
|
-+ new->dentry->d_inode) {
|
|
|
-+ /* FIXME: inode may be truncated while we hold a lock */
|
|
|
-+ error = vfs_unlink(new_dir, new->dentry);
|
|
|
-+ if (error)
|
|
|
-+ goto freename;
|
|
|
-+
|
|
|
-+ dentry = __lookup_hash(&new->dentry->d_name,
|
|
|
-+ newnd->path.dentry, newnd);
|
|
|
-+ if (IS_ERR(dentry))
|
|
|
-+ goto freename;
|
|
|
-+
|
|
|
-+ dput(new->dentry);
|
|
|
-+ new->dentry = dentry;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* copyup to the new file */
|
|
|
-+ error = __union_copyup(old, newnd, new);
|
|
|
-+ if (error)
|
|
|
-+ goto freename;
|
|
|
-+
|
|
|
-+ /* whiteout the old file */
|
|
|
-+ dentry = __lookup_hash(&old_name, oldnd->path.dentry, oldnd);
|
|
|
-+ error = PTR_ERR(dentry);
|
|
|
-+ if (IS_ERR(dentry))
|
|
|
-+ goto freename;
|
|
|
-+ error = vfs_whiteout(old_dir, dentry, 0);
|
|
|
-+ dput(dentry);
|
|
|
-+
|
|
|
-+ /* FIXME: This is acutally unlink() && create() ... */
|
|
|
-+/*
|
|
|
-+ if (!error) {
|
|
|
-+ const char *new_name = old_dentry->d_name.name;
|
|
|
-+ fsnotify_move(old_dir, new_dir, old_name.name, new_name, 0,
|
|
|
-+ new_dentry->d_inode, old_dentry->d_inode);
|
|
|
-+ }
|
|
|
-+*/
|
|
|
-+freename:
|
|
|
-+ kfree(old_name.name);
|
|
|
-+ return error;
|
|
|
-+}
|
|
|
-+
|
|
|
- SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
|
|
|
- int, newdfd, const char __user *, newname)
|
|
|
- {
|
|
|
- struct dentry *old_dir, *new_dir;
|
|
|
-- struct dentry *old_dentry, *new_dentry;
|
|
|
-+ struct path old, new;
|
|
|
- struct dentry *trap;
|
|
|
- struct nameidata oldnd, newnd;
|
|
|
- char *from;
|
|
|
-@@ -2724,16 +3889,28 @@ SYSCALL_DEFINE4(renameat, int, olddfd, c
|
|
|
-
|
|
|
- trap = lock_rename(new_dir, old_dir);
|
|
|
-
|
|
|
-- old_dentry = lookup_hash(&oldnd);
|
|
|
-- error = PTR_ERR(old_dentry);
|
|
|
-- if (IS_ERR(old_dentry))
|
|
|
-+ /*
|
|
|
-+ * For union mounts we need to call a giant lookup_rename_source()
|
|
|
-+ * instead.
|
|
|
-+ * First lock_rename() and look on the topmost fs like you would do in
|
|
|
-+ * the normal rename, if you find something which is not a directory,
|
|
|
-+ * go ahead and lookup target and do normal rename.
|
|
|
-+ * If you find a negative dentry, unlock_rename() and continue as
|
|
|
-+ * _hash_lookup_union() would do without locking the topmost parent
|
|
|
-+ * at the end. After that do lock_rename() of the source parent and the
|
|
|
-+ * target parent and do a copyup with additional whiteout creation at
|
|
|
-+ * the end.
|
|
|
-+ */
|
|
|
-+// error = hash_lookup_union(&oldnd, &oldnd.last, &old);
|
|
|
-+ error = lookup_rename_source(&oldnd, &newnd, &trap, &oldnd.last, &old);
|
|
|
-+ if (error)
|
|
|
- goto exit3;
|
|
|
- /* source must exist */
|
|
|
- error = -ENOENT;
|
|
|
-- if (!old_dentry->d_inode)
|
|
|
-+ if (!old.dentry->d_inode)
|
|
|
- goto exit4;
|
|
|
- /* unless the source is a directory trailing slashes give -ENOTDIR */
|
|
|
-- if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
|
|
|
-+ if (!S_ISDIR(old.dentry->d_inode->i_mode)) {
|
|
|
- error = -ENOTDIR;
|
|
|
- if (oldnd.last.name[oldnd.last.len])
|
|
|
- goto exit4;
|
|
|
-@@ -2742,32 +3919,44 @@ SYSCALL_DEFINE4(renameat, int, olddfd, c
|
|
|
- }
|
|
|
- /* source should not be ancestor of target */
|
|
|
- error = -EINVAL;
|
|
|
-- if (old_dentry == trap)
|
|
|
-+ if (old.dentry == trap)
|
|
|
- goto exit4;
|
|
|
-- new_dentry = lookup_hash(&newnd);
|
|
|
-- error = PTR_ERR(new_dentry);
|
|
|
-- if (IS_ERR(new_dentry))
|
|
|
-+ /* target is always on topmost fs, even with unions */
|
|
|
-+ error = lookup_hash(&newnd, &newnd.last, &new);
|
|
|
-+ if (error)
|
|
|
- goto exit4;
|
|
|
- /* target should not be an ancestor of source */
|
|
|
- error = -ENOTEMPTY;
|
|
|
-- if (new_dentry == trap)
|
|
|
-+ if (new.dentry == trap)
|
|
|
-+ goto exit5;
|
|
|
-+ /* renaming of directories on unions is done by the user-space */
|
|
|
-+ error = -EXDEV;
|
|
|
-+ if (is_unionized(oldnd.path.dentry, oldnd.path.mnt) &&
|
|
|
-+ S_ISDIR(old.dentry->d_inode->i_mode))
|
|
|
- goto exit5;
|
|
|
-+// if (is_unionized(newnd.path.dentry, newnd.path.mnt))
|
|
|
-+// goto exit5;
|
|
|
-
|
|
|
- error = mnt_want_write(oldnd.path.mnt);
|
|
|
- if (error)
|
|
|
- goto exit5;
|
|
|
-- error = security_path_rename(&oldnd.path, old_dentry,
|
|
|
-- &newnd.path, new_dentry);
|
|
|
-+ error = security_path_rename(&oldnd.path, old.dentry,
|
|
|
-+ &newnd.path, new.dentry);
|
|
|
- if (error)
|
|
|
- goto exit6;
|
|
|
-- error = vfs_rename(old_dir->d_inode, old_dentry,
|
|
|
-- new_dir->d_inode, new_dentry);
|
|
|
-+ if (is_unionized(oldnd.path.dentry, oldnd.path.mnt) &&
|
|
|
-+ (old.dentry->d_parent != oldnd.path.dentry)) {
|
|
|
-+ error = vfs_rename_union(&oldnd, &old, &newnd, &new);
|
|
|
-+ goto exit6;
|
|
|
-+ }
|
|
|
-+ error = vfs_rename(old_dir->d_inode, old.dentry,
|
|
|
-+ new_dir->d_inode, new.dentry);
|
|
|
- exit6:
|
|
|
- mnt_drop_write(oldnd.path.mnt);
|
|
|
- exit5:
|
|
|
-- dput(new_dentry);
|
|
|
-+ path_put_conditional(&new, &newnd);
|
|
|
- exit4:
|
|
|
-- dput(old_dentry);
|
|
|
-+ path_put_conditional(&old, &oldnd);
|
|
|
- exit3:
|
|
|
- unlock_rename(new_dir, old_dir);
|
|
|
- exit2:
|
|
|
---- a/fs/namespace.c
|
|
|
-+++ b/fs/namespace.c
|
|
|
-@@ -29,6 +29,7 @@
|
|
|
- #include <linux/log2.h>
|
|
|
- #include <linux/idr.h>
|
|
|
- #include <linux/fs_struct.h>
|
|
|
-+#include <linux/union.h>
|
|
|
- #include <asm/uaccess.h>
|
|
|
- #include <asm/unistd.h>
|
|
|
- #include "pnode.h"
|
|
|
-@@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char
|
|
|
- INIT_LIST_HEAD(&mnt->mnt_share);
|
|
|
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
|
|
|
- INIT_LIST_HEAD(&mnt->mnt_slave);
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+ INIT_LIST_HEAD(&mnt->mnt_unions);
|
|
|
-+#endif
|
|
|
- #ifdef CONFIG_SMP
|
|
|
- mnt->mnt_writers = alloc_percpu(int);
|
|
|
- if (!mnt->mnt_writers)
|
|
|
-@@ -469,6 +473,7 @@ static void __touch_mnt_namespace(struct
|
|
|
-
|
|
|
- static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
|
|
|
- {
|
|
|
-+ detach_mnt_union(mnt);
|
|
|
- old_path->dentry = mnt->mnt_mountpoint;
|
|
|
- old_path->mnt = mnt->mnt_parent;
|
|
|
- mnt->mnt_parent = mnt;
|
|
|
-@@ -492,6 +497,7 @@ static void attach_mnt(struct vfsmount *
|
|
|
- list_add_tail(&mnt->mnt_hash, mount_hashtable +
|
|
|
- hash(path->mnt, path->dentry));
|
|
|
- list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
|
|
|
-+ attach_mnt_union(mnt, path->mnt, path->dentry);
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-@@ -514,6 +520,7 @@ static void commit_tree(struct vfsmount
|
|
|
- list_add_tail(&mnt->mnt_hash, mount_hashtable +
|
|
|
- hash(parent, mnt->mnt_mountpoint));
|
|
|
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
|
|
|
-+ attach_mnt_union(mnt, mnt->mnt_parent, mnt->mnt_mountpoint);
|
|
|
- touch_mnt_namespace(n);
|
|
|
- }
|
|
|
-
|
|
|
-@@ -770,6 +777,7 @@ static void show_mnt_opts(struct seq_fil
|
|
|
- { MNT_NODIRATIME, ",nodiratime" },
|
|
|
- { MNT_RELATIME, ",relatime" },
|
|
|
- { MNT_STRICTATIME, ",strictatime" },
|
|
|
-+ { MNT_UNION, ",union" },
|
|
|
- { 0, NULL }
|
|
|
- };
|
|
|
- const struct proc_fs_info *fs_infop;
|
|
|
-@@ -984,6 +992,7 @@ void release_mounts(struct list_head *he
|
|
|
- struct dentry *dentry;
|
|
|
- struct vfsmount *m;
|
|
|
- spin_lock(&vfsmount_lock);
|
|
|
-+ detach_mnt_union(mnt);
|
|
|
- dentry = mnt->mnt_mountpoint;
|
|
|
- m = mnt->mnt_parent;
|
|
|
- mnt->mnt_mountpoint = mnt->mnt_root;
|
|
|
-@@ -1102,6 +1111,11 @@ static int do_umount(struct vfsmount *mn
|
|
|
- spin_unlock(&vfsmount_lock);
|
|
|
- if (retval)
|
|
|
- security_sb_umount_busy(mnt);
|
|
|
-+ /* If this was a union mount, we are no longer a read-only
|
|
|
-+ * user on the underlying mount */
|
|
|
-+ if (mnt->mnt_flags & MNT_UNION)
|
|
|
-+ mnt->mnt_parent->mnt_sb->s_readonly_users--;
|
|
|
-+
|
|
|
- up_write(&namespace_sem);
|
|
|
- release_mounts(&umount_list);
|
|
|
- return retval;
|
|
|
-@@ -1426,6 +1440,10 @@ static int do_change_type(struct path *p
|
|
|
- if (path->dentry != path->mnt->mnt_root)
|
|
|
- return -EINVAL;
|
|
|
-
|
|
|
-+ /* Don't change the type of union mounts */
|
|
|
-+ if (IS_MNT_UNION(path->mnt))
|
|
|
-+ return -EINVAL;
|
|
|
-+
|
|
|
- down_write(&namespace_sem);
|
|
|
- if (type == MS_SHARED) {
|
|
|
- err = invent_group_ids(mnt, recurse);
|
|
|
-@@ -1444,10 +1462,65 @@ static int do_change_type(struct path *p
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
-+ * Mount-time check of upper and lower layer file systems to see if we
|
|
|
-+ * can union mount one on the other.
|
|
|
-+ *
|
|
|
-+ * Union mounts must follow these rules:
|
|
|
-+ *
|
|
|
-+ * - The lower layer must be read-only. This avoids lots of nasty
|
|
|
-+ * unsolvable races where file system structures disappear suddenly.
|
|
|
-+ * XXX - Checking the vfsmnt for read-only is a temporary hack; the
|
|
|
-+ * file system could be mounted read-write elsewhere. We need to
|
|
|
-+ * enforce read-only at the superblock level (patches coming).
|
|
|
-+ *
|
|
|
-+ * - The upper layer must be writable. This isn't an absolute
|
|
|
-+ * requirement; right now we need it to make readdir() work since we
|
|
|
-+ * copy up directory entries to the top level. A possible
|
|
|
-+ * workaround is to mount a tmpfs file system transparently over the
|
|
|
-+ * top.
|
|
|
-+ *
|
|
|
-+ * - The upper layer must support whiteouts and fallthrus (if it is
|
|
|
-+ * writeable).
|
|
|
-+ *
|
|
|
-+ * - The lower layer must not also be a union mount. This is just to
|
|
|
-+ * make life simpler for now, there is no inherent limitation on the
|
|
|
-+ * number of layers.
|
|
|
-+ *
|
|
|
-+ * XXX - Check other mount flags for incompatibilities - I'm sure
|
|
|
-+ * there are some.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+static int
|
|
|
-+check_union_mnt(struct path *mntpnt, struct vfsmount *top_mnt, int mnt_flags)
|
|
|
-+{
|
|
|
-+ struct vfsmount *lower_mnt = mntpnt->mnt;
|
|
|
-+
|
|
|
-+ /* Is this even a union mount? */
|
|
|
-+ if (!(mnt_flags & MNT_UNION))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ /* Lower layer must be read-only and not a union mount */
|
|
|
-+ if (!(lower_mnt->mnt_sb->s_flags & MS_RDONLY) ||
|
|
|
-+ (lower_mnt->mnt_flags & MNT_UNION))
|
|
|
-+ return -EBUSY;
|
|
|
-+
|
|
|
-+ /* Upper layer must be writable */
|
|
|
-+ if (mnt_flags & MNT_READONLY)
|
|
|
-+ return -EROFS;
|
|
|
-+
|
|
|
-+ /* Upper layer must support whiteouts and fallthrus */
|
|
|
-+ if (!(top_mnt->mnt_sb->s_flags & MS_WHITEOUT))
|
|
|
-+ return -EINVAL;
|
|
|
-+
|
|
|
-+ /* All good! */
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
- * do loopback mount.
|
|
|
- */
|
|
|
--static int do_loopback(struct path *path, char *old_name,
|
|
|
-- int recurse)
|
|
|
-+static int do_loopback(struct path *path, char *old_name, int recurse,
|
|
|
-+ int mnt_flags)
|
|
|
- {
|
|
|
- struct path old_path;
|
|
|
- struct vfsmount *mnt = NULL;
|
|
|
-@@ -1477,6 +1550,13 @@ static int do_loopback(struct path *path
|
|
|
- if (!mnt)
|
|
|
- goto out;
|
|
|
-
|
|
|
-+ err = check_union_mnt(&old_path, mnt, mnt_flags);
|
|
|
-+ if (err)
|
|
|
-+ goto out;
|
|
|
-+
|
|
|
-+ if (mnt_flags & MNT_UNION)
|
|
|
-+ mnt->mnt_flags |= MNT_UNION;
|
|
|
-+
|
|
|
- err = graft_tree(mnt, path);
|
|
|
- if (err) {
|
|
|
- LIST_HEAD(umount_list);
|
|
|
-@@ -1486,6 +1566,10 @@ static int do_loopback(struct path *path
|
|
|
- release_mounts(&umount_list);
|
|
|
- }
|
|
|
-
|
|
|
-+ /* If this is a union mount, add ourselves to the readonly users */
|
|
|
-+ if (mnt_flags & MNT_UNION)
|
|
|
-+ mnt->mnt_parent->mnt_sb->s_readonly_users++;
|
|
|
-+
|
|
|
- out:
|
|
|
- up_write(&namespace_sem);
|
|
|
- path_put(&old_path);
|
|
|
-@@ -1570,6 +1654,13 @@ static int do_move_mount(struct path *pa
|
|
|
- if (err)
|
|
|
- return err;
|
|
|
-
|
|
|
-+ /* moving to or from a union mount is not supported */
|
|
|
-+ err = -EINVAL;
|
|
|
-+ if (IS_MNT_UNION(path->mnt))
|
|
|
-+ goto exit;
|
|
|
-+ if (IS_MNT_UNION(old_path.mnt))
|
|
|
-+ goto exit;
|
|
|
-+
|
|
|
- down_write(&namespace_sem);
|
|
|
- while (d_mountpoint(path->dentry) &&
|
|
|
- follow_down(path))
|
|
|
-@@ -1627,6 +1718,7 @@ out:
|
|
|
- up_write(&namespace_sem);
|
|
|
- if (!err)
|
|
|
- path_put(&parent_path);
|
|
|
-+exit:
|
|
|
- path_put(&old_path);
|
|
|
- return err;
|
|
|
- }
|
|
|
-@@ -1684,10 +1776,18 @@ int do_add_mount(struct vfsmount *newmnt
|
|
|
- if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
|
|
|
- goto unlock;
|
|
|
-
|
|
|
-+ err = check_union_mnt(path, newmnt, mnt_flags);
|
|
|
-+ if (err)
|
|
|
-+ goto unlock;
|
|
|
-+
|
|
|
- newmnt->mnt_flags = mnt_flags;
|
|
|
- if ((err = graft_tree(newmnt, path)))
|
|
|
- goto unlock;
|
|
|
-
|
|
|
-+ /* If this is a union mount, add ourselves to the readonly users */
|
|
|
-+ if (mnt_flags & MNT_UNION)
|
|
|
-+ newmnt->mnt_parent->mnt_sb->s_readonly_users++;
|
|
|
-+
|
|
|
- if (fslist) /* add to the specified expiration list */
|
|
|
- list_add_tail(&newmnt->mnt_expire, fslist);
|
|
|
-
|
|
|
-@@ -1940,10 +2040,12 @@ long do_mount(char *dev_name, char *dir_
|
|
|
- mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
|
|
|
- if (flags & MS_RDONLY)
|
|
|
- mnt_flags |= MNT_READONLY;
|
|
|
-+ if (flags & MS_UNION)
|
|
|
-+ mnt_flags |= MNT_UNION;
|
|
|
-
|
|
|
- flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
|
|
|
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
|
|
|
-- MS_STRICTATIME);
|
|
|
-+ MS_STRICTATIME | MS_UNION);
|
|
|
-
|
|
|
- /* ... and get the mountpoint */
|
|
|
- retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
|
|
|
-@@ -1959,7 +2061,8 @@ long do_mount(char *dev_name, char *dir_
|
|
|
- retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
|
|
|
- data_page);
|
|
|
- else if (flags & MS_BIND)
|
|
|
-- retval = do_loopback(&path, dev_name, flags & MS_REC);
|
|
|
-+ retval = do_loopback(&path, dev_name, flags & MS_REC,
|
|
|
-+ mnt_flags);
|
|
|
- else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
|
|
|
- retval = do_change_type(&path, flags);
|
|
|
- else if (flags & MS_MOVE)
|
|
|
-@@ -2196,6 +2299,8 @@ SYSCALL_DEFINE2(pivot_root, const char _
|
|
|
- if (d_unlinked(old.dentry))
|
|
|
- goto out2;
|
|
|
- error = -EBUSY;
|
|
|
-+ follow_union_down(&new);
|
|
|
-+ follow_union_down(&root);
|
|
|
- if (new.mnt == root.mnt ||
|
|
|
- old.mnt == root.mnt)
|
|
|
- goto out2; /* loop, on the same file system */
|
|
|
---- a/fs/nfsctl.c
|
|
|
-+++ b/fs/nfsctl.c
|
|
|
-@@ -38,10 +38,10 @@ static struct file *do_open(char *name,
|
|
|
- return ERR_PTR(error);
|
|
|
-
|
|
|
- if (flags == O_RDWR)
|
|
|
-- error = may_open(&nd.path, MAY_READ|MAY_WRITE,
|
|
|
-- FMODE_READ|FMODE_WRITE);
|
|
|
-+ error = may_open(&nd, MAY_READ|MAY_WRITE,
|
|
|
-+ FMODE_READ|FMODE_WRITE);
|
|
|
- else
|
|
|
-- error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
|
|
|
-+ error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
|
|
|
-
|
|
|
- if (!error)
|
|
|
- return dentry_open(nd.path.dentry, nd.path.mnt, flags,
|
|
|
---- a/fs/nfsd/nfs3xdr.c
|
|
|
-+++ b/fs/nfsd/nfs3xdr.c
|
|
|
-@@ -898,6 +898,11 @@ encode_entry(struct readdir_cd *ccd, con
|
|
|
- int elen; /* estimated entry length in words */
|
|
|
- int num_entry_words = 0; /* actual number of words */
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT) {
|
|
|
-+ cd->common.err = nfs_ok;
|
|
|
-+ return 0;
|
|
|
-+ }
|
|
|
-+
|
|
|
- if (cd->offset) {
|
|
|
- u64 offset64 = offset;
|
|
|
-
|
|
|
---- a/fs/nfsd/nfs4xdr.c
|
|
|
-+++ b/fs/nfsd/nfs4xdr.c
|
|
|
-@@ -2261,7 +2261,7 @@ nfsd4_encode_dirent(void *ccdv, const ch
|
|
|
- __be32 nfserr = nfserr_toosmall;
|
|
|
-
|
|
|
- /* In nfsv4, "." and ".." never make it onto the wire.. */
|
|
|
-- if (name && isdotent(name, namlen)) {
|
|
|
-+ if (d_type == DT_WHT || (name && isdotent(name, namlen))) {
|
|
|
- cd->common.err = nfs_ok;
|
|
|
- return 0;
|
|
|
- }
|
|
|
---- a/fs/nfsd/nfsxdr.c
|
|
|
-+++ b/fs/nfsd/nfsxdr.c
|
|
|
-@@ -513,6 +513,10 @@ nfssvc_encode_entry(void *ccdv, const ch
|
|
|
- namlen, name, offset, ino);
|
|
|
- */
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT) {
|
|
|
-+ cd->common.err = nfs_ok;
|
|
|
-+ return 0;
|
|
|
-+ }
|
|
|
- if (offset > ~((u32) 0)) {
|
|
|
- cd->common.err = nfserr_fbig;
|
|
|
- return -EINVAL;
|
|
|
---- a/fs/open.c
|
|
|
-+++ b/fs/open.c
|
|
|
-@@ -30,6 +30,7 @@
|
|
|
- #include <linux/audit.h>
|
|
|
- #include <linux/falloc.h>
|
|
|
- #include <linux/fs_struct.h>
|
|
|
-+#include <linux/union.h>
|
|
|
-
|
|
|
- int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
|
|
- {
|
|
|
-@@ -224,69 +225,69 @@ int do_truncate(struct dentry *dentry, l
|
|
|
- return ret;
|
|
|
- }
|
|
|
-
|
|
|
--static long do_sys_truncate(const char __user *pathname, loff_t length)
|
|
|
-+static int __do_ftruncate(struct file *file, unsigned long length, int small)
|
|
|
- {
|
|
|
-- struct path path;
|
|
|
-- struct inode *inode;
|
|
|
-+ struct inode * inode;
|
|
|
-+ struct dentry *dentry;
|
|
|
- int error;
|
|
|
-
|
|
|
- error = -EINVAL;
|
|
|
-- if (length < 0) /* sorry, but loff_t says... */
|
|
|
-+ if (length < 0)
|
|
|
- goto out;
|
|
|
-+ /* explicitly opened as large or we are on 64-bit box */
|
|
|
-+ if (file->f_flags & O_LARGEFILE)
|
|
|
-+ small = 0;
|
|
|
-
|
|
|
-- error = user_path(pathname, &path);
|
|
|
-- if (error)
|
|
|
-+ dentry = file->f_path.dentry;
|
|
|
-+ inode = dentry->d_inode;
|
|
|
-+ error = -EINVAL;
|
|
|
-+ if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
|
|
|
- goto out;
|
|
|
-- inode = path.dentry->d_inode;
|
|
|
--
|
|
|
-- /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
|
|
|
-- error = -EISDIR;
|
|
|
-- if (S_ISDIR(inode->i_mode))
|
|
|
-- goto dput_and_out;
|
|
|
-
|
|
|
- error = -EINVAL;
|
|
|
-- if (!S_ISREG(inode->i_mode))
|
|
|
-- goto dput_and_out;
|
|
|
--
|
|
|
-- error = mnt_want_write(path.mnt);
|
|
|
-- if (error)
|
|
|
-- goto dput_and_out;
|
|
|
-+ /* Cannot ftruncate over 2^31 bytes without large file support */
|
|
|
-+ if (small && length > MAX_NON_LFS)
|
|
|
-
|
|
|
-- error = inode_permission(inode, MAY_WRITE);
|
|
|
-- if (error)
|
|
|
-- goto mnt_drop_write_and_out;
|
|
|
-+ goto out;
|
|
|
-
|
|
|
- error = -EPERM;
|
|
|
- if (IS_APPEND(inode))
|
|
|
-- goto mnt_drop_write_and_out;
|
|
|
-+ goto out;
|
|
|
-
|
|
|
-- error = get_write_access(inode);
|
|
|
-- if (error)
|
|
|
-- goto mnt_drop_write_and_out;
|
|
|
-+ error = locks_verify_truncate(inode, file, length);
|
|
|
-+ if (!error)
|
|
|
-+ error = security_path_truncate(&file->f_path, length,
|
|
|
-+ ATTR_MTIME|ATTR_CTIME);
|
|
|
-+ if (!error)
|
|
|
-+ /* Already copied up for union, opened with write */
|
|
|
-+ error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
|
|
|
-+out:
|
|
|
-+ return error;
|
|
|
-+}
|
|
|
-
|
|
|
-- /*
|
|
|
-- * Make sure that there are no leases. get_write_access() protects
|
|
|
-- * against the truncate racing with a lease-granting setlease().
|
|
|
-- */
|
|
|
-- error = break_lease(inode, FMODE_WRITE);
|
|
|
-- if (error)
|
|
|
-- goto put_write_and_out;
|
|
|
-+static long do_sys_truncate(const char __user *pathname, loff_t length)
|
|
|
-+{
|
|
|
-+ struct file *file;
|
|
|
-+ char *tmp;
|
|
|
-+ int error;
|
|
|
-
|
|
|
-- error = locks_verify_truncate(inode, NULL, length);
|
|
|
-- if (!error)
|
|
|
-- error = security_path_truncate(&path, length, 0);
|
|
|
-- if (!error) {
|
|
|
-- vfs_dq_init(inode);
|
|
|
-- error = do_truncate(path.dentry, length, 0, NULL);
|
|
|
-- }
|
|
|
-+ error = -EINVAL;
|
|
|
-+ if (length < 0) /* sorry, but loff_t says... */
|
|
|
-+ return error;
|
|
|
-
|
|
|
--put_write_and_out:
|
|
|
-- put_write_access(inode);
|
|
|
--mnt_drop_write_and_out:
|
|
|
-- mnt_drop_write(path.mnt);
|
|
|
--dput_and_out:
|
|
|
-- path_put(&path);
|
|
|
--out:
|
|
|
-+ tmp = getname(pathname);
|
|
|
-+ if (IS_ERR(tmp))
|
|
|
-+ return PTR_ERR(tmp);
|
|
|
-+
|
|
|
-+ file = filp_open(tmp, O_RDWR | O_LARGEFILE, 0);
|
|
|
-+ putname(tmp);
|
|
|
-+
|
|
|
-+ if (IS_ERR(file))
|
|
|
-+ return PTR_ERR(file);
|
|
|
-+
|
|
|
-+ error = __do_ftruncate(file, length, 0);
|
|
|
-+
|
|
|
-+ fput(file);
|
|
|
- return error;
|
|
|
- }
|
|
|
-
|
|
|
-@@ -297,45 +298,16 @@ SYSCALL_DEFINE2(truncate, const char __u
|
|
|
-
|
|
|
- static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
|
|
|
- {
|
|
|
-- struct inode * inode;
|
|
|
-- struct dentry *dentry;
|
|
|
- struct file * file;
|
|
|
- int error;
|
|
|
-
|
|
|
-- error = -EINVAL;
|
|
|
-- if (length < 0)
|
|
|
-- goto out;
|
|
|
- error = -EBADF;
|
|
|
- file = fget(fd);
|
|
|
- if (!file)
|
|
|
- goto out;
|
|
|
-
|
|
|
-- /* explicitly opened as large or we are on 64-bit box */
|
|
|
-- if (file->f_flags & O_LARGEFILE)
|
|
|
-- small = 0;
|
|
|
--
|
|
|
-- dentry = file->f_path.dentry;
|
|
|
-- inode = dentry->d_inode;
|
|
|
-- error = -EINVAL;
|
|
|
-- if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
|
|
|
-- goto out_putf;
|
|
|
--
|
|
|
-- error = -EINVAL;
|
|
|
-- /* Cannot ftruncate over 2^31 bytes without large file support */
|
|
|
-- if (small && length > MAX_NON_LFS)
|
|
|
-- goto out_putf;
|
|
|
-+ error = __do_ftruncate(file, length, small);
|
|
|
-
|
|
|
-- error = -EPERM;
|
|
|
-- if (IS_APPEND(inode))
|
|
|
-- goto out_putf;
|
|
|
--
|
|
|
-- error = locks_verify_truncate(inode, file, length);
|
|
|
-- if (!error)
|
|
|
-- error = security_path_truncate(&file->f_path, length,
|
|
|
-- ATTR_MTIME|ATTR_CTIME);
|
|
|
-- if (!error)
|
|
|
-- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
|
|
|
--out_putf:
|
|
|
- fput(file);
|
|
|
- out:
|
|
|
- return error;
|
|
|
-@@ -494,7 +466,8 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con
|
|
|
- goto out_path_release;
|
|
|
- }
|
|
|
-
|
|
|
-- res = inode_permission(inode, mode | MAY_ACCESS);
|
|
|
-+ res = union_permission(&path, mode | MAY_ACCESS);
|
|
|
-+
|
|
|
- /* SuS v2 requires we report a read only fs too */
|
|
|
- if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
|
|
|
- goto out_path_release;
|
|
|
-@@ -508,7 +481,8 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con
|
|
|
- * inherently racy and know that the fs may change
|
|
|
- * state before we even see this result.
|
|
|
- */
|
|
|
-- if (__mnt_is_readonly(path.mnt))
|
|
|
-+ if ((!is_unionized(path.dentry, path.mnt) &&
|
|
|
-+ (__mnt_is_readonly(path.mnt))))
|
|
|
- res = -EROFS;
|
|
|
-
|
|
|
- out_path_release:
|
|
|
-@@ -554,20 +528,19 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd
|
|
|
- error = -EBADF;
|
|
|
- file = fget(fd);
|
|
|
- if (!file)
|
|
|
-- goto out;
|
|
|
-+ return error;
|
|
|
-
|
|
|
- inode = file->f_path.dentry->d_inode;
|
|
|
-
|
|
|
- error = -ENOTDIR;
|
|
|
- if (!S_ISDIR(inode->i_mode))
|
|
|
-- goto out_putf;
|
|
|
-+ goto out;
|
|
|
-
|
|
|
- error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
|
|
|
- if (!error)
|
|
|
- set_fs_pwd(current->fs, &file->f_path);
|
|
|
--out_putf:
|
|
|
-- fput(file);
|
|
|
- out:
|
|
|
-+ fput(file);
|
|
|
- return error;
|
|
|
- }
|
|
|
-
|
|
|
---- a/fs/readdir.c
|
|
|
-+++ b/fs/readdir.c
|
|
|
-@@ -16,6 +16,7 @@
|
|
|
- #include <linux/security.h>
|
|
|
- #include <linux/syscalls.h>
|
|
|
- #include <linux/unistd.h>
|
|
|
-+#include <linux/union.h>
|
|
|
-
|
|
|
- #include <asm/uaccess.h>
|
|
|
-
|
|
|
-@@ -36,9 +37,24 @@ int vfs_readdir(struct file *file, filld
|
|
|
-
|
|
|
- res = -ENOENT;
|
|
|
- if (!IS_DEADDIR(inode)) {
|
|
|
-+ /*
|
|
|
-+ * XXX Think harder about locking for
|
|
|
-+ * union_copyup_dir. Currently we lock the topmost
|
|
|
-+ * directory and hold that lock while sequentially
|
|
|
-+ * acquiring and dropping locks for the directories
|
|
|
-+ * below this one in the union stack.
|
|
|
-+ */
|
|
|
-+ if (is_unionized(file->f_path.dentry, file->f_path.mnt) &&
|
|
|
-+ !IS_OPAQUE(inode)) {
|
|
|
-+ res = union_copyup_dir(&file->f_path);
|
|
|
-+ if (res)
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+
|
|
|
- res = file->f_op->readdir(file, buf, filler);
|
|
|
- file_accessed(file);
|
|
|
- }
|
|
|
-+out_unlock:
|
|
|
- mutex_unlock(&inode->i_mutex);
|
|
|
- out:
|
|
|
- return res;
|
|
|
-@@ -77,6 +93,9 @@ static int fillonedir(void * __buf, cons
|
|
|
- struct old_linux_dirent __user * dirent;
|
|
|
- unsigned long d_ino;
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
- if (buf->result)
|
|
|
- return -EINVAL;
|
|
|
- d_ino = ino;
|
|
|
-@@ -154,6 +173,9 @@ static int filldir(void * __buf, const c
|
|
|
- unsigned long d_ino;
|
|
|
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long));
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
- buf->error = -EINVAL; /* only used if we fail.. */
|
|
|
- if (reclen > buf->count)
|
|
|
- return -EINVAL;
|
|
|
-@@ -239,6 +261,9 @@ static int filldir64(void * __buf, const
|
|
|
- struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
|
|
|
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64));
|
|
|
-
|
|
|
-+ if (d_type == DT_WHT)
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
- buf->error = -EINVAL; /* only used if we fail.. */
|
|
|
- if (reclen > buf->count)
|
|
|
- return -EINVAL;
|
|
|
---- a/fs/super.c
|
|
|
-+++ b/fs/super.c
|
|
|
-@@ -596,6 +596,15 @@ int do_remount_sb(struct super_block *sb
|
|
|
- }
|
|
|
- remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
|
|
|
-
|
|
|
-+ /* If we are remounting read/write, make sure that none of the
|
|
|
-+ users require read-only for correct operation (such as
|
|
|
-+ union mounts). */
|
|
|
-+ if (remount_rw && sb->s_readonly_users) {
|
|
|
-+ printk(KERN_INFO "%s: In use by %d read-only user(s)\n",
|
|
|
-+ sb->s_id, sb->s_readonly_users);
|
|
|
-+ return -EROFS;
|
|
|
-+ }
|
|
|
-+
|
|
|
- if (sb->s_op->remount_fs) {
|
|
|
- retval = sb->s_op->remount_fs(sb, &flags, data);
|
|
|
- if (retval)
|
|
|
-@@ -953,6 +962,11 @@ vfs_kern_mount(struct file_system_type *
|
|
|
- WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
|
|
|
- "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
|
|
|
-
|
|
|
-+ error = -EROFS;
|
|
|
-+ if (!(flags & MS_RDONLY) &&
|
|
|
-+ (mnt->mnt_sb->s_readonly_users))
|
|
|
-+ goto out_sb;
|
|
|
-+
|
|
|
- mnt->mnt_mountpoint = mnt->mnt_root;
|
|
|
- mnt->mnt_parent = mnt;
|
|
|
- up_write(&mnt->mnt_sb->s_umount);
|
|
|
---- /dev/null
|
|
|
-+++ b/fs/union.c
|
|
|
-@@ -0,0 +1,981 @@
|
|
|
-+/*
|
|
|
-+ * VFS based union mount for Linux
|
|
|
-+ *
|
|
|
-+ * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
|
|
|
-+ * Copyright (C) 2007-2009 Novell Inc.
|
|
|
-+ *
|
|
|
-+ * Author(s): Jan Blunck ([email protected])
|
|
|
-+ * Valerie Aurora <[email protected]>
|
|
|
-+ *
|
|
|
-+ * This program is free software; you can redistribute it and/or modify it
|
|
|
-+ * under the terms of the GNU General Public License as published by the Free
|
|
|
-+ * Software Foundation; either version 2 of the License, or (at your option)
|
|
|
-+ * any later version.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+#include <linux/bootmem.h>
|
|
|
-+#include <linux/init.h>
|
|
|
-+#include <linux/module.h>
|
|
|
-+#include <linux/types.h>
|
|
|
-+#include <linux/hash.h>
|
|
|
-+#include <linux/fs.h>
|
|
|
-+#include <linux/mount.h>
|
|
|
-+#include <linux/fs_struct.h>
|
|
|
-+#include <linux/union.h>
|
|
|
-+#include <linux/namei.h>
|
|
|
-+#include <linux/file.h>
|
|
|
-+#include <linux/mm.h>
|
|
|
-+#include <linux/quotaops.h>
|
|
|
-+#include <linux/dnotify.h>
|
|
|
-+#include <linux/security.h>
|
|
|
-+#include <linux/pipe_fs_i.h>
|
|
|
-+#include <linux/splice.h>
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * This is borrowed from fs/inode.c. The hashtable for lookups. Somebody
|
|
|
-+ * should try to make this good - I've just made it work.
|
|
|
-+ */
|
|
|
-+static unsigned int union_hash_mask __read_mostly;
|
|
|
-+static unsigned int union_hash_shift __read_mostly;
|
|
|
-+static struct hlist_head *union_hashtable __read_mostly;
|
|
|
-+static unsigned int union_rhash_mask __read_mostly;
|
|
|
-+static unsigned int union_rhash_shift __read_mostly;
|
|
|
-+static struct hlist_head *union_rhashtable __read_mostly;
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Locking Rules:
|
|
|
-+ * - dcache_lock (for union_rlookup() only)
|
|
|
-+ * - union_lock
|
|
|
-+ */
|
|
|
-+DEFINE_SPINLOCK(union_lock);
|
|
|
-+
|
|
|
-+static struct kmem_cache *union_cache __read_mostly;
|
|
|
-+
|
|
|
-+static unsigned long hash(struct dentry *dentry, struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ unsigned long tmp;
|
|
|
-+
|
|
|
-+ tmp = ((unsigned long)mnt * (unsigned long)dentry) ^
|
|
|
-+ (GOLDEN_RATIO_PRIME + (unsigned long)mnt) / L1_CACHE_BYTES;
|
|
|
-+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> union_hash_shift);
|
|
|
-+ return tmp & union_hash_mask;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static __initdata unsigned long union_hash_entries;
|
|
|
-+
|
|
|
-+static int __init set_union_hash_entries(char *str)
|
|
|
-+{
|
|
|
-+ if (!str)
|
|
|
-+ return 0;
|
|
|
-+ union_hash_entries = simple_strtoul(str, &str, 0);
|
|
|
-+ return 1;
|
|
|
-+}
|
|
|
-+
|
|
|
-+__setup("union_hash_entries=", set_union_hash_entries);
|
|
|
-+
|
|
|
-+static int __init init_union(void)
|
|
|
-+{
|
|
|
-+ int loop;
|
|
|
-+
|
|
|
-+ union_cache = KMEM_CACHE(union_mount, SLAB_PANIC | SLAB_MEM_SPREAD);
|
|
|
-+ union_hashtable = alloc_large_system_hash("Union-cache",
|
|
|
-+ sizeof(struct hlist_head),
|
|
|
-+ union_hash_entries,
|
|
|
-+ 14,
|
|
|
-+ 0,
|
|
|
-+ &union_hash_shift,
|
|
|
-+ &union_hash_mask,
|
|
|
-+ 0);
|
|
|
-+
|
|
|
-+ for (loop = 0; loop < (1 << union_hash_shift); loop++)
|
|
|
-+ INIT_HLIST_HEAD(&union_hashtable[loop]);
|
|
|
-+
|
|
|
-+
|
|
|
-+ union_rhashtable = alloc_large_system_hash("rUnion-cache",
|
|
|
-+ sizeof(struct hlist_head),
|
|
|
-+ union_hash_entries,
|
|
|
-+ 14,
|
|
|
-+ 0,
|
|
|
-+ &union_rhash_shift,
|
|
|
-+ &union_rhash_mask,
|
|
|
-+ 0);
|
|
|
-+
|
|
|
-+ for (loop = 0; loop < (1 << union_rhash_shift); loop++)
|
|
|
-+ INIT_HLIST_HEAD(&union_rhashtable[loop]);
|
|
|
-+
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+fs_initcall(init_union);
|
|
|
-+
|
|
|
-+struct union_mount *union_alloc(struct dentry *this, struct vfsmount *this_mnt,
|
|
|
-+ struct dentry *next, struct vfsmount *next_mnt)
|
|
|
-+{
|
|
|
-+ struct union_mount *um;
|
|
|
-+
|
|
|
-+ BUG_ON(!S_ISDIR(this->d_inode->i_mode));
|
|
|
-+ BUG_ON(!S_ISDIR(next->d_inode->i_mode));
|
|
|
-+
|
|
|
-+ um = kmem_cache_alloc(union_cache, GFP_ATOMIC);
|
|
|
-+ if (!um)
|
|
|
-+ return NULL;
|
|
|
-+
|
|
|
-+ atomic_set(&um->u_count, 1);
|
|
|
-+ INIT_LIST_HEAD(&um->u_unions);
|
|
|
-+ INIT_LIST_HEAD(&um->u_list);
|
|
|
-+ INIT_HLIST_NODE(&um->u_hash);
|
|
|
-+ INIT_HLIST_NODE(&um->u_rhash);
|
|
|
-+
|
|
|
-+ um->u_this.mnt = this_mnt;
|
|
|
-+ um->u_this.dentry = this;
|
|
|
-+ um->u_next.mnt = mntget(next_mnt);
|
|
|
-+ um->u_next.dentry = dget(next);
|
|
|
-+
|
|
|
-+ return um;
|
|
|
-+}
|
|
|
-+
|
|
|
-+struct union_mount *union_get(struct union_mount *um)
|
|
|
-+{
|
|
|
-+ BUG_ON(!atomic_read(&um->u_count));
|
|
|
-+ atomic_inc(&um->u_count);
|
|
|
-+ return um;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int __union_put(struct union_mount *um)
|
|
|
-+{
|
|
|
-+ if (!atomic_dec_and_test(&um->u_count))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ BUG_ON(!hlist_unhashed(&um->u_hash));
|
|
|
-+ BUG_ON(!hlist_unhashed(&um->u_rhash));
|
|
|
-+
|
|
|
-+ kmem_cache_free(union_cache, um);
|
|
|
-+ return 1;
|
|
|
-+}
|
|
|
-+
|
|
|
-+void union_put(struct union_mount *um)
|
|
|
-+{
|
|
|
-+ struct path tmp = um->u_next;
|
|
|
-+
|
|
|
-+ if (__union_put(um))
|
|
|
-+ path_put(&tmp);
|
|
|
-+}
|
|
|
-+
|
|
|
-+static void __union_hash(struct union_mount *um)
|
|
|
-+{
|
|
|
-+ hlist_add_head(&um->u_hash, union_hashtable +
|
|
|
-+ hash(um->u_this.dentry, um->u_this.mnt));
|
|
|
-+ hlist_add_head(&um->u_rhash, union_rhashtable +
|
|
|
-+ hash(um->u_next.dentry, um->u_next.mnt));
|
|
|
-+}
|
|
|
-+
|
|
|
-+static void __union_unhash(struct union_mount *um)
|
|
|
-+{
|
|
|
-+ hlist_del_init(&um->u_hash);
|
|
|
-+ hlist_del_init(&um->u_rhash);
|
|
|
-+}
|
|
|
-+
|
|
|
-+struct union_mount *union_lookup(struct dentry *dentry, struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct hlist_head *head = union_hashtable + hash(dentry, mnt);
|
|
|
-+ struct hlist_node *node;
|
|
|
-+ struct union_mount *um;
|
|
|
-+
|
|
|
-+ hlist_for_each_entry(um, node, head, u_hash) {
|
|
|
-+ if ((um->u_this.dentry == dentry) &&
|
|
|
-+ (um->u_this.mnt == mnt))
|
|
|
-+ return um;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return NULL;
|
|
|
-+}
|
|
|
-+
|
|
|
-+struct union_mount *union_rlookup(struct dentry *dentry, struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct hlist_head *head = union_rhashtable + hash(dentry, mnt);
|
|
|
-+ struct hlist_node *node;
|
|
|
-+ struct union_mount *um;
|
|
|
-+
|
|
|
-+ hlist_for_each_entry(um, node, head, u_rhash) {
|
|
|
-+ if ((um->u_next.dentry == dentry) &&
|
|
|
-+ (um->u_next.mnt == mnt))
|
|
|
-+ return um;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return NULL;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * is_unionized - check if a dentry lives on a union mounted file system
|
|
|
-+ *
|
|
|
-+ * This tests if a dentry is living on an union mounted file system by walking
|
|
|
-+ * the file system hierarchy.
|
|
|
-+ */
|
|
|
-+int is_unionized(struct dentry *dentry, struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct path this = { .mnt = mntget(mnt),
|
|
|
-+ .dentry = dget(dentry) };
|
|
|
-+ struct vfsmount *tmp;
|
|
|
-+
|
|
|
-+ do {
|
|
|
-+ /* check if there is an union mounted on top of us */
|
|
|
-+ spin_lock(&vfsmount_lock);
|
|
|
-+ list_for_each_entry(tmp, &this.mnt->mnt_mounts, mnt_child) {
|
|
|
-+ if (!(tmp->mnt_flags & MNT_UNION))
|
|
|
-+ continue;
|
|
|
-+ /* Isn't this a bug? */
|
|
|
-+ if (this.dentry->d_sb != tmp->mnt_mountpoint->d_sb)
|
|
|
-+ continue;
|
|
|
-+ if (is_subdir(this.dentry, tmp->mnt_mountpoint)) {
|
|
|
-+ spin_unlock(&vfsmount_lock);
|
|
|
-+ path_put(&this);
|
|
|
-+ return 1;
|
|
|
-+ }
|
|
|
-+ }
|
|
|
-+ spin_unlock(&vfsmount_lock);
|
|
|
-+
|
|
|
-+ /* check our mountpoint next */
|
|
|
-+ tmp = mntget(this.mnt->mnt_parent);
|
|
|
-+ dput(this.dentry);
|
|
|
-+ this.dentry = dget(this.mnt->mnt_mountpoint);
|
|
|
-+ mntput(this.mnt);
|
|
|
-+ this.mnt = tmp;
|
|
|
-+ } while (this.mnt != this.mnt->mnt_parent);
|
|
|
-+
|
|
|
-+ path_put(&this);
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+int append_to_union(struct vfsmount *mnt, struct dentry *dentry,
|
|
|
-+ struct vfsmount *dest_mnt, struct dentry *dest_dentry)
|
|
|
-+{
|
|
|
-+ struct union_mount *this, *um;
|
|
|
-+
|
|
|
-+ BUG_ON(!IS_MNT_UNION(mnt));
|
|
|
-+
|
|
|
-+ this = union_alloc(dentry, mnt, dest_dentry, dest_mnt);
|
|
|
-+ if (!this)
|
|
|
-+ return -ENOMEM;
|
|
|
-+
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ um = union_lookup(dentry, mnt);
|
|
|
-+ if (um) {
|
|
|
-+ BUG_ON((um->u_next.dentry != dest_dentry) ||
|
|
|
-+ (um->u_next.mnt != dest_mnt));
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ union_put(this);
|
|
|
-+ return 0;
|
|
|
-+ }
|
|
|
-+ list_add(&this->u_list, &mnt->mnt_unions);
|
|
|
-+ list_add(&this->u_unions, &dentry->d_unions);
|
|
|
-+ dest_dentry->d_unionized++;
|
|
|
-+ __union_hash(this);
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * follow_union_down - follow the union stack one layer down
|
|
|
-+ *
|
|
|
-+ * This is called to traverse the union stack from one layer to the next
|
|
|
-+ * overlayed one. follow_union_down() is called by various lookup functions
|
|
|
-+ * that are aware of union mounts.
|
|
|
-+ *
|
|
|
-+ * Returns non-zero if followed to the next layer, zero otherwise.
|
|
|
-+ */
|
|
|
-+int follow_union_down(struct path *path)
|
|
|
-+{
|
|
|
-+ struct union_mount *um;
|
|
|
-+
|
|
|
-+ if (!IS_MNT_UNION(path->mnt))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ um = union_lookup(path->dentry, path->mnt);
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ if (um) {
|
|
|
-+ path_get(&um->u_next);
|
|
|
-+ dput(path->dentry);
|
|
|
-+ path->dentry = um->u_next.dentry;
|
|
|
-+ mntput(path->mnt);
|
|
|
-+ path->mnt = um->u_next.mnt;
|
|
|
-+ return 1;
|
|
|
-+ }
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * follow_union_mount - follow the union stack to the topmost layer
|
|
|
-+ *
|
|
|
-+ * This is called to traverse the union stack to the topmost layer. This is
|
|
|
-+ * necessary for following parent pointers in an union mount.
|
|
|
-+ *
|
|
|
-+ * Returns none zero if followed to the topmost layer, zero otherwise.
|
|
|
-+ */
|
|
|
-+int follow_union_mount(struct path *path)
|
|
|
-+{
|
|
|
-+ struct union_mount *um;
|
|
|
-+ int res = 0;
|
|
|
-+
|
|
|
-+ while (IS_UNION(path->dentry)) {
|
|
|
-+ spin_lock(&dcache_lock);
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ um = union_rlookup(path->dentry, path->mnt);
|
|
|
-+ if (um)
|
|
|
-+ path_get(&um->u_this);
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ spin_unlock(&dcache_lock);
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Q: Aaargh, how do I validate the topmost dentry pointer?
|
|
|
-+ * A: Eeeeasy! We took the dcache_lock and union_lock. Since
|
|
|
-+ * this protects from any dput'ng going on, we know that the
|
|
|
-+ * dentry is valid since the union is unhashed under
|
|
|
-+ * dcache_lock too.
|
|
|
-+ */
|
|
|
-+ if (!um)
|
|
|
-+ break;
|
|
|
-+ dput(path->dentry);
|
|
|
-+ path->dentry = um->u_this.dentry;
|
|
|
-+ mntput(path->mnt);
|
|
|
-+ path->mnt = um->u_this.mnt;
|
|
|
-+ res = 1;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ return res;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Union mount copyup support
|
|
|
-+ */
|
|
|
-+
|
|
|
-+extern int hash_lookup_union(struct nameidata *, struct qstr *, struct path *);
|
|
|
-+extern void follow_mount(struct path *);
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * union_relookup_topmost - lookup and create the topmost path to dentry
|
|
|
-+ * @nd: pointer to nameidata
|
|
|
-+ * @flags: lookup flags
|
|
|
-+ */
|
|
|
-+static int union_relookup_topmost(struct nameidata *nd, int flags)
|
|
|
-+{
|
|
|
-+ int err;
|
|
|
-+ char *kbuf, *name;
|
|
|
-+ struct nameidata this;
|
|
|
-+
|
|
|
-+ kbuf = (char *)__get_free_page(GFP_KERNEL);
|
|
|
-+ if (!kbuf)
|
|
|
-+ return -ENOMEM;
|
|
|
-+
|
|
|
-+ name = d_path(&nd->path, kbuf, PAGE_SIZE);
|
|
|
-+ err = PTR_ERR(name);
|
|
|
-+ if (IS_ERR(name))
|
|
|
-+ goto free_page;
|
|
|
-+
|
|
|
-+ err = path_lookup(name, flags|LOOKUP_CREATE|LOOKUP_TOPMOST, &this);
|
|
|
-+ if (err)
|
|
|
-+ goto free_page;
|
|
|
-+
|
|
|
-+ path_put(&nd->path);
|
|
|
-+ nd->path.dentry = this.path.dentry;
|
|
|
-+ nd->path.mnt = this.path.mnt;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * the nd->flags should be unchanged
|
|
|
-+ */
|
|
|
-+ BUG_ON(this.um_flags & LAST_LOWLEVEL);
|
|
|
-+ nd->um_flags &= ~LAST_LOWLEVEL;
|
|
|
-+ free_page:
|
|
|
-+ free_page((unsigned long)kbuf);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static void __update_fs_pwd(struct path *path, struct dentry *dentry,
|
|
|
-+ struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct path old = { NULL, NULL };
|
|
|
-+
|
|
|
-+ write_lock(¤t->fs->lock);
|
|
|
-+ if (current->fs->pwd.dentry == path->dentry) {
|
|
|
-+ old = current->fs->pwd;
|
|
|
-+ path_get(¤t->fs->pwd);
|
|
|
-+ }
|
|
|
-+ write_unlock(¤t->fs->lock);
|
|
|
-+
|
|
|
-+ if (old.dentry)
|
|
|
-+ path_put(&old);
|
|
|
-+
|
|
|
-+ return;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * union_permission - check for access rights to a given inode
|
|
|
-+ * @inode: inode to check permission on
|
|
|
-+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
|
|
|
-+ *
|
|
|
-+ * In a union mount, the top layer is always read-write and the bottom
|
|
|
-+ * is always read-only. Ignore the read-only flag on the lower fs.
|
|
|
-+ *
|
|
|
-+ * Only need for certain activities, like checking to see if write
|
|
|
-+ * access is ok.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+int union_permission(struct path *path, int mask)
|
|
|
-+{
|
|
|
-+ struct inode *inode = path->dentry->d_inode;
|
|
|
-+
|
|
|
-+ if (!is_unionized(path->dentry, path->mnt))
|
|
|
-+ return inode_permission(inode, mask);
|
|
|
-+
|
|
|
-+ /* Tell __inode_permission to ignore MS_RDONLY */
|
|
|
-+ return __inode_permission(inode, mask, 0);
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * union_create_topmost - create the topmost path component
|
|
|
-+ * @nd: pointer to nameidata of the base directory
|
|
|
-+ * @name: pointer to file name
|
|
|
-+ * @path: pointer to path of the overlaid file
|
|
|
-+ *
|
|
|
-+ * This is called by __link_path_walk() to create the directories on a path
|
|
|
-+ * when it is called with LOOKUP_TOPMOST.
|
|
|
-+ */
|
|
|
-+struct dentry *union_create_topmost(struct nameidata *nd, struct qstr *name,
|
|
|
-+ struct path *path)
|
|
|
-+{
|
|
|
-+ struct dentry *dentry, *parent = nd->path.dentry;
|
|
|
-+ int res, mode = path->dentry->d_inode->i_mode;
|
|
|
-+
|
|
|
-+ if (parent->d_sb == path->dentry->d_sb)
|
|
|
-+ return ERR_PTR(-EEXIST);
|
|
|
-+
|
|
|
-+ mutex_lock(&parent->d_inode->i_mutex);
|
|
|
-+ dentry = lookup_one_len(name->name, nd->path.dentry, name->len);
|
|
|
-+ if (IS_ERR(dentry))
|
|
|
-+ goto out_unlock;
|
|
|
-+
|
|
|
-+ switch (mode & S_IFMT) {
|
|
|
-+ case S_IFREG:
|
|
|
-+ /*
|
|
|
-+ * FIXME: Does this make any sense in this case?
|
|
|
-+ * Special case - lookup gave negative, but... we had foo/bar/
|
|
|
-+ * From the vfs_mknod() POV we just have a negative dentry -
|
|
|
-+ * all is fine. Let's be bastards - you had / on the end,you've
|
|
|
-+ * been asking for (non-existent) directory. -ENOENT for you.
|
|
|
-+ */
|
|
|
-+ if (name->name[name->len] && !dentry->d_inode) {
|
|
|
-+ dput(dentry);
|
|
|
-+ dentry = ERR_PTR(-ENOENT);
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ res = vfs_create(parent->d_inode, dentry, mode, nd);
|
|
|
-+ if (res) {
|
|
|
-+ dput(dentry);
|
|
|
-+ dentry = ERR_PTR(res);
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+ break;
|
|
|
-+ case S_IFDIR:
|
|
|
-+ res = vfs_mkdir(parent->d_inode, dentry, mode);
|
|
|
-+ if (res) {
|
|
|
-+ dput(dentry);
|
|
|
-+ dentry = ERR_PTR(res);
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ res = append_to_union(nd->path.mnt, dentry, path->mnt,
|
|
|
-+ path->dentry);
|
|
|
-+ if (res) {
|
|
|
-+ dput(dentry);
|
|
|
-+ dentry = ERR_PTR(res);
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+ break;
|
|
|
-+ default:
|
|
|
-+ dput(dentry);
|
|
|
-+ dentry = ERR_PTR(-EINVAL);
|
|
|
-+ goto out_unlock;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* FIXME: Really necessary ??? */
|
|
|
-+/* __update_fs_pwd(path, dentry, nd->path.mnt); */
|
|
|
-+
|
|
|
-+ out_unlock:
|
|
|
-+ mutex_unlock(&parent->d_inode->i_mutex);
|
|
|
-+ return dentry;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static int union_copy_file(struct dentry *old_dentry, struct vfsmount *old_mnt,
|
|
|
-+ struct dentry *new_dentry, struct vfsmount *new_mnt)
|
|
|
-+{
|
|
|
-+ int ret;
|
|
|
-+ size_t size;
|
|
|
-+ loff_t offset;
|
|
|
-+ struct file *old_file, *new_file;
|
|
|
-+ const struct cred *cred = current_cred();
|
|
|
-+
|
|
|
-+ dget(old_dentry);
|
|
|
-+ mntget(old_mnt);
|
|
|
-+ old_file = dentry_open(old_dentry, old_mnt, O_RDONLY, cred);
|
|
|
-+ if (IS_ERR(old_file))
|
|
|
-+ return PTR_ERR(old_file);
|
|
|
-+
|
|
|
-+ dget(new_dentry);
|
|
|
-+ mntget(new_mnt);
|
|
|
-+ new_file = dentry_open(new_dentry, new_mnt, O_WRONLY, cred);
|
|
|
-+ ret = PTR_ERR(new_file);
|
|
|
-+ if (IS_ERR(new_file))
|
|
|
-+ goto fput_old;
|
|
|
-+
|
|
|
-+ /* XXX be smart by using a length param, which indicates max
|
|
|
-+ * data we'll want (e.g., we are about to truncate to 0 or 10
|
|
|
-+ * bytes or something */
|
|
|
-+ size = i_size_read(old_file->f_path.dentry->d_inode);
|
|
|
-+ if (((size_t)size != size) || ((ssize_t)size != size)) {
|
|
|
-+ ret = -EFBIG;
|
|
|
-+ goto fput_new;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ offset = 0;
|
|
|
-+ ret = do_splice_direct(old_file, &offset, new_file, size,
|
|
|
-+ SPLICE_F_MOVE);
|
|
|
-+ if (ret >= 0)
|
|
|
-+ ret = 0;
|
|
|
-+ fput_new:
|
|
|
-+ fput(new_file);
|
|
|
-+ fput_old:
|
|
|
-+ fput(old_file);
|
|
|
-+ return ret;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * __union_copyup - copy a file to the topmost directory
|
|
|
-+ * @old: pointer to path of the old file name
|
|
|
-+ * @new_nd: pointer to nameidata of the topmost directory
|
|
|
-+ * @new: pointer to path of the new file name
|
|
|
-+ *
|
|
|
-+ * The topmost directory @new_nd must already be locked. Creates the topmost
|
|
|
-+ * file if it doesn't exist yet.
|
|
|
-+ */
|
|
|
-+int __union_copyup(struct path *old, struct nameidata *new_nd,
|
|
|
-+ struct path *new)
|
|
|
-+{
|
|
|
-+ struct dentry *dentry;
|
|
|
-+ int error;
|
|
|
-+
|
|
|
-+ /* Maybe this should be -EINVAL */
|
|
|
-+ if (S_ISDIR(old->dentry->d_inode->i_mode))
|
|
|
-+ return -EISDIR;
|
|
|
-+
|
|
|
-+ if (new_nd->path.dentry != new->dentry->d_parent) {
|
|
|
-+ mutex_lock(&new_nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ dentry = lookup_one_len(new->dentry->d_name.name,
|
|
|
-+ new_nd->path.dentry,
|
|
|
-+ new->dentry->d_name.len);
|
|
|
-+ mutex_unlock(&new_nd->path.dentry->d_inode->i_mutex);
|
|
|
-+ if (IS_ERR(dentry))
|
|
|
-+ return PTR_ERR(dentry);
|
|
|
-+ error = -EEXIST;
|
|
|
-+ if (dentry->d_inode)
|
|
|
-+ goto out_dput;
|
|
|
-+ } else
|
|
|
-+ dentry = dget(new->dentry);
|
|
|
-+
|
|
|
-+ if (!dentry->d_inode) {
|
|
|
-+ error = vfs_create(new_nd->path.dentry->d_inode, dentry,
|
|
|
-+ old->dentry->d_inode->i_mode, new_nd);
|
|
|
-+ if (error)
|
|
|
-+ goto out_dput;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ BUG_ON(!S_ISREG(old->dentry->d_inode->i_mode));
|
|
|
-+ error = union_copy_file(old->dentry, old->mnt, dentry,
|
|
|
-+ new_nd->path.mnt);
|
|
|
-+ if (error) {
|
|
|
-+ /* FIXME: are there return value we should not
|
|
|
-+ * BUG() on ? */
|
|
|
-+ BUG_ON(vfs_unlink(new_nd->path.dentry->d_inode,
|
|
|
-+ dentry));
|
|
|
-+ goto out_dput;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ dput(new->dentry);
|
|
|
-+ new->dentry = dentry;
|
|
|
-+ if (new->mnt != new_nd->path.mnt)
|
|
|
-+ mntput(new->mnt);
|
|
|
-+ new->mnt = new_nd->path.mnt;
|
|
|
-+ return error;
|
|
|
-+
|
|
|
-+out_dput:
|
|
|
-+ dput(dentry);
|
|
|
-+ return error;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * union_copyup - copy a file to the topmost layer of the union stack
|
|
|
-+ * @nd: nameidata pointer to the file
|
|
|
-+ * @flags: flags given to open_namei
|
|
|
-+ */
|
|
|
-+int union_copyup(struct nameidata *nd, int flags /* XXX not used */)
|
|
|
-+{
|
|
|
-+ struct qstr this;
|
|
|
-+ char *name;
|
|
|
-+ struct dentry *dir;
|
|
|
-+ struct path path;
|
|
|
-+ int err;
|
|
|
-+
|
|
|
-+ if (!is_unionized(nd->path.dentry, nd->path.mnt))
|
|
|
-+ return 0;
|
|
|
-+ if (!S_ISREG(nd->path.dentry->d_inode->i_mode))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ /* safe the name for hash_lookup_union() */
|
|
|
-+ this.len = nd->path.dentry->d_name.len;
|
|
|
-+ this.hash = nd->path.dentry->d_name.hash;
|
|
|
-+ name = kmalloc(this.len + 1, GFP_KERNEL);
|
|
|
-+ if (!name)
|
|
|
-+ return -ENOMEM;
|
|
|
-+ this.name = name;
|
|
|
-+ memcpy(name, nd->path.dentry->d_name.name, nd->path.dentry->d_name.len);
|
|
|
-+ name[this.len] = 0;
|
|
|
-+
|
|
|
-+ err = union_relookup_topmost(nd, nd->flags|LOOKUP_PARENT);
|
|
|
-+ if (err) {
|
|
|
-+ kfree(name);
|
|
|
-+ return err;
|
|
|
-+ }
|
|
|
-+ nd->flags &= ~LOOKUP_PARENT;
|
|
|
-+
|
|
|
-+ dir = nd->path.dentry;
|
|
|
-+ mutex_lock(&dir->d_inode->i_mutex);
|
|
|
-+ err = hash_lookup_union(nd, &this, &path);
|
|
|
-+ mutex_unlock(&dir->d_inode->i_mutex);
|
|
|
-+ kfree(name);
|
|
|
-+ if (err)
|
|
|
-+ return err;
|
|
|
-+
|
|
|
-+ err = -ENOENT;
|
|
|
-+ if (!path.dentry->d_inode)
|
|
|
-+ goto exit_dput;
|
|
|
-+
|
|
|
-+ /* Necessary?! I guess not ... */
|
|
|
-+ follow_mount(&path);
|
|
|
-+
|
|
|
-+ err = -ENOENT;
|
|
|
-+ if (!path.dentry->d_inode)
|
|
|
-+ goto exit_dput;
|
|
|
-+
|
|
|
-+ err = -EISDIR;
|
|
|
-+ if (!S_ISREG(path.dentry->d_inode->i_mode))
|
|
|
-+ goto exit_dput;
|
|
|
-+
|
|
|
-+ if (path.dentry->d_parent != nd->path.dentry) {
|
|
|
-+ err = __union_copyup(&path, nd, &path);
|
|
|
-+ if (err)
|
|
|
-+ goto exit_dput;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ dput(nd->path.dentry);
|
|
|
-+ if (nd->path.mnt != path.mnt)
|
|
|
-+ mntput(nd->path.mnt);
|
|
|
-+ nd->path = path;
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+exit_dput:
|
|
|
-+ dput(path.dentry);
|
|
|
-+ if (path.mnt != nd->path.mnt)
|
|
|
-+ mntput(path.mnt);
|
|
|
-+ return err;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * This must be called when unhashing a dentry. This is called with dcache_lock
|
|
|
-+ * and unhashes all unions this dentry is in.
|
|
|
-+ */
|
|
|
-+void __d_drop_unions(struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ struct union_mount *this, *next;
|
|
|
-+
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ list_for_each_entry_safe(this, next, &dentry->d_unions, u_unions)
|
|
|
-+ __union_unhash(this);
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+}
|
|
|
-+EXPORT_SYMBOL_GPL(__d_drop_unions);
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * This must be called after __d_drop_unions() without holding any locks.
|
|
|
-+ * Note: The dentry might still be reachable via a lookup but at that time it
|
|
|
-+ * already a negative dentry. Otherwise it would be unhashed. The union_mount
|
|
|
-+ * structure itself is still reachable through mnt->mnt_unions (which we
|
|
|
-+ * protect against with union_lock).
|
|
|
-+ */
|
|
|
-+void shrink_d_unions(struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ struct union_mount *this, *next;
|
|
|
-+
|
|
|
-+repeat:
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ list_for_each_entry_safe(this, next, &dentry->d_unions, u_unions) {
|
|
|
-+ BUG_ON(!hlist_unhashed(&this->u_hash));
|
|
|
-+ BUG_ON(!hlist_unhashed(&this->u_rhash));
|
|
|
-+ list_del(&this->u_list);
|
|
|
-+ list_del(&this->u_unions);
|
|
|
-+ this->u_next.dentry->d_unionized--;
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ union_put(this);
|
|
|
-+ goto repeat;
|
|
|
-+ }
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+}
|
|
|
-+
|
|
|
-+extern void __dput(struct dentry *, struct list_head *, int);
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * This is the special variant for use in dput() only.
|
|
|
-+ */
|
|
|
-+void __shrink_d_unions(struct dentry *dentry, struct list_head *list)
|
|
|
-+{
|
|
|
-+ struct union_mount *this, *next;
|
|
|
-+
|
|
|
-+ BUG_ON(!d_unhashed(dentry));
|
|
|
-+
|
|
|
-+repeat:
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ list_for_each_entry_safe(this, next, &dentry->d_unions, u_unions) {
|
|
|
-+ struct dentry *n_dentry = this->u_next.dentry;
|
|
|
-+ struct vfsmount *n_mnt = this->u_next.mnt;
|
|
|
-+
|
|
|
-+ BUG_ON(!hlist_unhashed(&this->u_hash));
|
|
|
-+ BUG_ON(!hlist_unhashed(&this->u_rhash));
|
|
|
-+ list_del(&this->u_list);
|
|
|
-+ list_del(&this->u_unions);
|
|
|
-+ this->u_next.dentry->d_unionized--;
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ if (__union_put(this)) {
|
|
|
-+ __dput(n_dentry, list, 0);
|
|
|
-+ mntput(n_mnt);
|
|
|
-+ }
|
|
|
-+ goto repeat;
|
|
|
-+ }
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+}
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Remove all union_mounts structures belonging to this vfsmount from the
|
|
|
-+ * union lookup hashtable and so on ...
|
|
|
-+ */
|
|
|
-+void shrink_mnt_unions(struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct union_mount *this, *next;
|
|
|
-+
|
|
|
-+repeat:
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ list_for_each_entry_safe(this, next, &mnt->mnt_unions, u_list) {
|
|
|
-+ if (this->u_this.dentry == mnt->mnt_root)
|
|
|
-+ continue;
|
|
|
-+ __union_unhash(this);
|
|
|
-+ list_del(&this->u_list);
|
|
|
-+ list_del(&this->u_unions);
|
|
|
-+ this->u_next.dentry->d_unionized--;
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ union_put(this);
|
|
|
-+ goto repeat;
|
|
|
-+ }
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+}
|
|
|
-+
|
|
|
-+int attach_mnt_union(struct vfsmount *mnt, struct vfsmount *dest_mnt,
|
|
|
-+ struct dentry *dest_dentry)
|
|
|
-+{
|
|
|
-+ if (!IS_MNT_UNION(mnt))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ return append_to_union(mnt, mnt->mnt_root, dest_mnt, dest_dentry);
|
|
|
-+}
|
|
|
-+
|
|
|
-+void detach_mnt_union(struct vfsmount *mnt)
|
|
|
-+{
|
|
|
-+ struct union_mount *um;
|
|
|
-+
|
|
|
-+ if (!IS_MNT_UNION(mnt))
|
|
|
-+ return;
|
|
|
-+
|
|
|
-+ shrink_mnt_unions(mnt);
|
|
|
-+
|
|
|
-+ spin_lock(&union_lock);
|
|
|
-+ um = union_lookup(mnt->mnt_root, mnt);
|
|
|
-+ __union_unhash(um);
|
|
|
-+ list_del(&um->u_list);
|
|
|
-+ list_del(&um->u_unions);
|
|
|
-+ um->u_next.dentry->d_unionized--;
|
|
|
-+ spin_unlock(&union_lock);
|
|
|
-+ union_put(um);
|
|
|
-+ return;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * union_copyup_dir_one - copy up a single directory entry
|
|
|
-+ *
|
|
|
-+ * Individual directory entry copyup function for union_copyup_dir.
|
|
|
-+ * We get the entries from higher level layers first.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+static int union_copyup_dir_one(void *buf, const char *name, int namlen,
|
|
|
-+ loff_t offset, u64 ino, unsigned int d_type)
|
|
|
-+{
|
|
|
-+ struct dentry *topmost_dentry = (struct dentry *) buf;
|
|
|
-+ struct dentry *dentry;
|
|
|
-+ int err = 0;
|
|
|
-+
|
|
|
-+ switch (namlen) {
|
|
|
-+ case 2:
|
|
|
-+ if (name[1] != '.')
|
|
|
-+ break;
|
|
|
-+ case 1:
|
|
|
-+ if (name[0] != '.')
|
|
|
-+ break;
|
|
|
-+ return 0;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /* Lookup this entry in the topmost directory */
|
|
|
-+ dentry = lookup_one_len(name, topmost_dentry, namlen);
|
|
|
-+
|
|
|
-+ if (IS_ERR(dentry)) {
|
|
|
-+ printk(KERN_INFO "error looking up %s\n", dentry->d_name.name);
|
|
|
-+ goto out;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * If the entry already exists, one of the following is true:
|
|
|
-+ * it was already copied up (due to an earlier lookup), an
|
|
|
-+ * entry with the same name already exists on the topmost file
|
|
|
-+ * system, it is a whiteout, or it is a fallthru. In each
|
|
|
-+ * case, the top level entry masks any entries from lower file
|
|
|
-+ * systems, so don't copy up this entry.
|
|
|
-+ */
|
|
|
-+ if (dentry->d_inode || d_is_whiteout(dentry) ||
|
|
|
-+ d_is_fallthru(dentry)) {
|
|
|
-+ printk(KERN_INFO "skipping copy of %s\n", dentry->d_name.name);
|
|
|
-+ goto out_dput;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * If the entry doesn't exist, create a fallthru entry in the
|
|
|
-+ * topmost file system. All possible directory types are
|
|
|
-+ * used, so each file system must implement its own way of
|
|
|
-+ * storing a fallthru entry.
|
|
|
-+ */
|
|
|
-+ printk(KERN_INFO "creating fallthru for %s\n", dentry->d_name.name);
|
|
|
-+ err = topmost_dentry->d_inode->i_op->fallthru(topmost_dentry->d_inode,
|
|
|
-+ dentry);
|
|
|
-+ /* FIXME */
|
|
|
-+ BUG_ON(err);
|
|
|
-+ /*
|
|
|
-+ * At this point, we have a negative dentry marked as fallthru
|
|
|
-+ * in the cache. We could potentially lookup the entry lower
|
|
|
-+ * level file system and turn this into a positive dentry
|
|
|
-+ * right now, but it is not clear that would be a performance
|
|
|
-+ * win and adds more opportunities to fail.
|
|
|
-+ */
|
|
|
-+out_dput:
|
|
|
-+ dput(dentry);
|
|
|
-+out:
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+/**
|
|
|
-+ * union_copyup_dir - copy up low-level directory entries to topmost dir
|
|
|
-+ *
|
|
|
-+ * readdir() is difficult to support on union file systems for two
|
|
|
-+ * reasons: We must eliminate duplicates and apply whiteouts, and we
|
|
|
-+ * must return something in f_pos that lets us restart in the same
|
|
|
-+ * place when we return. Our solution is to, on first readdir() of
|
|
|
-+ * the directory, copy up all visible entries from the low-level file
|
|
|
-+ * systems and mark the entries that refer to low-level file system
|
|
|
-+ * objects as "fallthru" entries.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+int union_copyup_dir(struct path *topmost_path)
|
|
|
-+{
|
|
|
-+ struct dentry *topmost_dentry = topmost_path->dentry;
|
|
|
-+ struct path path = *topmost_path;
|
|
|
-+ int res = 0;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Skip opaque dirs.
|
|
|
-+ */
|
|
|
-+ if (IS_OPAQUE(topmost_dentry->d_inode))
|
|
|
-+ return 0;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Mark this dir opaque to show that we have already copied up
|
|
|
-+ * the lower entries. Only fallthru entries pass through to
|
|
|
-+ * the underlying file system.
|
|
|
-+ *
|
|
|
-+ * XXX Deal with the lower file system changing. This could
|
|
|
-+ * be through running a tool over the top level file system to
|
|
|
-+ * make directories transparent again, or we could check the
|
|
|
-+ * mtime of the underlying directory.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+ topmost_dentry->d_inode->i_flags |= S_OPAQUE;
|
|
|
-+ mark_inode_dirty(topmost_dentry->d_inode);
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Loop through each dir on each level copying up the entries
|
|
|
-+ * to the topmost.
|
|
|
-+ */
|
|
|
-+
|
|
|
-+ /* Don't drop the caller's reference to the topmost path */
|
|
|
-+ path_get(&path);
|
|
|
-+ while (follow_union_down(&path)) {
|
|
|
-+ struct file * ftmp;
|
|
|
-+ struct inode * inode;
|
|
|
-+
|
|
|
-+ /* XXX Permit fallthrus on lower-level? Would need to
|
|
|
-+ * pass in opaque flag to union_copyup_dir_one() and
|
|
|
-+ * only copy up fallthru entries there. We allow
|
|
|
-+ * fallthrus in lower level opaque directories on
|
|
|
-+ * lookup, so for consistency we should do one or the
|
|
|
-+ * other in both places. */
|
|
|
-+ if (IS_OPAQUE(path.dentry->d_inode))
|
|
|
-+ break;
|
|
|
-+
|
|
|
-+ /* dentry_open() doesn't get a path reference itself */
|
|
|
-+ path_get(&path);
|
|
|
-+ ftmp = dentry_open(path.dentry, path.mnt,
|
|
|
-+ O_RDONLY | O_DIRECTORY | O_NOATIME,
|
|
|
-+ current_cred());
|
|
|
-+ if (IS_ERR(ftmp)) {
|
|
|
-+ printk (KERN_ERR "unable to open dir %s for "
|
|
|
-+ "directory copyup: %ld\n",
|
|
|
-+ path.dentry->d_name.name, PTR_ERR(ftmp));
|
|
|
-+ continue;
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ inode = path.dentry->d_inode;
|
|
|
-+ mutex_lock(&inode->i_mutex);
|
|
|
-+
|
|
|
-+ res = -ENOENT;
|
|
|
-+ if (IS_DEADDIR(inode))
|
|
|
-+ goto out_fput;
|
|
|
-+ /*
|
|
|
-+ * Read the whole directory, calling our directory
|
|
|
-+ * entry copyup function on each entry. Pass in the
|
|
|
-+ * topmost dentry as our private data so we can create
|
|
|
-+ * new entries in the topmost directory.
|
|
|
-+ */
|
|
|
-+ res = ftmp->f_op->readdir(ftmp, topmost_dentry,
|
|
|
-+ union_copyup_dir_one);
|
|
|
-+out_fput:
|
|
|
-+ mutex_unlock(&inode->i_mutex);
|
|
|
-+ fput(ftmp);
|
|
|
-+
|
|
|
-+ if (res)
|
|
|
-+ break;
|
|
|
-+ }
|
|
|
-+ path_put(&path);
|
|
|
-+ return res;
|
|
|
-+}
|
|
|
---- a/include/linux/dcache.h
|
|
|
-+++ b/include/linux/dcache.h
|
|
|
-@@ -101,6 +101,15 @@ struct dentry {
|
|
|
- struct dentry *d_parent; /* parent directory */
|
|
|
- struct qstr d_name;
|
|
|
-
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+ /*
|
|
|
-+ * The following fields are used by the VFS based union mount
|
|
|
-+ * implementation. Both are protected by union_lock!
|
|
|
-+ */
|
|
|
-+ struct list_head d_unions; /* list of union_mount's */
|
|
|
-+ unsigned int d_unionized; /* unions referencing this dentry */
|
|
|
-+#endif
|
|
|
-+
|
|
|
- struct list_head d_lru; /* LRU list */
|
|
|
- /*
|
|
|
- * d_child and d_rcu can share memory
|
|
|
-@@ -186,6 +195,9 @@ d_iput: no no no yes
|
|
|
-
|
|
|
- #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */
|
|
|
-
|
|
|
-+#define DCACHE_WHITEOUT 0x0100 /* This negative dentry is a whiteout */
|
|
|
-+#define DCACHE_FALLTHRU 0x0200 /* Keep looking in the file system below */
|
|
|
-+
|
|
|
- extern spinlock_t dcache_lock;
|
|
|
- extern seqlock_t rename_lock;
|
|
|
-
|
|
|
-@@ -205,12 +217,20 @@ extern seqlock_t rename_lock;
|
|
|
- * __d_drop requires dentry->d_lock.
|
|
|
- */
|
|
|
-
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+extern void __d_drop_unions(struct dentry *);
|
|
|
-+#endif
|
|
|
-+
|
|
|
- static inline void __d_drop(struct dentry *dentry)
|
|
|
- {
|
|
|
- if (!(dentry->d_flags & DCACHE_UNHASHED)) {
|
|
|
- dentry->d_flags |= DCACHE_UNHASHED;
|
|
|
- hlist_del_rcu(&dentry->d_hash);
|
|
|
- }
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+ /* remove dentry from the union hashtable */
|
|
|
-+ __d_drop_unions(dentry);
|
|
|
-+#endif
|
|
|
- }
|
|
|
-
|
|
|
- static inline void d_drop(struct dentry *dentry)
|
|
|
-@@ -358,6 +378,16 @@ static inline int d_unlinked(struct dent
|
|
|
- return d_unhashed(dentry) && !IS_ROOT(dentry);
|
|
|
- }
|
|
|
-
|
|
|
-+static inline int d_is_whiteout(struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ return (dentry->d_flags & DCACHE_WHITEOUT);
|
|
|
-+}
|
|
|
-+
|
|
|
-+static inline int d_is_fallthru(struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ return (dentry->d_flags & DCACHE_FALLTHRU);
|
|
|
-+}
|
|
|
-+
|
|
|
- static inline struct dentry *dget_parent(struct dentry *dentry)
|
|
|
- {
|
|
|
- struct dentry *ret;
|
|
|
---- a/include/linux/ext2_fs.h
|
|
|
-+++ b/include/linux/ext2_fs.h
|
|
|
-@@ -189,6 +189,7 @@ struct ext2_group_desc
|
|
|
- #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
|
|
|
- #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
|
|
|
- #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
|
|
|
-+#define EXT2_OPAQUE_FL 0x00040000
|
|
|
- #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
|
|
|
-
|
|
|
- #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
|
|
|
-@@ -503,10 +504,12 @@ struct ext2_super_block {
|
|
|
- #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004
|
|
|
- #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008
|
|
|
- #define EXT2_FEATURE_INCOMPAT_META_BG 0x0010
|
|
|
-+#define EXT2_FEATURE_INCOMPAT_WHITEOUT 0x0020
|
|
|
- #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff
|
|
|
-
|
|
|
- #define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
|
|
|
- #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
|
|
|
-+ EXT2_FEATURE_INCOMPAT_WHITEOUT| \
|
|
|
- EXT2_FEATURE_INCOMPAT_META_BG)
|
|
|
- #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
|
|
|
- EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
|
|
|
-@@ -573,6 +576,8 @@ enum {
|
|
|
- EXT2_FT_FIFO,
|
|
|
- EXT2_FT_SOCK,
|
|
|
- EXT2_FT_SYMLINK,
|
|
|
-+ EXT2_FT_WHT,
|
|
|
-+ EXT2_FT_FALLTHRU,
|
|
|
- EXT2_FT_MAX
|
|
|
- };
|
|
|
-
|
|
|
---- a/include/linux/fs.h
|
|
|
-+++ b/include/linux/fs.h
|
|
|
-@@ -188,6 +188,7 @@ struct inodes_stat_t {
|
|
|
- #define MS_REMOUNT 32 /* Alter flags of a mounted FS */
|
|
|
- #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
|
|
|
- #define MS_DIRSYNC 128 /* Directory modifications are synchronous */
|
|
|
-+#define MS_UNION 256
|
|
|
- #define MS_NOATIME 1024 /* Do not update access times. */
|
|
|
- #define MS_NODIRATIME 2048 /* Do not update directory access times */
|
|
|
- #define MS_BIND 4096
|
|
|
-@@ -205,6 +206,7 @@ struct inodes_stat_t {
|
|
|
- #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
|
|
|
- #define MS_I_VERSION (1<<23) /* Update inode I_version field */
|
|
|
- #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
|
|
|
-+#define MS_WHITEOUT (1<<26) /* fs does support white-out filetype */
|
|
|
- #define MS_ACTIVE (1<<30)
|
|
|
- #define MS_NOUSER (1<<31)
|
|
|
-
|
|
|
-@@ -231,6 +233,7 @@ struct inodes_stat_t {
|
|
|
- #define S_NOCMTIME 128 /* Do not update file c/mtime */
|
|
|
- #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
|
|
|
- #define S_PRIVATE 512 /* Inode is fs-internal */
|
|
|
-+#define S_OPAQUE 1024 /* Directory is opaque */
|
|
|
-
|
|
|
- /*
|
|
|
- * Note that nosuid etc flags are inode-specific: setting some file-system
|
|
|
-@@ -266,6 +269,8 @@ struct inodes_stat_t {
|
|
|
- #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
|
|
|
- #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
|
|
|
-
|
|
|
-+#define IS_OPAQUE(inode) ((inode)->i_flags & S_OPAQUE)
|
|
|
-+
|
|
|
- /* the read-only stuff doesn't really belong here, but any other place is
|
|
|
- probably as bad and I don't want to create yet another include file. */
|
|
|
-
|
|
|
-@@ -1380,6 +1385,11 @@ struct super_block {
|
|
|
- * generic_show_options()
|
|
|
- */
|
|
|
- char *s_options;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Users who require read-only access - e.g., union mounts
|
|
|
-+ */
|
|
|
-+ int s_readonly_users;
|
|
|
- };
|
|
|
-
|
|
|
- extern struct timespec current_fs_time(struct super_block *sb);
|
|
|
-@@ -1517,6 +1527,8 @@ struct inode_operations {
|
|
|
- int (*mkdir) (struct inode *,struct dentry *,int);
|
|
|
- int (*rmdir) (struct inode *,struct dentry *);
|
|
|
- int (*mknod) (struct inode *,struct dentry *,int,dev_t);
|
|
|
-+ int (*whiteout) (struct inode *, struct dentry *, struct dentry *);
|
|
|
-+ int (*fallthru) (struct inode *, struct dentry *);
|
|
|
- int (*rename) (struct inode *, struct dentry *,
|
|
|
- struct inode *, struct dentry *);
|
|
|
- int (*readlink) (struct dentry *, char __user *,int);
|
|
|
-@@ -2108,6 +2120,7 @@ extern void emergency_remount(void);
|
|
|
- extern sector_t bmap(struct inode *, sector_t);
|
|
|
- #endif
|
|
|
- extern int notify_change(struct dentry *, struct iattr *);
|
|
|
-+extern int __inode_permission(struct inode *inode, int mask, int rofs);
|
|
|
- extern int inode_permission(struct inode *, int);
|
|
|
- extern int generic_permission(struct inode *, int,
|
|
|
- int (*check_acl)(struct inode *, int));
|
|
|
-@@ -2135,7 +2148,7 @@ extern void free_write_pipe(struct file
|
|
|
-
|
|
|
- extern struct file *do_filp_open(int dfd, const char *pathname,
|
|
|
- int open_flag, int mode, int acc_mode);
|
|
|
--extern int may_open(struct path *, int, int);
|
|
|
-+extern int may_open(struct nameidata *, int, int);
|
|
|
-
|
|
|
- extern int kernel_read(struct file *, loff_t, char *, unsigned long);
|
|
|
- extern struct file * open_exec(const char *);
|
|
|
---- a/include/linux/mount.h
|
|
|
-+++ b/include/linux/mount.h
|
|
|
-@@ -35,6 +35,7 @@ struct mnt_namespace;
|
|
|
- #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
|
|
|
- #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
|
|
|
- #define MNT_PNODE_MASK 0x3000 /* propagation flag mask */
|
|
|
-+#define MNT_UNION 0x4000 /* if the vfsmount is a union mount */
|
|
|
-
|
|
|
- struct vfsmount {
|
|
|
- struct list_head mnt_hash;
|
|
|
-@@ -53,6 +54,9 @@ struct vfsmount {
|
|
|
- struct list_head mnt_slave_list;/* list of slave mounts */
|
|
|
- struct list_head mnt_slave; /* slave list entry */
|
|
|
- struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+ struct list_head mnt_unions; /* list of union_mount structures */
|
|
|
-+#endif
|
|
|
- struct mnt_namespace *mnt_ns; /* containing namespace */
|
|
|
- int mnt_id; /* mount identifier */
|
|
|
- int mnt_group_id; /* peer group identifier */
|
|
|
---- a/include/linux/namei.h
|
|
|
-+++ b/include/linux/namei.h
|
|
|
-@@ -20,6 +20,7 @@ struct nameidata {
|
|
|
- struct qstr last;
|
|
|
- struct path root;
|
|
|
- unsigned int flags;
|
|
|
-+ unsigned int um_flags;
|
|
|
- int last_type;
|
|
|
- unsigned depth;
|
|
|
- char *saved_names[MAX_NESTED_LINKS + 1];
|
|
|
-@@ -35,6 +36,9 @@ struct nameidata {
|
|
|
- */
|
|
|
- enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
|
|
|
-
|
|
|
-+#define LAST_UNION 0x01
|
|
|
-+#define LAST_LOWLEVEL 0x02
|
|
|
-+
|
|
|
- /*
|
|
|
- * The bitmask for a lookup event:
|
|
|
- * - follow links at the end
|
|
|
-@@ -49,6 +53,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
|
|
|
- #define LOOKUP_CONTINUE 4
|
|
|
- #define LOOKUP_PARENT 16
|
|
|
- #define LOOKUP_REVAL 64
|
|
|
-+#define LOOKUP_TOPMOST 128
|
|
|
-+
|
|
|
- /*
|
|
|
- * Intent data
|
|
|
- */
|
|
|
---- /dev/null
|
|
|
-+++ b/include/linux/union.h
|
|
|
-@@ -0,0 +1,84 @@
|
|
|
-+/*
|
|
|
-+ * VFS based union mount for Linux
|
|
|
-+ *
|
|
|
-+ * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
|
|
|
-+ * Copyright (C) 2007 Novell Inc.
|
|
|
-+ * Author(s): Jan Blunck ([email protected])
|
|
|
-+ *
|
|
|
-+ * This program is free software; you can redistribute it and/or modify it
|
|
|
-+ * under the terms of the GNU General Public License as published by the Free
|
|
|
-+ * Software Foundation; either version 2 of the License, or (at your option)
|
|
|
-+ * any later version.
|
|
|
-+ *
|
|
|
-+ */
|
|
|
-+#ifndef __LINUX_UNION_H
|
|
|
-+#define __LINUX_UNION_H
|
|
|
-+#ifdef __KERNEL__
|
|
|
-+
|
|
|
-+#include <linux/list.h>
|
|
|
-+#include <asm/atomic.h>
|
|
|
-+
|
|
|
-+struct dentry;
|
|
|
-+struct vfsmount;
|
|
|
-+
|
|
|
-+#ifdef CONFIG_UNION_MOUNT
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * The new union mount structure.
|
|
|
-+ */
|
|
|
-+struct union_mount {
|
|
|
-+ atomic_t u_count; /* reference count */
|
|
|
-+ struct mutex u_mutex;
|
|
|
-+ struct list_head u_unions; /* list head for d_unions */
|
|
|
-+ struct list_head u_list; /* list head for mnt_unions */
|
|
|
-+ struct hlist_node u_hash; /* list head for seaching */
|
|
|
-+ struct hlist_node u_rhash; /* list head for reverse seaching */
|
|
|
-+
|
|
|
-+ struct path u_this; /* this is me */
|
|
|
-+ struct path u_next; /* this is what I overlay */
|
|
|
-+};
|
|
|
-+
|
|
|
-+#define IS_UNION(dentry) (!list_empty(&(dentry)->d_unions) || \
|
|
|
-+ (dentry)->d_unionized)
|
|
|
-+#define IS_MNT_UNION(mnt) ((mnt)->mnt_flags & MNT_UNION)
|
|
|
-+
|
|
|
-+extern int is_unionized(struct dentry *, struct vfsmount *);
|
|
|
-+extern int append_to_union(struct vfsmount *, struct dentry *,
|
|
|
-+ struct vfsmount *, struct dentry *);
|
|
|
-+extern int follow_union_down(struct path *);
|
|
|
-+extern int follow_union_mount(struct path *);
|
|
|
-+extern void __d_drop_unions(struct dentry *);
|
|
|
-+extern void shrink_d_unions(struct dentry *);
|
|
|
-+extern void __shrink_d_unions(struct dentry *, struct list_head *);
|
|
|
-+extern int attach_mnt_union(struct vfsmount *, struct vfsmount *,
|
|
|
-+ struct dentry *);
|
|
|
-+extern void detach_mnt_union(struct vfsmount *);
|
|
|
-+extern struct dentry *union_create_topmost(struct nameidata *, struct qstr *,
|
|
|
-+ struct path *);
|
|
|
-+extern int __union_copyup(struct path *, struct nameidata *, struct path *);
|
|
|
-+extern int union_copyup(struct nameidata *, int);
|
|
|
-+extern int union_copyup_dir(struct path *path);
|
|
|
-+extern int union_permission(struct path *, int);
|
|
|
-+
|
|
|
-+#else /* CONFIG_UNION_MOUNT */
|
|
|
-+
|
|
|
-+#define IS_UNION(x) (0)
|
|
|
-+#define IS_MNT_UNION(x) (0)
|
|
|
-+#define is_unionized(x, y) (0)
|
|
|
-+#define append_to_union(x1, y1, x2, y2) ({ BUG(); (0); })
|
|
|
-+#define follow_union_down(x) ({ (0); })
|
|
|
-+#define follow_union_mount(x) ({ (0); })
|
|
|
-+#define __d_drop_unions(x) do { } while (0)
|
|
|
-+#define shrink_d_unions(x) do { } while (0)
|
|
|
-+#define __shrink_d_unions(x,y) do { } while (0)
|
|
|
-+#define attach_mnt_union(x, y, z) do { } while (0)
|
|
|
-+#define detach_mnt_union(x) do { } while (0)
|
|
|
-+#define union_create_topmost(x, y, z) ({ BUG(); (NULL); })
|
|
|
-+#define __union_copyup(x, y, z) ({ BUG(); (0); })
|
|
|
-+#define union_copyup(x, y) ({ (0); })
|
|
|
-+#define union_copyup_dir(x) ({ BUG(); (0); })
|
|
|
-+#define union_permission(x, y) inode_permission(x->dentry->d_inode, y)
|
|
|
-+
|
|
|
-+#endif /* CONFIG_UNION_MOUNT */
|
|
|
-+#endif /* __KERNEL__ */
|
|
|
-+#endif /* __LINUX_UNION_H */
|
|
|
---- a/mm/shmem.c
|
|
|
-+++ b/mm/shmem.c
|
|
|
-@@ -1798,6 +1798,118 @@ static int shmem_statfs(struct dentry *d
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
-+static int shmem_rmdir(struct inode *dir, struct dentry *dentry);
|
|
|
-+static int shmem_unlink(struct inode *dir, struct dentry *dentry);
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Create a dentry to signify a whiteout.
|
|
|
-+ */
|
|
|
-+static int shmem_whiteout(struct inode *dir, struct dentry *old_dentry,
|
|
|
-+ struct dentry *new_dentry)
|
|
|
-+{
|
|
|
-+ struct shmem_sb_info *sbinfo = SHMEM_SB(dir->i_sb);
|
|
|
-+ struct dentry *dentry;
|
|
|
-+
|
|
|
-+ if (!(dir->i_sb->s_flags & MS_WHITEOUT))
|
|
|
-+ return -EPERM;
|
|
|
-+
|
|
|
-+ /* This gives us a proper initialized negative dentry */
|
|
|
-+ dentry = simple_lookup(dir, new_dentry, NULL);
|
|
|
-+ if (dentry && IS_ERR(dentry))
|
|
|
-+ return PTR_ERR(dentry);
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * No ordinary (disk based) filesystem counts whiteouts as inodes;
|
|
|
-+ * but each new link needs a new dentry, pinning lowmem, and
|
|
|
-+ * tmpfs dentries cannot be pruned until they are unlinked.
|
|
|
-+ */
|
|
|
-+ if (sbinfo->max_inodes) {
|
|
|
-+ spin_lock(&sbinfo->stat_lock);
|
|
|
-+ if (!sbinfo->free_inodes) {
|
|
|
-+ spin_unlock(&sbinfo->stat_lock);
|
|
|
-+ return -ENOSPC;
|
|
|
-+ }
|
|
|
-+ sbinfo->free_inodes--;
|
|
|
-+ spin_unlock(&sbinfo->stat_lock);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ if (old_dentry->d_inode || d_is_fallthru(old_dentry)) {
|
|
|
-+ if (old_dentry->d_inode && S_ISDIR(old_dentry->d_inode->i_mode))
|
|
|
-+ shmem_rmdir(dir, old_dentry);
|
|
|
-+ else
|
|
|
-+ shmem_unlink(dir, old_dentry);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ dir->i_size += BOGO_DIRENT_SIZE;
|
|
|
-+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
-+ /* Extra pinning count for the created dentry */
|
|
|
-+ dget(new_dentry);
|
|
|
-+ spin_lock(&new_dentry->d_lock);
|
|
|
-+ new_dentry->d_flags |= DCACHE_WHITEOUT;
|
|
|
-+ spin_unlock(&new_dentry->d_lock);
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static void shmem_d_instantiate(struct inode *dir, struct dentry *dentry,
|
|
|
-+ struct inode *inode);
|
|
|
-+
|
|
|
-+/*
|
|
|
-+ * Create a dentry to signify a fallthru. A fallthru lets us read the
|
|
|
-+ * low-level dentries into the dcache once on the first readdir() and
|
|
|
-+ * then
|
|
|
-+ */
|
|
|
-+static int shmem_fallthru(struct inode *dir, struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ struct shmem_sb_info *sbinfo = SHMEM_SB(dir->i_sb);
|
|
|
-+
|
|
|
-+ /* FIXME: this is stupid */
|
|
|
-+ if (!(dir->i_sb->s_flags & MS_WHITEOUT))
|
|
|
-+ return -EPERM;
|
|
|
-+
|
|
|
-+ if (dentry->d_inode || d_is_fallthru(dentry) || d_is_whiteout(dentry))
|
|
|
-+ return -EEXIST;
|
|
|
-+
|
|
|
-+ /*
|
|
|
-+ * Each new link needs a new dentry, pinning lowmem, and tmpfs
|
|
|
-+ * dentries cannot be pruned until they are unlinked.
|
|
|
-+ */
|
|
|
-+ if (sbinfo->max_inodes) {
|
|
|
-+ spin_lock(&sbinfo->stat_lock);
|
|
|
-+ if (!sbinfo->free_inodes) {
|
|
|
-+ spin_unlock(&sbinfo->stat_lock);
|
|
|
-+ return -ENOSPC;
|
|
|
-+ }
|
|
|
-+ sbinfo->free_inodes--;
|
|
|
-+ spin_unlock(&sbinfo->stat_lock);
|
|
|
-+ }
|
|
|
-+
|
|
|
-+ shmem_d_instantiate(dir, dentry, NULL);
|
|
|
-+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
-+
|
|
|
-+ spin_lock(&dentry->d_lock);
|
|
|
-+ dentry->d_flags |= DCACHE_FALLTHRU;
|
|
|
-+ spin_unlock(&dentry->d_lock);
|
|
|
-+ return 0;
|
|
|
-+}
|
|
|
-+
|
|
|
-+static void shmem_d_instantiate(struct inode *dir, struct dentry *dentry,
|
|
|
-+ struct inode *inode)
|
|
|
-+{
|
|
|
-+ if (d_is_whiteout(dentry)) {
|
|
|
-+ /* Re-using an existing whiteout */
|
|
|
-+ shmem_free_inode(dir->i_sb);
|
|
|
-+ if (S_ISDIR(inode->i_mode))
|
|
|
-+ inode->i_mode |= S_OPAQUE;
|
|
|
-+ } else if (d_is_fallthru(dentry)) {
|
|
|
-+ shmem_free_inode(dir->i_sb);
|
|
|
-+ } else {
|
|
|
-+ /* New dentry */
|
|
|
-+ dir->i_size += BOGO_DIRENT_SIZE;
|
|
|
-+ dget(dentry); /* Extra count - pin the dentry in core */
|
|
|
-+ }
|
|
|
-+ /* Will clear DCACHE_WHITEOUT and DCACHE_FALLTHRU flags */
|
|
|
-+ d_instantiate(dentry, inode);
|
|
|
-+}
|
|
|
- /*
|
|
|
- * File creation. Allocate an inode, and we're done..
|
|
|
- */
|
|
|
-@@ -1822,15 +1934,16 @@ shmem_mknod(struct inode *dir, struct de
|
|
|
- iput(inode);
|
|
|
- return error;
|
|
|
- }
|
|
|
-+
|
|
|
- if (dir->i_mode & S_ISGID) {
|
|
|
- inode->i_gid = dir->i_gid;
|
|
|
- if (S_ISDIR(mode))
|
|
|
- inode->i_mode |= S_ISGID;
|
|
|
- }
|
|
|
-- dir->i_size += BOGO_DIRENT_SIZE;
|
|
|
-+
|
|
|
-+ shmem_d_instantiate(dir, dentry, inode);
|
|
|
-+
|
|
|
- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
-- d_instantiate(dentry, inode);
|
|
|
-- dget(dentry); /* Extra count - pin the dentry in core */
|
|
|
- }
|
|
|
- return error;
|
|
|
- }
|
|
|
-@@ -1868,12 +1981,11 @@ static int shmem_link(struct dentry *old
|
|
|
- if (ret)
|
|
|
- goto out;
|
|
|
-
|
|
|
-- dir->i_size += BOGO_DIRENT_SIZE;
|
|
|
-+ shmem_d_instantiate(dir, dentry, inode);
|
|
|
-+
|
|
|
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
- inc_nlink(inode);
|
|
|
- atomic_inc(&inode->i_count); /* New dentry reference */
|
|
|
-- dget(dentry); /* Extra pinning count for the created dentry */
|
|
|
-- d_instantiate(dentry, inode);
|
|
|
- out:
|
|
|
- return ret;
|
|
|
- }
|
|
|
-@@ -1882,21 +1994,63 @@ static int shmem_unlink(struct inode *di
|
|
|
- {
|
|
|
- struct inode *inode = dentry->d_inode;
|
|
|
-
|
|
|
-- if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
|
|
|
-- shmem_free_inode(inode->i_sb);
|
|
|
-+ if (d_is_whiteout(dentry) || d_is_fallthru(dentry) ||
|
|
|
-+ (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)))
|
|
|
-+ shmem_free_inode(dir->i_sb);
|
|
|
-
|
|
|
-+ if (inode) {
|
|
|
-+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
-+ drop_nlink(inode);
|
|
|
-+ }
|
|
|
- dir->i_size -= BOGO_DIRENT_SIZE;
|
|
|
-- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
-- drop_nlink(inode);
|
|
|
- dput(dentry); /* Undo the count from "create" - this does all the work */
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
-+static void shmem_dir_unlink_whiteouts(struct inode *dir, struct dentry *dentry)
|
|
|
-+{
|
|
|
-+ if (!dentry->d_inode)
|
|
|
-+ return;
|
|
|
-+
|
|
|
-+ /* Remove whiteouts from logical empty directory */
|
|
|
-+ if (S_ISDIR(dentry->d_inode->i_mode) &&
|
|
|
-+ dentry->d_inode->i_sb->s_flags & MS_WHITEOUT) {
|
|
|
-+ struct dentry *child, *next;
|
|
|
-+ LIST_HEAD(list);
|
|
|
-+
|
|
|
-+ spin_lock(&dcache_lock);
|
|
|
-+ list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
|
|
|
-+ spin_lock(&child->d_lock);
|
|
|
-+ /* Unlink fallthrus too */
|
|
|
-+ if (d_is_whiteout(child) || d_is_fallthru(child)) {
|
|
|
-+ __d_drop(child);
|
|
|
-+ if (!list_empty(&child->d_lru)) {
|
|
|
-+ list_del(&child->d_lru);
|
|
|
-+ dentry_stat.nr_unused--;
|
|
|
-+ }
|
|
|
-+ list_add(&child->d_lru, &list);
|
|
|
-+ }
|
|
|
-+ spin_unlock(&child->d_lock);
|
|
|
-+ }
|
|
|
-+ spin_unlock(&dcache_lock);
|
|
|
-+
|
|
|
-+ list_for_each_entry_safe(child, next, &list, d_lru) {
|
|
|
-+ spin_lock(&child->d_lock);
|
|
|
-+ list_del_init(&child->d_lru);
|
|
|
-+ spin_unlock(&child->d_lock);
|
|
|
-+
|
|
|
-+ shmem_unlink(dentry->d_inode, child);
|
|
|
-+ }
|
|
|
-+ }
|
|
|
-+}
|
|
|
-+
|
|
|
- static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
|
|
|
- {
|
|
|
- if (!simple_empty(dentry))
|
|
|
- return -ENOTEMPTY;
|
|
|
-
|
|
|
-+ /* Remove whiteouts from logical empty directory */
|
|
|
-+ shmem_dir_unlink_whiteouts(dir, dentry);
|
|
|
- drop_nlink(dentry->d_inode);
|
|
|
- drop_nlink(dir);
|
|
|
- return shmem_unlink(dir, dentry);
|
|
|
-@@ -1905,7 +2059,7 @@ static int shmem_rmdir(struct inode *dir
|
|
|
- /*
|
|
|
- * The VFS layer already does all the dentry stuff for rename,
|
|
|
- * we just have to decrement the usage count for the target if
|
|
|
-- * it exists so that the VFS layer correctly free's it when it
|
|
|
-+ * it exists so that the VFS layer correctly frees it when it
|
|
|
- * gets overwritten.
|
|
|
- */
|
|
|
- static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
|
|
|
-@@ -1916,7 +2070,12 @@ static int shmem_rename(struct inode *ol
|
|
|
- if (!simple_empty(new_dentry))
|
|
|
- return -ENOTEMPTY;
|
|
|
-
|
|
|
-+ if (d_is_whiteout(new_dentry))
|
|
|
-+ shmem_unlink(new_dir, new_dentry);
|
|
|
-+
|
|
|
- if (new_dentry->d_inode) {
|
|
|
-+ /* Remove whiteouts from logical empty directory */
|
|
|
-+ shmem_dir_unlink_whiteouts(new_dir, new_dentry);
|
|
|
- (void) shmem_unlink(new_dir, new_dentry);
|
|
|
- if (they_are_dirs)
|
|
|
- drop_nlink(old_dir);
|
|
|
-@@ -1981,12 +2140,12 @@ static int shmem_symlink(struct inode *d
|
|
|
- unlock_page(page);
|
|
|
- page_cache_release(page);
|
|
|
- }
|
|
|
-+
|
|
|
-+ shmem_d_instantiate(dir, dentry, inode);
|
|
|
-+
|
|
|
- if (dir->i_mode & S_ISGID)
|
|
|
- inode->i_gid = dir->i_gid;
|
|
|
-- dir->i_size += BOGO_DIRENT_SIZE;
|
|
|
- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
|
|
-- d_instantiate(dentry, inode);
|
|
|
-- dget(dentry);
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
-@@ -2363,6 +2522,12 @@ int shmem_fill_super(struct super_block
|
|
|
- if (!root)
|
|
|
- goto failed_iput;
|
|
|
- sb->s_root = root;
|
|
|
-+
|
|
|
-+#ifdef CONFIG_TMPFS
|
|
|
-+ if (!(sb->s_flags & MS_NOUSER))
|
|
|
-+ sb->s_flags |= MS_WHITEOUT;
|
|
|
-+#endif
|
|
|
-+
|
|
|
- return 0;
|
|
|
-
|
|
|
- failed_iput:
|
|
|
-@@ -2463,6 +2628,8 @@ static const struct inode_operations shm
|
|
|
- .rmdir = shmem_rmdir,
|
|
|
- .mknod = shmem_mknod,
|
|
|
- .rename = shmem_rename,
|
|
|
-+ .whiteout = shmem_whiteout,
|
|
|
-+ .fallthru = shmem_fallthru,
|
|
|
- #endif
|
|
|
- #ifdef CONFIG_TMPFS_POSIX_ACL
|
|
|
- .setattr = shmem_notify_change,
|