From b21996e36c8e3b92a84e972378bde80b43acd890 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Sep 2011 09:14:34 -0400 Subject: locks: break delegations on unlink We need to break delegations on any operation that changes the set of links pointing to an inode. Start with unlink. Such operations also hold the i_mutex on a parent directory. Breaking a delegation may require waiting for a timeout (by default 90 seconds) in the case of a unresponsive NFS client. To avoid blocking all directory operations, we therefore drop locks before waiting for the delegation. The logic then looks like: acquire locks ... test for delegation; if found: take reference on inode release locks wait for delegation break drop reference on inode retry It is possible this could never terminate. (Even if we take precautions to prevent another delegation being acquired on the same inode, we could get a different inode on each retry.) But this seems very unlikely. The initial test for a delegation happens after the lock on the target inode is acquired, but the directory inode may have been acquired further up the call stack. We therefore add a "struct inode **" argument to any intervening functions, which we use to pass the inode back up to the caller in the case it needs a delegation synchronously broken. Cc: David Howells Cc: Tyler Hicks Cc: Dustin Kirkland Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/namei.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index e633a58d422..67ce331a3ed 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3615,7 +3615,25 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +/** + * vfs_unlink - unlink a filesystem object + * @dir: parent directory + * @dentry: victim + * @delegated_inode: returns victim inode, if the inode is delegated. + * + * The caller must hold dir->i_mutex. + * + * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and + * return a reference to the inode in delegated_inode. The caller + * should then break the delegation on that inode and retry. Because + * breaking a delegation may take a long time, the caller should drop + * dir->i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ +int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; int error = may_delete(dir, dentry, 0); @@ -3632,11 +3650,20 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) else { error = security_inode_unlink(dir, dentry); if (!error) { + error = break_deleg(target, O_WRONLY|O_NONBLOCK); + if (error) { + if (error == -EWOULDBLOCK && delegated_inode) { + *delegated_inode = target; + ihold(target); + } + goto out; + } error = dir->i_op->unlink(dir, dentry); if (!error) dont_mount(dentry); } } +out: mutex_unlock(&target->i_mutex); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ @@ -3661,6 +3688,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; + struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: name = user_path_parent(dfd, pathname, &nd, lookup_flags); @@ -3675,7 +3703,7 @@ retry: error = mnt_want_write(nd.path.mnt); if (error) goto exit1; - +retry_deleg: mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); @@ -3690,13 +3718,21 @@ retry: error = security_path_unlink(&nd.path, dentry); if (error) goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, dentry); + error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ + inode = NULL; + if (delegated_inode) { + error = break_deleg(delegated_inode, O_WRONLY); + iput(delegated_inode); + delegated_inode = NULL; + if (!error) + goto retry_deleg; + } mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); -- cgit v1.2.3