From 684999149002dd046269666a390458e0acb38280 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Feb 2009 13:52:43 -0700 Subject: Rename struct file->f_ep_lock This lock moves out of the CONFIG_EPOLL ifdef and becomes f_lock. For now, epoll remains the only user, but a future patch will use it to protect f_flags as well. Cc: Davide Libenzi Reviewed-by: Christoph Hellwig Signed-off-by: Jonathan Corbet --- fs/eventpoll.c | 12 +++++++----- fs/file_table.c | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 011b9b8c90c6..c5c424f23fd5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -417,10 +417,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) ep_unregister_pollwait(ep, epi); /* Remove the current item from the list of epoll hooks */ - spin_lock(&file->f_ep_lock); + spin_lock(&file->f_lock); if (ep_is_linked(&epi->fllink)) list_del_init(&epi->fllink); - spin_unlock(&file->f_ep_lock); + spin_unlock(&file->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -538,7 +538,7 @@ void eventpoll_release_file(struct file *file) struct epitem *epi; /* - * We don't want to get "file->f_ep_lock" because it is not + * We don't want to get "file->f_lock" because it is not * necessary. It is not necessary because we're in the "struct file" * cleanup path, and this means that noone is using this file anymore. * So, for example, epoll_ctl() cannot hit here sicne if we reach this @@ -547,6 +547,8 @@ void eventpoll_release_file(struct file *file) * will correctly serialize the operation. We do need to acquire * "ep->mtx" after "epmutex" because ep_remove() requires it when called * from anywhere but ep_free(). + * + * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); @@ -785,9 +787,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, goto error_unregister; /* Add the current item to the list of active epoll hook for this file */ - spin_lock(&tfile->f_ep_lock); + spin_lock(&tfile->f_lock); list_add_tail(&epi->fllink, &tfile->f_ep_links); - spin_unlock(&tfile->f_ep_lock); + spin_unlock(&tfile->f_lock); /* * Add the current item to the RB tree. All RB tree operations are diff --git a/fs/file_table.c b/fs/file_table.c index bbeeac6efa1a..aa1e18050282 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -127,6 +127,7 @@ struct file *get_empty_filp(void) atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); f->f_cred = get_cred(cred); + spin_lock_init(&f->f_lock); eventpoll_init_file(f); /* f->f_version: 0 */ return f; -- cgit v1.2.3 From db1dd4d376134eba0e08af523b61cc566a4ea1cd Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 6 Feb 2009 15:25:24 -0700 Subject: Use f_lock to protect f_flags Traditionally, changes to struct file->f_flags have been done under BKL protection, or with no protection at all. This patch causes all f_flags changes after file open/creation time to be done under protection of f_lock. This allows the removal of some BKL usage and fixes a number of longstanding (if microscopic) races. Reviewed-by: Christoph Hellwig Cc: Al Viro Signed-off-by: Jonathan Corbet --- fs/fcntl.c | 2 ++ fs/ioctl.c | 7 ++++--- fs/nfsd/vfs.c | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index bd215cc791da..04df8570a2d2 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -189,7 +189,9 @@ static int setfl(int fd, struct file * filp, unsigned long arg) } } + spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); + spin_unlock(&filp->f_lock); out: unlock_kernel(); return error; diff --git a/fs/ioctl.c b/fs/ioctl.c index 240ec63984cb..421aab465dab 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -404,10 +404,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp) if (O_NONBLOCK != O_NDELAY) flag |= O_NDELAY; #endif + spin_lock(&filp->f_lock); if (on) filp->f_flags |= flag; else filp->f_flags &= ~flag; + spin_unlock(&filp->f_lock); return error; } @@ -432,10 +434,12 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, if (error) return error; + spin_lock(&filp->f_lock); if (on) filp->f_flags |= FASYNC; else filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); return error; } @@ -499,10 +503,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIONBIO: - /* BKL needed to avoid races tweaking f_flags */ - lock_kernel(); error = ioctl_fionbio(filp, argp); - unlock_kernel(); break; case FIOASYNC: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6e50aaa56ca2..c165a6403df0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) + if (stable && !EX_WGATHER(exp)) { + spin_lock(&file->f_lock); file->f_flags |= O_SYNC; + spin_unlock(&file->f_lock); + } /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); -- cgit v1.2.3 From 76398425bb06b07cc3a3b1ce169c67dc9d6874ed Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 1 Feb 2009 14:26:59 -0700 Subject: Move FASYNC bit handling to f_op->fasync() Removing the BKL from FASYNC handling ran into the challenge of keeping the setting of the FASYNC bit in filp->f_flags atomic with regard to calls to the underlying fasync() function. Andi Kleen suggested moving the handling of that bit into fasync(); this patch does exactly that. As a result, we have a couple of internal API changes: fasync() must now manage the FASYNC bit, and it will be called without the BKL held. As it happens, every fasync() implementation in the kernel with one exception calls fasync_helper(). So, if we make fasync_helper() set the FASYNC bit, we can avoid making any changes to the other fasync() functions - as long as those functions, themselves, have proper locking. Most fasync() implementations do nothing but call fasync_helper() - which has its own lock - so they are easily verified as correct. The BKL had already been pushed down into the rest. The networking code has its own version of fasync_helper(), so that code has been augmented with explicit FASYNC bit handling. Cc: Al Viro Cc: David Miller Reviewed-by: Christoph Hellwig Signed-off-by: Jonathan Corbet --- fs/fcntl.c | 29 ++++++++++++++++------------- fs/ioctl.c | 13 +------------ 2 files changed, 17 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 04df8570a2d2..431bb6459273 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) return ret; } -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { @@ -177,23 +177,19 @@ static int setfl(int fd, struct file * filp, unsigned long arg) return error; /* - * We still need a lock here for now to keep multiple FASYNC calls - * from racing with each other. + * ->fasync() is responsible for setting the FASYNC bit. */ - lock_kernel(); - if ((arg ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); - if (error < 0) - goto out; - } + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && + filp->f_op->fasync) { + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); + if (error < 0) + goto out; } - spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); spin_unlock(&filp->f_lock); + out: - unlock_kernel(); return error; } @@ -518,7 +514,7 @@ static DEFINE_RWLOCK(fasync_lock); static struct kmem_cache *fasync_cache __read_mostly; /* - * fasync_helper() is used by some character device drivers (mainly mice) + * fasync_helper() is used by almost all character device drivers * to set up the fasync queue. It returns negative on error, 0 if it did * no changes and positive if it added/deleted the entry. */ @@ -557,6 +553,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap result = 1; } out: + /* Fix up FASYNC bit while still holding fasync_lock */ + spin_lock(&filp->f_lock); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); write_unlock_irq(&fasync_lock); return result; } diff --git a/fs/ioctl.c b/fs/ioctl.c index 421aab465dab..e8e89edba576 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -427,19 +427,11 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) + /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else error = -ENOTTY; } - if (error) - return error; - - spin_lock(&filp->f_lock); - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - spin_unlock(&filp->f_lock); return error; } @@ -507,10 +499,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIOASYNC: - /* BKL needed to avoid races tweaking f_flags */ - lock_kernel(); error = ioctl_fioasync(fd, filp, argp); - unlock_kernel(); break; case FIOQSIZE: -- cgit v1.2.3 From 60aa49243d09afc873f082567d2e3c16634ced84 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 1 Feb 2009 14:52:56 -0700 Subject: Rationalize fasync return values Most fasync implementations do something like: return fasync_helper(...); But fasync_helper() will return a positive value at times - a feature used in at least one place. Thus, a number of other drivers do: err = fasync_helper(...); if (err < 0) return err; return 0; In the interests of consistency and more concise code, it makes sense to map positive return values onto zero where ->fasync() is called. Cc: Al Viro Signed-off-by: Jonathan Corbet --- fs/fcntl.c | 2 ++ fs/ioctl.c | 2 +- fs/pipe.c | 16 +++------------- 3 files changed, 6 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 431bb6459273..d865ca66ccba 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -184,6 +184,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg) error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; + if (error > 0) + error = 0; } spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); diff --git a/fs/ioctl.c b/fs/ioctl.c index e8e89edba576..ac2d47e43926 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -432,7 +432,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, else error = -ENOTTY; } - return error; + return error < 0 ? error : 0; } static int ioctl_fsfreeze(struct file *filp) diff --git a/fs/pipe.c b/fs/pipe.c index 14f502b89cf5..94ad15967cf9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -667,10 +667,7 @@ pipe_read_fasync(int fd, struct file *filp, int on) retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); mutex_unlock(&inode->i_mutex); - if (retval < 0) - return retval; - - return 0; + return retval; } @@ -684,10 +681,7 @@ pipe_write_fasync(int fd, struct file *filp, int on) retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); mutex_unlock(&inode->i_mutex); - if (retval < 0) - return retval; - - return 0; + return retval; } @@ -706,11 +700,7 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) fasync_helper(-1, filp, 0, &pipe->fasync_readers); } mutex_unlock(&inode->i_mutex); - - if (retval < 0) - return retval; - - return 0; + return retval; } -- cgit v1.2.3