aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 20:25:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 20:25:04 -0700
commitaab174f0df5d72d31caccf281af5f614fa254578 (patch)
tree2a172c5009c4ac8755e858593154c258ce7709a0 /fs
parentca41cc96b2813221b05af57d0355157924de5a07 (diff)
parent2bd2c1941f141ad780135ccc1cd08ca71a24f10a (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs update from Al Viro: - big one - consolidation of descriptor-related logics; almost all of that is moved to fs/file.c (BTW, I'm seriously tempted to rename the result to fd.c. As it is, we have a situation when file_table.c is about handling of struct file and file.c is about handling of descriptor tables; the reasons are historical - file_table.c used to be about a static array of struct file we used to have way back). A lot of stray ends got cleaned up and converted to saner primitives, disgusting mess in android/binder.c is still disgusting, but at least doesn't poke so much in descriptor table guts anymore. A bunch of relatively minor races got fixed in process, plus an ext4 struct file leak. - related thing - fget_light() partially unuglified; see fdget() in there (and yes, it generates the code as good as we used to have). - also related - bits of Cyrill's procfs stuff that got entangled into that work; _not_ all of it, just the initial move to fs/proc/fd.c and switch of fdinfo to seq_file. - Alex's fs/coredump.c spiltoff - the same story, had been easier to take that commit than mess with conflicts. The rest is a separate pile, this was just a mechanical code movement. - a few misc patches all over the place. Not all for this cycle, there'll be more (and quite a few currently sit in akpm's tree)." Fix up trivial conflicts in the android binder driver, and some fairly simple conflicts due to two different changes to the sock_alloc_file() interface ("take descriptor handling from sock_alloc_file() to callers" vs "net: Providing protocol type via system.sockprotoname xattr of /proc/PID/fd entries" adding a dentry name to the socket) * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (72 commits) MAX_LFS_FILESIZE should be a loff_t compat: fs: Generic compat_sys_sendfile implementation fs: push rcu_barrier() from deactivate_locked_super() to filesystems btrfs: reada_extent doesn't need kref for refcount coredump: move core dump functionality into its own file coredump: prevent double-free on an error path in core dumper usb/gadget: fix misannotations fcntl: fix misannotations ceph: don't abuse d_delete() on failure exits hypfs: ->d_parent is never NULL or negative vfs: delete surplus inode NULL check switch simple cases of fget_light to fdget new helpers: fdget()/fdput() switch o2hb_region_dev_write() to fget_light() proc_map_files_readdir(): don't bother with grabbing files make get_file() return its argument vhost_set_vring(): turn pollstart/pollstop into bool switch prctl_set_mm_exe_file() to fget_light() switch xfs_find_handle() to fget_light() switch xfs_swapext() to fget_light() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/super.c5
-rw-r--r--fs/affs/super.c5
-rw-r--r--fs/afs/super.c5
-rw-r--r--fs/autofs4/dev-ioctl.c18
-rw-r--r--fs/autofs4/waitq.c3
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/inode.c5
-rw-r--r--fs/binfmt_elf.c19
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c32
-rw-r--r--fs/btrfs/reada.c18
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/super.c5
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/coda/inode.c37
-rw-r--r--fs/compat.c112
-rw-r--r--fs/compat_ioctl.c27
-rw-r--r--fs/coredump.c686
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/efs/super.c5
-rw-r--r--fs/eventpoll.c23
-rw-r--r--fs/exec.c688
-rw-r--r--fs/exofs/super.c5
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/fcntl.c166
-rw-r--r--fs/fhandle.c17
-rw-r--r--fs/file.c573
-rw-r--r--fs/file_table.c106
-rw-r--r--fs/freevxfs/vxfs_super.c5
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/inode.c6
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hpfs/super.c5
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/ioctl.c25
-rw-r--r--fs/isofs/inode.c5
-rw-r--r--fs/jffs2/super.c6
-rw-r--r--fs/jfs/super.c6
-rw-r--r--fs/locks.c20
-rw-r--r--fs/logfs/inode.c5
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/namei.c41
-rw-r--r--fs/ncpfs/inode.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nilfs2/super.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c87
-rw-r--r--fs/notify/inotify/inotify_user.c28
-rw-r--r--fs/ntfs/super.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c38
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c5
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/open.c130
-rw-r--r--fs/openpromfs/inode.c5
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/base.c417
-rw-r--r--fs/proc/fd.c367
-rw-r--r--fs/proc/fd.h14
-rw-r--r--fs/proc/internal.h48
-rw-r--r--fs/qnx4/inode.c5
-rw-r--r--fs/qnx6/inode.c5
-rw-r--r--fs/read_write.c180
-rw-r--r--fs/read_write.h2
-rw-r--r--fs/readdir.c36
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/romfs/super.c5
-rw-r--r--fs/select.c31
-rw-r--r--fs/signalfd.c13
-rw-r--r--fs/splice.c69
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/stat.c10
-rw-r--r--fs/statfs.c9
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c33
-rw-r--r--fs/sysv/inode.c5
-rw-r--r--fs/timerfd.c45
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/utimes.c11
-rw-r--r--fs/xattr.c52
-rw-r--r--fs/xfs/xfs_dfrag.c34
-rw-r--r--fs/xfs/xfs_ioctl.c10
-rw-r--r--fs/xfs/xfs_super.c5
94 files changed, 2486 insertions, 2092 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b85efa77394..392c5dac198 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void)
*/
static void v9fs_destroy_inode_cache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(v9fs_inode_cache);
}
diff --git a/fs/Makefile b/fs/Makefile
index 2fb97793467..8938f825032 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o coredump.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 22a0d7ed5fa..d5712293579 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(adfs_inode_cachep);
}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 1f030825cd3..b84dc735250 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -147,6 +147,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(affs_inode_cachep);
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index df8c6047c2a..43165009428 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void)
BUG();
}
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(afs_inode_cachep);
_leave("");
}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index abf645c1703..a16214109d3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p)
return ino && ino->sbi->type & *(unsigned *)p;
}
-static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- BUG_ON(fdt->fd[fd] != NULL);
- rcu_assign_pointer(fdt->fd[fd], file);
- __set_close_on_exec(fd, fdt);
- spin_unlock(&files->file_lock);
-}
-
-
/*
* Open a file descriptor on the autofs mount point corresponding
* to the given path and device number (aka. new_encode_dev(sb->s_dev)).
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
{
int err, fd;
- fd = get_unused_fd();
+ fd = get_unused_fd_flags(O_CLOEXEC);
if (likely(fd >= 0)) {
struct file *filp;
struct path path;
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
goto out;
}
- autofs_dev_ioctl_fd_install(fd, filp);
+ fd_install(fd, filp);
}
return fd;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7..dce436e595c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
return;
}
- pipe = sbi->pipe;
- get_file(pipe);
+ pipe = get_file(sbi->pipe);
mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7f73a692bfd..2b3bda8d5e6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -457,6 +457,11 @@ befs_init_inodecache(void)
static void
befs_destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(befs_inode_cachep);
}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b242beba58e..737aaa3f709 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(bfs_inode_cachep);
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1b52956afe3..0225fddf49b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info)
return 0;
info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
if (!info->psinfo)
- goto notes_free;
+ return 0;
info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
if (!info->prstatus)
- goto psinfo_free;
+ return 0;
info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
if (!info->fpu)
- goto prstatus_free;
+ return 0;
#ifdef ELF_CORE_COPY_XFPREGS
info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
if (!info->xfpu)
- goto fpu_free;
+ return 0;
#endif
return 1;
-#ifdef ELF_CORE_COPY_XFPREGS
- fpu_free:
- kfree(info->fpu);
-#endif
- prstatus_free:
- kfree(info->prstatus);
- psinfo_free:
- kfree(info->psinfo);
- notes_free:
- kfree(info->notes);
- return 0;
}
static int fill_note_info(struct elfhdr *elf, int phdrs,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c878476bb9..b08ea4717e9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -107,6 +107,12 @@ void extent_io_exit(void)
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
+
+ /*
+ * Make sure all delayed rcu free are flushed before we
+ * destroy caches.
+ */
+ rcu_barrier();
if (extent_state_cache)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a028a58619..a6ed6944e50 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7076,6 +7076,11 @@ static void init_once(void *foo)
void btrfs_destroy_cachep(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
if (btrfs_inode_cachep)
kmem_cache_destroy(btrfs_inode_cachep);
if (btrfs_trans_handle_cachep)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 27bfce58da3..47127c1bd29 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
u64 *transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
{
- struct file *src_file;
int namelen;
int ret = 0;
@@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
ret = btrfs_mksubvol(&file->f_path, name, namelen,
NULL, transid, readonly, inherit);
} else {
+ struct fd src = fdget(fd);
struct inode *src_inode;
- src_file = fget(fd);
- if (!src_file) {
+ if (!src.file) {
ret = -EINVAL;
goto out_drop_write;
}
- src_inode = src_file->f_path.dentry->d_inode;
+ src_inode = src.file->f_path.dentry->d_inode;
if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
printk(KERN_INFO "btrfs: Snapshot src from "
"another FS\n");
ret = -EINVAL;
- fput(src_file);
- goto out_drop_write;
+ } else {
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ BTRFS_I(src_inode)->root,
+ transid, readonly, inherit);
}
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
- BTRFS_I(src_inode)->root,
- transid, readonly, inherit);
- fput(src_file);
+ fdput(src);
}
out_drop_write:
mnt_drop_write_file(file);
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct file *src_file;
+ struct fd src_file;
struct inode *src;
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
if (ret)
return ret;
- src_file = fget(srcfd);
- if (!src_file) {
+ src_file = fdget(srcfd);
+ if (!src_file.file) {
ret = -EBADF;
goto out_drop_write;
}
ret = -EXDEV;
- if (src_file->f_path.mnt != file->f_path.mnt)
+ if (src_file.file->f_path.mnt != file->f_path.mnt)
goto out_fput;
- src = src_file->f_dentry->d_inode;
+ src = src_file.file->f_dentry->d_inode;
ret = -EINVAL;
if (src == inode)
goto out_fput;
/* the src must be open for reading */
- if (!(src_file->f_mode & FMODE_READ))
+ if (!(src_file.file->f_mode & FMODE_READ))
goto out_fput;
/* don't make the dst file partly checksummed */
@@ -2724,7 +2722,7 @@ out_unlock:
vfree(buf);
btrfs_free_path(path);
out_fput:
- fput(src_file);
+ fdput(src_file);
out_drop_write:
mnt_drop_write_file(file);
return ret;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 48a4882d8ad..a955669519a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -68,7 +68,7 @@ struct reada_extent {
u32 blocksize;
int err;
struct list_head extctl;
- struct kref refcnt;
+ int refcnt;
spinlock_t lock;
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
- kref_get(&re->refcnt);
+ re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (!re)
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
- kref_get(&re->refcnt);
+ re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (re)
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
re->top = *top;
INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock);
- kref_init(&re->refcnt);
+ re->refcnt = 1;
/*
* map block
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
if (ret == -EEXIST) {
re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
BUG_ON(!re_exist);
- kref_get(&re_exist->refcnt);
+ re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
goto error;
}
@@ -465,10 +465,6 @@ error:
return re_exist;
}
-static void reada_kref_dummy(struct kref *kr)
-{
-}
-
static void reada_extent_put(struct btrfs_fs_info *fs_info,
struct reada_extent *re)
{
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
spin_lock(&fs_info->reada_lock);
- if (!kref_put(&re->refcnt, reada_kref_dummy)) {
+ if (--re->refcnt) {
spin_unlock(&fs_info->reada_lock);
return;
}
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
return 0;
}
dev->reada_next = re->logical + re->blocksize;
- kref_get(&re->refcnt);
+ re->refcnt++;
spin_unlock(&fs_info->reada_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4b5762ef7c2..ba95eea201b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
pr_err("fill_trace bad get_inode "
"%llx.%llx\n", vino.ino, vino.snap);
err = PTR_ERR(in);
- d_delete(dn);
+ d_drop(dn);
goto done;
}
dn = splice_dentry(dn, in, &have_lease, true);
@@ -1277,7 +1277,7 @@ retry_lookup:
in = ceph_get_inode(parent->d_sb, vino);
if (IS_ERR(in)) {
dout("new_inode badness\n");
- d_delete(dn);
+ d_drop(dn);
dput(dn);
err = PTR_ERR(in);
goto out;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b982239f38f..3a42d932637 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -603,6 +603,11 @@ bad_cap:
static void destroy_caches(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ceph_inode_cachep);
kmem_cache_destroy(ceph_cap_cachep);
kmem_cache_destroy(ceph_dentry_cachep);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a41044a3108..e7931cc55d0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -968,6 +968,11 @@ cifs_init_inodecache(void)
static void
cifs_destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(cifs_inode_cachep);
}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index f1813120d75..be2aa490948 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -85,6 +85,11 @@ int coda_init_inodecache(void)
void coda_destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(coda_inode_cachep);
}
@@ -107,43 +112,41 @@ static const struct super_operations coda_super_operations =
static int get_device_index(struct coda_mount_data *data)
{
- struct file *file;
+ struct fd f;
struct inode *inode;
int idx;
- if(data == NULL) {
+ if (data == NULL) {
printk("coda_read_super: Bad mount data\n");
return -1;
}
- if(data->version != CODA_MOUNT_VERSION) {
+ if (data->version != CODA_MOUNT_VERSION) {
printk("coda_read_super: Bad mount version\n");
return -1;
}
- file = fget(data->fd);
- inode = NULL;
- if(file)
- inode = file->f_path.dentry->d_inode;
-
- if(!inode || !S_ISCHR(inode->i_mode) ||
- imajor(inode) != CODA_PSDEV_MAJOR) {
- if(file)
- fput(file);
-
- printk("coda_read_super: Bad file\n");
- return -1;
+ f = fdget(data->fd);
+ if (!f.file)
+ goto Ebadf;
+ inode = f.file->f_path.dentry->d_inode;
+ if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
+ fdput(f);
+ goto Ebadf;
}
idx = iminor(inode);
- fput(file);
+ fdput(f);
- if(idx < 0 || idx >= MAX_CODADEVS) {
+ if (idx < 0 || idx >= MAX_CODADEVS) {
printk("coda_read_super: Bad minor number\n");
return -1;
}
return idx;
+Ebadf:
+ printk("coda_read_super: Bad file\n");
+ return -1;
}
static int coda_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/compat.c b/fs/compat.c
index 1bdb350ea5d..b7a24d0ca30 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
struct compat_old_linux_dirent __user *dirent, unsigned int count)
{
int error;
- struct file *file;
- int fput_needed;
+ struct fd f = fdget(fd);
struct compat_readdir_callback buf;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ if (!f.file)
return -EBADF;
buf.result = 0;
buf.dirent = dirent;
- error = vfs_readdir(file, compat_fillonedir, &buf);
+ error = vfs_readdir(f.file, compat_fillonedir, &buf);
if (buf.result)
error = buf.result;
- fput_light(file, fput_needed);
+ fdput(f);
return error;
}
@@ -949,17 +947,16 @@ efault:
asmlinkage long compat_sys_getdents(unsigned int fd,
struct compat_linux_dirent __user *dirent, unsigned int count)
{
- struct file * file;
+ struct fd f;
struct compat_linux_dirent __user * lastdirent;
struct compat_getdents_callback buf;
- int fput_needed;
int error;
if (!access_ok(VERIFY_WRITE, dirent, count))
return -EFAULT;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
return -EBADF;
buf.current_dir = dirent;
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
buf.count = count;
buf.error = 0;
- error = vfs_readdir(file, compat_filldir, &buf);
+ error = vfs_readdir(f.file, compat_filldir, &buf);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
- if (put_user(file->f_pos, &lastdirent->d_off))
+ if (put_user(f.file->f_pos, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
- fput_light(file, fput_needed);
+ fdput(f);
return error;
}
@@ -1035,17 +1032,16 @@ efault:
asmlinkage long compat_sys_getdents64(unsigned int fd,
struct linux_dirent64 __user * dirent, unsigned int count)
{
- struct file * file;
+ struct fd f;
struct linux_dirent64 __user * lastdirent;
struct compat_getdents_callback64 buf;
- int fput_needed;
int error;
if (!access_ok(VERIFY_WRITE, dirent, count))
return -EFAULT;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
return -EBADF;
buf.current_dir = dirent;
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
buf.count = count;
buf.error = 0;
- error = vfs_readdir(file, compat_filldir64, &buf);
+ error = vfs_readdir(f.file, compat_filldir64, &buf);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
- typeof(lastdirent->d_off) d_off = file->f_pos;
+ typeof(lastdirent->d_off) d_off = f.file->f_pos;
if (__put_user_unaligned(d_off, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
- fput_light(file, fput_needed);
+ fdput(f);
return error;
}
#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
@@ -1152,18 +1148,16 @@ asmlinkage ssize_t
compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
unsigned long vlen)
{
- struct file *file;
- int fput_needed;
+ struct fd f = fdget(fd);
ssize_t ret;
loff_t pos;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ if (!f.file)
return -EBADF;
- pos = file->f_pos;
- ret = compat_readv(file, vec, vlen, &pos);
- file->f_pos = pos;
- fput_light(file, fput_needed);
+ pos = f.file->f_pos;
+ ret = compat_readv(f.file, vec, vlen, &pos);
+ f.file->f_pos = pos;
+ fdput(f);
return ret;
}
@@ -1171,19 +1165,18 @@ asmlinkage ssize_t
compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
unsigned long vlen, loff_t pos)
{
- struct file *file;
- int fput_needed;
+ struct fd f;
ssize_t ret;
if (pos < 0)
return -EINVAL;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
return -EBADF;
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
- ret = compat_readv(file, vec, vlen, &pos);
- fput_light(file, fput_needed);
+ if (f.file->f_mode & FMODE_PREAD)
+ ret = compat_readv(f.file, vec, vlen, &pos);
+ fdput(f);
return ret;
}
@@ -1221,18 +1214,16 @@ asmlinkage ssize_t
compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
unsigned long vlen)
{
- struct file *file;
- int fput_needed;
+ struct fd f = fdget(fd);
ssize_t ret;
loff_t pos;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ if (!f.file)
return -EBADF;
- pos = file->f_pos;
- ret = compat_writev(file, vec, vlen, &pos);
- file->f_pos = pos;
- fput_light(file, fput_needed);
+ pos = f.file->f_pos;
+ ret = compat_writev(f.file, vec, vlen, &pos);
+ f.file->f_pos = pos;
+ fdput(f);
return ret;
}
@@ -1240,19 +1231,18 @@ asmlinkage ssize_t
compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
unsigned long vlen, loff_t pos)
{
- struct file *file;
- int fput_needed;
+ struct fd f;
ssize_t ret;
if (pos < 0)
return -EINVAL;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
return -EBADF;
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
- ret = compat_writev(file, vec, vlen, &pos);
- fput_light(file, fput_needed);
+ if (f.file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(f.file, vec, vlen, &pos);
+ fdput(f);
return ret;
}
@@ -1802,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd,
return do_handle_open(mountdirfd, handle, flags);
}
#endif
+
+#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE
+asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
+ compat_off_t __user *offset, compat_size_t count)
+{
+ loff_t pos;
+ off_t off;
+ ssize_t ret;
+
+ if (offset) {
+ if (unlikely(get_user(off, offset)))
+ return -EFAULT;
+ pos = off;
+ ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
+ if (unlikely(put_user(pos, offset)))
+ return -EFAULT;
+ return ret;
+ }
+
+ return do_sendfile(out_fd, in_fd, NULL, count, 0);
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9c03a3ae898..f5054025f9d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1539,16 +1539,13 @@ static int compat_ioctl_check_table(unsigned int xcmd)
asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
- struct file *filp;
+ struct fd f = fdget(fd);
int error = -EBADF;
- int fput_needed;
-
- filp = fget_light(fd, &fput_needed);
- if (!filp)
+ if (!f.file)
goto out;
/* RED-PEN how should LSM module know it's handling 32bit? */
- error = security_file_ioctl(filp, cmd, arg);
+ error = security_file_ioctl(f.file, cmd, arg);
if (error)
goto out_fput;
@@ -1568,30 +1565,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
case FS_IOC_RESVSP_32:
case FS_IOC_RESVSP64_32:
- error = compat_ioctl_preallocate(filp, compat_ptr(arg));
+ error = compat_ioctl_preallocate(f.file, compat_ptr(arg));
goto out_fput;
#else
case FS_IOC_RESVSP:
case FS_IOC_RESVSP64:
- error = ioctl_preallocate(filp, compat_ptr(arg));
+ error = ioctl_preallocate(f.file, compat_ptr(arg));
goto out_fput;
#endif
case FIBMAP:
case FIGETBSZ:
case FIONREAD:
- if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+ if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
break;
/*FALL THROUGH*/
default:
- if (filp->f_op && filp->f_op->compat_ioctl) {
- error = filp->f_op->compat_ioctl(filp, cmd, arg);
+ if (f.file->f_op && f.file->f_op->compat_ioctl) {
+ error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
if (error != -ENOIOCTLCMD)
goto out_fput;
}
- if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+ if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
goto do_ioctl;
break;
}
@@ -1599,7 +1596,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
if (compat_ioctl_check_table(XFORM(cmd)))
goto found_handler;
- error = do_ioctl_trans(fd, cmd, arg, filp);
+ error = do_ioctl_trans(fd, cmd, arg, f.file);
if (error == -ENOIOCTLCMD)
error = -ENOTTY;
@@ -1608,9 +1605,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
found_handler:
arg = (unsigned long)compat_ptr(arg);
do_ioctl:
- error = do_vfs_ioctl(filp, fd, cmd, arg);
+ error = do_vfs_ioctl(f.file, fd, cmd, arg);
out_fput:
- fput_light(filp, fput_needed);
+ fdput(f);
out:
return error;
}
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 00000000000..f045bbad682
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,686 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
+#include "internal.h"
+
+#include <trace/events/sched.h>
+
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+
+struct core_name {
+ char *corename;
+ int used, size;
+};
+static atomic_t call_count = ATOMIC_INIT(1);
+
+/* The maximal length of core_pattern is also specified in sysctl.c */
+
+static int expand_corename(struct core_name *cn)
+{
+ char *old_corename = cn->corename;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+ cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+ if (!cn->corename) {
+ kfree(old_corename);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+ char *cur;
+ int need;
+ int ret;
+ va_list arg;
+
+ va_start(arg, fmt);
+ need = vsnprintf(NULL, 0, fmt, arg);
+ va_end(arg);
+
+ if (likely(need < cn->size - cn->used - 1))
+ goto out_printf;
+
+ ret = expand_corename(cn);
+ if (ret)
+ goto expand_fail;
+
+out_printf:
+ cur = cn->corename + cn->used;
+ va_start(arg, fmt);
+ vsnprintf(cur, need + 1, fmt, arg);
+ va_end(arg);
+ cn->used += need;
+ return 0;
+
+expand_fail:
+ return ret;
+}
+
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+ struct file *exe_file;
+ char *pathbuf, *path;
+ int ret;
+
+ exe_file = get_mm_exe_file(current->mm);
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
+
+ pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+ if (!pathbuf) {
+ ret = -ENOMEM;
+ goto put_exe_file;
+ }
+
+ path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto free_buf;
+ }
+
+ cn_escape(path);
+
+ ret = cn_printf(cn, "%s", path);
+
+free_buf:
+ kfree(pathbuf);
+put_exe_file:
+ fput(exe_file);
+ return ret;
+}
+
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+ const struct cred *cred = current_cred();
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
+ int pid_in_pattern = 0;
+ int err = 0;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+ cn->corename = kmalloc(cn->size, GFP_KERNEL);
+ cn->used = 0;
+
+ if (!cn->corename)
+ return -ENOMEM;
+
+ /* Repeat as long as we have more pattern to process and more output
+ space */
+ while (*pat_ptr) {
+ if (*pat_ptr != '%') {
+ if (*pat_ptr == 0)
+ goto out;
+ err = cn_printf(cn, "%c", *pat_ptr++);
+ } else {
+ switch (*++pat_ptr) {
+ /* single % at the end, drop that */
+ case 0:
+ goto out;
+ /* Double percent, output one percent */
+ case '%':
+ err = cn_printf(cn, "%c", '%');
+ break;
+ /* pid */
+ case 'p':
+ pid_in_pattern = 1;
+ err = cn_printf(cn, "%d",
+ task_tgid_vnr(current));
+ break;
+ /* uid */
+ case 'u':
+ err = cn_printf(cn, "%d", cred->uid);
+ break;
+ /* gid */
+ case 'g':
+ err = cn_printf(cn, "%d", cred->gid);
+ break;
+ /* signal that caused the coredump */
+ case 's':
+ err = cn_printf(cn, "%ld", signr);
+ break;
+ /* UNIX time of coredump */
+ case 't': {
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ err = cn_printf(cn, "%lu", tv.tv_sec);
+ break;
+ }
+ /* hostname */
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
+ down_read(&uts_sem);
+ err = cn_printf(cn, "%s",
+ utsname()->nodename);
+ up_read(&uts_sem);
+ cn_escape(namestart);
+ break;
+ }
+ /* executable */
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
+ err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
+ break;
+ }
+ case 'E':
+ err = cn_print_exe_file(cn);
+ break;
+ /* core limit size */
+ case 'c':
+ err = cn_printf(cn, "%lu",
+ rlimit(RLIMIT_CORE));
+ break;
+ default:
+ break;
+ }
+ ++pat_ptr;
+ }
+
+ if (err)
+ return err;
+ }
+
+ /* Backward compatibility with core_uses_pid:
+ *
+ * If core_pattern does not include a %p (as is the default)
+ * and core_uses_pid is set, then .%pid will be appended to
+ * the filename. Do not do this for piped commands. */
+ if (!ispipe && !pid_in_pattern && core_uses_pid) {
+ err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+ if (err)
+ return err;
+ }
+out:
+ return ispipe;
+}
+
+static int zap_process(struct task_struct *start, int exit_code)
+{
+ struct task_struct *t;
+ int nr = 0;
+
+ start->signal->flags = SIGNAL_GROUP_EXIT;
+ start->signal->group_exit_code = exit_code;
+ start->signal->group_stop_count = 0;
+
+ t = start;
+ do {
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+ if (t != current && t->mm) {
+ sigaddset(&t->pending.signal, SIGKILL);
+ signal_wake_up(t, 1);
+ nr++;
+ }
+ } while_each_thread(start, t);
+
+ return nr;
+}
+
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ struct core_state *core_state, int exit_code)
+{
+ struct task_struct *g, *p;
+ unsigned long flags;
+ int nr = -EAGAIN;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
+ nr = zap_process(tsk, exit_code);
+ }
+ spin_unlock_irq(&tsk->sighand->siglock);
+ if (unlikely(nr < 0))
+ return nr;
+
+ if (atomic_read(&mm->mm_users) == nr + 1)
+ goto done;
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, &current->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
+ rcu_read_lock();
+ for_each_process(g) {
+ if (g == tsk->group_leader)
+ continue;
+ if (g->flags & PF_KTHREAD)
+ continue;
+ p = g;
+ do {
+ if (p->mm) {
+ if (unlikely(p->mm == mm)) {
+ lock_task_sighand(p, &flags);
+ nr += zap_process(p, exit_code);
+ unlock_task_sighand(p, &flags);
+ }
+ break;
+ }
+ } while_each_thread(g, p);
+ }
+ rcu_read_unlock();
+done:
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
+}
+
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+ int core_waiters = -EBUSY;
+
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+
+ down_write(&mm->mmap_sem);
+ if (!mm->core_state)
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+ up_write(&mm->mmap_sem);
+
+ if (core_waiters > 0) {
+ struct core_thread *ptr;
+
+ wait_for_completion(&core_state->startup);
+ /*
+ * Wait for all the threads to become inactive, so that
+ * all the thread context (extended register state, like
+ * fpu etc) gets copied to the memory.
+ */
+ ptr = core_state->dumper.next;
+ while (ptr != NULL) {
+ wait_task_inactive(ptr->task, 0);
+ ptr = ptr->next;
+ }
+ }
+
+ return core_waiters;
+}
+
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
+static void wait_for_dump_helpers(struct file *file)
+{
+ struct pipe_inode_info *pipe;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+
+ pipe_lock(pipe);
+ pipe->readers++;
+ pipe->writers--;
+
+ while ((pipe->readers > 1) && (!signal_pending(current))) {
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_wait(pipe);
+ }
+
+ pipe->readers--;
+ pipe->writers++;
+ pipe_unlock(pipe);
+
+}
+
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace. Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process. Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1. This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+ struct file *files[2];
+ struct coredump_params *cp = (struct coredump_params *)info->data;
+ int err = create_pipe_files(files, 0);
+ if (err)
+ return err;
+
+ cp->file = files[1];
+
+ replace_fd(0, files[0], 0);
+ /* and disallow core files too */
+ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+ return 0;
+}
+
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+ struct core_state core_state;
+ struct core_name cn;
+ struct mm_struct *mm = current->mm;
+ struct linux_binfmt * binfmt;
+ const struct cred *old_cred;
+ struct cred *cred;
+ int retval = 0;
+ int flag = 0;
+ int ispipe;
+ struct files_struct *displaced;
+ bool need_nonrelative = false;
+ static atomic_t core_dump_count = ATOMIC_INIT(0);
+ struct coredump_params cprm = {
+ .signr = signr,
+ .regs = regs,
+ .limit = rlimit(RLIMIT_CORE),
+ /*
+ * We must use the same mm->flags while dumping core to avoid
+ * inconsistency of bit flags, since this flag is not protected
+ * by any locks.
+ */
+ .mm_flags = mm->flags,
+ };
+
+ audit_core_dumps(signr);
+
+ binfmt = mm->binfmt;
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ if (!__get_dumpable(cprm.mm_flags))
+ goto fail;
+
+ cred = prepare_creds();
+ if (!cred)
+ goto fail;
+ /*
+ * We cannot trust fsuid as being the "true" uid of the process
+ * nor do we know its entire history. We only know it was tainted
+ * so we dump it as root in mode 2, and only into a controlled
+ * environment (pipe handler or fully qualified path).
+ */
+ if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+ /* Setuid core dump mode */
+ flag = O_EXCL; /* Stop rewrite attacks */
+ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
+ need_nonrelative = true;
+ }
+
+ retval = coredump_wait(exit_code, &core_state);
+ if (retval < 0)
+ goto fail_creds;
+
+ old_cred = override_creds(cred);
+
+ /*
+ * Clear any false indication of pending signals that might
+ * be seen by the filesystem code called to write the core file.
+ */
+ clear_thread_flag(TIF_SIGPENDING);
+
+ ispipe = format_corename(&cn, signr);
+
+ if (ispipe) {
+ int dump_count;
+ char **helper_argv;
+
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
+ if (cprm.limit == 1) {
+ /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a speacial value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things.
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ printk(KERN_WARNING
+ "Process %d(%s) has RLIMIT_CORE set to 1\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_unlock;
+ }
+ cprm.limit = RLIM_INFINITY;
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_dropcount;
+ }
+
+ helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+ if (!helper_argv) {
+ printk(KERN_WARNING "%s failed to allocate memory\n",
+ __func__);
+ goto fail_dropcount;
+ }
+
+ retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+ NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+ NULL, &cprm);
+ argv_free(helper_argv);
+ if (retval) {
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
+ cn.corename);
+ goto close_fail;
+ }
+ } else {
+ struct inode *inode;
+
+ if (cprm.limit < binfmt->min_coredump)
+ goto fail_unlock;
+
+ if (need_nonrelative && cn.corename[0] != '/') {
+ printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+ "to fully qualified path!\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_unlock;
+ }
+
+ cprm.file = filp_open(cn.corename,
+ O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ 0600);
+ if (IS_ERR(cprm.file))
+ goto fail_unlock;
+
+ inode = cprm.file->f_path.dentry->d_inode;
+ if (inode->i_nlink > 1)
+ goto close_fail;
+ if (d_unhashed(cprm.file->f_path.dentry))
+ goto close_fail;
+ /*
+ * AK: actually i see no reason to not allow this for named
+ * pipes etc, but keep the previous behaviour for now.
+ */
+ if (!S_ISREG(inode->i_mode))
+ goto close_fail;
+ /*
+ * Dont allow local users get cute and trick others to coredump
+ * into their pre-created files.
+ */
+ if (!uid_eq(inode->i_uid, current_fsuid()))
+ goto close_fail;
+ if (!cprm.file->f_op || !cprm.file->f_op->write)
+ goto close_fail;
+ if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+ goto close_fail;
+ }
+
+ /* get us an unshared descriptor table; almost always a no-op */
+ retval = unshare_files(&displaced);
+ if (retval)
+ goto close_fail;
+ if (displaced)
+ put_files_struct(displaced);
+ retval = binfmt->core_dump(&cprm);
+ if (retval)
+ current->signal->group_exit_code |= 0x80;
+
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(cprm.file);
+close_fail:
+ if (cprm.file)
+ filp_close(cprm.file, NULL);
+fail_dropcount:
+ if (ispipe)
+ atomic_dec(&core_dump_count);
+fail_unlock:
+ kfree(cn.corename);
+fail_corename:
+ coredump_finish(mm);
+ revert_creds(old_cred);
+fail_creds:
+ put_cred(cred);
+fail:
+ return;
+}
+
+/*
+ * Core dumping helper functions. These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+ return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+
+int dump_seek(struct file *file, loff_t off)
+{
+ int ret = 1;
+
+ if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ return 0;
+ } else {
+ char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+ if (!buf)
+ return 0;
+ while (off > 0) {
+ unsigned long n = off;
+
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ if (!dump_write(file, buf, n)) {
+ ret = 0;
+ break;
+ }
+ off -= n;
+ }
+ free_page((unsigned long)buf);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/dcache.c b/fs/dcache.c
index 693f95bf1ca..3a463d0c4fe 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2113,7 +2113,7 @@ again:
inode = dentry->d_inode;
isdir = S_ISDIR(inode->i_mode);
if (dentry->d_count == 1) {
- if (inode && !spin_trylock(&inode->i_lock)) {
+ if (!spin_trylock(&inode->i_lock)) {
spin_unlock(&dentry->d_lock);
cpu_relax();
goto again;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 24bb043e50d..4e0886c9e5c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -711,6 +711,12 @@ static void ecryptfs_free_kmem_caches(void)
{
int i;
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+
for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
struct ecryptfs_cache_info *info;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index e755ec746c6..2002431ef9a 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -96,6 +96,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(efs_inode_cachep);
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index eedec84c180..cd96649bfe6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1810,7 +1810,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
int, maxevents, int, timeout)
{
int error;
- struct file *file;
+ struct fd f;
struct eventpoll *ep;
/* The maximum number of event must be greater than zero */
@@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
return -EINVAL;
/* Verify that the area passed by the user is writeable */
- if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) {
- error = -EFAULT;
- goto error_return;
- }
+ if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
+ return -EFAULT;
/* Get the "struct file *" for the eventpoll file */
- error = -EBADF;
- file = fget(epfd);
- if (!file)
- goto error_return;
+ f = fdget(epfd);
+ if (!f.file)
+ return -EBADF;
/*
* We have to check that the file structure underneath the fd
* the user passed to us _is_ an eventpoll file.
*/
error = -EINVAL;
- if (!is_file_epoll(file))
+ if (!is_file_epoll(f.file))
goto error_fput;
/*
* At this point it is safe to assume that the "private_data" contains
* our own data structure.
*/
- ep = file->private_data;
+ ep = f.file->private_data;
/* Time to fish for events ... */
error = ep_poll(ep, events, maxevents, timeout);
error_fput:
- fput(file);
-error_return:
-
+ fdput(f);
return error;
}
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4de4ec..48fb26ef8a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@
#include <trace/events/sched.h>
-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
int suid_dumpable = 0;
-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
@@ -1006,40 +995,6 @@ no_thread_group:
return 0;
}
-/*
- * These functions flushes out all traces of the currently running executable
- * so that a new one can be started
- */
-static void flush_old_files(struct files_struct * files)
-{
- long j = -1;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
- for (;;) {
- unsigned long set, i;
-
- j++;
- i = j * BITS_PER_LONG;
- fdt = files_fdtable(files);
- if (i >= fdt->max_fds)
- break;
- set = fdt->close_on_exec[j];
- if (!set)
- continue;
- fdt->close_on_exec[j] = 0;
- spin_unlock(&files->file_lock);
- for ( ; set ; i++,set >>= 1) {
- if (set & 1) {
- sys_close(i);
- }
- }
- spin_lock(&files->file_lock);
-
- }
- spin_unlock(&files->file_lock);
-}
-
char *get_task_comm(char *buf, struct task_struct *tsk)
{
/* buf must be at least sizeof(tsk->comm) in size */
@@ -1050,6 +1005,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(get_task_comm);
+/*
+ * These functions flushes out all traces of the currently running executable
+ * so that a new one can be started
+ */
+
void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
@@ -1171,7 +1131,7 @@ void setup_new_exec(struct linux_binprm * bprm)
current->self_exec_id++;
flush_signal_handlers(current, 0);
- flush_old_files(current->files);
+ do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
@@ -1632,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new)
EXPORT_SYMBOL(set_binfmt);
-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
- struct file *exe_file;
- char *pathbuf, *path;
- int ret;
-
- exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
-
- pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto put_exe_file;
- }
-
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto free_buf;
- }
-
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
-
-free_buf:
- kfree(pathbuf);
-put_exe_file:
- fput(exe_file);
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- cn_escape(namestart);
- break;
- }
- /* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
- break;
- }
- case 'E':
- err = cn_print_exe_file(cn);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, &current->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (core_waiters > 0) {
- struct core_thread *ptr;
-
- wait_for_completion(&core_state->startup);
- /*
- * Wait for all the threads to become inactive, so that
- * all the thread context (extended register state, like
- * fpu etc) gets copied to the memory.
- */
- ptr = core_state->dumper.next;
- while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
- ptr = ptr->next;
- }
- }
-
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value)
}
}
-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
{
int ret;
@@ -2032,290 +1645,3 @@ int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
}
-
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
-
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct file *files[2];
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
- int err = create_pipe_files(files, 0);
- if (err)
- return err;
-
- cp->file = files[1];
-
- sys_close(0);
- fd_install(0, files[0]);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- __set_open_fd(0, fdt);
- __clear_close_on_exec(0, fdt);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- bool need_nonrelative = false;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the process
- * nor do we know its entire history. We only know it was tainted
- * so we dump it as root in mode 2, and only into a controlled
- * environment (pipe handler or fully qualified path).
- */
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
- }
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- if (need_nonrelative && cn.corename[0] != '/') {
- printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_unlock;
- }
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (!uid_eq(inode->i_uid, current_fsuid()))
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
- }
- return ret;
-}
-EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index dde41a75c7c..59e3bbfac0b 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
*/
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(exofs_inode_cachep);
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index af74d9e27b7..6c205d0c565 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ext2_inode_cachep);
}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 09b8455bd7e..bd29894c8fb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -532,6 +532,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ext3_inode_cachep);
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7f7dad78760..5439d6a56e9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -233,7 +233,7 @@ group_extend_out:
case EXT4_IOC_MOVE_EXT: {
struct move_extent me;
- struct file *donor_filp;
+ struct fd donor;
int err;
if (!(filp->f_mode & FMODE_READ) ||
@@ -245,11 +245,11 @@ group_extend_out:
return -EFAULT;
me.moved_len = 0;
- donor_filp = fget(me.donor_fd);
- if (!donor_filp)
+ donor = fdget(me.donor_fd);
+ if (!donor.file)
return -EBADF;
- if (!(donor_filp->f_mode & FMODE_WRITE)) {
+ if (!(donor.file->f_mode & FMODE_WRITE)) {
err = -EBADF;
goto mext_out;
}
@@ -258,14 +258,15 @@ group_extend_out:
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online defrag not supported with bigalloc");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto mext_out;
}
err = mnt_want_write_file(filp);
if (err)
goto mext_out;
- err = ext4_move_extents(filp, donor_filp, me.orig_start,
+ err = ext4_move_extents(filp, donor.file, me.orig_start,
me.donor_start, me.len, &me.moved_len);
mnt_drop_write_file(filp);
@@ -273,7 +274,7 @@ group_extend_out:
&me, sizeof(me)))
err = -EFAULT;
mext_out:
- fput(donor_filp);
+ fdput(donor);
return err;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1f15cc836fb..69c55d4e462 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1019,6 +1019,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ext4_inode_cachep);
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 47d9eb0be88..4e5a6ac54eb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void)
static void __exit fat_destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(fat_inode_cachep);
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 887b5ba8c9b..8f704291d4e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,124 +26,6 @@
#include <asm/siginfo.h>
#include <asm/uaccess.h>
-void set_close_on_exec(unsigned int fd, int flag)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- if (flag)
- __set_close_on_exec(fd, fdt);
- else
- __clear_close_on_exec(fd, fdt);
- spin_unlock(&files->file_lock);
-}
-
-static bool get_close_on_exec(unsigned int fd)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
- bool res;
- rcu_read_lock();
- fdt = files_fdtable(files);
- res = close_on_exec(fd, fdt);
- rcu_read_unlock();
- return res;
-}
-
-SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
-{
- int err = -EBADF;
- struct file * file, *tofree;
- struct files_struct * files = current->files;
- struct fdtable *fdt;
-
- if ((flags & ~O_CLOEXEC) != 0)
- return -EINVAL;
-
- if (unlikely(oldfd == newfd))
- return -EINVAL;
-
- spin_lock(&files->file_lock);
- err = expand_files(files, newfd);
- file = fcheck(oldfd);
- if (unlikely(!file))
- goto Ebadf;
- if (unlikely(err < 0)) {
- if (err == -EMFILE)
- goto Ebadf;
- goto out_unlock;
- }
- /*
- * We need to detect attempts to do dup2() over allocated but still
- * not finished descriptor. NB: OpenBSD avoids that at the price of
- * extra work in their equivalent of fget() - they insert struct
- * file immediately after grabbing descriptor, mark it larval if
- * more work (e.g. actual opening) is needed and make sure that
- * fget() treats larval files as absent. Potentially interesting,
- * but while extra work in fget() is trivial, locking implications
- * and amount of surgery on open()-related paths in VFS are not.
- * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
- * deadlocks in rather amusing ways, AFAICS. All of that is out of
- * scope of POSIX or SUS, since neither considers shared descriptor
- * tables and this condition does not arise without those.
- */
- err = -EBUSY;
- fdt = files_fdtable(files);
- tofree = fdt->fd[newfd];
- if (!tofree && fd_is_open(newfd, fdt))
- goto out_unlock;
- get_file(file);
- rcu_assign_pointer(fdt->fd[newfd], file);
- __set_open_fd(newfd, fdt);
- if (flags & O_CLOEXEC)
- __set_close_on_exec(newfd, fdt);
- else
- __clear_close_on_exec(newfd, fdt);
- spin_unlock(&files->file_lock);
-
- if (tofree)
- filp_close(tofree, files);
-
- return newfd;
-
-Ebadf:
- err = -EBADF;
-out_unlock:
- spin_unlock(&files->file_lock);
- return err;
-}
-
-SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
-{
- if (unlikely(newfd == oldfd)) { /* corner case */
- struct files_struct *files = current->files;
- int retval = oldfd;
-
- rcu_read_lock();
- if (!fcheck_files(files, oldfd))
- retval = -EBADF;
- rcu_read_unlock();
- return retval;
- }
- return sys_dup3(oldfd, newfd, 0);
-}
-
-SYSCALL_DEFINE1(dup, unsigned int, fildes)
-{
- int ret = -EBADF;
- struct file *file = fget_raw(fildes);
-
- if (file) {
- ret = get_unused_fd();
- if (ret >= 0)
- fd_install(ret, file);
- else
- fput(file);
- }
- return ret;
-}
-
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
static int setfl(int fd, struct file * filp, unsigned long arg)
@@ -267,7 +149,7 @@ pid_t f_getown(struct file *filp)
static int f_setown_ex(struct file *filp, unsigned long arg)
{
- struct f_owner_ex * __user owner_p = (void * __user)arg;
+ struct f_owner_ex __user *owner_p = (void __user *)arg;
struct f_owner_ex owner;
struct pid *pid;
int type;
@@ -307,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
static int f_getown_ex(struct file *filp, unsigned long arg)
{
- struct f_owner_ex * __user owner_p = (void * __user)arg;
+ struct f_owner_ex __user *owner_p = (void __user *)arg;
struct f_owner_ex owner;
int ret = 0;
@@ -345,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
static int f_getowner_uids(struct file *filp, unsigned long arg)
{
struct user_namespace *user_ns = current_user_ns();
- uid_t * __user dst = (void * __user)arg;
+ uid_t __user *dst = (void __user *)arg;
uid_t src[2];
int err;
@@ -373,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
switch (cmd) {
case F_DUPFD:
+ err = f_dupfd(arg, filp, 0);
+ break;
case F_DUPFD_CLOEXEC:
- if (arg >= rlimit(RLIMIT_NOFILE))
- break;
- err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
- if (err >= 0) {
- get_file(filp);
- fd_install(err, filp);
- }
+ err = f_dupfd(arg, filp, FD_CLOEXEC);
break;
case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
@@ -470,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd)
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
- struct file *filp;
- int fput_needed;
+ struct fd f = fdget_raw(fd);
long err = -EBADF;
- filp = fget_raw_light(fd, &fput_needed);
- if (!filp)
+ if (!f.file)
goto out;
- if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (unlikely(f.file->f_mode & FMODE_PATH)) {
if (!check_fcntl_cmd(cmd))
goto out1;
}
- err = security_file_fcntl(filp, cmd, arg);
+ err = security_file_fcntl(f.file, cmd, arg);
if (!err)
- err = do_fcntl(fd, cmd, arg, filp);
+ err = do_fcntl(fd, cmd, arg, f.file);
out1:
- fput_light(filp, fput_needed);
+ fdput(f);
out:
return err;
}
@@ -497,38 +373,36 @@ out:
SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
unsigned long, arg)
{
- struct file * filp;
+ struct fd f = fdget_raw(fd);
long err = -EBADF;
- int fput_needed;
- filp = fget_raw_light(fd, &fput_needed);
- if (!filp)
+ if (!f.file)
goto out;
- if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (unlikely(f.file->f_mode & FMODE_PATH)) {
if (!check_fcntl_cmd(cmd))
goto out1;
}
- err = security_file_fcntl(filp, cmd, arg);
+ err = security_file_fcntl(f.file, cmd, arg);
if (err)
goto out1;
switch (cmd) {
case F_GETLK64:
- err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
+ err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
break;
case F_SETLK64:
case F_SETLKW64:
- err = fcntl_setlk64(fd, filp, cmd,
+ err = fcntl_setlk64(fd, f.file, cmd,
(struct flock64 __user *) arg);
break;
default:
- err = do_fcntl(fd, cmd, arg, filp);
+ err = do_fcntl(fd, cmd, arg, f.file);
break;
}
out1:
- fput_light(filp, fput_needed);
+ fdput(f);
out:
return err;
}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index a48e4a139be..f775bfdd6e4 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
static struct vfsmount *get_vfsmount_from_fd(int fd)
{
- struct path path;
+ struct vfsmount *mnt;
if (fd == AT_FDCWD) {
struct fs_struct *fs = current->fs;
spin_lock(&fs->lock);
- path = fs->pwd;
- mntget(path.mnt);
+ mnt = mntget(fs->pwd.mnt);
spin_unlock(&fs->lock);
} else {
- int fput_needed;
- struct file *file = fget_light(fd, &fput_needed);
- if (!file)
+ struct fd f = fdget(fd);
+ if (!f.file)
return ERR_PTR(-EBADF);
- path = file->f_path;
- mntget(path.mnt);
- fput_light(file, fput_needed);
+ mnt = mntget(f.file->f_path.mnt);
+ fdput(f);
}
- return path.mnt;
+ return mnt;
}
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
diff --git a/fs/file.c b/fs/file.c
index ba3f6053025..0f1bda4bebf 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
* Manage the dynamic fd arrays in the process files_struct.
*/
+#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/mm.h>
@@ -84,22 +85,14 @@ static void free_fdtable_work(struct work_struct *work)
}
}
-void free_fdtable_rcu(struct rcu_head *rcu)
+static void free_fdtable_rcu(struct rcu_head *rcu)
{
struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
struct fdtable_defer *fddef;
BUG_ON(!fdt);
+ BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
- if (fdt->max_fds <= NR_OPEN_DEFAULT) {
- /*
- * This fdtable is embedded in the files structure and that
- * structure itself is getting destroyed.
- */
- kmem_cache_free(files_cachep,
- container_of(fdt, struct files_struct, fdtab));
- return;
- }
if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
kfree(fdt->fd);
kfree(fdt->open_fds);
@@ -229,7 +222,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
copy_fdtable(new_fdt, cur_fdt);
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
- free_fdtable(cur_fdt);
+ call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
} else {
/* Somebody else expanded, so undo our attempt */
__free_fdtable(new_fdt);
@@ -245,19 +238,12 @@ static int expand_fdtable(struct files_struct *files, int nr)
* expanded and execution may have blocked.
* The files->file_lock should be held on entry, and will be held on exit.
*/
-int expand_files(struct files_struct *files, int nr)
+static int expand_files(struct files_struct *files, int nr)
{
struct fdtable *fdt;
fdt = files_fdtable(files);
- /*
- * N.B. For clone tasks sharing a files structure, this test
- * will limit the total number of files that can be opened.
- */
- if (nr >= rlimit(RLIMIT_NOFILE))
- return -EMFILE;
-
/* Do we need to expand? */
if (nr < fdt->max_fds)
return 0;
@@ -270,6 +256,26 @@ int expand_files(struct files_struct *files, int nr)
return expand_fdtable(files, nr);
}
+static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
+{
+ __set_bit(fd, fdt->close_on_exec);
+}
+
+static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
+{
+ __clear_bit(fd, fdt->close_on_exec);
+}
+
+static inline void __set_open_fd(int fd, struct fdtable *fdt)
+{
+ __set_bit(fd, fdt->open_fds);
+}
+
+static inline void __clear_open_fd(int fd, struct fdtable *fdt)
+{
+ __clear_bit(fd, fdt->open_fds);
+}
+
static int count_open_files(struct fdtable *fdt)
{
int size = fdt->max_fds;
@@ -395,6 +401,95 @@ out:
return NULL;
}
+static void close_files(struct files_struct * files)
+{
+ int i, j;
+ struct fdtable *fdt;
+
+ j = 0;
+
+ /*
+ * It is safe to dereference the fd table without RCU or
+ * ->file_lock because this is the last reference to the
+ * files structure. But use RCU to shut RCU-lockdep up.
+ */
+ rcu_read_lock();
+ fdt = files_fdtable(files);
+ rcu_read_unlock();
+ for (;;) {
+ unsigned long set;
+ i = j * BITS_PER_LONG;
+ if (i >= fdt->max_fds)
+ break;
+ set = fdt->open_fds[j++];
+ while (set) {
+ if (set & 1) {
+ struct file * file = xchg(&fdt->fd[i], NULL);
+ if (file) {
+ filp_close(file, files);
+ cond_resched();
+ }
+ }
+ i++;
+ set >>= 1;
+ }
+ }
+}
+
+struct files_struct *get_files_struct(struct task_struct *task)
+{
+ struct files_struct *files;
+
+ task_lock(task);
+ files = task->files;
+ if (files)
+ atomic_inc(&files->count);
+ task_unlock(task);
+
+ return files;
+}
+
+void put_files_struct(struct files_struct *files)
+{
+ struct fdtable *fdt;
+
+ if (atomic_dec_and_test(&files->count)) {
+ close_files(files);
+ /* not really needed, since nobody can see us */
+ rcu_read_lock();
+ fdt = files_fdtable(files);
+ rcu_read_unlock();
+ /* free the arrays if they are not embedded */
+ if (fdt != &files->fdtab)
+ __free_fdtable(fdt);
+ kmem_cache_free(files_cachep, files);
+ }
+}
+
+void reset_files_struct(struct files_struct *files)
+{
+ struct task_struct *tsk = current;
+ struct files_struct *old;
+
+ old = tsk->files;
+ task_lock(tsk);
+ tsk->files = files;
+ task_unlock(tsk);
+ put_files_struct(old);
+}
+
+void exit_files(struct task_struct *tsk)
+{
+ struct files_struct * files = tsk->files;
+
+ if (files) {
+ task_lock(tsk);
+ tsk->files = NULL;
+ task_unlock(tsk);
+ put_files_struct(files);
+ }
+}
+
static void __devinit fdtable_defer_list_init(int cpu)
{
struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
@@ -424,12 +519,18 @@ struct files_struct init_files = {
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
};
+void daemonize_descriptors(void)
+{
+ atomic_inc(&init_files.count);
+ reset_files_struct(&init_files);
+}
+
/*
* allocate a file descriptor, mark it busy.
*/
-int alloc_fd(unsigned start, unsigned flags)
+int __alloc_fd(struct files_struct *files,
+ unsigned start, unsigned end, unsigned flags)
{
- struct files_struct *files = current->files;
unsigned int fd;
int error;
struct fdtable *fdt;
@@ -444,6 +545,14 @@ repeat:
if (fd < fdt->max_fds)
fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
+ /*
+ * N.B. For clone tasks sharing a files structure, this test
+ * will limit the total number of files that can be opened.
+ */
+ error = -EMFILE;
+ if (fd >= end)
+ goto out;
+
error = expand_files(files, fd);
if (error < 0)
goto out;
@@ -477,8 +586,424 @@ out:
return error;
}
-int get_unused_fd(void)
+static int alloc_fd(unsigned start, unsigned flags)
+{
+ return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
+}
+
+int get_unused_fd_flags(unsigned flags)
+{
+ return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+}
+EXPORT_SYMBOL(get_unused_fd_flags);
+
+static void __put_unused_fd(struct files_struct *files, unsigned int fd)
+{
+ struct fdtable *fdt = files_fdtable(files);
+ __clear_open_fd(fd, fdt);
+ if (fd < files->next_fd)
+ files->next_fd = fd;
+}
+
+void put_unused_fd(unsigned int fd)
+{
+ struct files_struct *files = current->files;
+ spin_lock(&files->file_lock);
+ __put_unused_fd(files, fd);
+ spin_unlock(&files->file_lock);
+}
+
+EXPORT_SYMBOL(put_unused_fd);
+
+/*
+ * Install a file pointer in the fd array.
+ *
+ * The VFS is full of places where we drop the files lock between
+ * setting the open_fds bitmap and installing the file in the file
+ * array. At any such point, we are vulnerable to a dup2() race
+ * installing a file in the array before us. We need to detect this and
+ * fput() the struct file we are about to overwrite in this case.
+ *
+ * It should never happen - if we allow dup2() do it, _really_ bad things
+ * will follow.
+ *
+ * NOTE: __fd_install() variant is really, really low-level; don't
+ * use it unless you are forced to by truly lousy API shoved down
+ * your throat. 'files' *MUST* be either current->files or obtained
+ * by get_files_struct(current) done by whoever had given it to you,
+ * or really bad things will happen. Normally you want to use
+ * fd_install() instead.
+ */
+
+void __fd_install(struct files_struct *files, unsigned int fd,
+ struct file *file)
+{
+ struct fdtable *fdt;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ BUG_ON(fdt->fd[fd] != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
+ spin_unlock(&files->file_lock);
+}
+
+void fd_install(unsigned int fd, struct file *file)
{
- return alloc_fd(0, 0);
+ __fd_install(current->files, fd, file);
+}
+
+EXPORT_SYMBOL(fd_install);
+
+/*
+ * The same warnings as for __alloc_fd()/__fd_install() apply here...
+ */
+int __close_fd(struct files_struct *files, unsigned fd)
+{
+ struct file *file;
+ struct fdtable *fdt;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ if (fd >= fdt->max_fds)
+ goto out_unlock;
+ file = fdt->fd[fd];
+ if (!file)
+ goto out_unlock;
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ __clear_close_on_exec(fd, fdt);
+ __put_unused_fd(files, fd);
+ spin_unlock(&files->file_lock);
+ return filp_close(file, files);
+
+out_unlock:
+ spin_unlock(&files->file_lock);
+ return -EBADF;
+}
+
+void do_close_on_exec(struct files_struct *files)
+{
+ unsigned i;
+ struct fdtable *fdt;
+
+ /* exec unshares first */
+ BUG_ON(atomic_read(&files->count) != 1);
+ spin_lock(&files->file_lock);
+ for (i = 0; ; i++) {
+ unsigned long set;
+ unsigned fd = i * BITS_PER_LONG;
+ fdt = files_fdtable(files);
+ if (fd >= fdt->max_fds)
+ break;
+ set = fdt->close_on_exec[i];
+ if (!set)
+ continue;
+ fdt->close_on_exec[i] = 0;
+ for ( ; set ; fd++, set >>= 1) {
+ struct file *file;
+ if (!(set & 1))
+ continue;
+ file = fdt->fd[fd];
+ if (!file)
+ continue;
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ __put_unused_fd(files, fd);
+ spin_unlock(&files->file_lock);
+ filp_close(file, files);
+ cond_resched();
+ spin_lock(&files->file_lock);
+ }
+
+ }
+ spin_unlock(&files->file_lock);
+}
+
+struct file *fget(unsigned int fd)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ /* File object ref couldn't be taken */
+ if (file->f_mode & FMODE_PATH ||
+ !atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ }
+ rcu_read_unlock();
+
+ return file;
+}
+
+EXPORT_SYMBOL(fget);
+
+struct file *fget_raw(unsigned int fd)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ /* File object ref couldn't be taken */
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ }
+ rcu_read_unlock();
+
+ return file;
+}
+
+EXPORT_SYMBOL(fget_raw);
+
+/*
+ * Lightweight file lookup - no refcnt increment if fd table isn't shared.
+ *
+ * You can use this instead of fget if you satisfy all of the following
+ * conditions:
+ * 1) You must call fput_light before exiting the syscall and returning control
+ * to userspace (i.e. you cannot remember the returned struct file * after
+ * returning to userspace).
+ * 2) You must not call filp_close on the returned struct file * in between
+ * calls to fget_light and fput_light.
+ * 3) You must not clone the current task in between the calls to fget_light
+ * and fput_light.
+ *
+ * The fput_needed flag returned by fget_light should be passed to the
+ * corresponding fput_light.
+ */
+struct file *fget_light(unsigned int fd, int *fput_needed)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ *fput_needed = 0;
+ if (atomic_read(&files->count) == 1) {
+ file = fcheck_files(files, fd);
+ if (file && (file->f_mode & FMODE_PATH))
+ file = NULL;
+ } else {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (!(file->f_mode & FMODE_PATH) &&
+ atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return file;
+}
+EXPORT_SYMBOL(fget_light);
+
+struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ *fput_needed = 0;
+ if (atomic_read(&files->count) == 1) {
+ file = fcheck_files(files, fd);
+ } else {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return file;
+}
+
+void set_close_on_exec(unsigned int fd, int flag)
+{
+ struct files_struct *files = current->files;
+ struct fdtable *fdt;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ if (flag)
+ __set_close_on_exec(fd, fdt);
+ else
+ __clear_close_on_exec(fd, fdt);
+ spin_unlock(&files->file_lock);
+}
+
+bool get_close_on_exec(unsigned int fd)
+{
+ struct files_struct *files = current->files;
+ struct fdtable *fdt;
+ bool res;
+ rcu_read_lock();
+ fdt = files_fdtable(files);
+ res = close_on_exec(fd, fdt);
+ rcu_read_unlock();
+ return res;
+}
+
+static int do_dup2(struct files_struct *files,
+ struct file *file, unsigned fd, unsigned flags)
+{
+ struct file *tofree;
+ struct fdtable *fdt;
+
+ /*
+ * We need to detect attempts to do dup2() over allocated but still
+ * not finished descriptor. NB: OpenBSD avoids that at the price of
+ * extra work in their equivalent of fget() - they insert struct
+ * file immediately after grabbing descriptor, mark it larval if
+ * more work (e.g. actual opening) is needed and make sure that
+ * fget() treats larval files as absent. Potentially interesting,
+ * but while extra work in fget() is trivial, locking implications
+ * and amount of surgery on open()-related paths in VFS are not.
+ * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+ * deadlocks in rather amusing ways, AFAICS. All of that is out of
+ * scope of POSIX or SUS, since neither considers shared descriptor
+ * tables and this condition does not arise without those.
+ */
+ fdt = files_fdtable(files);
+ tofree = fdt->fd[fd];
+ if (!tofree && fd_is_open(fd, fdt))
+ goto Ebusy;
+ get_file(file);
+ rcu_assign_pointer(fdt->fd[fd], file);
+ __set_open_fd(fd, fdt);
+ if (flags & O_CLOEXEC)
+ __set_close_on_exec(fd, fdt);
+ else
+ __clear_close_on_exec(fd, fdt);
+ spin_unlock(&files->file_lock);
+
+ if (tofree)
+ filp_close(tofree, files);
+
+ return fd;
+
+Ebusy:
+ spin_unlock(&files->file_lock);
+ return -EBUSY;
+}
+
+int replace_fd(unsigned fd, struct file *file, unsigned flags)
+{
+ int err;
+ struct files_struct *files = current->files;
+
+ if (!file)
+ return __close_fd(files, fd);
+
+ if (fd >= rlimit(RLIMIT_NOFILE))
+ return -EMFILE;
+
+ spin_lock(&files->file_lock);
+ err = expand_files(files, fd);
+ if (unlikely(err < 0))
+ goto out_unlock;
+ return do_dup2(files, file, fd, flags);
+
+out_unlock:
+ spin_unlock(&files->file_lock);
+ return err;
+}
+
+SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+{
+ int err = -EBADF;
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ if ((flags & ~O_CLOEXEC) != 0)
+ return -EINVAL;
+
+ if (newfd >= rlimit(RLIMIT_NOFILE))
+ return -EMFILE;
+
+ spin_lock(&files->file_lock);
+ err = expand_files(files, newfd);
+ file = fcheck(oldfd);
+ if (unlikely(!file))
+ goto Ebadf;
+ if (unlikely(err < 0)) {
+ if (err == -EMFILE)
+ goto Ebadf;
+ goto out_unlock;
+ }
+ return do_dup2(files, file, newfd, flags);
+
+Ebadf:
+ err = -EBADF;
+out_unlock:
+ spin_unlock(&files->file_lock);
+ return err;
+}
+
+SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
+{
+ if (unlikely(newfd == oldfd)) { /* corner case */
+ struct files_struct *files = current->files;
+ int retval = oldfd;
+
+ rcu_read_lock();
+ if (!fcheck_files(files, oldfd))
+ retval = -EBADF;
+ rcu_read_unlock();
+ return retval;
+ }
+ return sys_dup3(oldfd, newfd, 0);
+}
+
+SYSCALL_DEFINE1(dup, unsigned int, fildes)
+{
+ int ret = -EBADF;
+ struct file *file = fget_raw(fildes);
+
+ if (file) {
+ ret = get_unused_fd();
+ if (ret >= 0)
+ fd_install(ret, file);
+ else
+ fput(file);
+ }
+ return ret;
+}
+
+int f_dupfd(unsigned int from, struct file *file, unsigned flags)
+{
+ int err;
+ if (from >= rlimit(RLIMIT_NOFILE))
+ return -EINVAL;
+ err = alloc_fd(from, flags);
+ if (err >= 0) {
+ get_file(file);
+ fd_install(err, file);
+ }
+ return err;
+}
+
+int iterate_fd(struct files_struct *files, unsigned n,
+ int (*f)(const void *, struct file *, unsigned),
+ const void *p)
+{
+ struct fdtable *fdt;
+ struct file *file;
+ int res = 0;
+ if (!files)
+ return 0;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ while (!res && n < fdt->max_fds) {
+ file = rcu_dereference_check_fdtable(files, fdt->fd[n++]);
+ if (file)
+ res = f(p, file, n);
+ }
+ spin_unlock(&files->file_lock);
+ return res;
}
-EXPORT_SYMBOL(get_unused_fd);
+EXPORT_SYMBOL(iterate_fd);
diff --git a/fs/file_table.c b/fs/file_table.c
index 701985e4ccd..c6780163bf3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file)
EXPORT_SYMBOL(fput);
-struct file *fget(unsigned int fd)
-{
- struct file *file;
- struct files_struct *files = current->files;
-
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- /* File object ref couldn't be taken */
- if (file->f_mode & FMODE_PATH ||
- !atomic_long_inc_not_zero(&file->f_count))
- file = NULL;
- }
- rcu_read_unlock();
-
- return file;
-}
-
-EXPORT_SYMBOL(fget);
-
-struct file *fget_raw(unsigned int fd)
-{
- struct file *file;
- struct files_struct *files = current->files;
-
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- /* File object ref couldn't be taken */
- if (!atomic_long_inc_not_zero(&file->f_count))
- file = NULL;
- }
- rcu_read_unlock();
-
- return file;
-}
-
-EXPORT_SYMBOL(fget_raw);
-
-/*
- * Lightweight file lookup - no refcnt increment if fd table isn't shared.
- *
- * You can use this instead of fget if you satisfy all of the following
- * conditions:
- * 1) You must call fput_light before exiting the syscall and returning control
- * to userspace (i.e. you cannot remember the returned struct file * after
- * returning to userspace).
- * 2) You must not call filp_close on the returned struct file * in between
- * calls to fget_light and fput_light.
- * 3) You must not clone the current task in between the calls to fget_light
- * and fput_light.
- *
- * The fput_needed flag returned by fget_light should be passed to the
- * corresponding fput_light.
- */
-struct file *fget_light(unsigned int fd, int *fput_needed)
-{
- struct file *file;
- struct files_struct *files = current->files;
-
- *fput_needed = 0;
- if (atomic_read(&files->count) == 1) {
- file = fcheck_files(files, fd);
- if (file && (file->f_mode & FMODE_PATH))
- file = NULL;
- } else {
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- if (!(file->f_mode & FMODE_PATH) &&
- atomic_long_inc_not_zero(&file->f_count))
- *fput_needed = 1;
- else
- /* Didn't get the reference, someone's freed */
- file = NULL;
- }
- rcu_read_unlock();
- }
-
- return file;
-}
-
-struct file *fget_raw_light(unsigned int fd, int *fput_needed)
-{
- struct file *file;
- struct files_struct *files = current->files;
-
- *fput_needed = 0;
- if (atomic_read(&files->count) == 1) {
- file = fcheck_files(files, fd);
- } else {
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- if (atomic_long_inc_not_zero(&file->f_count))
- *fput_needed = 1;
- else
- /* Didn't get the reference, someone's freed */
- file = NULL;
- }
- rcu_read_unlock();
- }
-
- return file;
-}
-
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index d4fabd26084..fed2c8afb3a 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -279,6 +279,11 @@ static void __exit
vxfs_cleanup(void)
{
unregister_filesystem(&vxfs_fs_type);
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(vxfs_inode_cachep);
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f4246cfc8d8..8c23fa7a91e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
if (ff->reserved_req) {
req = ff->reserved_req;
ff->reserved_req = NULL;
- get_file(file);
- req->stolen_file = file;
+ req->stolen_file = get_file(file);
}
spin_unlock(&fc->lock);
} while (!req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fca222dabe3..f0eda124cff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void)
{
unregister_filesystem(&fuse_fs_type);
unregister_fuseblk();
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(fuse_inode_cachep);
}
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0b63d135a09..e93ddaadfd1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -492,6 +492,12 @@ static int __init init_hfs_fs(void)
static void __exit exit_hfs_fs(void)
{
unregister_filesystem(&hfs_fs_type);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(hfs_inode_cachep);
}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index fdafb2d7165..811a84d2d96 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void)
static void __exit exit_hfsplus_fs(void)
{
unregister_filesystem(&hfsplus_fs_type);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(hfsplus_inode_cachep);
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a152783602d..bc28bf077a6 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -210,6 +210,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(hpfs_inode_cachep);
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6e572c4fbf6..9460120a517 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1048,6 +1048,11 @@ static int __init init_hugetlbfs_fs(void)
static void __exit exit_hugetlbfs_fs(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(hugetlbfs_inode_cachep);
kern_unmount(hugetlbfs_vfsmount);
unregister_filesystem(&hugetlbfs_fs_type);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29167bebe87..3bdad6d1f26 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
- struct file *filp;
- int error = -EBADF;
- int fput_needed;
-
- filp = fget_light(fd, &fput_needed);
- if (!filp)
- goto out;
-
- error = security_file_ioctl(filp, cmd, arg);
- if (error)
- goto out_fput;
-
- error = do_vfs_ioctl(filp, fd, cmd, arg);
- out_fput:
- fput_light(filp, fput_needed);
- out:
+ int error;
+ struct fd f = fdget(fd);
+
+ if (!f.file)
+ return -EBADF;
+ error = security_file_ioctl(f.file, cmd, arg);
+ if (!error)
+ error = do_vfs_ioctl(f.file, fd, cmd, arg);
+ fdput(f);
return error;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a7d8e6cc5e0..67ce52507d7 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -115,6 +115,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(isofs_inode_cachep);
}
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 61ea41389f9..ff487954cd9 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void)
unregister_filesystem(&jffs2_fs_type);
jffs2_destroy_slab_caches();
jffs2_compressors_exit();
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(jffs2_inode_cachep);
}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 706692f2403..efdf8835dfc 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -911,6 +911,12 @@ static void __exit exit_jfs_fs(void)
jfs_proc_clean();
#endif
unregister_filesystem(&jfs_fs_type);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(jfs_inode_cachep);
}
diff --git a/fs/locks.c b/fs/locks.c
index 7e81bfc7516..abc7dc6c490 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait);
*/
SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
{
- struct file *filp;
- int fput_needed;
+ struct fd f = fdget(fd);
struct file_lock *lock;
int can_sleep, unlock;
int error;
error = -EBADF;
- filp = fget_light(fd, &fput_needed);
- if (!filp)
+ if (!f.file)
goto out;
can_sleep = !(cmd & LOCK_NB);
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
unlock = (cmd == LOCK_UN);
if (!unlock && !(cmd & LOCK_MAND) &&
- !(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
+ !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
goto out_putf;
- error = flock_make_lock(filp, &lock, cmd);
+ error = flock_make_lock(f.file, &lock, cmd);
if (error)
goto out_putf;
if (can_sleep)
lock->fl_flags |= FL_SLEEP;
- error = security_file_lock(filp, lock->fl_type);
+ error = security_file_lock(f.file, lock->fl_type);
if (error)
goto out_free;
- if (filp->f_op && filp->f_op->flock)
- error = filp->f_op->flock(filp,
+ if (f.file->f_op && f.file->f_op->flock)
+ error = f.file->f_op->flock(f.file,
(can_sleep) ? F_SETLKW : F_SETLK,
lock);
else
- error = flock_lock_file_wait(filp, lock);
+ error = flock_lock_file_wait(f.file, lock);
out_free:
locks_free_lock(lock);
out_putf:
- fput_light(filp, fput_needed);
+ fdput(f);
out:
return error;
}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index bda39085309..adb90116d36 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -417,5 +417,10 @@ int logfs_init_inode_cache(void)
void logfs_destroy_inode_cache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(logfs_inode_cache);
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d0e42c67892..4fc5f8ab1c4 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -100,6 +100,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(minix_inode_cachep);
}
diff --git a/fs/namei.c b/fs/namei.c
index a856e7f7b6e..aa30d19e9ed 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags,
struct nameidata *nd, struct file **fp)
{
int retval = 0;
- int fput_needed;
- struct file *file;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED;
@@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags,
get_fs_pwd(current->fs, &nd->path);
}
} else {
+ struct fd f = fdget_raw(dfd);
struct dentry *dentry;
- file = fget_raw_light(dfd, &fput_needed);
- retval = -EBADF;
- if (!file)
- goto out_fail;
+ if (!f.file)
+ return -EBADF;
- dentry = file->f_path.dentry;
+ dentry = f.file->f_path.dentry;
if (*name) {
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (!S_ISDIR(dentry->d_inode->i_mode)) {
+ fdput(f);
+ return -ENOTDIR;
+ }
retval = inode_permission(dentry->d_inode, MAY_EXEC);
- if (retval)
- goto fput_fail;
+ if (retval) {
+ fdput(f);
+ return retval;
+ }
}
- nd->path = file->f_path;
+ nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
- if (fput_needed)
- *fp = file;
+ if (f.need_put)
+ *fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
lock_rcu_walk();
} else {
- path_get(&file->f_path);
- fput_light(file, fput_needed);
+ path_get(&nd->path);
+ fdput(f);
}
}
nd->inode = nd->path.dentry->d_inode;
return 0;
-
-fput_fail:
- fput_light(file, fput_needed);
-out_fail:
- return retval;
}
static inline int lookup_last(struct nameidata *nd, struct path *path)
@@ -3971,7 +3966,7 @@ EXPORT_SYMBOL(user_path_at);
EXPORT_SYMBOL(follow_down_one);
EXPORT_SYMBOL(follow_down);
EXPORT_SYMBOL(follow_up);
-EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
+EXPORT_SYMBOL(get_write_access); /* nfsd */
EXPORT_SYMBOL(getname);
EXPORT_SYMBOL(lock_rename);
EXPORT_SYMBOL(lookup_one_len);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index eaa74323663..d7e9fe77188 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -89,6 +89,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ncp_inode_cachep);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9b47610338f..e4c716d374a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void)
static void nfs_destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(nfs_inode_cachep);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cc894eda385..48a1bad3733 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
return -ENOMEM;
}
fp->fi_lease = fl;
- fp->fi_deleg_file = fl->fl_file;
- get_file(fp->fi_deleg_file);
+ fp->fi_deleg_file = get_file(fl->fl_file);
atomic_set(&fp->fi_delegees, 1);
list_add(&dp->dl_perfile, &fp->fi_delegations);
return 0;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6a10812711c..3c991dc84f2 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj)
static void nilfs_destroy_cachep(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+
if (nilfs_inode_cachep)
kmem_cache_destroy(nilfs_inode_cachep);
if (nilfs_transaction_cachep)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d4380366973..721d692fa8d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
return fsnotify_remove_notify_event(group);
}
-static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
+static int create_fd(struct fsnotify_group *group,
+ struct fsnotify_event *event,
+ struct file **file)
{
int client_fd;
struct file *new_file;
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
put_unused_fd(client_fd);
client_fd = PTR_ERR(new_file);
} else {
- fd_install(client_fd, new_file);
+ *file = new_file;
}
return client_fd;
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
static int fill_event_metadata(struct fsnotify_group *group,
struct fanotify_event_metadata *metadata,
- struct fsnotify_event *event)
+ struct fsnotify_event *event,
+ struct file **file)
{
int ret = 0;
pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
group, metadata, event);
+ *file = NULL;
metadata->event_len = FAN_EVENT_METADATA_LEN;
metadata->metadata_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
if (unlikely(event->mask & FAN_Q_OVERFLOW))
metadata->fd = FAN_NOFD;
else {
- metadata->fd = create_fd(group, event);
+ metadata->fd = create_fd(group, event, file);
if (metadata->fd < 0)
ret = metadata->fd;
}
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
return 0;
}
-static void remove_access_response(struct fsnotify_group *group,
- struct fsnotify_event *event,
- __s32 fd)
-{
- struct fanotify_response_event *re;
-
- if (!(event->mask & FAN_ALL_PERM_EVENTS))
- return;
-
- re = dequeue_re(group, fd);
- if (!re)
- return;
-
- BUG_ON(re->event != event);
-
- kmem_cache_free(fanotify_response_event_cache, re);
-
- return;
-}
#else
static int prepare_for_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
return 0;
}
-static void remove_access_response(struct fsnotify_group *group,
- struct fsnotify_event *event,
- __s32 fd)
-{
- return;
-}
#endif
static ssize_t copy_event_to_user(struct fsnotify_group *group,
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
char __user *buf)
{
struct fanotify_event_metadata fanotify_event_metadata;
+ struct file *f;
int fd, ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+ ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
if (ret < 0)
goto out;
fd = fanotify_event_metadata.fd;
- ret = prepare_for_access_response(group, event, fd);
- if (ret)
- goto out_close_fd;
-
ret = -EFAULT;
if (copy_to_user(buf, &fanotify_event_metadata,
fanotify_event_metadata.event_len))
- goto out_kill_access_response;
+ goto out_close_fd;
+ ret = prepare_for_access_response(group, event, fd);
+ if (ret)
+ goto out_close_fd;
+
+ fd_install(fd, f);
return fanotify_event_metadata.event_len;
-out_kill_access_response:
- remove_access_response(group, event, fd);
out_close_fd:
- if (fd != FAN_NOFD)
- sys_close(fd);
+ if (fd != FAN_NOFD) {
+ put_unused_fd(fd);
+ fput(f);
+ }
out:
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
@@ -470,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename,
dfd, filename, flags);
if (filename == NULL) {
- struct file *file;
- int fput_needed;
+ struct fd f = fdget(dfd);
ret = -EBADF;
- file = fget_light(dfd, &fput_needed);
- if (!file)
+ if (!f.file)
goto out;
ret = -ENOTDIR;
if ((flags & FAN_MARK_ONLYDIR) &&
- !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
- fput_light(file, fput_needed);
+ !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
+ fdput(f);
goto out;
}
- *path = file->f_path;
+ *path = f.file->f_path;
path_get(path);
- fput_light(file, fput_needed);
+ fdput(f);
} else {
unsigned int lookup_flags = 0;
@@ -767,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
- struct file *filp;
+ struct fd f;
struct path path;
- int ret, fput_needed;
+ int ret;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
__func__, fanotify_fd, flags, dfd, pathname, mask);
@@ -803,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
#endif
return -EINVAL;
- filp = fget_light(fanotify_fd, &fput_needed);
- if (unlikely(!filp))
+ f = fdget(fanotify_fd);
+ if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an fanotify instance */
ret = -EINVAL;
- if (unlikely(filp->f_op != &fanotify_fops))
+ if (unlikely(f.file->f_op != &fanotify_fops))
goto fput_and_out;
- group = filp->private_data;
+ group = f.file->private_data;
/*
* group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
@@ -858,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
path_put(&path);
fput_and_out:
- fput_light(filp, fput_needed);
+ fdput(f);
return ret;
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985..c311dda054a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
struct fsnotify_group *group;
struct inode *inode;
struct path path;
- struct file *filp;
- int ret, fput_needed;
+ struct fd f;
+ int ret;
unsigned flags = 0;
- filp = fget_light(fd, &fput_needed);
- if (unlikely(!filp))
+ f = fdget(fd);
+ if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an inotify instance */
- if (unlikely(filp->f_op != &inotify_fops)) {
+ if (unlikely(f.file->f_op != &inotify_fops)) {
ret = -EINVAL;
goto fput_and_out;
}
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
/* inode held in place by reference to path; group by fget on fd */
inode = path.dentry->d_inode;
- group = filp->private_data;
+ group = f.file->private_data;
/* create/update an inode mark */
ret = inotify_update_watch(group, inode, mask);
path_put(&path);
fput_and_out:
- fput_light(filp, fput_needed);
+ fdput(f);
return ret;
}
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
{
struct fsnotify_group *group;
struct inotify_inode_mark *i_mark;
- struct file *filp;
- int ret = 0, fput_needed;
+ struct fd f;
+ int ret = 0;
- filp = fget_light(fd, &fput_needed);
- if (unlikely(!filp))
+ f = fdget(fd);
+ if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an inotify instance */
ret = -EINVAL;
- if (unlikely(filp->f_op != &inotify_fops))
+ if (unlikely(f.file->f_op != &inotify_fops))
goto out;
- group = filp->private_data;
+ group = f.file->private_data;
ret = -EINVAL;
i_mark = inotify_idr_find(group, wd);
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
fsnotify_put_mark(&i_mark->fsn_mark);
out:
- fput_light(filp, fput_needed);
+ fdput(f);
return ret;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index da01c165067..4a8289f8b16 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3193,6 +3193,12 @@ static void __exit exit_ntfs_fs(void)
ntfs_debug("Unregistering NTFS driver.");
unregister_filesystem(&ntfs_fs_type);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ntfs_big_inode_cache);
kmem_cache_destroy(ntfs_inode_cache);
kmem_cache_destroy(ntfs_name_cache);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a4e855e3690..f7c648d7d6b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1746,8 +1746,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
long fd;
int sectsize;
char *p = (char *)page;
- struct file *filp = NULL;
- struct inode *inode = NULL;
+ struct fd f;
+ struct inode *inode;
ssize_t ret = -EINVAL;
int live_threshold;
@@ -1766,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
if (fd < 0 || fd >= INT_MAX)
goto out;
- filp = fget(fd);
- if (filp == NULL)
+ f = fdget(fd);
+ if (f.file == NULL)
goto out;
if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
reg->hr_block_bytes == 0)
- goto out;
+ goto out2;
- inode = igrab(filp->f_mapping->host);
+ inode = igrab(f.file->f_mapping->host);
if (inode == NULL)
- goto out;
+ goto out2;
if (!S_ISBLK(inode->i_mode))
- goto out;
+ goto out3;
- reg->hr_bdev = I_BDEV(filp->f_mapping->host);
+ reg->hr_bdev = I_BDEV(f.file->f_mapping->host);
ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
if (ret) {
reg->hr_bdev = NULL;
- goto out;
+ goto out3;
}
inode = NULL;
@@ -1797,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
"blocksize %u incorrect for device, expected %d",
reg->hr_block_bytes, sectsize);
ret = -EINVAL;
- goto out;
+ goto out3;
}
o2hb_init_region_params(reg);
@@ -1811,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
ret = o2hb_map_slot_data(reg);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto out3;
}
ret = o2hb_populate_slot_data(reg);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto out3;
}
INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
@@ -1847,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
if (IS_ERR(hb_task)) {
ret = PTR_ERR(hb_task);
mlog_errno(ret);
- goto out;
+ goto out3;
}
spin_lock(&o2hb_live_lock);
@@ -1863,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
if (reg->hr_aborted_start) {
ret = -EIO;
- goto out;
+ goto out3;
}
/* Ok, we were woken. Make sure it wasn't by drop_item() */
@@ -1882,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
config_item_name(&reg->hr_item), reg->hr_dev_name);
+out3:
+ iput(inode);
+out2:
+ fdput(f);
out:
- if (filp)
- fput(filp);
- if (inode)
- iput(inode);
if (ret < 0) {
if (reg->hr_bdev) {
blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 83b6f98e066..16b712d260d 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void)
flush_workqueue(user_dlm_worker);
destroy_workqueue(user_dlm_worker);
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(dlmfs_inode_cache);
bdi_destroy(&dlmfs_backing_dev_info);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 68f4541c2db..0e91ec22a94 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void)
static void ocfs2_free_mem_caches(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
if (ocfs2_inode_cachep)
kmem_cache_destroy(ocfs2_inode_cachep);
ocfs2_inode_cachep = NULL;
diff --git a/fs/open.c b/fs/open.c
index b0bae3a4182..44da0feeca2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
- struct inode * inode;
+ struct inode *inode;
struct dentry *dentry;
- struct file * file;
+ struct fd f;
int error;
error = -EINVAL;
if (length < 0)
goto out;
error = -EBADF;
- file = fget(fd);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
goto out;
/* explicitly opened as large or we are on 64-bit box */
- if (file->f_flags & O_LARGEFILE)
+ if (f.file->f_flags & O_LARGEFILE)
small = 0;
- dentry = file->f_path.dentry;
+ dentry = f.file->f_path.dentry;
inode = dentry->d_inode;
error = -EINVAL;
- if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
+ if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
goto out_putf;
error = -EINVAL;
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
goto out_putf;
sb_start_write(inode->i_sb);
- error = locks_verify_truncate(inode, file, length);
+ error = locks_verify_truncate(inode, f.file, length);
if (!error)
- error = security_path_truncate(&file->f_path);
+ error = security_path_truncate(&f.file->f_path);
if (!error)
- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+ error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
sb_end_write(inode->i_sb);
out_putf:
- fput(file);
+ fdput(f);
out:
return error;
}
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
{
- struct file *file;
+ struct fd f = fdget(fd);
int error = -EBADF;
- file = fget(fd);
- if (file) {
- error = do_fallocate(file, mode, offset, len);
- fput(file);
+ if (f.file) {
+ error = do_fallocate(f.file, mode, offset, len);
+ fdput(f);
}
-
return error;
}
@@ -400,16 +398,15 @@ out:
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
- struct file *file;
+ struct fd f = fdget_raw(fd);
struct inode *inode;
- int error, fput_needed;
+ int error = -EBADF;
error = -EBADF;
- file = fget_raw_light(fd, &fput_needed);
- if (!file)
+ if (!f.file)
goto out;
- inode = file->f_path.dentry->d_inode;
+ inode = f.file->f_path.dentry->d_inode;
error = -ENOTDIR;
if (!S_ISDIR(inode->i_mode))
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
if (!error)
- set_fs_pwd(current->fs, &file->f_path);
+ set_fs_pwd(current->fs, &f.file->f_path);
out_putf:
- fput_light(file, fput_needed);
+ fdput(f);
out:
return error;
}
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
{
- struct file * file;
+ struct fd f = fdget(fd);
int error = -EBADF;
- struct dentry * dentry;
- file = fget(fd);
- if (!file)
+ if (!f.file)
goto out;
- error = mnt_want_write_file(file);
+ error = mnt_want_write_file(f.file);
if (error)
goto out_fput;
- dentry = file->f_path.dentry;
- audit_inode(NULL, dentry);
- error = chown_common(&file->f_path, user, group);
- mnt_drop_write_file(file);
+ audit_inode(NULL, f.file->f_path.dentry);
+ error = chown_common(&f.file->f_path, user, group);
+ mnt_drop_write_file(f.file);
out_fput:
- fput(file);
+ fdput(f);
out:
return error;
}
@@ -803,50 +797,6 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
-static void __put_unused_fd(struct files_struct *files, unsigned int fd)
-{
- struct fdtable *fdt = files_fdtable(files);
- __clear_open_fd(fd, fdt);
- if (fd < files->next_fd)
- files->next_fd = fd;
-}
-
-void put_unused_fd(unsigned int fd)
-{
- struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
- __put_unused_fd(files, fd);
- spin_unlock(&files->file_lock);
-}
-
-EXPORT_SYMBOL(put_unused_fd);
-
-/*
- * Install a file pointer in the fd array.
- *
- * The VFS is full of places where we drop the files lock between
- * setting the open_fds bitmap and installing the file in the file
- * array. At any such point, we are vulnerable to a dup2() race
- * installing a file in the array before us. We need to detect this and
- * fput() the struct file we are about to overwrite in this case.
- *
- * It should never happen - if we allow dup2() do it, _really_ bad things
- * will follow.
- */
-
-void fd_install(unsigned int fd, struct file *file)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- BUG_ON(fdt->fd[fd] != NULL);
- rcu_assign_pointer(fdt->fd[fd], file);
- spin_unlock(&files->file_lock);
-}
-
-EXPORT_SYMBOL(fd_install);
-
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
@@ -858,7 +808,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
op->mode = 0;
/* Must never be set by userspace */
- flags &= ~FMODE_NONOTIFY;
+ flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
/*
* O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1038,23 +988,7 @@ EXPORT_SYMBOL(filp_close);
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
- struct file * filp;
- struct files_struct *files = current->files;
- struct fdtable *fdt;
- int retval;
-
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- if (fd >= fdt->max_fds)
- goto out_unlock;
- filp = fdt->fd[fd];
- if (!filp)
- goto out_unlock;
- rcu_assign_pointer(fdt->fd[fd], NULL);
- __clear_close_on_exec(fd, fdt);
- __put_unused_fd(files, fd);
- spin_unlock(&files->file_lock);
- retval = filp_close(filp, files);
+ int retval = __close_fd(current->files, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
@@ -1064,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
retval = -EINTR;
return retval;
-
-out_unlock:
- spin_unlock(&files->file_lock);
- return -EBADF;
}
EXPORT_SYMBOL(sys_close);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 4a3477949bc..2ad080faca3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void)
static void __exit exit_openprom_fs(void)
{
unregister_filesystem(&openprom_fs_type);
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(op_inode_cachep);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 8d85d7068c1..bd3479db4b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1064,9 +1064,8 @@ err_inode:
return err;
}
-int do_pipe_flags(int *fd, int flags)
+static int __do_pipe_flags(int *fd, struct file **files, int flags)
{
- struct file *files[2];
int error;
int fdw, fdr;
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags)
fdw = error;
audit_fd_pair(fdr, fdw);
- fd_install(fdr, files[0]);
- fd_install(fdw, files[1]);
fd[0] = fdr;
fd[1] = fdw;
-
return 0;
err_fdr:
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags)
return error;
}
+int do_pipe_flags(int *fd, int flags)
+{
+ struct file *files[2];
+ int error = __do_pipe_flags(fd, files, flags);
+ if (!error) {
+ fd_install(fd[0], files[0]);
+ fd_install(fd[1], files[1]);
+ }
+ return error;
+}
+
/*
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way Unix traditionally does this, though.
*/
SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
{
+ struct file *files[2];
int fd[2];
int error;
- error = do_pipe_flags(fd, flags);
+ error = __do_pipe_flags(fd, files, flags);
if (!error) {
- if (copy_to_user(fildes, fd, sizeof(fd))) {
- sys_close(fd[0]);
- sys_close(fd[1]);
+ if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
+ fput(files[0]);
+ fput(files[1]);
+ put_unused_fd(fd[0]);
+ put_unused_fd(fd[1]);
error = -EFAULT;
+ } else {
+ fd_install(fd[0], files[0]);
+ fd_install(fd[1], files[1]);
}
}
return error;
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index c1c72933592..99349efbbc2 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
proc-$(CONFIG_MMU) := mmu.o task_mmu.o
proc-y += inode.o root.o base.o generic.o array.o \
- proc_tty.o
+ proc_tty.o fd.o
proc-y += cmdline.o
proc-y += consoles.o
proc-y += cpuinfo.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index acd1960c28a..d295af99367 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -90,6 +90,7 @@
#endif
#include <trace/events/oom.h>
#include "internal.h"
+#include "fd.h"
/* NOTE:
* Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
NULL, &proc_single_file_operations, \
{ .proc_show = show } )
-static int proc_fd_permission(struct inode *inode, int mask);
-
/*
* Count the number of hardlinks for the pid_entry table, excluding the .
* and .. links.
@@ -1500,7 +1499,7 @@ out:
return error;
}
-static const struct inode_operations proc_pid_link_inode_operations = {
+const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
.follow_link = proc_pid_follow_link,
.setattr = proc_setattr,
@@ -1509,21 +1508,6 @@ static const struct inode_operations proc_pid_link_inode_operations = {
/* building an inode */
-static int task_dumpable(struct task_struct *task)
-{
- int dumpable = 0;
- struct mm_struct *mm;
-
- task_lock(task);
- mm = task->mm;
- if (mm)
- dumpable = get_dumpable(mm);
- task_unlock(task);
- if(dumpable == 1)
- return 1;
- return 0;
-}
-
struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
{
struct inode * inode;
@@ -1649,15 +1633,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
-static int pid_delete_dentry(const struct dentry * dentry)
-{
- /* Is the task we represent dead?
- * If so, then don't put the dentry on the lru list,
- * kill it immediately.
- */
- return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
const struct dentry_operations pid_dentry_operations =
{
.d_revalidate = pid_revalidate,
@@ -1720,289 +1695,6 @@ end_instantiate:
return filldir(dirent, name, len, filp->f_pos, ino, type);
}
-static unsigned name_to_int(struct dentry *dentry)
-{
- const char *name = dentry->d_name.name;
- int len = dentry->d_name.len;
- unsigned n = 0;
-
- if (len > 1 && *name == '0')
- goto out;
- while (len-- > 0) {
- unsigned c = *name++ - '0';
- if (c > 9)
- goto out;
- if (n >= (~0U-9)/10)
- goto out;
- n *= 10;
- n += c;
- }
- return n;
-out:
- return ~0U;
-}
-
-#define PROC_FDINFO_MAX 64
-
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
-{
- struct task_struct *task = get_proc_task(inode);
- struct files_struct *files = NULL;
- struct file *file;
- int fd = proc_fd(inode);
-
- if (task) {
- files = get_files_struct(task);
- put_task_struct(task);
- }
- if (files) {
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (file) {
- unsigned int f_flags;
- struct fdtable *fdt;
-
- fdt = files_fdtable(files);
- f_flags = file->f_flags & ~O_CLOEXEC;
- if (close_on_exec(fd, fdt))
- f_flags |= O_CLOEXEC;
-
- if (path) {
- *path = file->f_path;
- path_get(&file->f_path);
- }
- if (info)
- snprintf(info, PROC_FDINFO_MAX,
- "pos:\t%lli\n"
- "flags:\t0%o\n",
- (long long) file->f_pos,
- f_flags);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- return 0;
- }
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- }
- return -ENOENT;
-}
-
-static int proc_fd_link(struct dentry *dentry, struct path *path)
-{
- return proc_fd_info(dentry->d_inode, path, NULL);
-}
-
-static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
-{
- struct inode *inode;
- struct task_struct *task;
- int fd;
- struct files_struct *files;
- const struct cred *cred;
-
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- inode = dentry->d_inode;
- task = get_proc_task(inode);
- fd = proc_fd(inode);
-
- if (task) {
- files = get_files_struct(task);
- if (files) {
- struct file *file;
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- unsigned f_mode = file->f_mode;
-
- rcu_read_unlock();
- put_files_struct(files);
-
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
-
- if (S_ISLNK(inode->i_mode)) {
- unsigned i_mode = S_IFLNK;
- if (f_mode & FMODE_READ)
- i_mode |= S_IRUSR | S_IXUSR;
- if (f_mode & FMODE_WRITE)
- i_mode |= S_IWUSR | S_IXUSR;
- inode->i_mode = i_mode;
- }
-
- security_task_to_inode(task, inode);
- put_task_struct(task);
- return 1;
- }
- rcu_read_unlock();
- put_files_struct(files);
- }
- put_task_struct(task);
- }
- d_drop(dentry);
- return 0;
-}
-
-static const struct dentry_operations tid_fd_dentry_operations =
-{
- .d_revalidate = tid_fd_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
-static struct dentry *proc_fd_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
- unsigned fd = (unsigned long)ptr;
- struct inode *inode;
- struct proc_inode *ei;
- struct dentry *error = ERR_PTR(-ENOENT);
-
- inode = proc_pid_make_inode(dir->i_sb, task);
- if (!inode)
- goto out;
- ei = PROC_I(inode);
- ei->fd = fd;
-
- inode->i_mode = S_IFLNK;
- inode->i_op = &proc_pid_link_inode_operations;
- inode->i_size = 64;
- ei->op.proc_get_link = proc_fd_link;
- d_set_d_op(dentry, &tid_fd_dentry_operations);
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (tid_fd_revalidate(dentry, 0))
- error = NULL;
-
- out:
- return error;
-}
-
-static struct dentry *proc_lookupfd_common(struct inode *dir,
- struct dentry *dentry,
- instantiate_t instantiate)
-{
- struct task_struct *task = get_proc_task(dir);
- unsigned fd = name_to_int(dentry);
- struct dentry *result = ERR_PTR(-ENOENT);
-
- if (!task)
- goto out_no_task;
- if (fd == ~0U)
- goto out;
-
- result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
-out:
- put_task_struct(task);
-out_no_task:
- return result;
-}
-
-static int proc_readfd_common(struct file * filp, void * dirent,
- filldir_t filldir, instantiate_t instantiate)
-{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct task_struct *p = get_proc_task(inode);
- unsigned int fd, ino;
- int retval;
- struct files_struct * files;
-
- retval = -ENOENT;
- if (!p)
- goto out_no_task;
- retval = 0;
-
- fd = filp->f_pos;
- switch (fd) {
- case 0:
- if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- default:
- files = get_files_struct(p);
- if (!files)
- goto out;
- rcu_read_lock();
- for (fd = filp->f_pos-2;
- fd < files_fdtable(files)->max_fds;
- fd++, filp->f_pos++) {
- char name[PROC_NUMBUF];
- int len;
- int rv;
-
- if (!fcheck_files(files, fd))
- continue;
- rcu_read_unlock();
-
- len = snprintf(name, sizeof(name), "%d", fd);
- rv = proc_fill_cache(filp, dirent, filldir,
- name, len, instantiate, p,
- (void *)(unsigned long)fd);
- if (rv < 0)
- goto out_fd_loop;
- rcu_read_lock();
- }
- rcu_read_unlock();
-out_fd_loop:
- put_files_struct(files);
- }
-out:
- put_task_struct(p);
-out_no_task:
- return retval;
-}
-
-static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
-{
- return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
-}
-
-static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
-{
- return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
-}
-
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- char tmp[PROC_FDINFO_MAX];
- int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
- if (!err)
- err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
- return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
- .open = nonseekable_open,
- .read = proc_fdinfo_read,
- .llseek = no_llseek,
-};
-
-static const struct file_operations proc_fd_operations = {
- .read = generic_read_dir,
- .readdir = proc_readfd,
- .llseek = default_llseek,
-};
-
#ifdef CONFIG_CHECKPOINT_RESTORE
/*
@@ -2121,7 +1813,7 @@ out:
}
struct map_files_info {
- struct file *file;
+ fmode_t mode;
unsigned long len;
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
@@ -2130,13 +1822,10 @@ static struct dentry *
proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
- const struct file *file = ptr;
+ fmode_t mode = (fmode_t)(unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
- if (!file)
- return ERR_PTR(-ENOENT);
-
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
return ERR_PTR(-ENOENT);
@@ -2148,9 +1837,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
inode->i_size = 64;
inode->i_mode = S_IFLNK;
- if (file->f_mode & FMODE_READ)
+ if (mode & FMODE_READ)
inode->i_mode |= S_IRUSR;
- if (file->f_mode & FMODE_WRITE)
+ if (mode & FMODE_WRITE)
inode->i_mode |= S_IWUSR;
d_set_d_op(dentry, &tid_map_files_dentry_operations);
@@ -2194,7 +1883,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
if (!vma)
goto out_no_vma;
- result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
+ result = proc_map_files_instantiate(dir, dentry, task,
+ (void *)(unsigned long)vma->vm_file->f_mode);
out_no_vma:
up_read(&mm->mmap_sem);
@@ -2295,8 +1985,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (++pos <= filp->f_pos)
continue;
- get_file(vma->vm_file);
- info.file = vma->vm_file;
+ info.mode = vma->vm_file->f_mode;
info.len = snprintf(info.name,
sizeof(info.name), "%lx-%lx",
vma->vm_start, vma->vm_end);
@@ -2311,19 +2000,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
ret = proc_fill_cache(filp, dirent, filldir,
p->name, p->len,
proc_map_files_instantiate,
- task, p->file);
+ task,
+ (void *)(unsigned long)p->mode);
if (ret)
break;
filp->f_pos++;
- fput(p->file);
- }
- for (; i < nr_files; i++) {
- /*
- * In case of error don't forget
- * to put rest of file refs.
- */
- p = flex_array_get(fa, i);
- fput(p->file);
}
if (fa)
flex_array_free(fa);
@@ -2345,82 +2026,6 @@ static const struct file_operations proc_map_files_operations = {
#endif /* CONFIG_CHECKPOINT_RESTORE */
-/*
- * /proc/pid/fd needs a special permission handler so that a process can still
- * access /proc/self/fd after it has executed a setuid().
- */
-static int proc_fd_permission(struct inode *inode, int mask)
-{
- int rv = generic_permission(inode, mask);
- if (rv == 0)
- return 0;
- if (task_pid(current) == proc_pid(inode))
- rv = 0;
- return rv;
-}
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fd_inode_operations = {
- .lookup = proc_lookupfd,
- .permission = proc_fd_permission,
- .setattr = proc_setattr,
-};
-
-static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
- unsigned fd = (unsigned long)ptr;
- struct inode *inode;
- struct proc_inode *ei;
- struct dentry *error = ERR_PTR(-ENOENT);
-
- inode = proc_pid_make_inode(dir->i_sb, task);
- if (!inode)
- goto out;
- ei = PROC_I(inode);
- ei->fd = fd;
- inode->i_mode = S_IFREG | S_IRUSR;
- inode->i_fop = &proc_fdinfo_file_operations;
- d_set_d_op(dentry, &tid_fd_dentry_operations);
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (tid_fd_revalidate(dentry, 0))
- error = NULL;
-
- out:
- return error;
-}
-
-static struct dentry *proc_lookupfdinfo(struct inode *dir,
- struct dentry *dentry,
- unsigned int flags)
-{
- return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
-}
-
-static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
-{
- return proc_readfd_common(filp, dirent, filldir,
- proc_fdinfo_instantiate);
-}
-
-static const struct file_operations proc_fdinfo_operations = {
- .read = generic_read_dir,
- .readdir = proc_readfdinfo,
- .llseek = default_llseek,
-};
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fdinfo_inode_operations = {
- .lookup = proc_lookupfdinfo,
- .setattr = proc_setattr,
-};
-
-
static struct dentry *proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
new file mode 100644
index 00000000000..f28a875f877
--- /dev/null
+++ b/fs/proc/fd.c
@@ -0,0 +1,367 @@
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/dcache.h>
+#include <linux/path.h>
+#include <linux/fdtable.h>
+#include <linux/namei.h>
+#include <linux/pid.h>
+#include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>
+
+#include <linux/proc_fs.h>
+
+#include "internal.h"
+#include "fd.h"
+
+static int seq_show(struct seq_file *m, void *v)
+{
+ struct files_struct *files = NULL;
+ int f_flags = 0, ret = -ENOENT;
+ struct file *file = NULL;
+ struct task_struct *task;
+
+ task = get_proc_task(m->private);
+ if (!task)
+ return -ENOENT;
+
+ files = get_files_struct(task);
+ put_task_struct(task);
+
+ if (files) {
+ int fd = proc_fd(m->private);
+
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (file) {
+ struct fdtable *fdt = files_fdtable(files);
+
+ f_flags = file->f_flags;
+ if (close_on_exec(fd, fdt))
+ f_flags |= O_CLOEXEC;
+
+ get_file(file);
+ ret = 0;
+ }
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ }
+
+ if (!ret) {
+ seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+ (long long)file->f_pos, f_flags);
+ fput(file);
+ }
+
+ return ret;
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, seq_show, inode);
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+ .open = seq_fdinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct files_struct *files;
+ struct task_struct *task;
+ const struct cred *cred;
+ struct inode *inode;
+ int fd;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
+ fd = proc_fd(inode);
+
+ if (task) {
+ files = get_files_struct(task);
+ if (files) {
+ struct file *file;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ unsigned f_mode = file->f_mode;
+
+ rcu_read_unlock();
+ put_files_struct(files);
+
+ if (task_dumpable(task)) {
+ rcu_read_lock();
+ cred = __task_cred(task);
+ inode->i_uid = cred->euid;
+ inode->i_gid = cred->egid;
+ rcu_read_unlock();
+ } else {
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ }
+
+ if (S_ISLNK(inode->i_mode)) {
+ unsigned i_mode = S_IFLNK;
+ if (f_mode & FMODE_READ)
+ i_mode |= S_IRUSR | S_IXUSR;
+ if (f_mode & FMODE_WRITE)
+ i_mode |= S_IWUSR | S_IXUSR;
+ inode->i_mode = i_mode;
+ }
+
+ security_task_to_inode(task, inode);
+ put_task_struct(task);
+ return 1;
+ }
+ rcu_read_unlock();
+ put_files_struct(files);
+ }
+ put_task_struct(task);
+ }
+
+ d_drop(dentry);
+ return 0;
+}
+
+static const struct dentry_operations tid_fd_dentry_operations = {
+ .d_revalidate = tid_fd_revalidate,
+ .d_delete = pid_delete_dentry,
+};
+
+static int proc_fd_link(struct dentry *dentry, struct path *path)
+{
+ struct files_struct *files = NULL;
+ struct task_struct *task;
+ int ret = -ENOENT;
+
+ task = get_proc_task(dentry->d_inode);
+ if (task) {
+ files = get_files_struct(task);
+ put_task_struct(task);
+ }
+
+ if (files) {
+ int fd = proc_fd(dentry->d_inode);
+ struct file *fd_file;
+
+ spin_lock(&files->file_lock);
+ fd_file = fcheck_files(files, fd);
+ if (fd_file) {
+ *path = fd_file->f_path;
+ path_get(&fd_file->f_path);
+ ret = 0;
+ }
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ }
+
+ return ret;
+}
+
+static struct dentry *
+proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
+{
+ struct dentry *error = ERR_PTR(-ENOENT);
+ unsigned fd = (unsigned long)ptr;
+ struct proc_inode *ei;
+ struct inode *inode;
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ goto out;
+
+ ei = PROC_I(inode);
+ ei->fd = fd;
+
+ inode->i_mode = S_IFLNK;
+ inode->i_op = &proc_pid_link_inode_operations;
+ inode->i_size = 64;
+
+ ei->op.proc_get_link = proc_fd_link;
+
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
+ d_add(dentry, inode);
+
+ /* Close the race of the process dying before we return the dentry */
+ if (tid_fd_revalidate(dentry, 0))
+ error = NULL;
+ out:
+ return error;
+}
+
+static struct dentry *proc_lookupfd_common(struct inode *dir,
+ struct dentry *dentry,
+ instantiate_t instantiate)
+{
+ struct task_struct *task = get_proc_task(dir);
+ struct dentry *result = ERR_PTR(-ENOENT);
+ unsigned fd = name_to_int(dentry);
+
+ if (!task)
+ goto out_no_task;
+ if (fd == ~0U)
+ goto out;
+
+ result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+out:
+ put_task_struct(task);
+out_no_task:
+ return result;
+}
+
+static int proc_readfd_common(struct file * filp, void * dirent,
+ filldir_t filldir, instantiate_t instantiate)
+{
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *p = get_proc_task(inode);
+ struct files_struct *files;
+ unsigned int fd, ino;
+ int retval;
+
+ retval = -ENOENT;
+ if (!p)
+ goto out_no_task;
+ retval = 0;
+
+ fd = filp->f_pos;
+ switch (fd) {
+ case 0:
+ if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+ goto out;
+ filp->f_pos++;
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+ goto out;
+ filp->f_pos++;
+ default:
+ files = get_files_struct(p);
+ if (!files)
+ goto out;
+ rcu_read_lock();
+ for (fd = filp->f_pos - 2;
+ fd < files_fdtable(files)->max_fds;
+ fd++, filp->f_pos++) {
+ char name[PROC_NUMBUF];
+ int len;
+ int rv;
+
+ if (!fcheck_files(files, fd))
+ continue;
+ rcu_read_unlock();
+
+ len = snprintf(name, sizeof(name), "%d", fd);
+ rv = proc_fill_cache(filp, dirent, filldir,
+ name, len, instantiate, p,
+ (void *)(unsigned long)fd);
+ if (rv < 0)
+ goto out_fd_loop;
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+out_fd_loop:
+ put_files_struct(files);
+ }
+out:
+ put_task_struct(p);
+out_no_task:
+ return retval;
+}
+
+static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
+{
+ return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
+}
+
+const struct file_operations proc_fd_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_readfd,
+ .llseek = default_llseek,
+};
+
+static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
+}
+
+/*
+ * /proc/pid/fd needs a special permission handler so that a process can still
+ * access /proc/self/fd after it has executed a setuid().
+ */
+int proc_fd_permission(struct inode *inode, int mask)
+{
+ int rv = generic_permission(inode, mask);
+ if (rv == 0)
+ return 0;
+ if (task_pid(current) == proc_pid(inode))
+ rv = 0;
+ return rv;
+}
+
+const struct inode_operations proc_fd_inode_operations = {
+ .lookup = proc_lookupfd,
+ .permission = proc_fd_permission,
+ .setattr = proc_setattr,
+};
+
+static struct dentry *
+proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
+{
+ struct dentry *error = ERR_PTR(-ENOENT);
+ unsigned fd = (unsigned long)ptr;
+ struct proc_inode *ei;
+ struct inode *inode;
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ goto out;
+
+ ei = PROC_I(inode);
+ ei->fd = fd;
+
+ inode->i_mode = S_IFREG | S_IRUSR;
+ inode->i_fop = &proc_fdinfo_file_operations;
+
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
+ d_add(dentry, inode);
+
+ /* Close the race of the process dying before we return the dentry */
+ if (tid_fd_revalidate(dentry, 0))
+ error = NULL;
+ out:
+ return error;
+}
+
+static struct dentry *
+proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
+ return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
+}
+
+static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
+{
+ return proc_readfd_common(filp, dirent, filldir,
+ proc_fdinfo_instantiate);
+}
+
+const struct inode_operations proc_fdinfo_inode_operations = {
+ .lookup = proc_lookupfdinfo,
+ .setattr = proc_setattr,
+};
+
+const struct file_operations proc_fdinfo_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_readfdinfo,
+ .llseek = default_llseek,
+};
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
new file mode 100644
index 00000000000..cbb1d47deda
--- /dev/null
+++ b/fs/proc/fd.h
@@ -0,0 +1,14 @@
+#ifndef __PROCFS_FD_H__
+#define __PROCFS_FD_H__
+
+#include <linux/fs.h>
+
+extern const struct file_operations proc_fd_operations;
+extern const struct inode_operations proc_fd_inode_operations;
+
+extern const struct file_operations proc_fdinfo_operations;
+extern const struct inode_operations proc_fdinfo_inode_operations;
+
+extern int proc_fd_permission(struct inode *inode, int mask);
+
+#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e1167a1c912..67925a7bd8c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -9,6 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/sched.h>
#include <linux/proc_fs.h>
struct ctl_table_header;
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
extern const struct file_operations proc_net_operations;
extern const struct inode_operations proc_net_inode_operations;
+extern const struct inode_operations proc_pid_link_inode_operations;
struct proc_maps_private {
struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode)
return PROC_I(inode)->fd;
}
+static inline int task_dumpable(struct task_struct *task)
+{
+ int dumpable = 0;
+ struct mm_struct *mm;
+
+ task_lock(task);
+ mm = task->mm;
+ if (mm)
+ dumpable = get_dumpable(mm);
+ task_unlock(task);
+ if(dumpable == 1)
+ return 1;
+ return 0;
+}
+
+static inline int pid_delete_dentry(const struct dentry * dentry)
+{
+ /* Is the task we represent dead?
+ * If so, then don't put the dentry on the lru list,
+ * kill it immediately.
+ */
+ return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
+static inline unsigned name_to_int(struct dentry *dentry)
+{
+ const char *name = dentry->d_name.name;
+ int len = dentry->d_name.len;
+ unsigned n = 0;
+
+ if (len > 1 && *name == '0')
+ goto out;
+ while (len-- > 0) {
+ unsigned c = *name++ - '0';
+ if (c > 9)
+ goto out;
+ if (n >= (~0U-9)/10)
+ goto out;
+ n *= 10;
+ n += c;
+ }
+ return n;
+out:
+ return ~0U;
+}
+
struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
struct dentry *dentry);
int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5c3c7b02e17..43098bb5723 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -391,6 +391,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(qnx4_inode_cachep);
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index f4eef0b5e7b..b6addf56048 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -651,6 +651,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(qnx6_inode_cachep);
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 1adfb691e4f..d06534857e9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek);
SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
{
off_t retval;
- struct file * file;
- int fput_needed;
-
- retval = -EBADF;
- file = fget_light(fd, &fput_needed);
- if (!file)
- goto bad;
+ struct fd f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
retval = -EINVAL;
if (origin <= SEEK_MAX) {
- loff_t res = vfs_llseek(file, offset, origin);
+ loff_t res = vfs_llseek(f.file, offset, origin);
retval = res;
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
- fput_light(file, fput_needed);
-bad:
+ fdput(f);
return retval;
}
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
unsigned int, origin)
{
int retval;
- struct file * file;
+ struct fd f = fdget(fd);
loff_t offset;
- int fput_needed;
- retval = -EBADF;
- file = fget_light(fd, &fput_needed);
- if (!file)
- goto bad;
+ if (!f.file)
+ return -EBADF;
retval = -EINVAL;
if (origin > SEEK_MAX)
goto out_putf;
- offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
+ offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
origin);
retval = (int)offset;
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
retval = 0;
}
out_putf:
- fput_light(file, fput_needed);
-bad:
+ fdput(f);
return retval;
}
#endif
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos)
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
- struct file *file;
+ struct fd f = fdget(fd);
ssize_t ret = -EBADF;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (file) {
- loff_t pos = file_pos_read(file);
- ret = vfs_read(file, buf, count, &pos);
- file_pos_write(file, pos);
- fput_light(file, fput_needed);
+ if (f.file) {
+ loff_t pos = file_pos_read(f.file);
+ ret = vfs_read(f.file, buf, count, &pos);
+ file_pos_write(f.file, pos);
+ fdput(f);
}
-
return ret;
}
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
- struct file *file;
+ struct fd f = fdget(fd);
ssize_t ret = -EBADF;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (file) {
- loff_t pos = file_pos_read(file);
- ret = vfs_write(file, buf, count, &pos);
- file_pos_write(file, pos);
- fput_light(file, fput_needed);
+ if (f.file) {
+ loff_t pos = file_pos_read(f.file);
+ ret = vfs_write(f.file, buf, count, &pos);
+ file_pos_write(f.file, pos);
+ fdput(f);
}
return ret;
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
size_t count, loff_t pos)
{
- struct file *file;
+ struct fd f;
ssize_t ret = -EBADF;
- int fput_needed;
if (pos < 0)
return -EINVAL;
- file = fget_light(fd, &fput_needed);
- if (file) {
+ f = fdget(fd);
+ if (f.file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
- ret = vfs_read(file, buf, count, &pos);
- fput_light(file, fput_needed);
+ if (f.file->f_mode & FMODE_PREAD)
+ ret = vfs_read(f.file, buf, count, &pos);
+ fdput(f);
}
return ret;
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64);
SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
size_t count, loff_t pos)
{
- struct file *file;
+ struct fd f;
ssize_t ret = -EBADF;
- int fput_needed;
if (pos < 0)
return -EINVAL;
- file = fget_light(fd, &fput_needed);
- if (file) {
+ f = fdget(fd);
+ if (f.file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
- ret = vfs_write(file, buf, count, &pos);
- fput_light(file, fput_needed);
+ if (f.file->f_mode & FMODE_PWRITE)
+ ret = vfs_write(f.file, buf, count, &pos);
+ fdput(f);
}
return ret;
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev);
SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- struct file *file;
+ struct fd f = fdget(fd);
ssize_t ret = -EBADF;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (file) {
- loff_t pos = file_pos_read(file);
- ret = vfs_readv(file, vec, vlen, &pos);
- file_pos_write(file, pos);
- fput_light(file, fput_needed);
+ if (f.file) {
+ loff_t pos = file_pos_read(f.file);
+ ret = vfs_readv(f.file, vec, vlen, &pos);
+ file_pos_write(f.file, pos);
+ fdput(f);
}
if (ret > 0)
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- struct file *file;
+ struct fd f = fdget(fd);
ssize_t ret = -EBADF;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (file) {
- loff_t pos = file_pos_read(file);
- ret = vfs_writev(file, vec, vlen, &pos);
- file_pos_write(file, pos);
- fput_light(file, fput_needed);
+ if (f.file) {
+ loff_t pos = file_pos_read(f.file);
+ ret = vfs_writev(f.file, vec, vlen, &pos);
+ file_pos_write(f.file, pos);
+ fdput(f);
}
if (ret > 0)
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
- struct file *file;
+ struct fd f;
ssize_t ret = -EBADF;
- int fput_needed;
if (pos < 0)
return -EINVAL;
- file = fget_light(fd, &fput_needed);
- if (file) {
+ f = fdget(fd);
+ if (f.file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
- ret = vfs_readv(file, vec, vlen, &pos);
- fput_light(file, fput_needed);
+ if (f.file->f_mode & FMODE_PREAD)
+ ret = vfs_readv(f.file, vec, vlen, &pos);
+ fdput(f);
}
if (ret > 0)
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
- struct file *file;
+ struct fd f;
ssize_t ret = -EBADF;
- int fput_needed;
if (pos < 0)
return -EINVAL;
- file = fget_light(fd, &fput_needed);
- if (file) {
+ f = fdget(fd);
+ if (f.file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
- ret = vfs_writev(file, vec, vlen, &pos);
- fput_light(file, fput_needed);
+ if (f.file->f_mode & FMODE_PWRITE)
+ ret = vfs_writev(f.file, vec, vlen, &pos);
+ fdput(f);
}
if (ret > 0)
@@ -884,31 +862,31 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
-static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
- size_t count, loff_t max)
+ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
+ loff_t max)
{
- struct file * in_file, * out_file;
- struct inode * in_inode, * out_inode;
+ struct fd in, out;
+ struct inode *in_inode, *out_inode;
loff_t pos;
ssize_t retval;
- int fput_needed_in, fput_needed_out, fl;
+ int fl;
/*
* Get input file, and verify that it is ok..
*/
retval = -EBADF;
- in_file = fget_light(in_fd, &fput_needed_in);
- if (!in_file)
+ in = fdget(in_fd);
+ if (!in.file)
goto out;
- if (!(in_file->f_mode & FMODE_READ))
+ if (!(in.file->f_mode & FMODE_READ))
goto fput_in;
retval = -ESPIPE;
if (!ppos)
- ppos = &in_file->f_pos;
+ ppos = &in.file->f_pos;
else
- if (!(in_file->f_mode & FMODE_PREAD))
+ if (!(in.file->f_mode & FMODE_PREAD))
goto fput_in;
- retval = rw_verify_area(READ, in_file, ppos, count);
+ retval = rw_verify_area(READ, in.file, ppos, count);
if (retval < 0)
goto fput_in;
count = retval;
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
* Get output file, and verify that it is ok..
*/
retval = -EBADF;
- out_file = fget_light(out_fd, &fput_needed_out);
- if (!out_file)
+ out = fdget(out_fd);
+ if (!out.file)
goto fput_in;
- if (!(out_file->f_mode & FMODE_WRITE))
+ if (!(out.file->f_mode & FMODE_WRITE))
goto fput_out;
retval = -EINVAL;
- in_inode = in_file->f_path.dentry->d_inode;
- out_inode = out_file->f_path.dentry->d_inode;
- retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
+ in_inode = in.file->f_path.dentry->d_inode;
+ out_inode = out.file->f_path.dentry->d_inode;
+ retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
if (retval < 0)
goto fput_out;
count = retval;
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
* and the application is arguably buggy if it doesn't expect
* EAGAIN on a non-blocking file descriptor.
*/
- if (in_file->f_flags & O_NONBLOCK)
+ if (in.file->f_flags & O_NONBLOCK)
fl = SPLICE_F_NONBLOCK;
#endif
- retval = do_splice_direct(in_file, ppos, out_file, count, fl);
+ retval = do_splice_direct(in.file, ppos, out.file, count, fl);
if (retval > 0) {
add_rchar(current, retval);
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
retval = -EOVERFLOW;
fput_out:
- fput_light(out_file, fput_needed_out);
+ fdput(out);
fput_in:
- fput_light(in_file, fput_needed_in);
+ fdput(in);
out:
return retval;
}
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0..d3e00ef6742 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
+ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
+ loff_t max);
diff --git a/fs/readdir.c b/fs/readdir.c
index 39e3370d79c..5e69ef533b7 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
struct old_linux_dirent __user *, dirent, unsigned int, count)
{
int error;
- struct file * file;
+ struct fd f = fdget(fd);
struct readdir_callback buf;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ if (!f.file)
return -EBADF;
buf.result = 0;
buf.dirent = dirent;
- error = vfs_readdir(file, fillonedir, &buf);
+ error = vfs_readdir(f.file, fillonedir, &buf);
if (buf.result)
error = buf.result;
- fput_light(file, fput_needed);
+ fdput(f);
return error;
}
@@ -191,17 +189,16 @@ efault:
SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct linux_dirent __user *, dirent, unsigned int, count)
{
- struct file * file;
+ struct fd f;
struct linux_dirent __user * lastdirent;
struct getdents_callback buf;
- int fput_needed;
int error;
if (!access_ok(VERIFY_WRITE, dirent, count))
return -EFAULT;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
return -EBADF;
buf.current_dir = dirent;
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
buf.count = count;
buf.error = 0;
- error = vfs_readdir(file, filldir, &buf);
+ error = vfs_readdir(f.file, filldir, &buf);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
- if (put_user(file->f_pos, &lastdirent->d_off))
+ if (put_user(f.file->f_pos, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
- fput_light(file, fput_needed);
+ fdput(f);
return error;
}
@@ -272,17 +269,16 @@ efault:
SYSCALL_DEFINE3(getdents64, unsigned int, fd,
struct linux_dirent64 __user *, dirent, unsigned int, count)
{
- struct file * file;
+ struct fd f;
struct linux_dirent64 __user * lastdirent;
struct getdents_callback64 buf;
- int fput_needed;
int error;
if (!access_ok(VERIFY_WRITE, dirent, count))
return -EFAULT;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
return -EBADF;
buf.current_dir = dirent;
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
buf.count = count;
buf.error = 0;
- error = vfs_readdir(file, filldir64, &buf);
+ error = vfs_readdir(f.file, filldir64, &buf);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
- typeof(lastdirent->d_off) d_off = file->f_pos;
+ typeof(lastdirent->d_off) d_off = f.file->f_pos;
if (__put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
- fput_light(file, fput_needed);
+ fdput(f);
return error;
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7a37dabf5a9..1078ae17999 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -608,6 +608,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(reiserfs_inode_cachep);
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 77c5f217398..fd7c5f60b46 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -648,6 +648,11 @@ error_register:
static void __exit exit_romfs_fs(void)
{
unregister_filesystem(&romfs_fs_type);
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(romfs_inode_cachep);
}
diff --git a/fs/select.c b/fs/select.c
index db14c781335..2ef72d96503 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
struct poll_table_entry *entry = poll_get_entry(pwq);
if (!entry)
return;
- get_file(filp);
- entry->filp = filp;
+ entry->filp = get_file(filp);
entry->wait_address = wait_address;
entry->key = p->_key;
init_waitqueue_func_entry(&entry->wait, pollwake);
@@ -429,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
unsigned long in, out, ex, all_bits, bit = 1, mask, j;
unsigned long res_in = 0, res_out = 0, res_ex = 0;
- const struct file_operations *f_op = NULL;
- struct file *file = NULL;
in = *inp++; out = *outp++; ex = *exp++;
all_bits = in | out | ex;
@@ -440,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
}
for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
- int fput_needed;
+ struct fd f;
if (i >= n)
break;
if (!(bit & all_bits))
continue;
- file = fget_light(i, &fput_needed);
- if (file) {
- f_op = file->f_op;
+ f = fdget(i);
+ if (f.file) {
+ const struct file_operations *f_op;
+ f_op = f.file->f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
- mask = (*f_op->poll)(file, wait);
+ mask = (*f_op->poll)(f.file, wait);
}
- fput_light(file, fput_needed);
+ fdput(f);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
retval++;
@@ -726,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
mask = 0;
fd = pollfd->fd;
if (fd >= 0) {
- int fput_needed;
- struct file * file;
-
- file = fget_light(fd, &fput_needed);
+ struct fd f = fdget(fd);
mask = POLLNVAL;
- if (file != NULL) {
+ if (f.file) {
mask = DEFAULT_POLLMASK;
- if (file->f_op && file->f_op->poll) {
+ if (f.file->f_op && f.file->f_op->poll) {
pwait->_key = pollfd->events|POLLERR|POLLHUP;
- mask = file->f_op->poll(file, pwait);
+ mask = f.file->f_op->poll(f.file, pwait);
}
/* Mask out unneeded events. */
mask &= pollfd->events | POLLERR | POLLHUP;
- fput_light(file, fput_needed);
+ fdput(f);
}
}
pollfd->revents = mask;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9f35a37173d..8bee4e57091 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
if (ufd < 0)
kfree(ctx);
} else {
- int fput_needed;
- struct file *file = fget_light(ufd, &fput_needed);
- if (!file)
+ struct fd f = fdget(ufd);
+ if (!f.file)
return -EBADF;
- ctx = file->private_data;
- if (file->f_op != &signalfd_fops) {
- fput_light(file, fput_needed);
+ ctx = f.file->private_data;
+ if (f.file->f_op != &signalfd_fops) {
+ fdput(f);
return -EINVAL;
}
spin_lock_irq(&current->sighand->siglock);
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
spin_unlock_irq(&current->sighand->siglock);
wake_up(&current->sighand->signalfd_wqh);
- fput_light(file, fput_needed);
+ fdput(f);
}
return ufd;
diff --git a/fs/splice.c b/fs/splice.c
index 41514dd8946..13e5b4776e7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
unsigned long, nr_segs, unsigned int, flags)
{
- struct file *file;
+ struct fd f;
long error;
- int fput;
if (unlikely(nr_segs > UIO_MAXIOV))
return -EINVAL;
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
return 0;
error = -EBADF;
- file = fget_light(fd, &fput);
- if (file) {
- if (file->f_mode & FMODE_WRITE)
- error = vmsplice_to_pipe(file, iov, nr_segs, flags);
- else if (file->f_mode & FMODE_READ)
- error = vmsplice_to_user(file, iov, nr_segs, flags);
-
- fput_light(file, fput);
+ f = fdget(fd);
+ if (f.file) {
+ if (f.file->f_mode & FMODE_WRITE)
+ error = vmsplice_to_pipe(f.file, iov, nr_segs, flags);
+ else if (f.file->f_mode & FMODE_READ)
+ error = vmsplice_to_user(f.file, iov, nr_segs, flags);
+
+ fdput(f);
}
return error;
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
int, fd_out, loff_t __user *, off_out,
size_t, len, unsigned int, flags)
{
+ struct fd in, out;
long error;
- struct file *in, *out;
- int fput_in, fput_out;
if (unlikely(!len))
return 0;
error = -EBADF;
- in = fget_light(fd_in, &fput_in);
- if (in) {
- if (in->f_mode & FMODE_READ) {
- out = fget_light(fd_out, &fput_out);
- if (out) {
- if (out->f_mode & FMODE_WRITE)
- error = do_splice(in, off_in,
- out, off_out,
+ in = fdget(fd_in);
+ if (in.file) {
+ if (in.file->f_mode & FMODE_READ) {
+ out = fdget(fd_out);
+ if (out.file) {
+ if (out.file->f_mode & FMODE_WRITE)
+ error = do_splice(in.file, off_in,
+ out.file, off_out,
len, flags);
- fput_light(out, fput_out);
+ fdput(out);
}
}
-
- fput_light(in, fput_in);
+ fdput(in);
}
-
return error;
}
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len,
SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
{
- struct file *in;
- int error, fput_in;
+ struct fd in;
+ int error;
if (unlikely(!len))
return 0;
error = -EBADF;
- in = fget_light(fdin, &fput_in);
- if (in) {
- if (in->f_mode & FMODE_READ) {
- int fput_out;
- struct file *out = fget_light(fdout, &fput_out);
-
- if (out) {
- if (out->f_mode & FMODE_WRITE)
- error = do_tee(in, out, len, flags);
- fput_light(out, fput_out);
+ in = fdget(fdin);
+ if (in.file) {
+ if (in.file->f_mode & FMODE_READ) {
+ struct fd out = fdget(fdout);
+ if (out.file) {
+ if (out.file->f_mode & FMODE_WRITE)
+ error = do_tee(in.file, out.file,
+ len, flags);
+ fdput(out);
}
}
- fput_light(in, fput_in);
+ fdput(in);
}
return error;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 29cd014ed3a..260e3928d4f 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -425,6 +425,11 @@ static int __init init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(squashfs_inode_cachep);
}
diff --git a/fs/stat.c b/fs/stat.c
index 208039eec6c..eae494630a3 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
{
- int fput_needed;
- struct file *f = fget_raw_light(fd, &fput_needed);
+ struct fd f = fdget_raw(fd);
int error = -EBADF;
- if (f) {
- error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
- fput_light(f, fput_needed);
+ if (f.file) {
+ error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
+ stat);
+ fdput(f);
}
return error;
}
diff --git a/fs/statfs.c b/fs/statfs.c
index 95ad5c0e586..f8e832e6f0a 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st)
int fd_statfs(int fd, struct kstatfs *st)
{
- int fput_needed;
- struct file *file = fget_light(fd, &fput_needed);
+ struct fd f = fdget(fd);
int error = -EBADF;
- if (file) {
- error = vfs_statfs(&file->f_path, st);
- fput_light(file, fput_needed);
+ if (f.file) {
+ error = vfs_statfs(&f.file->f_path, st);
+ fdput(f);
}
return error;
}
diff --git a/fs/super.c b/fs/super.c
index 0902cfa6a12..5fdf7ff32c4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s)
/* caches are now gone, we can safely kill the shrinker now */
unregister_shrinker(&s->s_shrink);
-
- /*
- * We need to call rcu_barrier so all the delayed rcu free
- * inodes are flushed before we release the fs module.
- */
- rcu_barrier();
put_filesystem(fs);
put_super(s);
} else {
diff --git a/fs/sync.c b/fs/sync.c
index eb8722dc556..14eefeb4463 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -148,21 +148,19 @@ void emergency_sync(void)
*/
SYSCALL_DEFINE1(syncfs, int, fd)
{
- struct file *file;
+ struct fd f = fdget(fd);
struct super_block *sb;
int ret;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ if (!f.file)
return -EBADF;
- sb = file->f_dentry->d_sb;
+ sb = f.file->f_dentry->d_sb;
down_read(&sb->s_umount);
ret = sync_filesystem(sb);
up_read(&sb->s_umount);
- fput_light(file, fput_needed);
+ fdput(f);
return ret;
}
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync);
static int do_fsync(unsigned int fd, int datasync)
{
- struct file *file;
+ struct fd f = fdget(fd);
int ret = -EBADF;
- int fput_needed;
- file = fget_light(fd, &fput_needed);
- if (file) {
- ret = vfs_fsync(file, datasync);
- fput_light(file, fput_needed);
+ if (f.file) {
+ ret = vfs_fsync(f.file, datasync);
+ fdput(f);
}
return ret;
}
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
unsigned int flags)
{
int ret;
- struct file *file;
+ struct fd f;
struct address_space *mapping;
loff_t endbyte; /* inclusive */
- int fput_needed;
umode_t i_mode;
ret = -EINVAL;
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
endbyte--; /* inclusive */
ret = -EBADF;
- file = fget_light(fd, &fput_needed);
- if (!file)
+ f = fdget(fd);
+ if (!f.file)
goto out;
- i_mode = file->f_path.dentry->d_inode->i_mode;
+ i_mode = f.file->f_path.dentry->d_inode->i_mode;
ret = -ESPIPE;
if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
!S_ISLNK(i_mode))
goto out_put;
- mapping = file->f_mapping;
+ mapping = f.file->f_mapping;
if (!mapping) {
ret = -EINVAL;
goto out_put;
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
ret = filemap_fdatawait_range(mapping, offset, endbyte);
out_put:
- fput_light(file, fput_needed);
+ fdput(f);
out:
return ret;
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index b23ab736685..d33e506c1ea 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void)
void sysv_destroy_icache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(sysv_inode_cachep);
}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index dffeb3795af..d03822bbf19 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = {
.llseek = noop_llseek,
};
-static struct file *timerfd_fget(int fd)
+static int timerfd_fget(int fd, struct fd *p)
{
- struct file *file;
-
- file = fget(fd);
- if (!file)
- return ERR_PTR(-EBADF);
- if (file->f_op != &timerfd_fops) {
- fput(file);
- return ERR_PTR(-EINVAL);
+ struct fd f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+ if (f.file->f_op != &timerfd_fops) {
+ fdput(f);
+ return -EINVAL;
}
-
- return file;
+ *p = f;
+ return 0;
}
SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
@@ -284,7 +282,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
const struct itimerspec __user *, utmr,
struct itimerspec __user *, otmr)
{
- struct file *file;
+ struct fd f;
struct timerfd_ctx *ctx;
struct itimerspec ktmr, kotmr;
int ret;
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
!timespec_valid(&ktmr.it_interval))
return -EINVAL;
- file = timerfd_fget(ufd);
- if (IS_ERR(file))
- return PTR_ERR(file);
- ctx = file->private_data;
+ ret = timerfd_fget(ufd, &f);
+ if (ret)
+ return ret;
+ ctx = f.file->private_data;
timerfd_setup_cancel(ctx, flags);
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
ret = timerfd_setup(ctx, flags, &ktmr);
spin_unlock_irq(&ctx->wqh.lock);
- fput(file);
+ fdput(f);
if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
return -EFAULT;
@@ -343,14 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
{
- struct file *file;
+ struct fd f;
struct timerfd_ctx *ctx;
struct itimerspec kotmr;
-
- file = timerfd_fget(ufd);
- if (IS_ERR(file))
- return PTR_ERR(file);
- ctx = file->private_data;
+ int ret = timerfd_fget(ufd, &f);
+ if (ret)
+ return ret;
+ ctx = f.file->private_data;
spin_lock_irq(&ctx->wqh.lock);
if (ctx->expired && ctx->tintv.tv64) {
@@ -362,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
kotmr.it_interval = ktime_to_timespec(ctx->tintv);
spin_unlock_irq(&ctx->wqh.lock);
- fput(file);
+ fdput(f);
return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 681f3a94244..49825427a0e 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void)
dbg_debugfs_exit();
ubifs_compressors_exit();
unregister_shrinker(&ubifs_shrinker_info);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ubifs_inode_slab);
unregister_filesystem(&ubifs_fs_type);
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 862741dddf2..d44fb568abe 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -171,6 +171,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(udf_inode_cachep);
}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 444927e5706..f7cfecfe1ca 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1466,6 +1466,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ufs_inode_cachep);
}
diff --git a/fs/utimes.c b/fs/utimes.c
index fa4dbe451e2..bb0696a4173 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times,
goto out;
if (filename == NULL && dfd != AT_FDCWD) {
- int fput_needed;
- struct file *file;
+ struct fd f;
if (flags & AT_SYMLINK_NOFOLLOW)
goto out;
- file = fget_light(dfd, &fput_needed);
+ f = fdget(dfd);
error = -EBADF;
- if (!file)
+ if (!f.file)
goto out;
- error = utimes_common(&file->f_path, times);
- fput_light(file, fput_needed);
+ error = utimes_common(&f.file->f_path, times);
+ fdput(f);
} else {
struct path path;
int lookup_flags = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index f7f7f09b0b4..ca15fbd391c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -403,22 +403,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
const void __user *,value, size_t, size, int, flags)
{
- int fput_needed;
- struct file *f;
+ struct fd f = fdget(fd);
struct dentry *dentry;
int error = -EBADF;
- f = fget_light(fd, &fput_needed);
- if (!f)
+ if (!f.file)
return error;
- dentry = f->f_path.dentry;
+ dentry = f.file->f_path.dentry;
audit_inode(NULL, dentry);
- error = mnt_want_write_file(f);
+ error = mnt_want_write_file(f.file);
if (!error) {
error = setxattr(dentry, name, value, size, flags);
- mnt_drop_write_file(f);
+ mnt_drop_write_file(f.file);
}
- fput_light(f, fput_needed);
+ fdput(f);
return error;
}
@@ -502,16 +500,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
void __user *, value, size_t, size)
{
- int fput_needed;
- struct file *f;
+ struct fd f = fdget(fd);
ssize_t error = -EBADF;
- f = fget_light(fd, &fput_needed);
- if (!f)
+ if (!f.file)
return error;
- audit_inode(NULL, f->f_path.dentry);
- error = getxattr(f->f_path.dentry, name, value, size);
- fput_light(f, fput_needed);
+ audit_inode(NULL, f.file->f_path.dentry);
+ error = getxattr(f.file->f_path.dentry, name, value, size);
+ fdput(f);
return error;
}
@@ -583,16 +579,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
{
- int fput_needed;
- struct file *f;
+ struct fd f = fdget(fd);
ssize_t error = -EBADF;
- f = fget_light(fd, &fput_needed);
- if (!f)
+ if (!f.file)
return error;
- audit_inode(NULL, f->f_path.dentry);
- error = listxattr(f->f_path.dentry, list, size);
- fput_light(f, fput_needed);
+ audit_inode(NULL, f.file->f_path.dentry);
+ error = listxattr(f.file->f_path.dentry, list, size);
+ fdput(f);
return error;
}
@@ -652,22 +646,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
{
- int fput_needed;
- struct file *f;
+ struct fd f = fdget(fd);
struct dentry *dentry;
int error = -EBADF;
- f = fget_light(fd, &fput_needed);
- if (!f)
+ if (!f.file)
return error;
- dentry = f->f_path.dentry;
+ dentry = f.file->f_path.dentry;
audit_inode(NULL, dentry);
- error = mnt_want_write_file(f);
+ error = mnt_want_write_file(f.file);
if (!error) {
error = removexattr(dentry, name);
- mnt_drop_write_file(f);
+ mnt_drop_write_file(f.file);
}
- fput_light(f, fput_needed);
+ fdput(f);
return error;
}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e00de08dc8a..b9b8646e62d 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -48,44 +48,44 @@ xfs_swapext(
xfs_swapext_t *sxp)
{
xfs_inode_t *ip, *tip;
- struct file *file, *tmp_file;
+ struct fd f, tmp;
int error = 0;
/* Pull information for the target fd */
- file = fget((int)sxp->sx_fdtarget);
- if (!file) {
+ f = fdget((int)sxp->sx_fdtarget);
+ if (!f.file) {
error = XFS_ERROR(EINVAL);
goto out;
}
- if (!(file->f_mode & FMODE_WRITE) ||
- !(file->f_mode & FMODE_READ) ||
- (file->f_flags & O_APPEND)) {
+ if (!(f.file->f_mode & FMODE_WRITE) ||
+ !(f.file->f_mode & FMODE_READ) ||
+ (f.file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_file;
}
- tmp_file = fget((int)sxp->sx_fdtmp);
- if (!tmp_file) {
+ tmp = fdget((int)sxp->sx_fdtmp);
+ if (!tmp.file) {
error = XFS_ERROR(EINVAL);
goto out_put_file;
}
- if (!(tmp_file->f_mode & FMODE_WRITE) ||
- !(tmp_file->f_mode & FMODE_READ) ||
- (tmp_file->f_flags & O_APPEND)) {
+ if (!(tmp.file->f_mode & FMODE_WRITE) ||
+ !(tmp.file->f_mode & FMODE_READ) ||
+ (tmp.file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_tmp_file;
}
- if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
- IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
+ if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
+ IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
error = XFS_ERROR(EINVAL);
goto out_put_tmp_file;
}
- ip = XFS_I(file->f_path.dentry->d_inode);
- tip = XFS_I(tmp_file->f_path.dentry->d_inode);
+ ip = XFS_I(f.file->f_path.dentry->d_inode);
+ tip = XFS_I(tmp.file->f_path.dentry->d_inode);
if (ip->i_mount != tip->i_mount) {
error = XFS_ERROR(EINVAL);
@@ -105,9 +105,9 @@ xfs_swapext(
error = xfs_swap_extents(ip, tip, sxp);
out_put_tmp_file:
- fput(tmp_file);
+ fdput(tmp);
out_put_file:
- fput(file);
+ fdput(f);
out:
return error;
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c3b6d..8305f2ac677 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,16 +70,16 @@ xfs_find_handle(
int hsize;
xfs_handle_t handle;
struct inode *inode;
- struct file *file = NULL;
+ struct fd f;
struct path path;
int error;
struct xfs_inode *ip;
if (cmd == XFS_IOC_FD_TO_HANDLE) {
- file = fget(hreq->fd);
- if (!file)
+ f = fdget(hreq->fd);
+ if (!f.file)
return -EBADF;
- inode = file->f_path.dentry->d_inode;
+ inode = f.file->f_path.dentry->d_inode;
} else {
error = user_lpath((const char __user *)hreq->path, &path);
if (error)
@@ -134,7 +134,7 @@ xfs_find_handle(
out_put:
if (cmd == XFS_IOC_FD_TO_HANDLE)
- fput(file);
+ fdput(f);
else
path_put(&path);
return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 001537f92ca..e0fd2734189 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1506,6 +1506,11 @@ xfs_init_zones(void)
STATIC void
xfs_destroy_zones(void)
{
+ /*
+ * Make sure all delayed rcu free are flushed before we
+ * destroy caches.
+ */
+ rcu_barrier();
kmem_zone_destroy(xfs_ili_zone);
kmem_zone_destroy(xfs_inode_zone);
kmem_zone_destroy(xfs_efi_zone);