aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/ceph/addr.c1
-rw-r--r--fs/ceph/mds_client.c5
-rw-r--r--fs/cifs/cifs_dfs_ref.c7
-rw-r--r--fs/cifs/file.c10
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/ext4/extents.c27
-rw-r--r--fs/ext4/inline.c75
-rw-r--r--fs/ext4/inode.c43
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/ext4/xattr.c30
-rw-r--r--fs/ext4/xattr.h32
-rw-r--r--fs/f2fs/dir.c34
-rw-r--r--fs/f2fs/extent_cache.c5
-rw-r--r--fs/f2fs/f2fs.h4
-rw-r--r--fs/f2fs/gc.c22
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/f2fs/segment.h9
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/glock.c5
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c127
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c13
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs42proc.c63
-rw-r--r--fs/nfs/nfs4namespace.c2
-rw-r--r--fs/nfs/nfs4proc.c19
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfsd/vfs.c59
-rw-r--r--fs/orangefs/super.c9
-rw-r--r--fs/pnode.c61
-rw-r--r--fs/pnode.h2
-rw-r--r--fs/super.c13
40 files changed, 497 insertions, 248 deletions
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 81dd075356b9..d4fb0afc0097 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
/* try and do the mount */
_debug("--- attempting mount %s -o %s ---", devname, options);
- mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
+ mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
_debug("--- mount result %p ---", mnt);
free_page((unsigned long) devname);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e44271dfceb6..5db6c8d745ea 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current_real_cred()->uid;
- wq->gid = current_real_cred()->gid;
+ wq->uid = current_cred()->uid;
+ wq->gid = current_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ef3ebd780aff..1e643c718917 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -363,6 +363,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
+ osd_req_op_extent_update(req, 0, len);
break;
}
goto out_pages;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6a26c7bd1286..e3e1a80b351e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ /* Never leave an unregistered request on an unsafe list! */
+ list_del_init(&req->r_unsafe_item);
+
if (req->r_tid == mdsc->oldest_tid) {
struct rb_node *p = rb_next(&req->r_node);
mdsc->oldest_tid = 0;
@@ -1036,7 +1039,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
while (!list_empty(&session->s_unsafe)) {
req = list_first_entry(&session->s_unsafe,
struct ceph_mds_request, r_unsafe_item);
- list_del_init(&req->r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
__unregister_request(mdsc, req);
@@ -2423,7 +2425,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
* useful we could do with a revised return value.
*/
dout("got safe reply %llu, mds%d\n", tid, mds);
- list_del_init(&req->r_unsafe_item);
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ec9dbbcca3b9..9156be545b0f 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -245,7 +245,8 @@ compose_mount_options_err:
* @fullpath: full path in UNC format
* @ref: server's referral
*/
-static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
+ struct cifs_sb_info *cifs_sb,
const char *fullpath, const struct dfs_info3_param *ref)
{
struct vfsmount *mnt;
@@ -259,7 +260,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
- mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
+ mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
kfree(mountdata);
kfree(devname);
return mnt;
@@ -334,7 +335,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
mnt = ERR_PTR(-EINVAL);
break;
}
- mnt = cifs_dfs_do_refmount(cifs_sb,
+ mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
full_path, referrals + i);
cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
__func__, referrals[i].node_name, mnt);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 18a1e1d6671f..1cd0e2eefc66 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2884,7 +2884,15 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
for (i = 0; i < rdata->nr_pages; i++) {
struct page *page = rdata->pages[i];
size_t copy = min_t(size_t, remaining, PAGE_SIZE);
- size_t written = copy_page_to_iter(page, 0, copy, iter);
+ size_t written;
+
+ if (unlikely(iter->type & ITER_PIPE)) {
+ void *addr = kmap_atomic(page);
+
+ written = copy_to_iter(addr, copy, iter);
+ kunmap_atomic(addr);
+ } else
+ written = copy_page_to_iter(page, 0, copy, iter);
remaining -= written;
if (written < copy && iov_iter_count(iter) > 0)
break;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f17fcf89e18e..1e30f74a9527 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -187,9 +187,9 @@ static const struct super_operations debugfs_super_operations = {
static struct vfsmount *debugfs_automount(struct path *path)
{
- struct vfsmount *(*f)(void *);
- f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
- return f(d_inode(path->dentry)->i_private);
+ debugfs_automount_t f;
+ f = (debugfs_automount_t)path->dentry->d_fsdata;
+ return f(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
@@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
*/
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c930a0110fb4..9fbf92ca358c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5344,7 +5344,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t stop, *iterator, ex_start, ex_end;
/* Let path point to the last extent */
- path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -5353,15 +5354,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent)
goto out;
- stop = le32_to_cpu(extent->ee_block) +
- ext4_ext_get_actual_len(extent);
+ stop = le32_to_cpu(extent->ee_block);
/*
* In case of left shift, Don't start shifting extents until we make
* sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
- path = ext4_find_extent(inode, start - 1, &path, 0);
+ path = ext4_find_extent(inode, start - 1, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5393,9 +5394,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
else
iterator = &stop;
- /* Its safe to start updating extents */
- while (start < stop) {
- path = ext4_find_extent(inode, *iterator, &path, 0);
+ /*
+ * Its safe to start updating extents. Start and stop are unsigned, so
+ * in case of right shift if extent with 0 block is reached, iterator
+ * becomes NULL to indicate the end of the loop.
+ */
+ while (iterator && start <= stop) {
+ path = ext4_find_extent(inode, *iterator, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5422,8 +5428,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- *iterator = le32_to_cpu(extent->ee_block) > 0 ?
- le32_to_cpu(extent->ee_block) - 1 : 0;
+ if (le32_to_cpu(extent->ee_block) > 0)
+ *iterator = le32_to_cpu(extent->ee_block) - 1;
+ else
+ /* Beginning is reached, end of the loop */
+ iterator = NULL;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d8ca4b9f9dd6..37b521ed39df 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -376,7 +376,7 @@ out:
static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
unsigned int len)
{
- int ret, size;
+ int ret, size, no_expand;
struct ext4_inode_info *ei = EXT4_I(inode);
if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
@@ -386,15 +386,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
if (size < len)
return -ENOSPC;
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
if (ei->i_inline_off)
ret = ext4_update_inline_data(handle, inode, len);
else
ret = ext4_create_inline_data(handle, inode, len);
- up_write(&EXT4_I(inode)->xattr_sem);
-
+ ext4_write_unlock_xattr(inode, &no_expand);
return ret;
}
@@ -523,7 +522,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
struct inode *inode,
unsigned flags)
{
- int ret, needed_blocks;
+ int ret, needed_blocks, no_expand;
handle_t *handle = NULL;
int retries = 0, sem_held = 0;
struct page *page = NULL;
@@ -563,7 +562,7 @@ retry:
goto out;
}
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
sem_held = 1;
/* If some one has already done this for us, just exit. */
if (!ext4_has_inline_data(inode)) {
@@ -600,7 +599,7 @@ retry:
put_page(page);
page = NULL;
ext4_orphan_add(handle, inode);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
sem_held = 0;
ext4_journal_stop(handle);
handle = NULL;
@@ -626,7 +625,7 @@ out:
put_page(page);
}
if (sem_held)
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
if (handle)
ext4_journal_stop(handle);
brelse(iloc.bh);
@@ -719,7 +718,7 @@ convert:
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page)
{
- int ret;
+ int ret, no_expand;
void *kaddr;
struct ext4_iloc iloc;
@@ -737,7 +736,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
goto out;
}
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));
kaddr = kmap_atomic(page);
@@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
/* clear page dirty so that writepages wouldn't work for us. */
ClearPageDirty(page);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
brelse(iloc.bh);
out:
return copied;
@@ -758,7 +757,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
unsigned len,
struct page *page)
{
- int ret;
+ int ret, no_expand;
void *kaddr;
struct ext4_iloc iloc;
@@ -768,11 +767,11 @@ ext4_journalled_write_inline_data(struct inode *inode,
return NULL;
}
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
kunmap_atomic(kaddr);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return iloc.bh;
}
@@ -934,8 +933,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page)
{
int i_size_changed = 0;
+ int ret;
- copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ return ret;
+ }
+ copied = ret;
/*
* No need to use i_size_read() here, the i_size
@@ -1249,7 +1255,7 @@ out:
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode)
{
- int ret, inline_size;
+ int ret, inline_size, no_expand;
void *inline_start;
struct ext4_iloc iloc;
@@ -1257,7 +1263,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (ret)
return ret;
- down_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_lock_xattr(dir, &no_expand);
if (!ext4_has_inline_data(dir))
goto out;
@@ -1303,7 +1309,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
out:
ext4_mark_inode_dirty(handle, dir);
- up_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_unlock_xattr(dir, &no_expand);
brelse(iloc.bh);
return ret;
}
@@ -1663,7 +1669,7 @@ int ext4_delete_inline_entry(handle_t *handle,
struct buffer_head *bh,
int *has_inline_data)
{
- int err, inline_size;
+ int err, inline_size, no_expand;
struct ext4_iloc iloc;
void *inline_start;
@@ -1671,7 +1677,7 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
return err;
- down_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_lock_xattr(dir, &no_expand);
if (!ext4_has_inline_data(dir)) {
*has_inline_data = 0;
goto out;
@@ -1705,7 +1711,7 @@ int ext4_delete_inline_entry(handle_t *handle,
ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
out:
- up_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_unlock_xattr(dir, &no_expand);
brelse(iloc.bh);
if (err != -ENOENT)
ext4_std_error(dir->i_sb, err);
@@ -1804,11 +1810,11 @@ out:
int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
{
- int ret;
+ int ret, no_expand;
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
ret = ext4_destroy_inline_data_nolock(handle, inode);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return ret;
}
@@ -1893,7 +1899,7 @@ out:
void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
{
handle_t *handle;
- int inline_size, value_len, needed_blocks;
+ int inline_size, value_len, needed_blocks, no_expand;
size_t i_size;
void *value = NULL;
struct ext4_xattr_ibody_find is = {
@@ -1910,7 +1916,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
if (IS_ERR(handle))
return;
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
*has_inline = 0;
ext4_journal_stop(handle);
@@ -1968,7 +1974,7 @@ out_error:
up_write(&EXT4_I(inode)->i_data_sem);
out:
brelse(is.iloc.bh);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
kfree(value);
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
@@ -1984,7 +1990,7 @@ out:
int ext4_convert_inline_data(struct inode *inode)
{
- int error, needed_blocks;
+ int error, needed_blocks, no_expand;
handle_t *handle;
struct ext4_iloc iloc;
@@ -2006,15 +2012,10 @@ int ext4_convert_inline_data(struct inode *inode)
goto out_free;
}
- down_write(&EXT4_I(inode)->xattr_sem);
- if (!ext4_has_inline_data(inode)) {
- up_write(&EXT4_I(inode)->xattr_sem);
- goto out;
- }
-
- error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
- up_write(&EXT4_I(inode)->xattr_sem);
-out:
+ ext4_write_lock_xattr(inode, &no_expand);
+ if (ext4_has_inline_data(inode))
+ error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+ ext4_write_unlock_xattr(inode, &no_expand);
ext4_journal_stop(handle);
out_free:
brelse(iloc.bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 33a509c876ee..dc9d64ac5969 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1324,8 +1324,11 @@ static int ext4_write_end(struct file *file,
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto errout;
+ }
copied = ret;
} else
copied = block_write_end(file, mapping, pos,
@@ -1379,7 +1382,9 @@ errout:
* set the buffer to be dirty, since in data=journalled mode we need
* to call ext4_handle_dirty_metadata() instead.
*/
-static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+ struct page *page,
+ unsigned from, unsigned to)
{
unsigned int block_start = 0, block_end;
struct buffer_head *head, *bh;
@@ -1396,7 +1401,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
size = min(to, block_end) - start;
zero_user(page, start, size);
- set_buffer_uptodate(bh);
+ write_end_fn(handle, bh);
}
clear_buffer_new(bh);
}
@@ -1425,18 +1430,25 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
- if (ext4_has_inline_data(inode))
- copied = ext4_write_inline_data_end(inode, pos, len,
- copied, page);
- else {
- if (copied < len) {
- if (!PageUptodate(page))
- copied = 0;
- zero_new_buffers(page, from+copied, to);
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_write_inline_data_end(inode, pos, len,
+ copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ goto errout;
}
-
+ copied = ret;
+ } else if (unlikely(copied < len) && !PageUptodate(page)) {
+ copied = 0;
+ ext4_journalled_zero_new_buffers(handle, page, from, to);
+ } else {
+ if (unlikely(copied < len))
+ ext4_journalled_zero_new_buffers(handle, page,
+ from + copied, to);
ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
- to, &partial, write_end_fn);
+ from + copied, &partial,
+ write_end_fn);
if (!partial)
SetPageUptodate(page);
}
@@ -1462,6 +1474,7 @@ static int ext4_journalled_write_end(struct file *file,
*/
ext4_orphan_add(handle, inode);
+errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
@@ -3811,6 +3824,10 @@ static int ext4_block_truncate_page(handle_t *handle,
unsigned blocksize;
struct inode *inode = mapping->host;
+ /* If we are processing an encrypted inode during orphan list handling */
+ if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode))
+ return 0;
+
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7ae43c59bc79..2e9fc7a61048 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3123,6 +3123,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
if (ar->pright && start + size - 1 >= ar->lright)
size -= start + size - ar->lright;
+ /*
+ * Trim allocation request for filesystems with artificially small
+ * groups.
+ */
+ if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+ size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
+
end = start + size;
/* check we don't cross already preallocated blocks */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 104f8bfba718..c4a389a6027b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1616,13 +1616,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
!fscrypt_has_permitted_context(dir, inode)) {
int nokey = ext4_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
- iput(inode);
- if (nokey)
+ if (nokey) {
+ iput(inode);
return ERR_PTR(-ENOKEY);
+ }
ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu",
(unsigned long) dir->i_ino,
(unsigned long) inode->i_ino);
+ iput(inode);
return ERR_PTR(-EPERM);
}
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bbc316db9495..5fa9ba1de429 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -825,6 +825,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ int aborted = 0;
int i, err;
ext4_unregister_li_request(sb);
@@ -834,9 +835,10 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->rsv_conversion_wq);
if (sbi->s_journal) {
+ aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
- if (err < 0)
+ if ((err < 0) && !aborted)
ext4_abort(sb, "Couldn't clean up the journal");
}
@@ -847,7 +849,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
@@ -3828,7 +3830,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
if (ext4_has_feature_meta_bg(sb)) {
- if (le32_to_cpu(es->s_first_meta_bg) >= db_count) {
+ if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
ext4_msg(sb, KERN_WARNING,
"first meta block group too large: %u "
"(group descriptor block count %u)",
@@ -3911,7 +3913,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* root first: it may be modified in the journal!
*/
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
- if (ext4_load_journal(sb, es, journal_devnum))
+ err = ext4_load_journal(sb, es, journal_devnum);
+ if (err)
goto failed_mount3a;
} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
ext4_has_feature_journal_needs_recovery(sb)) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index d77be9e9f535..4448ed37181b 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1174,16 +1174,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
struct ext4_xattr_block_find bs = {
.s = { .not_found = -ENODATA, },
};
- unsigned long no_expand;
+ int no_expand;
int error;
if (!name)
return -EINVAL;
if (strlen(name) > 255)
return -ERANGE;
- down_write(&EXT4_I(inode)->xattr_sem);
- no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_write_lock_xattr(inode, &no_expand);
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
if (error)
@@ -1251,7 +1249,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = ext4_current_time(inode);
if (!value)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ no_expand = 0;
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1265,9 +1263,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
cleanup:
brelse(is.iloc.bh);
brelse(bs.bh);
- if (no_expand == 0)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return error;
}
@@ -1484,12 +1480,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
int error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
int isize_diff; /* How much do we need to grow i_extra_isize */
+ int no_expand;
+
+ if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
+ return 0;
- down_write(&EXT4_I(inode)->xattr_sem);
- /*
- * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
- */
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
retry:
isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
@@ -1571,17 +1566,16 @@ shift:
EXT4_I(inode)->i_extra_isize = new_extra_isize;
brelse(bh);
out:
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return 0;
cleanup:
brelse(bh);
/*
- * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
- * size expansion failed.
+ * Inode size expansion failed; don't try again
*/
- up_write(&EXT4_I(inode)->xattr_sem);
+ no_expand = 1;
+ ext4_write_unlock_xattr(inode, &no_expand);
return error;
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index a92e783fa057..099c8b670ef5 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler;
#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
+/*
+ * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
+ * The first is to signal that there the inline xattrs and data are
+ * taking up so much space that we might as well not keep trying to
+ * expand it. The second is that xattr_sem is taken for writing, so
+ * we shouldn't try to recurse into the inode expansion. For this
+ * second case, we need to make sure that we take save and restore the
+ * NO_EXPAND state flag appropriately.
+ */
+static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
+{
+ down_write(&EXT4_I(inode)->xattr_sem);
+ *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+}
+
+static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
+{
+ if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
+ return 0;
+ *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ return 1;
+}
+
+static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
+{
+ if (*save == 0)
+ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ up_write(&EXT4_I(inode)->xattr_sem);
+}
+
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 369f4513be37..ebdc90fc71b7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
- if (!de && room && F2FS_I(dir)->chash != namehash) {
- F2FS_I(dir)->chash = namehash;
- F2FS_I(dir)->clevel = level;
+ /* This is to increase the speed of f2fs_create */
+ if (!de && room) {
+ F2FS_I(dir)->task = current;
+ if (F2FS_I(dir)->chash != namehash) {
+ F2FS_I(dir)->chash = namehash;
+ F2FS_I(dir)->clevel = level;
+ }
}
return de;
@@ -643,14 +647,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
+ struct page *page = NULL;
+ struct f2fs_dir_entry *de = NULL;
int err;
err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
- err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
-
+ /*
+ * An immature stakable filesystem shows a race condition between lookup
+ * and create. If we have same task when doing lookup and create, it's
+ * definitely fine as expected by VFS normally. Otherwise, let's just
+ * verify on-disk dentry one more time, which guarantees filesystem
+ * consistency more.
+ */
+ if (current != F2FS_I(dir)->task) {
+ de = __f2fs_find_entry(dir, &fname, &page);
+ F2FS_I(dir)->task = NULL;
+ }
+ if (de) {
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+ err = -EEXIST;
+ } else if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ } else {
+ err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
+ }
fscrypt_free_filename(&fname);
return err;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2b06d4fcd954..7b32ce979fe1 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -352,11 +352,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
- if (en)
- __release_extent_node(sbi, et, prev_ex);
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
+ if (en)
+ __release_extent_node(sbi, et, prev_ex);
+
en = next_ex;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 506af456412f..3a1640be7ffc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -431,6 +431,7 @@ struct f2fs_inode_info {
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
+ struct task_struct *task; /* lookup and create consistency */
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
loff_t last_disk_size; /* lastly written file size */
@@ -833,6 +834,9 @@ struct f2fs_sb_info {
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
+ /* threshold for converting bg victims for fg */
+ u64 fggc_threshold;
+
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6f14ee923acd..34a69e7ed90b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -166,7 +166,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = sbi->segs_per_sec;
}
- if (p->max_search > sbi->max_victim_search)
+ /* we need to check every dirty segments in the FG_GC case */
+ if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
@@ -199,6 +200,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
continue;
+
+ if (no_fggc_candidate(sbi, secno))
+ continue;
+
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
}
@@ -322,13 +327,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
-
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
+ if (gc_type == FG_GC && p.alloc_mode == LFS &&
+ no_fggc_candidate(sbi, secno))
+ goto next;
cost = get_gc_cost(sbi, segno, &p);
@@ -972,5 +979,16 @@ stop:
void build_gc_manager(struct f2fs_sb_info *sbi)
{
+ u64 main_count, resv_count, ovp_count, blocks_per_sec;
+
DIRTY_I(sbi)->v_ops = &default_v_ops;
+
+ /* threshold of # of valid blocks in a section for victims of FG_GC */
+ main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
+ resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
+ ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
+ blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
+
+ sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec,
+ (main_count - resv_count));
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fc886f008449..a7943f861d68 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -813,6 +813,8 @@ next:
start = start_segno + sbi->segs_per_sec;
if (start < end)
goto next;
+ else
+ end = start - 1;
}
mutex_unlock(&dirty_i->seglist_lock);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index fecb856ad874..b164f8339281 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -688,6 +688,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
- (base + 1) + type;
}
+static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
+ unsigned int secno)
+{
+ if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
+ sbi->fggc_threshold)
+ return true;
+ return false;
+}
+
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 338d2f73eb29..a2c05f2ada6d 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1359,6 +1359,16 @@ out:
return 0;
}
+static void fat_dummy_inode_init(struct inode *inode)
+{
+ /* Initialize this dummy inode to work as no-op. */
+ MSDOS_I(inode)->mmu_private = 0;
+ MSDOS_I(inode)->i_start = 0;
+ MSDOS_I(inode)->i_logstart = 0;
+ MSDOS_I(inode)->i_attrs = 0;
+ MSDOS_I(inode)->i_pos = 0;
+}
+
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- MSDOS_I(fat_inode)->i_pos = 0;
+ fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
+ fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2401c5dabb2a..5ec5870e423a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else if (sync) {
+ __set_bit(FR_FORCE, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
fuse_request_send(ff->fc, req);
iput(req->misc.release.inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 14cbf60167a7..133f322573b5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -658,9 +658,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct kmem_cache *cachep;
int ret, tries = 0;
+ rcu_read_lock();
gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (gl && !lockref_get_not_dead(&gl->gl_lockref))
gl = NULL;
+ rcu_read_unlock();
*glp = gl;
if (gl)
@@ -728,15 +730,18 @@ again:
if (ret == -EEXIST) {
ret = 0;
+ rcu_read_lock();
tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
if (++tries < 100) {
+ rcu_read_unlock();
cond_resched();
goto again;
}
tmp = NULL;
ret = -ENOMEM;
}
+ rcu_read_unlock();
} else {
WARN_ON_ONCE(ret);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a6a3389a07fc..51519c2836b5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
struct gfs2_sbd *ln_sbd;
u64 ln_number;
unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e1652665bd93..5e659ee08d6a 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1863,7 +1863,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
__blist_del_buffer(list, jh);
jh->b_jlist = BJ_None;
- if (test_clear_buffer_jbddirty(bh))
+ if (transaction && is_journal_aborted(transaction->t_journal))
+ clear_buffer_jbddirty(bh);
+ else if (test_clear_buffer_jbddirty(bh))
mark_buffer_dirty(bh); /* Expose it to the VM */
}
diff --git a/fs/mount.h b/fs/mount.h
index d2e25d7b64b3..d8295f273a2f 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt)
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
-extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/namei.c b/fs/namei.c
index 9c8dd3c83a80..150fbdd8e04c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1100,7 +1100,6 @@ static int follow_automount(struct path *path, struct nameidata *nd,
bool *need_mntput)
{
struct vfsmount *mnt;
- const struct cred *old_cred;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
@@ -1129,9 +1128,7 @@ static int follow_automount(struct path *path, struct nameidata *nd,
if (nd->total_link_count >= 40)
return -ELOOP;
- old_cred = override_creds(&init_cred);
mnt = path->dentry->d_op->d_automount(path);
- revert_creds(old_cred);
if (IS_ERR(mnt)) {
/*
* The filesystem is allowed to return -EISDIR here to indicate
diff --git a/fs/namespace.c b/fs/namespace.c
index cb15f5397991..843d274ba167 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -645,28 +645,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
}
/*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
- struct mount *p, *res = NULL;
- p = __lookup_mnt(mnt, dentry);
- if (!p)
- goto out;
- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
- res = p;
- hlist_for_each_entry_continue(p, mnt_hash) {
- if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
- break;
- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
- res = p;
- }
-out:
- return res;
-}
-
-/*
* lookup_mnt - Return the first child mount mounted at path
*
* "First" means first mounted chronologically. If you create the
@@ -886,6 +864,13 @@ void mnt_set_mountpoint(struct mount *mnt,
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}
+static void __attach_mnt(struct mount *mnt, struct mount *parent)
+{
+ hlist_add_head_rcu(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+}
+
/*
* vfsmount lock must be held for write
*/
@@ -894,28 +879,45 @@ static void attach_mnt(struct mount *mnt,
struct mountpoint *mp)
{
mnt_set_mountpoint(parent, mp, mnt);
- hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ __attach_mnt(mnt, parent);
}
-static void attach_shadowed(struct mount *mnt,
- struct mount *parent,
- struct mount *shadows)
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
{
- if (shadows) {
- hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
- list_add(&mnt->mnt_child, &shadows->mnt_child);
- } else {
- hlist_add_head_rcu(&mnt->mnt_hash,
- m_hash(&parent->mnt, mnt->mnt_mountpoint));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
- }
+ struct mountpoint *old_mp = mnt->mnt_mp;
+ struct dentry *old_mountpoint = mnt->mnt_mountpoint;
+ struct mount *old_parent = mnt->mnt_parent;
+
+ list_del_init(&mnt->mnt_child);
+ hlist_del_init(&mnt->mnt_mp_list);
+ hlist_del_init_rcu(&mnt->mnt_hash);
+
+ attach_mnt(mnt, parent, mp);
+
+ put_mountpoint(old_mp);
+
+ /*
+ * Safely avoid even the suggestion this code might sleep or
+ * lock the mount hash by taking advantage of the knowledge that
+ * mnt_change_mountpoint will not release the final reference
+ * to a mountpoint.
+ *
+ * During mounting, the mount passed in as the parent mount will
+ * continue to use the old mountpoint and during unmounting, the
+ * old mountpoint will continue to exist until namespace_unlock,
+ * which happens well after mnt_change_mountpoint.
+ */
+ spin_lock(&old_mountpoint->d_lock);
+ old_mountpoint->d_lockref.count--;
+ spin_unlock(&old_mountpoint->d_lock);
+
+ mnt_add_count(old_parent, -1);
}
/*
* vfsmount lock must be held for write
*/
-static void commit_tree(struct mount *mnt, struct mount *shadows)
+static void commit_tree(struct mount *mnt)
{
struct mount *parent = mnt->mnt_parent;
struct mount *m;
@@ -933,7 +935,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
n->mounts += n->pending_mounts;
n->pending_mounts = 0;
- attach_shadowed(mnt, parent, shadows);
+ __attach_mnt(mnt, parent);
touch_mnt_namespace(n);
}
@@ -997,6 +999,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
+struct vfsmount *
+vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
+ const char *name, void *data)
+{
+ /* Until it is worked out how to pass the user namespace
+ * through from the parent mount to the submount don't support
+ * unprivileged mounts with submounts.
+ */
+ if (mountpoint->d_sb->s_user_ns != &init_user_ns)
+ return ERR_PTR(-EPERM);
+
+ return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
+}
+EXPORT_SYMBOL_GPL(vfs_submount);
+
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
@@ -1741,7 +1758,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;
for (s = r; s; s = next_mnt(s, r)) {
- struct mount *t = NULL;
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
@@ -1763,14 +1779,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
goto out;
lock_mount_hash();
list_add_tail(&q->mnt_list, &res->mnt_list);
- mnt_set_mountpoint(parent, p->mnt_mp, q);
- if (!list_empty(&parent->mnt_mounts)) {
- t = list_last_entry(&parent->mnt_mounts,
- struct mount, mnt_child);
- if (t->mnt_mp != p->mnt_mp)
- t = NULL;
- }
- attach_shadowed(q, parent, t);
+ attach_mnt(q, parent, p->mnt_mp);
unlock_mount_hash();
}
}
@@ -1971,10 +1980,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
{
HLIST_HEAD(tree_list);
struct mnt_namespace *ns = dest_mnt->mnt_ns;
+ struct mountpoint *smp;
struct mount *child, *p;
struct hlist_node *n;
int err;
+ /* Preallocate a mountpoint in case the new mounts need
+ * to be tucked under other mounts.
+ */
+ smp = get_mountpoint(source_mnt->mnt.mnt_root);
+ if (IS_ERR(smp))
+ return PTR_ERR(smp);
+
/* Is there space to add these mounts to the mount namespace? */
if (!parent_path) {
err = count_mounts(ns, source_mnt);
@@ -2001,16 +2018,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
- commit_tree(source_mnt, NULL);
+ commit_tree(source_mnt);
}
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
struct mount *q;
hlist_del_init(&child->mnt_hash);
- q = __lookup_mnt_last(&child->mnt_parent->mnt,
- child->mnt_mountpoint);
- commit_tree(child, q);
+ q = __lookup_mnt(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ if (q)
+ mnt_change_mountpoint(child, smp, q);
+ commit_tree(child);
}
+ put_mountpoint(smp);
unlock_mount_hash();
return 0;
@@ -2025,6 +2045,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
cleanup_group_ids(source_mnt, NULL);
out:
ns->pending_mounts = 0;
+
+ read_seqlock_excl(&mount_lock);
+ put_mountpoint(smp);
+ read_sequnlock_excl(&mount_lock);
+
return err;
}
@@ -2773,7 +2798,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME | MS_NOREMOTELOCK);
+ MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index a5c38889e7ae..13abd608af0f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1073,9 +1073,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- if (task->tk_status >= 0)
- return 0;
-
switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
@@ -1176,9 +1173,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
- if (task->tk_status >= 0)
- return 0;
-
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1213,6 +1207,13 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
+ if (task->tk_status >= 0)
+ return 0;
+
+ /* Handle the case of an invalid layout segment */
+ if (!pnfs_is_valid_lseg(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+
switch (vers) {
case 3:
return ff_layout_async_handle_error_v3(task, lseg, idx);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5551e8ef67fd..e49d831c4e85 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
{
- return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
+ return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
}
/**
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 608501971fe0..5cda392028ce 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -128,30 +128,26 @@ out_unlock:
return err;
}
-static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
+static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock,
- struct file *dst, loff_t pos_dst,
+ struct file *dst,
struct nfs_lock_context *dst_lock,
- size_t count)
+ struct nfs42_copy_args *args,
+ struct nfs42_copy_res *res)
{
- struct nfs42_copy_args args = {
- .src_fh = NFS_FH(file_inode(src)),
- .src_pos = pos_src,
- .dst_fh = NFS_FH(file_inode(dst)),
- .dst_pos = pos_dst,
- .count = count,
- };
- struct nfs42_copy_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
- .rpc_argp = &args,
- .rpc_resp = &res,
+ .rpc_argp = args,
+ .rpc_resp = res,
};
struct inode *dst_inode = file_inode(dst);
struct nfs_server *server = NFS_SERVER(dst_inode);
+ loff_t pos_src = args->src_pos;
+ loff_t pos_dst = args->dst_pos;
+ size_t count = args->count;
int status;
- status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+ status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
if (status)
return status;
@@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
if (status)
return status;
- status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+ status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
if (status)
return status;
@@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
return status;
status = nfs4_call_sync(server->client, server, &msg,
- &args.seq_args, &res.seq_res, 0);
+ &args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
server->caps &= ~NFS_CAP_COPY;
if (status)
return status;
- if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
- status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
+ if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
+ status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
if (status)
return status;
}
truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res.write_res.count);
+ pos_dst + res->write_res.count);
- return res.write_res.count;
+ return res->write_res.count;
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
struct nfs_server *server = NFS_SERVER(file_inode(dst));
struct nfs_lock_context *src_lock;
struct nfs_lock_context *dst_lock;
- struct nfs4_exception src_exception = { };
- struct nfs4_exception dst_exception = { };
+ struct nfs42_copy_args args = {
+ .src_fh = NFS_FH(file_inode(src)),
+ .src_pos = pos_src,
+ .dst_fh = NFS_FH(file_inode(dst)),
+ .dst_pos = pos_dst,
+ .count = count,
+ };
+ struct nfs42_copy_res res;
+ struct nfs4_exception src_exception = {
+ .inode = file_inode(src),
+ .stateid = &args.src_stateid,
+ };
+ struct nfs4_exception dst_exception = {
+ .inode = file_inode(dst),
+ .stateid = &args.dst_stateid,
+ };
ssize_t err, err2;
if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
@@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
if (IS_ERR(src_lock))
return PTR_ERR(src_lock);
- src_exception.inode = file_inode(src);
src_exception.state = src_lock->open_context->state;
dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
@@ -216,15 +225,17 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
goto out_put_src_lock;
}
- dst_exception.inode = file_inode(dst);
dst_exception.state = dst_lock->open_context->state;
do {
inode_lock(file_inode(dst));
- err = _nfs42_proc_copy(src, pos_src, src_lock,
- dst, pos_dst, dst_lock, count);
+ err = _nfs42_proc_copy(src, src_lock,
+ dst, dst_lock,
+ &args, &res);
inode_unlock(file_inode(dst));
+ if (err >= 0)
+ break;
if (err == -ENOTSUPP) {
err = -EOPNOTSUPP;
break;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index d21104912676..d8b040bd9814 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
mountdata->hostname,
mountdata->mnt_path);
- mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
+ mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
if (!IS_ERR(mnt))
break;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3573653fd5cc..0a8bc7eab083 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2708,6 +2708,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
ret = PTR_ERR(state);
if (IS_ERR(state))
goto out;
+ ctx->state = state;
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK)
@@ -2733,7 +2734,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (ret != 0)
goto out;
- ctx->state = state;
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
if (read_seqretry(&sp->so_reclaim_seqlock, seq))
@@ -4990,7 +4990,7 @@ out:
*/
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+ struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
.acl_pages = pages,
@@ -5004,13 +5004,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
+ unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
int ret = -ENOMEM, i;
- /* As long as we're doing a round trip to the server anyway,
- * let's be prepared for a page of acl data. */
- if (npages == 0)
- npages = 1;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
@@ -7430,11 +7426,11 @@ static void nfs4_exchange_id_release(void *data)
struct nfs41_exchange_id_data *cdata =
(struct nfs41_exchange_id_data *)data;
- nfs_put_client(cdata->args.client);
if (cdata->xprt) {
xprt_put(cdata->xprt);
rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
}
+ nfs_put_client(cdata->args.client);
kfree(cdata->res.impl_id);
kfree(cdata->res.server_scope);
kfree(cdata->res.server_owner);
@@ -7541,10 +7537,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
task_setup_data.callback_data = calldata;
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- status = PTR_ERR(task);
- goto out_impl_id;
- }
+ if (IS_ERR(task))
+ return PTR_ERR(task);
if (!xprt) {
status = rpc_wait_for_completion_task(task);
@@ -7572,6 +7566,7 @@ out_server_owner:
kfree(calldata->res.server_owner);
out_calldata:
kfree(calldata);
+ nfs_put_client(clp);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fc89e5ed07ee..c9c4d9855976 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2492,7 +2492,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ replen = hdr.replen + op_decode_hdr_maxsz;
encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ca642fe9b21..b829cc9a9b39 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
- int size_change = 0;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
- goto out;
+ return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
- return nfserrno(host_err);
+ goto out;
}
dentry = fhp->fh_dentry;
@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
- goto out;
+ return 0;
nfsd_sanitize_attrs(inode, iap);
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ return nfserr_notsync;
+
/*
* The size case is special, it changes the file in addition to the
- * attributes.
+ * attributes, and file systems don't expect it to be mixed with
+ * "random" attribute changes. We thus split out the size change
+ * into a separate call to ->setattr, and do the rest as a separate
+ * setattr call.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
- goto out;
- size_change = 1;
+ return err;
+ }
+ fh_lock(fhp);
+ if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
- if (iap->ia_size != i_size_read(inode))
- iap->ia_valid |= ATTR_MTIME;
- }
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
- iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+ iap->ia_valid &= ~ATTR_SIZE;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ goto out_unlock;
}
- fh_lock(fhp);
+ iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
- err = nfserrno(host_err);
-out_put_write_access:
+out_unlock:
+ fh_unlock(fhp);
if (size_change)
put_write_access(inode);
- if (!err)
- err = nfserrno(commit_metadata(fhp));
out:
- return err;
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index c48859f16e7b..67c24351a67f 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
return &orangefs_inode->vfs_inode;
}
+static void orangefs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+}
+
static void orangefs_destroy_inode(struct inode *inode)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
@@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode)
"%s: deallocated %p destroying inode %pU\n",
__func__, orangefs_inode, get_khandle_from_ino(inode));
- kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+ call_rcu(&inode->i_rcu, orangefs_i_callback);
}
/*
diff --git a/fs/pnode.c b/fs/pnode.c
index 234a9ac49958..b394ca5307ec 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -324,6 +324,21 @@ out:
return ret;
}
+static struct mount *find_topper(struct mount *mnt)
+{
+ /* If there is exactly one mount covering mnt completely return it. */
+ struct mount *child;
+
+ if (!list_is_singular(&mnt->mnt_mounts))
+ return NULL;
+
+ child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
+ if (child->mnt_mountpoint != mnt->mnt.mnt_root)
+ return NULL;
+
+ return child;
+}
+
/*
* return true if the refcount is greater than count
*/
@@ -344,9 +359,8 @@ static inline int do_refcount_check(struct mount *mnt, int count)
*/
int propagate_mount_busy(struct mount *mnt, int refcnt)
{
- struct mount *m, *child;
+ struct mount *m, *child, *topper;
struct mount *parent = mnt->mnt_parent;
- int ret = 0;
if (mnt == parent)
return do_refcount_check(mnt, refcnt);
@@ -361,12 +375,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
- if (child && list_empty(&child->mnt_mounts) &&
- (ret = do_refcount_check(child, 1)))
- break;
+ int count = 1;
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
+ if (!child)
+ continue;
+
+ /* Is there exactly one mount on the child that covers
+ * it completely whose reference should be ignored?
+ */
+ topper = find_topper(child);
+ if (topper)
+ count += 1;
+ else if (!list_empty(&child->mnt_mounts))
+ continue;
+
+ if (do_refcount_check(child, count))
+ return 1;
}
- return ret;
+ return 0;
}
/*
@@ -383,7 +409,7 @@ void propagate_mount_unlock(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
if (child)
child->mnt.mnt_flags &= ~MNT_LOCKED;
}
@@ -401,9 +427,11 @@ static void mark_umount_candidates(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt_last(&m->mnt,
+ struct mount *child = __lookup_mnt(&m->mnt,
mnt->mnt_mountpoint);
- if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
+ if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
+ continue;
+ if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
SET_MNT_MARK(child);
}
}
@@ -422,8 +450,8 @@ static void __propagate_umount(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
-
- struct mount *child = __lookup_mnt_last(&m->mnt,
+ struct mount *topper;
+ struct mount *child = __lookup_mnt(&m->mnt,
mnt->mnt_mountpoint);
/*
* umount the child only if the child has no children
@@ -432,6 +460,15 @@ static void __propagate_umount(struct mount *mnt)
if (!child || !IS_MNT_MARKED(child))
continue;
CLEAR_MNT_MARK(child);
+
+ /* If there is exactly one mount covering all of child
+ * replace child with that mount.
+ */
+ topper = find_topper(child);
+ if (topper)
+ mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
+ topper);
+
if (list_empty(&child->mnt_mounts)) {
list_del_init(&child->mnt_child);
child->mnt.mnt_flags |= MNT_UMOUNT;
diff --git a/fs/pnode.h b/fs/pnode.h
index 550f5a8b4fcf..dc87e65becd2 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
+ struct mount *mnt);
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
diff --git a/fs/super.c b/fs/super.c
index c183835566c1..1058bf3e8724 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -470,7 +470,7 @@ struct super_block *sget_userns(struct file_system_type *type,
struct super_block *old;
int err;
- if (!(flags & MS_KERNMOUNT) &&
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
!(type->fs_flags & FS_USERNS_MOUNT) &&
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@@ -500,7 +500,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags, user_ns);
+ s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -541,8 +541,15 @@ struct super_block *sget(struct file_system_type *type,
{
struct user_namespace *user_ns = current_user_ns();
+ /* We don't yet pass the user namespace of the parent
+ * mount through to here so always use &init_user_ns
+ * until that changes.
+ */
+ if (flags & MS_SUBMOUNT)
+ user_ns = &init_user_ns;
+
/* Ensure the requestor has permissions over the target filesystem */
- if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
return sget_userns(type, test, set, flags, user_ns, data);