aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c44
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/namei.c5
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/reiserfs.h1
-rw-r--r--fs/reiserfs/super.c21
8 files changed, 67 insertions, 31 deletions
diff --git a/fs/aio.c b/fs/aio.c
index fe4f49212b99..88ede4a84ce0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
#define AIO_RING_PAGES 8
struct kioctx_table {
- struct rcu_head rcu;
- unsigned nr;
- struct kioctx *table[];
+ struct rcu_head rcu;
+ unsigned nr;
+ struct kioctx __rcu *table[];
};
struct kioctx_cpu {
@@ -115,7 +115,8 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct work_struct free_work;
+ struct rcu_head free_rcu;
+ struct work_struct free_work; /* see free_ioctx() */
/*
* signals when all in-flight requests are done
@@ -326,7 +327,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
- ctx = table->table[i];
+ ctx = rcu_dereference(table->table[i]);
if (ctx && ctx->aio_ring_file == file) {
if (!atomic_read(&ctx->dead)) {
ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -573,6 +574,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
return cancel(&kiocb->common);
}
+/*
+ * free_ioctx() should be RCU delayed to synchronize against the RCU
+ * protected lookup_ioctx() and also needs process context to call
+ * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
+ * ->free_work.
+ */
static void free_ioctx(struct work_struct *work)
{
struct kioctx *ctx = container_of(work, struct kioctx, free_work);
@@ -586,6 +593,14 @@ static void free_ioctx(struct work_struct *work)
kmem_cache_free(kioctx_cachep, ctx);
}
+static void free_ioctx_rcufn(struct rcu_head *head)
+{
+ struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
+
+ INIT_WORK(&ctx->free_work, free_ioctx);
+ schedule_work(&ctx->free_work);
+}
+
static void free_ioctx_reqs(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
@@ -594,8 +609,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
complete(&ctx->rq_wait->comp);
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ /* Synchronize against RCU protected table->table[] dereferences */
+ call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
}
/*
@@ -636,9 +651,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
while (1) {
if (table)
for (i = 0; i < table->nr; i++)
- if (!table->table[i]) {
+ if (!rcu_access_pointer(table->table[i])) {
ctx->id = i;
- table->table[i] = ctx;
+ rcu_assign_pointer(table->table[i], ctx);
spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress,
@@ -813,11 +828,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
}
table = rcu_dereference_raw(mm->ioctx_table);
- WARN_ON(ctx != table->table[ctx->id]);
- table->table[ctx->id] = NULL;
+ WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
+ RCU_INIT_POINTER(table->table[ctx->id], NULL);
spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */
+ /* free_ioctx_reqs() will do the necessary RCU synchronization */
wake_up_all(&ctx->wait);
/*
@@ -859,7 +874,8 @@ void exit_aio(struct mm_struct *mm)
skipped = 0;
for (i = 0; i < table->nr; ++i) {
- struct kioctx *ctx = table->table[i];
+ struct kioctx *ctx =
+ rcu_dereference_protected(table->table[i], true);
if (!ctx) {
skipped++;
@@ -1048,7 +1064,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
- ctx = table->table[id];
+ ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
percpu_ref_get(&ctx->users);
ret = ctx;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 600c67ef8a03..6d874b1cd53c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -568,6 +568,7 @@ void btrfs_free_stale_device(struct btrfs_device *cur_dev)
btrfs_sysfs_remove_fsid(fs_devs);
list_del(&fs_devs->list);
free_fs_devices(fs_devs);
+ break;
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
@@ -4638,10 +4639,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (devs_max && ndevs > devs_max)
ndevs = devs_max;
/*
- * the primary goal is to maximize the number of stripes, so use as many
- * devices as possible, even if the stripes are not maximum sized.
+ * The primary goal is to maximize the number of stripes, so use as
+ * many devices as possible, even if the stripes are not maximum sized.
+ *
+ * The DUP profile stores more than one stripe per device, the
+ * max_avail is the total size so we have to adjust.
*/
- stripe_size = devices_info[ndevs-1].max_avail;
+ stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
num_stripes = ndevs * dev_stripes;
/*
@@ -4681,8 +4685,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
stripe_size = devices_info[ndevs-1].max_avail;
}
- stripe_size = div_u64(stripe_size, dev_stripes);
-
/* align to BTRFS_STRIPE_LEN */
stripe_size = div_u64(stripe_size, raid_stripe_len);
stripe_size *= raid_stripe_len;
diff --git a/fs/dcache.c b/fs/dcache.c
index 5bf7b4a188e9..4d43df7721fe 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -634,11 +634,16 @@ again:
spin_unlock(&parent->d_lock);
goto again;
}
- rcu_read_unlock();
- if (parent != dentry)
+ if (parent != dentry) {
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- else
+ if (unlikely(dentry->d_lockref.count < 0)) {
+ spin_unlock(&parent->d_lock);
+ parent = NULL;
+ }
+ } else {
parent = NULL;
+ }
+ rcu_read_unlock();
return parent;
}
diff --git a/fs/namei.c b/fs/namei.c
index c54aaa759ed1..8df416aca02c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -585,9 +585,10 @@ static int __nd_alloc_stack(struct nameidata *nd)
static bool path_connected(const struct path *path)
{
struct vfsmount *mnt = path->mnt;
+ struct super_block *sb = mnt->mnt_sb;
- /* Only bind mounts can have disconnected paths */
- if (mnt->mnt_root == mnt->mnt_sb->s_root)
+ /* Bind mounts and multi-root filesystems can have disconnected paths */
+ if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
return true;
return is_subdir(path->dentry, mnt->mnt_root);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3149f7e58d6f..62f358f67764 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2581,6 +2581,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
/* initial superblock/root creation */
mount_info->fill_super(s, mount_info);
nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
+ if (!(server->flags & NFS_MOUNT_UNSHARED))
+ s->s_iflags |= SB_I_MULTIROOT;
}
mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9d6486d416a3..a72097b625ef 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1961,7 +1961,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
* will be requeued because superblock is being shutdown and doesn't
* have MS_ACTIVE set.
*/
- cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
+ reiserfs_cancel_old_flush(sb);
/* wait for all commits to finish */
cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work);
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 5dcf3ab83886..6ca00471afbf 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -2948,6 +2948,7 @@ int reiserfs_allocate_list_bitmaps(struct super_block *s,
struct reiserfs_list_bitmap *, unsigned int);
void reiserfs_schedule_old_flush(struct super_block *s);
+void reiserfs_cancel_old_flush(struct super_block *s);
void add_save_link(struct reiserfs_transaction_handle *th,
struct inode *inode, int truncate);
int remove_save_link(struct inode *inode, int truncate);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f9f3be50081a..ee095246da4e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -90,7 +90,9 @@ static void flush_old_commits(struct work_struct *work)
s = sbi->s_journal->j_work_sb;
spin_lock(&sbi->old_work_lock);
- sbi->work_queued = 0;
+ /* Avoid clobbering the cancel state... */
+ if (sbi->work_queued == 1)
+ sbi->work_queued = 0;
spin_unlock(&sbi->old_work_lock);
reiserfs_sync_fs(s, 1);
@@ -117,21 +119,22 @@ void reiserfs_schedule_old_flush(struct super_block *s)
spin_unlock(&sbi->old_work_lock);
}
-static void cancel_old_flush(struct super_block *s)
+void reiserfs_cancel_old_flush(struct super_block *s)
{
struct reiserfs_sb_info *sbi = REISERFS_SB(s);
- cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
spin_lock(&sbi->old_work_lock);
- sbi->work_queued = 0;
+ /* Make sure no new flushes will be queued */
+ sbi->work_queued = 2;
spin_unlock(&sbi->old_work_lock);
+ cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
}
static int reiserfs_freeze(struct super_block *s)
{
struct reiserfs_transaction_handle th;
- cancel_old_flush(s);
+ reiserfs_cancel_old_flush(s);
reiserfs_write_lock(s);
if (!(s->s_flags & MS_RDONLY)) {
@@ -152,7 +155,13 @@ static int reiserfs_freeze(struct super_block *s)
static int reiserfs_unfreeze(struct super_block *s)
{
+ struct reiserfs_sb_info *sbi = REISERFS_SB(s);
+
reiserfs_allow_writes(s);
+ spin_lock(&sbi->old_work_lock);
+ /* Allow old_work to run again */
+ sbi->work_queued = 0;
+ spin_unlock(&sbi->old_work_lock);
return 0;
}
@@ -2187,7 +2196,7 @@ error_unlocked:
if (sbi->commit_wq)
destroy_workqueue(sbi->commit_wq);
- cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
+ reiserfs_cancel_old_flush(s);
reiserfs_free_bitmap_cache(s);
if (SB_BUFFER_WITH_SB(s))