aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c36
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/relocation.c9
-rw-r--r--fs/ceph/locks.c73
-rw-r--r--fs/ceph/mds_client.c65
-rw-r--r--fs/ceph/super.h9
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/ecryptfs/file.c6
-rw-r--r--fs/hpfs/file.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c1
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/kmsg.c10
-rw-r--r--fs/xfs/xfs_acl.c31
-rw-r--r--fs/xfs/xfs_acl.h31
-rw-r--r--fs/xfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/xfs_attr_leaf.h1
-rw-r--r--fs/xfs/xfs_btree.c10
-rw-r--r--fs/xfs/xfs_dir2_format.h5
-rw-r--r--fs/xfs/xfs_dquot.c37
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_log_recover.c103
-rw-r--r--fs/xfs/xfs_mount.c18
-rw-r--r--fs/xfs/xfs_qm.c40
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_super.c11
27 files changed, 376 insertions, 166 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 7fe5bdee163..2bbcacf74d0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -141,9 +141,6 @@ static void aio_free_ring(struct kioctx *ctx)
for (i = 0; i < ctx->nr_pages; i++)
put_page(ctx->ring_pages[i]);
- if (ctx->mmap_size)
- vm_munmap(ctx->mmap_base, ctx->mmap_size);
-
if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
kfree(ctx->ring_pages);
}
@@ -322,11 +319,6 @@ static void free_ioctx(struct kioctx *ctx)
aio_free_ring(ctx);
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
- aio_nr -= ctx->max_reqs;
- spin_unlock(&aio_nr_lock);
-
pr_debug("freeing %p\n", ctx);
/*
@@ -435,17 +427,24 @@ static void kill_ioctx(struct kioctx *ctx)
{
if (!atomic_xchg(&ctx->dead, 1)) {
hlist_del_rcu(&ctx->list);
- /* Between hlist_del_rcu() and dropping the initial ref */
- synchronize_rcu();
/*
- * We can't punt to workqueue here because put_ioctx() ->
- * free_ioctx() will unmap the ringbuffer, and that has to be
- * done in the original process's context. kill_ioctx_rcu/work()
- * exist for exit_aio(), as in that path free_ioctx() won't do
- * the unmap.
+ * It'd be more correct to do this in free_ioctx(), after all
+ * the outstanding kiocbs have finished - but by then io_destroy
+ * has already returned, so io_setup() could potentially return
+ * -EAGAIN with no ioctxs actually in use (as far as userspace
+ * could tell).
*/
- kill_ioctx_work(&ctx->rcu_work);
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
+ aio_nr -= ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
+
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
+
+ /* Between hlist_del_rcu() and dropping the initial ref */
+ call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
}
}
@@ -495,10 +494,7 @@ void exit_aio(struct mm_struct *mm)
*/
ctx->mmap_size = 0;
- if (!atomic_xchg(&ctx->dead, 1)) {
- hlist_del_rcu(&ctx->list);
- call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
- }
+ kill_ioctx(ctx);
}
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b3cb5286a..b8b60b660c8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2859,8 +2859,8 @@ fail_qgroup:
btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
- del_fs_roots(fs_info);
btrfs_cleanup_transaction(fs_info->tree_root);
+ del_fs_roots(fs_info);
fail_cleaner:
kthread_stop(fs_info->cleaner_kthread);
@@ -3512,15 +3512,15 @@ int close_ctree(struct btrfs_root *root)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
- free_root_pointers(fs_info, 1);
-
btrfs_free_block_groups(fs_info);
+ btrfs_stop_all_workers(fs_info);
+
del_fs_roots(fs_info);
- iput(fs_info->btree_inode);
+ free_root_pointers(fs_info, 1);
- btrfs_stop_all_workers(fs_info);
+ iput(fs_info->btree_inode);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index af978f7682b..17f3064b4a3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8012,6 +8012,9 @@ int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ if (root == NULL)
+ return 1;
+
/* the snap/subvol tree is on deleting */
if (btrfs_root_refs(&root->root_item) == 0 &&
root != root->fs_info->tree_root)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 395b82031a4..4febca4fc2d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4082,7 +4082,7 @@ out:
return inode;
}
-static struct reloc_control *alloc_reloc_control(void)
+static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
{
struct reloc_control *rc;
@@ -4093,7 +4093,8 @@ static struct reloc_control *alloc_reloc_control(void)
INIT_LIST_HEAD(&rc->reloc_roots);
backref_cache_init(&rc->backref_cache);
mapping_tree_init(&rc->reloc_root_tree);
- extent_io_tree_init(&rc->processed_blocks, NULL);
+ extent_io_tree_init(&rc->processed_blocks,
+ fs_info->btree_inode->i_mapping);
return rc;
}
@@ -4110,7 +4111,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
int rw = 0;
int err = 0;
- rc = alloc_reloc_control();
+ rc = alloc_reloc_control(fs_info);
if (!rc)
return -ENOMEM;
@@ -4311,7 +4312,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (list_empty(&reloc_roots))
goto out;
- rc = alloc_reloc_control();
+ rc = alloc_reloc_control(root->fs_info);
if (!rc) {
err = -ENOMEM;
goto out;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 202dd3d68be..ebbf680378e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -191,27 +191,23 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
}
/**
- * Encode the flock and fcntl locks for the given inode into the pagelist.
- * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
- * sequential flock locks.
- * Must be called with lock_flocks() already held.
- * If we encounter more of a specific lock type than expected,
- * we return the value 1.
+ * Encode the flock and fcntl locks for the given inode into the ceph_filelock
+ * array. Must be called with lock_flocks() already held.
+ * If we encounter more of a specific lock type than expected, return -ENOSPC.
*/
-int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
- int num_fcntl_locks, int num_flock_locks)
+int ceph_encode_locks_to_buffer(struct inode *inode,
+ struct ceph_filelock *flocks,
+ int num_fcntl_locks, int num_flock_locks)
{
struct file_lock *lock;
- struct ceph_filelock cephlock;
int err = 0;
int seen_fcntl = 0;
int seen_flock = 0;
+ int l = 0;
dout("encoding %d flock and %d fcntl locks", num_flock_locks,
num_fcntl_locks);
- err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32));
- if (err)
- goto fail;
+
for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
if (lock->fl_flags & FL_POSIX) {
++seen_fcntl;
@@ -219,19 +215,12 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
err = -ENOSPC;
goto fail;
}
- err = lock_to_ceph_filelock(lock, &cephlock);
+ err = lock_to_ceph_filelock(lock, &flocks[l]);
if (err)
goto fail;
- err = ceph_pagelist_append(pagelist, &cephlock,
- sizeof(struct ceph_filelock));
+ ++l;
}
- if (err)
- goto fail;
}
-
- err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32));
- if (err)
- goto fail;
for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
if (lock->fl_flags & FL_FLOCK) {
++seen_flock;
@@ -239,19 +228,51 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
err = -ENOSPC;
goto fail;
}
- err = lock_to_ceph_filelock(lock, &cephlock);
+ err = lock_to_ceph_filelock(lock, &flocks[l]);
if (err)
goto fail;
- err = ceph_pagelist_append(pagelist, &cephlock,
- sizeof(struct ceph_filelock));
+ ++l;
}
- if (err)
- goto fail;
}
fail:
return err;
}
+/**
+ * Copy the encoded flock and fcntl locks into the pagelist.
+ * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
+ * sequential flock locks.
+ * Returns zero on success.
+ */
+int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
+ struct ceph_pagelist *pagelist,
+ int num_fcntl_locks, int num_flock_locks)
+{
+ int err = 0;
+ __le32 nlocks;
+
+ nlocks = cpu_to_le32(num_fcntl_locks);
+ err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
+ if (err)
+ goto out_fail;
+
+ err = ceph_pagelist_append(pagelist, flocks,
+ num_fcntl_locks * sizeof(*flocks));
+ if (err)
+ goto out_fail;
+
+ nlocks = cpu_to_le32(num_flock_locks);
+ err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
+ if (err)
+ goto out_fail;
+
+ err = ceph_pagelist_append(pagelist,
+ &flocks[num_fcntl_locks],
+ num_flock_locks * sizeof(*flocks));
+out_fail:
+ return err;
+}
+
/*
* Given a pointer to a lock, convert it to a ceph filelock
*/
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4f22671a5bd..4d2920304be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2478,39 +2478,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
if (recon_state->flock) {
int num_fcntl_locks, num_flock_locks;
- struct ceph_pagelist_cursor trunc_point;
-
- ceph_pagelist_set_cursor(pagelist, &trunc_point);
- do {
- lock_flocks();
- ceph_count_locks(inode, &num_fcntl_locks,
- &num_flock_locks);
- rec.v2.flock_len = (2*sizeof(u32) +
- (num_fcntl_locks+num_flock_locks) *
- sizeof(struct ceph_filelock));
- unlock_flocks();
-
- /* pre-alloc pagelist */
- ceph_pagelist_truncate(pagelist, &trunc_point);
- err = ceph_pagelist_append(pagelist, &rec, reclen);
- if (!err)
- err = ceph_pagelist_reserve(pagelist,
- rec.v2.flock_len);
-
- /* encode locks */
- if (!err) {
- lock_flocks();
- err = ceph_encode_locks(inode,
- pagelist,
- num_fcntl_locks,
- num_flock_locks);
- unlock_flocks();
- }
- } while (err == -ENOSPC);
+ struct ceph_filelock *flocks;
+
+encode_again:
+ lock_flocks();
+ ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+ unlock_flocks();
+ flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
+ sizeof(struct ceph_filelock), GFP_NOFS);
+ if (!flocks) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ lock_flocks();
+ err = ceph_encode_locks_to_buffer(inode, flocks,
+ num_fcntl_locks,
+ num_flock_locks);
+ unlock_flocks();
+ if (err) {
+ kfree(flocks);
+ if (err == -ENOSPC)
+ goto encode_again;
+ goto out_free;
+ }
+ /*
+ * number of encoded locks is stable, so copy to pagelist
+ */
+ rec.v2.flock_len = cpu_to_le32(2*sizeof(u32) +
+ (num_fcntl_locks+num_flock_locks) *
+ sizeof(struct ceph_filelock));
+ err = ceph_pagelist_append(pagelist, &rec, reclen);
+ if (!err)
+ err = ceph_locks_to_pagelist(flocks, pagelist,
+ num_fcntl_locks,
+ num_flock_locks);
+ kfree(flocks);
} else {
err = ceph_pagelist_append(pagelist, &rec, reclen);
}
-
out_free:
kfree(path);
out_dput:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8696be2ff67..7ccfdb4aea2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -822,8 +822,13 @@ extern const struct export_operations ceph_export_ops;
extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
-extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
- int p_locks, int f_locks);
+extern int ceph_encode_locks_to_buffer(struct inode *inode,
+ struct ceph_filelock *flocks,
+ int num_fcntl_locks,
+ int num_flock_locks);
+extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
+ struct ceph_pagelist *pagelist,
+ int num_fcntl_locks, int num_flock_locks);
extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
/* debugfs.c */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5b97e56ddbc..e3bc39bb9d1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3279,8 +3279,8 @@ build_unc_path_to_root(const struct smb_vol *vol,
pos = full_path + unc_len;
if (pplen) {
- *pos++ = CIFS_DIR_SEP(cifs_sb);
- strncpy(pos, vol->prepath, pplen);
+ *pos = CIFS_DIR_SEP(cifs_sb);
+ strncpy(pos + 1, vol->prepath, pplen);
pos += pplen;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 201f0a0d6b0..a7abbea2c09 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -295,6 +295,12 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
static int
ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
+ int rc;
+
+ rc = filemap_write_and_wait(file->f_mapping);
+ if (rc)
+ return rc;
+
return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 3027f4dbbab..e4ba5fe4c3b 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -109,10 +109,14 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
+ hpfs_lock(inode->i_sb);
+
if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size);
hpfs_truncate(inode);
}
+
+ hpfs_unlock(inode->i_sb);
}
static int hpfs_write_begin(struct file *file, struct address_space *mapping,
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index b3fdd1a323d..e68588e6b1e 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1408,6 +1408,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
mres->lockname_len, mres->lockname);
ret = -EFAULT;
spin_unlock(&res->spinlock);
+ dlm_lockres_put(res);
goto leave;
}
res->state |= DLM_LOCK_RES_MIGRATING;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 04ee1b57c24..b4a5cdf9dbc 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -947,7 +947,7 @@ leave:
ocfs2_free_dir_lookup_result(&orphan_insert);
ocfs2_free_dir_lookup_result(&lookup);
- if (status)
+ if (status && (status != -ENOTEMPTY))
mlog_errno(status);
return status;
@@ -2216,7 +2216,7 @@ out:
brelse(orphan_dir_bh);
- return 0;
+ return ret;
}
int ocfs2_create_inode_in_orphan(struct inode *dir,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd51e50001f..c3834dad09b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2118,6 +2118,7 @@ static int show_timer(struct seq_file *m, void *v)
nstr[notify & ~SIGEV_THREAD_ID],
(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
pid_nr_ns(timer->it_pid, tp->ns));
+ seq_printf(m, "ClockID: %d\n", timer->it_clock);
return 0;
}
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index bd4b5a740ff..bdfabdaefdc 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait;
static int kmsg_open(struct inode * inode, struct file * file)
{
- return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE);
+ return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC);
}
static int kmsg_release(struct inode * inode, struct file * file)
{
- (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE);
+ (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_PROC);
return 0;
}
@@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
if ((file->f_flags & O_NONBLOCK) &&
- !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
+ !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
return -EAGAIN;
- return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE);
+ return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_PROC);
}
static unsigned int kmsg_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &log_wait, wait);
- if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
+ if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
return POLLIN | POLLRDNORM;
return 0;
}
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 1d32f1d5276..306d883d89b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -21,6 +21,8 @@
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_vnodeops.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
#include "xfs_trace.h"
#include <linux/slab.h>
#include <linux/xattr.h>
@@ -34,7 +36,9 @@
*/
STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
+xfs_acl_from_disk(
+ struct xfs_acl *aclp,
+ int max_entries)
{
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
@@ -42,7 +46,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
- if (count > XFS_ACL_MAX_ENTRIES)
+ if (count > max_entries)
return ERR_PTR(-EFSCORRUPTED);
acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -108,9 +112,9 @@ xfs_get_acl(struct inode *inode, int type)
struct xfs_inode *ip = XFS_I(inode);
struct posix_acl *acl;
struct xfs_acl *xfs_acl;
- int len = sizeof(struct xfs_acl);
unsigned char *ea_name;
int error;
+ int len;
acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
@@ -133,8 +137,8 @@ xfs_get_acl(struct inode *inode, int type)
* If we have a cached ACLs value just return it, not need to
* go out to the disk.
*/
-
- xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ len = XFS_ACL_MAX_SIZE(ip->i_mount);
+ xfs_acl = kzalloc(len, GFP_KERNEL);
if (!xfs_acl)
return ERR_PTR(-ENOMEM);
@@ -153,7 +157,7 @@ xfs_get_acl(struct inode *inode, int type)
goto out;
}
- acl = xfs_acl_from_disk(xfs_acl);
+ acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
if (IS_ERR(acl))
goto out;
@@ -189,16 +193,17 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (acl) {
struct xfs_acl *xfs_acl;
- int len;
+ int len = XFS_ACL_MAX_SIZE(ip->i_mount);
- xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ xfs_acl = kzalloc(len, GFP_KERNEL);
if (!xfs_acl)
return -ENOMEM;
xfs_acl_to_disk(xfs_acl, acl);
- len = sizeof(struct xfs_acl) -
- (sizeof(struct xfs_acl_entry) *
- (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+ /* subtract away the unused acl entries */
+ len -= sizeof(struct xfs_acl_entry) *
+ (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
len, ATTR_ROOT);
@@ -243,7 +248,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
static int
xfs_acl_exists(struct inode *inode, unsigned char *name)
{
- int len = sizeof(struct xfs_acl);
+ int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
ATTR_ROOT|ATTR_KERNOVAL) == 0);
@@ -379,7 +384,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
goto out_release;
error = -EINVAL;
- if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+ if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
goto out_release;
if (type == ACL_TYPE_ACCESS) {
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 39632d94135..4016a567b83 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -22,19 +22,36 @@ struct inode;
struct posix_acl;
struct xfs_inode;
-#define XFS_ACL_MAX_ENTRIES 25
#define XFS_ACL_NOT_PRESENT (-1)
/* On-disk XFS access control list structure */
+struct xfs_acl_entry {
+ __be32 ae_tag;
+ __be32 ae_id;
+ __be16 ae_perm;
+ __be16 ae_pad; /* fill the implicit hole in the structure */
+};
+
struct xfs_acl {
- __be32 acl_cnt;
- struct xfs_acl_entry {
- __be32 ae_tag;
- __be32 ae_id;
- __be16 ae_perm;
- } acl_entry[XFS_ACL_MAX_ENTRIES];
+ __be32 acl_cnt;
+ struct xfs_acl_entry acl_entry[0];
};
+/*
+ * The number of ACL entries allowed is defined by the on-disk format.
+ * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is
+ * limited only by the maximum size of the xattr that stores the information.
+ */
+#define XFS_ACL_MAX_ENTRIES(mp) \
+ (xfs_sb_version_hascrc(&mp->m_sb) \
+ ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+ sizeof(struct xfs_acl_entry) \
+ : 25)
+
+#define XFS_ACL_MAX_SIZE(mp) \
+ (sizeof(struct xfs_acl) + \
+ sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
+
/* On-disk XFS extended attribute names */
#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d788302e506..31d3cd12926 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -3258,7 +3258,7 @@ xfs_attr3_leaf_inactive(
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
if (name_rmt->valueblk) {
lp->valueblk = be32_to_cpu(name_rmt->valueblk);
- lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
+ lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
be32_to_cpu(name_rmt->valuelen));
lp++;
}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f9d7846097e..444a7704596 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -128,6 +128,7 @@ struct xfs_attr3_leaf_hdr {
__u8 holes;
__u8 pad1;
struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
+ __be32 pad2; /* 64 bit alignment */
};
#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 8804b8a3c31..0903960410a 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -2544,7 +2544,17 @@ xfs_btree_new_iroot(
if (error)
goto error0;
+ /*
+ * we can't just memcpy() the root in for CRC enabled btree blocks.
+ * In that case have to also ensure the blkno remains correct
+ */
memcpy(cblock, block, xfs_btree_block_len(cur));
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
+ else
+ cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
+ }
be16_add_cpu(&block->bb_level, 1);
xfs_btree_set_numrecs(block, 1);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index 995f1f505a5..7826782b8d7 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -266,6 +266,7 @@ struct xfs_dir3_blk_hdr {
struct xfs_dir3_data_hdr {
struct xfs_dir3_blk_hdr hdr;
xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT];
+ __be32 pad; /* 64 bit alignment */
};
#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
@@ -477,7 +478,7 @@ struct xfs_dir3_leaf_hdr {
struct xfs_da3_blkinfo info; /* header for da routines */
__be16 count; /* count of entries */
__be16 stale; /* count of stale entries */
- __be32 pad;
+ __be32 pad; /* 64 bit alignment */
};
struct xfs_dir3_icleaf_hdr {
@@ -715,7 +716,7 @@ struct xfs_dir3_free_hdr {
__be32 firstdb; /* db of first entry */
__be32 nvalid; /* count of valid entries */
__be32 nused; /* count of used entries */
- __be32 pad; /* 64 bit alignment. */
+ __be32 pad; /* 64 bit alignment */
};
struct xfs_dir3_free {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a41f8bf1da3..044e97a33c8 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -249,8 +249,11 @@ xfs_qm_init_dquot_blk(
d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
d->dd_diskdq.d_id = cpu_to_be32(curid);
d->dd_diskdq.d_flags = type;
- if (xfs_sb_version_hascrc(&mp->m_sb))
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+ xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
}
xfs_trans_dquot_buf(tp, bp,
@@ -286,23 +289,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
}
-STATIC void
-xfs_dquot_buf_calc_crc(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
- int i;
-
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return;
-
- for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
- xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
- offsetof(struct xfs_dqblk, dd_crc));
- }
-}
-
STATIC bool
xfs_dquot_buf_verify_crc(
struct xfs_mount *mp,
@@ -328,12 +314,11 @@ xfs_dquot_buf_verify_crc(
for (i = 0; i < ndquots; i++, d++) {
if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
- offsetof(struct xfs_dqblk, dd_crc)))
+ XFS_DQUOT_CRC_OFF))
return false;
if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
return false;
}
-
return true;
}
@@ -393,6 +378,11 @@ xfs_dquot_buf_read_verify(
}
}
+/*
+ * we don't calculate the CRC here as that is done when the dquot is flushed to
+ * the buffer after the update is done. This ensures that the dquot in the
+ * buffer always has an up-to-date CRC value.
+ */
void
xfs_dquot_buf_write_verify(
struct xfs_buf *bp)
@@ -404,7 +394,6 @@ xfs_dquot_buf_write_verify(
xfs_buf_ioerror(bp, EFSCORRUPTED);
return;
}
- xfs_dquot_buf_calc_crc(mp, bp);
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
@@ -1151,11 +1140,17 @@ xfs_qm_dqflush(
* copy the lsn into the on-disk dquot now while we have the in memory
* dquot here. This can't be done later in the write verifier as we
* can't get access to the log item at that point in time.
+ *
+ * We also calculate the CRC here so that the on-disk dquot in the
+ * buffer always has a valid CRC. This ensures there is no possibility
+ * of a dquot without an up-to-date CRC getting to disk.
*/
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
+ xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index efbe1accb6c..7f7be5f98f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1638,6 +1638,10 @@ xfs_iunlink(
dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, dip);
+
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
@@ -1723,6 +1727,10 @@ xfs_iunlink_remove(
dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, dip);
+
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
@@ -1796,6 +1804,10 @@ xfs_iunlink_remove(
dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, dip);
+
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
@@ -1809,6 +1821,10 @@ xfs_iunlink_remove(
last_dip->di_next_unlinked = cpu_to_be32(next_agino);
ASSERT(next_agino != 0);
offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, last_dip);
+
xfs_trans_inode_buf(tp, last_ibp);
xfs_trans_log_buf(tp, last_ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d9e4d3c3991..7cf5e4eafe2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(
}
/*
- * Sort the log items in the transaction. Cancelled buffers need
- * to be put first so they are processed before any items that might
- * modify the buffers. If they are cancelled, then the modifications
- * don't need to be replayed.
+ * Sort the log items in the transaction.
+ *
+ * The ordering constraints are defined by the inode allocation and unlink
+ * behaviour. The rules are:
+ *
+ * 1. Every item is only logged once in a given transaction. Hence it
+ * represents the last logged state of the item. Hence ordering is
+ * dependent on the order in which operations need to be performed so
+ * required initial conditions are always met.
+ *
+ * 2. Cancelled buffers are recorded in pass 1 in a separate table and
+ * there's nothing to replay from them so we can simply cull them
+ * from the transaction. However, we can't do that until after we've
+ * replayed all the other items because they may be dependent on the
+ * cancelled buffer and replaying the cancelled buffer can remove it
+ * form the cancelled buffer table. Hence they have tobe done last.
+ *
+ * 3. Inode allocation buffers must be replayed before inode items that
+ * read the buffer and replay changes into it.
+ *
+ * 4. Inode unlink buffers must be replayed after inode items are replayed.
+ * This ensures that inodes are completely flushed to the inode buffer
+ * in a "free" state before we remove the unlinked inode list pointer.
+ *
+ * Hence the ordering needs to be inode allocation buffers first, inode items
+ * second, inode unlink buffers third and cancelled buffers last.
+ *
+ * But there's a problem with that - we can't tell an inode allocation buffer
+ * apart from a regular buffer, so we can't separate them. We can, however,
+ * tell an inode unlink buffer from the others, and so we can separate them out
+ * from all the other buffers and move them to last.
+ *
+ * Hence, 4 lists, in order from head to tail:
+ * - buffer_list for all buffers except cancelled/inode unlink buffers
+ * - item_list for all non-buffer items
+ * - inode_buffer_list for inode unlink buffers
+ * - cancel_list for the cancelled buffers
*/
STATIC int
xlog_recover_reorder_trans(
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(
{
xlog_recover_item_t *item, *n;
LIST_HEAD(sort_list);
+ LIST_HEAD(cancel_list);
+ LIST_HEAD(buffer_list);
+ LIST_HEAD(inode_buffer_list);
+ LIST_HEAD(inode_list);
list_splice_init(&trans->r_itemq, &sort_list);
list_for_each_entry_safe(item, n, &sort_list, ri_list) {
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(
switch (ITEM_TYPE(item)) {
case XFS_LI_BUF:
- if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
+ if (buf_f->blf_flags & XFS_BLF_CANCEL) {
trace_xfs_log_recover_item_reorder_head(log,
trans, item, pass);
- list_move(&item->ri_list, &trans->r_itemq);
+ list_move(&item->ri_list, &cancel_list);
+ break;
+ }
+ if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
+ list_move(&item->ri_list, &inode_buffer_list);
break;
}
+ list_move_tail(&item->ri_list, &buffer_list);
+ break;
case XFS_LI_INODE:
case XFS_LI_DQUOT:
case XFS_LI_QUOTAOFF:
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(
case XFS_LI_EFI:
trace_xfs_log_recover_item_reorder_tail(log,
trans, item, pass);
- list_move_tail(&item->ri_list, &trans->r_itemq);
+ list_move_tail(&item->ri_list, &inode_list);
break;
default:
xfs_warn(log->l_mp,
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(
}
}
ASSERT(list_empty(&sort_list));
+ if (!list_empty(&buffer_list))
+ list_splice(&buffer_list, &trans->r_itemq);
+ if (!list_empty(&inode_list))
+ list_splice_tail(&inode_list, &trans->r_itemq);
+ if (!list_empty(&inode_buffer_list))
+ list_splice_tail(&inode_buffer_list, &trans->r_itemq);
+ if (!list_empty(&cancel_list))
+ list_splice_tail(&cancel_list, &trans->r_itemq);
return 0;
}
@@ -1794,7 +1845,13 @@ xlog_recover_do_inode_buffer(
xfs_agino_t *buffer_nextp;
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
- bp->b_ops = &xfs_inode_buf_ops;
+
+ /*
+ * Post recovery validation only works properly on CRC enabled
+ * filesystems.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ bp->b_ops = &xfs_inode_buf_ops;
inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
@@ -1861,6 +1918,15 @@ xlog_recover_do_inode_buffer(
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
next_unlinked_offset);
*buffer_nextp = *logged_nextp;
+
+ /*
+ * If necessary, recalculate the CRC in the on-disk inode. We
+ * have to leave the inode in a consistent state for whoever
+ * reads it next....
+ */
+ xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+ xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+
}
return 0;
@@ -2145,7 +2211,16 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
- xlog_recovery_validate_buf_type(mp, bp, buf_f);
+ /*
+ * We can only do post recovery validation on items on CRC enabled
+ * fielsystems as we need to know when the buffer was written to be able
+ * to determine if we should have replayed the item. If we replay old
+ * metadata over a newer buffer, then it will enter a temporarily
+ * inconsistent state resulting in verification failures. Hence for now
+ * just avoid the verification stage for non-crc filesystems
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xlog_recovery_validate_buf_type(mp, bp, buf_f);
}
/*
@@ -2266,6 +2341,12 @@ xfs_qm_dqcheck(
d->dd_diskdq.d_flags = type;
d->dd_diskdq.d_id = cpu_to_be32(id);
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+ xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
+
return errs;
}
@@ -2793,6 +2874,10 @@ xlog_recover_dquot_pass2(
}
memcpy(ddq, recddq, item->ri_buf[1].i_len);
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
ASSERT(dq_f->qlf_size == 2);
ASSERT(bp->b_target->bt_mount == mp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f6bfbd73466..e8e310c0509 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -314,7 +314,8 @@ STATIC int
xfs_mount_validate_sb(
xfs_mount_t *mp,
xfs_sb_t *sbp,
- bool check_inprogress)
+ bool check_inprogress,
+ bool check_version)
{
/*
@@ -337,9 +338,10 @@ xfs_mount_validate_sb(
/*
* Version 5 superblock feature mask validation. Reject combinations the
- * kernel cannot support up front before checking anything else.
+ * kernel cannot support up front before checking anything else. For
+ * write validation, we don't need to check feature masks.
*/
- if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
+ if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
xfs_alert(mp,
"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
"Use of these features in this kernel is at your own risk!");
@@ -675,7 +677,8 @@ xfs_sb_to_disk(
static int
xfs_sb_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ bool check_version)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_sb sb;
@@ -686,7 +689,8 @@ xfs_sb_verify(
* Only check the in progress field for the primary superblock as
* mkfs.xfs doesn't clear it from secondary superblocks.
*/
- return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR);
+ return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
+ check_version);
}
/*
@@ -719,7 +723,7 @@ xfs_sb_read_verify(
goto out_error;
}
}
- error = xfs_sb_verify(bp);
+ error = xfs_sb_verify(bp, true);
out_error:
if (error) {
@@ -758,7 +762,7 @@ xfs_sb_write_verify(
struct xfs_buf_log_item *bip = bp->b_fspriv;
int error;
- error = xfs_sb_verify(bp);
+ error = xfs_sb_verify(bp, false);
if (error) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, error);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index f41702b4300..b75c9bb6e71 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -41,6 +41,7 @@
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_cksum.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -839,7 +840,7 @@ xfs_qm_reset_dqcounts(
xfs_dqid_t id,
uint type)
{
- xfs_disk_dquot_t *ddq;
+ struct xfs_dqblk *dqb;
int j;
trace_xfs_reset_dqcounts(bp, _RET_IP_);
@@ -853,8 +854,12 @@ xfs_qm_reset_dqcounts(
do_div(j, sizeof(xfs_dqblk_t));
ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
#endif
- ddq = bp->b_addr;
+ dqb = bp->b_addr;
for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+ struct xfs_disk_dquot *ddq;
+
+ ddq = (struct xfs_disk_dquot *)&dqb[j];
+
/*
* Do a sanity check, and if needed, repair the dqblk. Don't
* output any warnings because it's perfectly possible to
@@ -871,7 +876,12 @@ xfs_qm_reset_dqcounts(
ddq->d_bwarns = 0;
ddq->d_iwarns = 0;
ddq->d_rtbwarns = 0;
- ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_update_cksum((char *)&dqb[j],
+ sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
}
}
@@ -907,19 +917,29 @@ xfs_qm_dqiter_bufs(
XFS_FSB_TO_DADDR(mp, bno),
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
&xfs_dquot_buf_ops);
- if (error)
- break;
/*
- * XXX(hch): need to figure out if it makes sense to validate
- * the CRC here.
+ * CRC and validation errors will return a EFSCORRUPTED here. If
+ * this occurs, re-read without CRC validation so that we can
+ * repair the damage via xfs_qm_reset_dqcounts(). This process
+ * will leave a trace in the log indicating corruption has
+ * been detected.
*/
+ if (error == EFSCORRUPTED) {
+ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, bno),
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ NULL);
+ }
+
+ if (error)
+ break;
+
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
- /*
- * goto the next block.
- */
+
+ /* goto the next block. */
bno++;
firstid += mp->m_quotainfo->qi_dqperchunk;
}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index c61e31c7d99..c38068f26c5 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,6 +87,8 @@ typedef struct xfs_dqblk {
uuid_t dd_uuid; /* location information */
} xfs_dqblk_t;
+#define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc)
+
/*
* flags for q_flags field in the dquot.
*/
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ea341cea68c..3033ba5e976 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1373,6 +1373,17 @@ xfs_finish_flags(
}
/*
+ * V5 filesystems always use attr2 format for attributes.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+ xfs_warn(mp,
+"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
+ MNTOPT_NOATTR2, MNTOPT_ATTR2);
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
* mkfs'ed attr2 will turn on attr2 mount unless explicitly
* told by noattr2 to turn it off
*/