aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/cifs/dir.c3
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/debugfs/inode.c55
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/inode.c6
-rw-r--r--fs/ext2/super.c184
-rw-r--r--fs/ext2/xip.c5
-rw-r--r--fs/ext3/inode.c18
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c468
-rw-r--r--fs/ext3/xattr.c7
-rw-r--r--fs/ext4/super.c30
-rw-r--r--fs/isofs/compress.c533
-rw-r--r--fs/isofs/rock.c3
-rw-r--r--fs/jbd2/commit.c2
-rw-r--r--fs/namei.c31
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nilfs2/alloc.c108
-rw-r--r--fs/nilfs2/alloc.h21
-rw-r--r--fs/nilfs2/bmap.c8
-rw-r--r--fs/nilfs2/btnode.c76
-rw-r--r--fs/nilfs2/btnode.h6
-rw-r--r--fs/nilfs2/btree.c106
-rw-r--r--fs/nilfs2/btree.h22
-rw-r--r--fs/nilfs2/cpfile.c26
-rw-r--r--fs/nilfs2/cpfile.h3
-rw-r--r--fs/nilfs2/dat.c47
-rw-r--r--fs/nilfs2/dat.h3
-rw-r--r--fs/nilfs2/dir.c24
-rw-r--r--fs/nilfs2/gcdat.c3
-rw-r--r--fs/nilfs2/gcinode.c6
-rw-r--r--fs/nilfs2/ifile.c35
-rw-r--r--fs/nilfs2/ifile.h2
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/mdt.c56
-rw-r--r--fs/nilfs2/mdt.h25
-rw-r--r--fs/nilfs2/namei.c83
-rw-r--r--fs/nilfs2/recovery.c34
-rw-r--r--fs/nilfs2/segbuf.c185
-rw-r--r--fs/nilfs2/segbuf.h54
-rw-r--r--fs/nilfs2/segment.c369
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c203
-rw-r--r--fs/nilfs2/sufile.h14
-rw-r--r--fs/nilfs2/super.c88
-rw-r--r--fs/nilfs2/the_nilfs.c155
-rw-r--r--fs/nilfs2/the_nilfs.h10
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/quota.h4
-rw-r--r--fs/ocfs2/quota_local.c2
-rw-r--r--fs/quota/Kconfig8
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/quota/quota_v1.c2
-rw-r--r--fs/quota/quota_v2.c167
-rw-r--r--fs/quota/quotaio_v2.h19
-rw-r--r--fs/sync.c13
-rw-r--r--fs/sysfs/dir.c388
-rw-r--r--fs/sysfs/file.c41
-rw-r--r--fs/sysfs/inode.c176
-rw-r--r--fs/sysfs/symlink.c11
-rw-r--r--fs/sysfs/sysfs.h9
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c114
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h9
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c71
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h1
-rw-r--r--fs/xfs/support/debug.h18
-rw-r--r--fs/xfs/xfs_attr.c16
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/xfs_bmap_btree.c3
-rw-r--r--fs/xfs/xfs_filestream.h8
-rw-r--r--fs/xfs/xfs_fsops.c25
-rw-r--r--fs/xfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_iget.c5
-rw-r--r--fs/xfs/xfs_iomap.c9
-rw-r--r--fs/xfs/xfs_log_recover.c40
-rw-r--r--fs/xfs/xfs_mount.c18
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_rw.c30
-rw-r--r--fs/xfs/xfs_rw.h29
-rw-r--r--fs/xfs/xfs_trans.c7
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c13
-rw-r--r--fs/xfs/xfs_vnodeops.c79
-rw-r--r--fs/xfs/xfs_vnodeops.h1
95 files changed, 2424 insertions, 2155 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c63a3c8beb7..5e15a21dbf9 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -671,7 +671,6 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
ssize_t result;
size_t count = iov_length(iov, nr_segs);
- int ret;
_enter("{%x.%u},{%zu},%lu,",
vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
@@ -691,13 +690,6 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
return result;
}
- /* return error values for O_SYNC and IS_SYNC() */
- if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
- ret = afs_fsync(iocb->ki_filp, dentry, 1);
- if (ret < 0)
- result = ret;
- }
-
_leave(" = %zd", result);
return result;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 06550affbd2..77f759302e1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -909,7 +909,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
unsigned long last_index;
int will_write;
- will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
+ will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -1076,7 +1076,7 @@ out_nolock:
if (err)
num_written = err;
- if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 1);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1f42f772865..6ccf7262d1b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
posix_flags |= SMB_O_EXCL;
if (oflags & O_TRUNC)
posix_flags |= SMB_O_TRUNC;
- if (oflags & O_SYNC)
+ /* be safe and imply O_SYNC for O_DSYNC */
+ if (oflags & O_DSYNC)
posix_flags |= SMB_O_SYNC;
if (oflags & O_DIRECTORY)
posix_flags |= SMB_O_DIRECTORY;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 429337eb7af..057e1dae12a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
reopening a file. They had their effect on the original open */
if (flags & O_APPEND)
posix_flags |= (fmode_t)O_APPEND;
- if (flags & O_SYNC)
- posix_flags |= (fmode_t)O_SYNC;
+ if (flags & O_DSYNC)
+ posix_flags |= (fmode_t)O_DSYNC;
+ if (flags & __O_SYNC)
+ posix_flags |= (fmode_t)__O_SYNC;
if (flags & O_DIRECTORY)
posix_flags |= (fmode_t)O_DIRECTORY;
if (flags & O_NOFOLLOW)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 0d23b52dd22..b486169f42b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -32,7 +32,9 @@ static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
static bool debugfs_registered;
-static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev)
+static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev,
+ void *data, const struct file_operations *fops)
+
{
struct inode *inode = new_inode(sb);
@@ -44,14 +46,18 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
init_special_inode(inode, mode, dev);
break;
case S_IFREG:
- inode->i_fop = &debugfs_file_operations;
+ inode->i_fop = fops ? fops : &debugfs_file_operations;
+ inode->i_private = data;
break;
case S_IFLNK:
inode->i_op = &debugfs_link_operations;
+ inode->i_fop = fops;
+ inode->i_private = data;
break;
case S_IFDIR:
inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ inode->i_fop = fops ? fops : &simple_dir_operations;
+ inode->i_private = data;
/* directory inodes start off with i_nlink == 2
* (for "." entry) */
@@ -64,7 +70,8 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
/* SMP-safe */
static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
- int mode, dev_t dev)
+ int mode, dev_t dev, void *data,
+ const struct file_operations *fops)
{
struct inode *inode;
int error = -EPERM;
@@ -72,7 +79,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
if (dentry->d_inode)
return -EEXIST;
- inode = debugfs_get_inode(dir->i_sb, mode, dev);
+ inode = debugfs_get_inode(dir->i_sb, mode, dev, data, fops);
if (inode) {
d_instantiate(dentry, inode);
dget(dentry);
@@ -81,12 +88,13 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
return error;
}
-static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
+ void *data, const struct file_operations *fops)
{
int res;
mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
- res = debugfs_mknod(dir, dentry, mode, 0);
+ res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
if (!res) {
inc_nlink(dir);
fsnotify_mkdir(dir, dentry);
@@ -94,18 +102,20 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return res;
}
-static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode)
+static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode,
+ void *data, const struct file_operations *fops)
{
mode = (mode & S_IALLUGO) | S_IFLNK;
- return debugfs_mknod(dir, dentry, mode, 0);
+ return debugfs_mknod(dir, dentry, mode, 0, data, fops);
}
-static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode)
+static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode,
+ void *data, const struct file_operations *fops)
{
int res;
mode = (mode & S_IALLUGO) | S_IFREG;
- res = debugfs_mknod(dir, dentry, mode, 0);
+ res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
if (!res)
fsnotify_create(dir, dentry);
return res;
@@ -139,7 +149,9 @@ static struct file_system_type debug_fs_type = {
static int debugfs_create_by_name(const char *name, mode_t mode,
struct dentry *parent,
- struct dentry **dentry)
+ struct dentry **dentry,
+ void *data,
+ const struct file_operations *fops)
{
int error = 0;
@@ -164,13 +176,16 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
if (!IS_ERR(*dentry)) {
switch (mode & S_IFMT) {
case S_IFDIR:
- error = debugfs_mkdir(parent->d_inode, *dentry, mode);
+ error = debugfs_mkdir(parent->d_inode, *dentry, mode,
+ data, fops);
break;
case S_IFLNK:
- error = debugfs_link(parent->d_inode, *dentry, mode);
+ error = debugfs_link(parent->d_inode, *dentry, mode,
+ data, fops);
break;
default:
- error = debugfs_create(parent->d_inode, *dentry, mode);
+ error = debugfs_create(parent->d_inode, *dentry, mode,
+ data, fops);
break;
}
dput(*dentry);
@@ -221,19 +236,13 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
if (error)
goto exit;
- error = debugfs_create_by_name(name, mode, parent, &dentry);
+ error = debugfs_create_by_name(name, mode, parent, &dentry,
+ data, fops);
if (error) {
dentry = NULL;
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
goto exit;
}
-
- if (dentry->d_inode) {
- if (data)
- dentry->d_inode->i_private = data;
- if (fops)
- dentry->d_inode->i_fop = fops;
- }
exit:
return dentry;
}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 6cde970b0a1..fc2bd05d355 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -353,8 +353,8 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
* ext2_find_entry()
*
* finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
+ * returns the page in which the entry was found (as a parameter - res_page),
+ * and the entry itself. Page is returned mapped and unlocked.
* Entry is guaranteed to be valid.
*/
struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9a8a8e27a06..da318b0fa63 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -142,7 +142,7 @@ struct dentry *ext2_get_parent(struct dentry *child);
/* super.c */
extern void ext2_error (struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
-extern void ext2_warning (struct super_block *, const char *, const char *, ...)
+extern void ext2_msg(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
extern void ext2_update_dynamic_rev (struct super_block *sb);
extern void ext2_write_super (struct super_block *);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index ade634076d0..71b032c65a0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -137,7 +137,8 @@ static int ext2_block_to_path(struct inode *inode,
int final = 0;
if (i_block < 0) {
- ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
+ ext2_msg(inode->i_sb, KERN_WARNING,
+ "warning: %s: block < 0", __func__);
} else if (i_block < direct_blocks) {
offsets[n++] = i_block;
final = direct_blocks;
@@ -157,7 +158,8 @@ static int ext2_block_to_path(struct inode *inode,
offsets[n++] = i_block & (ptrs - 1);
final = ptrs;
} else {
- ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
+ ext2_msg(inode->i_sb, KERN_WARNING,
+ "warning: %s: block is too big", __func__);
}
if (boundary)
*boundary = final - 1 - (i_block & (ptrs - 1));
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1a9ffee47d5..1388802b780 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -58,27 +58,27 @@ void ext2_error (struct super_block * sb, const char * function,
}
va_start(args, fmt);
- printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function);
+ printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
- panic("EXT2-fs panic from previous error\n");
+ panic("EXT2-fs: panic from previous error\n");
if (test_opt(sb, ERRORS_RO)) {
- printk("Remounting filesystem read-only\n");
+ ext2_msg(sb, KERN_CRIT,
+ "error: remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
}
}
-void ext2_warning (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext2_msg(struct super_block *sb, const char *prefix,
+ const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ",
- sb->s_id, function);
+ printk("%sEXT2-fs (%s): ", prefix, sb->s_id);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -91,9 +91,9 @@ void ext2_update_dynamic_rev(struct super_block *sb)
if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
return;
- ext2_warning(sb, __func__,
- "updating to rev %d because of new feature flag, "
- "running e2fsck is recommended",
+ ext2_msg(sb, KERN_WARNING,
+ "warning: updating to rev %d because of "
+ "new feature flag, running e2fsck is recommended",
EXT2_DYNAMIC_REV);
es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
@@ -419,10 +419,10 @@ static const match_table_t tokens = {
{Opt_err, NULL}
};
-static int parse_options (char * options,
- struct ext2_sb_info *sbi)
+static int parse_options(char *options, struct super_block *sb)
{
- char * p;
+ char *p;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
substring_t args[MAX_OPT_ARGS];
int option;
@@ -505,7 +505,8 @@ static int parse_options (char * options,
#else
case Opt_user_xattr:
case Opt_nouser_xattr:
- printk("EXT2 (no)user_xattr options not supported\n");
+ ext2_msg(sb, KERN_INFO, "(no)user_xattr options"
+ "not supported");
break;
#endif
#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -518,14 +519,15 @@ static int parse_options (char * options,
#else
case Opt_acl:
case Opt_noacl:
- printk("EXT2 (no)acl options not supported\n");
+ ext2_msg(sb, KERN_INFO,
+ "(no)acl options not supported");
break;
#endif
case Opt_xip:
#ifdef CONFIG_EXT2_FS_XIP
set_opt (sbi->s_mount_opt, XIP);
#else
- printk("EXT2 xip option not supported\n");
+ ext2_msg(sb, KERN_INFO, "xip option not supported");
#endif
break;
@@ -542,19 +544,18 @@ static int parse_options (char * options,
case Opt_quota:
case Opt_usrquota:
case Opt_grpquota:
- printk(KERN_ERR
- "EXT2-fs: quota operations not supported.\n");
-
+ ext2_msg(sb, KERN_INFO,
+ "quota operations not supported");
break;
#endif
case Opt_reservation:
set_opt(sbi->s_mount_opt, RESERVATION);
- printk("reservations ON\n");
+ ext2_msg(sb, KERN_INFO, "reservations ON");
break;
case Opt_noreservation:
clear_opt(sbi->s_mount_opt, RESERVATION);
- printk("reservations OFF\n");
+ ext2_msg(sb, KERN_INFO, "reservations OFF");
break;
case Opt_ignore:
break;
@@ -573,34 +574,40 @@ static int ext2_setup_super (struct super_block * sb,
struct ext2_sb_info *sbi = EXT2_SB(sb);
if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
- printk ("EXT2-fs warning: revision level too high, "
- "forcing read-only mode\n");
+ ext2_msg(sb, KERN_ERR,
+ "error: revision level too high, "
+ "forcing read-only mode");
res = MS_RDONLY;
}
if (read_only)
return res;
if (!(sbi->s_mount_state & EXT2_VALID_FS))
- printk ("EXT2-fs warning: mounting unchecked fs, "
- "running e2fsck is recommended\n");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: mounting unchecked fs, "
+ "running e2fsck is recommended");
else if ((sbi->s_mount_state & EXT2_ERROR_FS))
- printk ("EXT2-fs warning: mounting fs with errors, "
- "running e2fsck is recommended\n");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: mounting fs with errors, "
+ "running e2fsck is recommended");
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
le16_to_cpu(es->s_mnt_count) >=
(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
- printk ("EXT2-fs warning: maximal mount count reached, "
- "running e2fsck is recommended\n");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: maximal mount count reached, "
+ "running e2fsck is recommended");
else if (le32_to_cpu(es->s_checkinterval) &&
- (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds()))
- printk ("EXT2-fs warning: checktime reached, "
- "running e2fsck is recommended\n");
+ (le32_to_cpu(es->s_lastcheck) +
+ le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ ext2_msg(sb, KERN_WARNING,
+ "warning: checktime reached, "
+ "running e2fsck is recommended");
if (!le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
ext2_write_super(sb);
if (test_opt (sb, DEBUG))
- printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
- "bpg=%lu, ipg=%lu, mo=%04lx]\n",
+ ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
+ "bpg=%lu, ipg=%lu, mo=%04lx]",
EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
sbi->s_frag_size,
sbi->s_groups_count,
@@ -767,7 +774,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
*/
blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
if (!blocksize) {
- printk ("EXT2-fs: unable to set blocksize\n");
+ ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
goto failed_sbi;
}
@@ -783,7 +790,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
if (!(bh = sb_bread(sb, logic_sb_block))) {
- printk ("EXT2-fs: unable to read superblock\n");
+ ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
goto failed_sbi;
}
/*
@@ -826,7 +833,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, RESERVATION);
- if (!parse_options ((char *) data, sbi))
+ if (!parse_options((char *) data, sb))
goto failed_mount;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -840,8 +847,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
(EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
- printk("EXT2-fs warning: feature flags set on rev 0 fs, "
- "running e2fsck is recommended\n");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: feature flags set on rev 0 fs, "
+ "running e2fsck is recommended");
/*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
@@ -849,16 +857,16 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
*/
features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
if (features) {
- printk("EXT2-fs: %s: couldn't mount because of "
- "unsupported optional features (%x).\n",
- sb->s_id, le32_to_cpu(features));
+ ext2_msg(sb, KERN_ERR, "error: couldn't mount because of "
+ "unsupported optional features (%x)",
+ le32_to_cpu(features));
goto failed_mount;
}
if (!(sb->s_flags & MS_RDONLY) &&
(features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
- printk("EXT2-fs: %s: couldn't mount RDWR because of "
- "unsupported optional features (%x).\n",
- sb->s_id, le32_to_cpu(features));
+ ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
+ "unsupported optional features (%x)",
+ le32_to_cpu(features));
goto failed_mount;
}
@@ -866,7 +874,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) {
if (!silent)
- printk("XIP: Unsupported blocksize\n");
+ ext2_msg(sb, KERN_ERR,
+ "error: unsupported blocksize for xip");
goto failed_mount;
}
@@ -875,7 +884,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);
if (!sb_set_blocksize(sb, blocksize)) {
- printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
+ ext2_msg(sb, KERN_ERR, "error: blocksize is too small");
goto failed_sbi;
}
@@ -883,14 +892,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
offset = (sb_block*BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
if(!bh) {
- printk("EXT2-fs: Couldn't read superblock on "
- "2nd try.\n");
+ ext2_msg(sb, KERN_ERR, "error: couldn't read"
+ "superblock on 2nd try");
goto failed_sbi;
}
es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
- printk ("EXT2-fs: Magic mismatch, very weird !\n");
+ ext2_msg(sb, KERN_ERR, "error: magic mismatch");
goto failed_mount;
}
}
@@ -906,7 +915,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
!is_power_of_2(sbi->s_inode_size) ||
(sbi->s_inode_size > blocksize)) {
- printk ("EXT2-fs: unsupported inode size: %d\n",
+ ext2_msg(sb, KERN_ERR,
+ "error: unsupported inode size: %d",
sbi->s_inode_size);
goto failed_mount;
}
@@ -943,29 +953,33 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_blocksize != bh->b_size) {
if (!silent)
- printk ("VFS: Unsupported blocksize on dev "
- "%s.\n", sb->s_id);
+ ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
goto failed_mount;
}
if (sb->s_blocksize != sbi->s_frag_size) {
- printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n",
+ ext2_msg(sb, KERN_ERR,
+ "error: fragsize %lu != blocksize %lu"
+ "(not supported yet)",
sbi->s_frag_size, sb->s_blocksize);
goto failed_mount;
}
if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
- printk ("EXT2-fs: #blocks per group too big: %lu\n",
+ ext2_msg(sb, KERN_ERR,
+ "error: #blocks per group too big: %lu",
sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
- printk ("EXT2-fs: #fragments per group too big: %lu\n",
+ ext2_msg(sb, KERN_ERR,
+ "error: #fragments per group too big: %lu",
sbi->s_frags_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
- printk ("EXT2-fs: #inodes per group too big: %lu\n",
+ ext2_msg(sb, KERN_ERR,
+ "error: #inodes per group too big: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}
@@ -979,13 +993,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
- printk ("EXT2-fs: not enough memory\n");
+ ext2_msg(sb, KERN_ERR, "error: not enough memory");
goto failed_mount;
}
bgl_lock_init(sbi->s_blockgroup_lock);
sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
if (!sbi->s_debts) {
- printk ("EXT2-fs: not enough memory\n");
+ ext2_msg(sb, KERN_ERR, "error: not enough memory");
goto failed_mount_group_desc;
}
for (i = 0; i < db_count; i++) {
@@ -994,12 +1008,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi->s_group_desc[i]) {
for (j = 0; j < i; j++)
brelse (sbi->s_group_desc[j]);
- printk ("EXT2-fs: unable to read group descriptors\n");
+ ext2_msg(sb, KERN_ERR,
+ "error: unable to read group descriptors");
goto failed_mount_group_desc;
}
}
if (!ext2_check_descriptors (sb)) {
- printk ("EXT2-fs: group descriptors corrupted!\n");
+ ext2_msg(sb, KERN_ERR, "group descriptors corrupted");
goto failed_mount2;
}
sbi->s_gdb_count = db_count;
@@ -1032,7 +1047,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_count_dirs(sb));
}
if (err) {
- printk(KERN_ERR "EXT2-fs: insufficient memory\n");
+ ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
/*
@@ -1048,27 +1063,28 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
iput(root);
- printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
+ ext2_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
goto failed_mount3;
}
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
iput(root);
- printk(KERN_ERR "EXT2-fs: get root inode failed\n");
+ ext2_msg(sb, KERN_ERR, "error: get root inode failed");
ret = -ENOMEM;
goto failed_mount3;
}
if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
- ext2_warning(sb, __func__,
- "mounting ext3 filesystem as ext2");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: mounting ext3 filesystem as ext2");
ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
return 0;
cantfind_ext2:
if (!silent)
- printk("VFS: Can't find an ext2 filesystem on dev %s.\n",
- sb->s_id);
+ ext2_msg(sb, KERN_ERR,
+ "error: can't find an ext2 filesystem on dev %s.",
+ sb->s_id);
goto failed_mount;
failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
@@ -1121,8 +1137,24 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
static int ext2_sync_fs(struct super_block *sb, int wait)
{
struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+ struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
lock_kernel();
+ if (buffer_write_io_error(sbh)) {
+ /*
+ * Oh, dear. A previous attempt to write the
+ * superblock failed. This could happen because the
+ * USB device was yanked out. Or it could happen to
+ * be a transient write error and maybe the block will
+ * be remapped. Nothing we can do but to retry the
+ * write and hope for the best.
+ */
+ ext2_msg(sb, KERN_ERR,
+ "previous I/O error to superblock detected\n");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
+
if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
ext2_debug("setting valid to 0\n");
es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
@@ -1170,7 +1202,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
/*
* Allow the "check" option to be passed as a remount option.
*/
- if (!parse_options (data, sbi)) {
+ if (!parse_options(data, sb)) {
err = -EINVAL;
goto restore_opts;
}
@@ -1182,7 +1214,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
EXT2_MOUNT_XIP if not */
if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
- printk("XIP: Unsupported blocksize\n");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: unsupported blocksize for xip");
err = -EINVAL;
goto restore_opts;
}
@@ -1191,8 +1224,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
(old_mount_opt & EXT2_MOUNT_XIP)) &&
invalidate_inodes(sb)) {
- ext2_warning(sb, __func__, "refusing change of xip flag "
- "with busy inodes while remounting");
+ ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
+ "xip flag with busy inodes while remounting");
sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
}
@@ -1216,9 +1249,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
~EXT2_FEATURE_RO_COMPAT_SUPP);
if (ret) {
- printk("EXT2-fs: %s: couldn't remount RDWR because of "
- "unsupported optional features (%x).\n",
- sb->s_id, le32_to_cpu(ret));
+ ext2_msg(sb, KERN_WARNING,
+ "warning: couldn't remount RDWR because of "
+ "unsupported optional features (%x).",
+ le32_to_cpu(ret));
err = -EROFS;
goto restore_opts;
}
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index c18fbf3e406..322a56b2dfb 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -69,8 +69,9 @@ void ext2_xip_verify_sb(struct super_block *sb)
if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
!sb->s_bdev->bd_disk->fops->direct_access) {
sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
- ext2_warning(sb, __func__,
- "ignoring xip option - not supported by bdev");
+ ext2_msg(sb, KERN_WARNING,
+ "warning: ignoring xip option - "
+ "not supported by bdev");
}
}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2db95777890..ad14227f509 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
return ext3_journal_get_write_access(handle, bh);
}
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext3_truncate_failed_write(struct inode *inode)
+{
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ext3_truncate(inode);
+}
+
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1209,7 +1219,7 @@ write_begin_failed:
unlock_page(page);
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
return ret ? ret : copied;
}
@@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
return ret ? ret : copied;
}
@@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
return ret ? ret : copied;
}
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8359e7b3dc8..5f83b617917 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -266,7 +266,7 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_bh;
if (IS_ERR(gdb = bclean(handle, sb, block))) {
- err = PTR_ERR(bh);
+ err = PTR_ERR(gdb);
goto exit_bh;
}
ext3_journal_dirty_metadata(handle, gdb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 427496c4767..7ad1e8c30bd 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -135,12 +135,24 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
if (is_handle_aborted(handle))
return;
- printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
- caller, errstr, err_fn);
+ printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
+ caller, errstr, err_fn);
journal_abort_handle(handle);
}
+void ext3_msg(struct super_block *sb, const char *prefix,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
+ vprintk(fmt, args);
+ printk("\n");
+ va_end(args);
+}
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -174,12 +186,13 @@ static void ext3_handle_error(struct super_block *sb)
journal_abort(journal, -EIO);
}
if (test_opt (sb, ERRORS_RO)) {
- printk (KERN_CRIT "Remounting filesystem read-only\n");
+ ext3_msg(sb, KERN_CRIT,
+ "error: remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
}
ext3_commit_super(sb, es, 1);
if (test_opt(sb, ERRORS_PANIC))
- panic("EXT3-fs (device %s): panic forced after error\n",
+ panic("EXT3-fs (%s): panic forced after error\n",
sb->s_id);
}
@@ -247,8 +260,7 @@ void __ext3_std_error (struct super_block * sb, const char * function,
return;
errstr = ext3_decode_error(sb, errno, nbuf);
- printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
- sb->s_id, function, errstr);
+ ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
ext3_handle_error(sb);
}
@@ -268,21 +280,20 @@ void ext3_abort (struct super_block * sb, const char * function,
{
va_list args;
- printk (KERN_CRIT "ext3_abort called.\n");
-
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+ printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
- panic("EXT3-fs panic from previous error\n");
+ panic("EXT3-fs: panic from previous error\n");
if (sb->s_flags & MS_RDONLY)
return;
- printk(KERN_CRIT "Remounting filesystem read-only\n");
+ ext3_msg(sb, KERN_CRIT,
+ "error: remounting filesystem read-only");
EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
sb->s_flags |= MS_RDONLY;
EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
@@ -296,7 +307,7 @@ void ext3_warning (struct super_block * sb, const char * function,
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
+ printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
sb->s_id, function);
vprintk(fmt, args);
printk("\n");
@@ -310,10 +321,10 @@ void ext3_update_dynamic_rev(struct super_block *sb)
if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
return;
- ext3_warning(sb, __func__,
- "updating to rev %d because of new feature flag, "
- "running e2fsck is recommended",
- EXT3_DYNAMIC_REV);
+ ext3_msg(sb, KERN_WARNING,
+ "warning: updating to rev %d because of "
+ "new feature flag, running e2fsck is recommended",
+ EXT3_DYNAMIC_REV);
es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
@@ -331,7 +342,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
/*
* Open the external journal device
*/
-static struct block_device *ext3_blkdev_get(dev_t dev)
+static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
{
struct block_device *bdev;
char b[BDEVNAME_SIZE];
@@ -342,8 +353,9 @@ static struct block_device *ext3_blkdev_get(dev_t dev)
return bdev;
fail:
- printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
- __bdevname(dev, b), PTR_ERR(bdev));
+ ext3_msg(sb, "error: failed to open journal device %s: %ld",
+ __bdevname(dev, b), PTR_ERR(bdev));
+
return NULL;
}
@@ -378,13 +390,13 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
{
struct list_head *l;
- printk(KERN_ERR "sb orphan head is %d\n",
+ ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
le32_to_cpu(sbi->s_es->s_last_orphan));
- printk(KERN_ERR "sb_info orphan list:\n");
+ ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
list_for_each(l, &sbi->s_orphan) {
struct inode *inode = orphan_list_entry(l);
- printk(KERN_ERR " "
+ ext3_msg(sb, KERN_ERR, " "
"inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
inode->i_sb->s_id, inode->i_ino, inode,
inode->i_mode, inode->i_nlink,
@@ -527,9 +539,22 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
#if defined(CONFIG_QUOTA)
struct ext3_sb_info *sbi = EXT3_SB(sb);
- if (sbi->s_jquota_fmt)
- seq_printf(seq, ",jqfmt=%s",
- (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+ if (sbi->s_jquota_fmt) {
+ char *fmtname = "";
+
+ switch (sbi->s_jquota_fmt) {
+ case QFMT_VFS_OLD:
+ fmtname = "vfsold";
+ break;
+ case QFMT_VFS_V0:
+ fmtname = "vfsv0";
+ break;
+ case QFMT_VFS_V1:
+ fmtname = "vfsv1";
+ break;
+ }
+ seq_printf(seq, ",jqfmt=%s", fmtname);
+ }
if (sbi->s_qf_names[USRQUOTA])
seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -636,6 +661,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, DATA_ERR_ABORT))
seq_puts(seq, ",data_err=abort");
+ if (test_opt(sb, NOLOAD))
+ seq_puts(seq, ",norecovery");
+
ext3_show_quota_options(seq, sb);
return 0;
@@ -787,9 +815,9 @@ enum {
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
- Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
- Opt_grpquota
+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+ Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
+ Opt_usrquota, Opt_grpquota
};
static const match_table_t tokens = {
@@ -818,6 +846,7 @@ static const match_table_t tokens = {
{Opt_reservation, "reservation"},
{Opt_noreservation, "noreservation"},
{Opt_noload, "noload"},
+ {Opt_noload, "norecovery"},
{Opt_nobh, "nobh"},
{Opt_bh, "bh"},
{Opt_commit, "commit=%u"},
@@ -836,6 +865,7 @@ static const match_table_t tokens = {
{Opt_grpjquota, "grpjquota=%s"},
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+ {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
{Opt_grpquota, "grpquota"},
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
@@ -845,7 +875,7 @@ static const match_table_t tokens = {
{Opt_err, NULL},
};
-static ext3_fsblk_t get_sb_block(void **data)
+static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
{
ext3_fsblk_t sb_block;
char *options = (char *) *data;
@@ -856,7 +886,7 @@ static ext3_fsblk_t get_sb_block(void **data)
/*todo: use simple_strtoll with >32bit ext3 */
sb_block = simple_strtoul(options, &options, 0);
if (*options && *options != ',') {
- printk("EXT3-fs: Invalid sb specification: %s\n",
+ ext3_msg(sb, "error: invalid sb specification: %s",
(char *) *data);
return 1;
}
@@ -956,7 +986,8 @@ static int parse_options (char *options, struct super_block *sb,
#else
case Opt_user_xattr:
case Opt_nouser_xattr:
- printk("EXT3 (no)user_xattr options not supported\n");
+ ext3_msg(sb, KERN_INFO,
+ "(no)user_xattr options not supported");
break;
#endif
#ifdef CONFIG_EXT3_FS_POSIX_ACL
@@ -969,7 +1000,8 @@ static int parse_options (char *options, struct super_block *sb,
#else
case Opt_acl:
case Opt_noacl:
- printk("EXT3 (no)acl options not supported\n");
+ ext3_msg(sb, KERN_INFO,
+ "(no)acl options not supported");
break;
#endif
case Opt_reservation:
@@ -985,16 +1017,16 @@ static int parse_options (char *options, struct super_block *sb,
user to specify an existing inode to be the
journal file. */
if (is_remount) {
- printk(KERN_ERR "EXT3-fs: cannot specify "
- "journal on remount\n");
+ ext3_msg(sb, KERN_ERR, "error: cannot specify "
+ "journal on remount");
return 0;
}
set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
break;
case Opt_journal_inum:
if (is_remount) {
- printk(KERN_ERR "EXT3-fs: cannot specify "
- "journal on remount\n");
+ ext3_msg(sb, KERN_ERR, "error: cannot specify "
+ "journal on remount");
return 0;
}
if (match_int(&args[0], &option))
@@ -1003,8 +1035,8 @@ static int parse_options (char *options, struct super_block *sb,
break;
case Opt_journal_dev:
if (is_remount) {
- printk(KERN_ERR "EXT3-fs: cannot specify "
- "journal on remount\n");
+ ext3_msg(sb, KERN_ERR, "error: cannot specify "
+ "journal on remount");
return 0;
}
if (match_int(&args[0], &option))
@@ -1036,12 +1068,11 @@ static int parse_options (char *options, struct super_block *sb,
if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
== data_opt)
break;
- printk(KERN_ERR
- "EXT3-fs (device %s): Cannot change "
+ ext3_msg(sb, KERN_ERR,
+ "error: cannot change "
"data mode on remount. The filesystem "
"is mounted in data=%s mode and you "
- "try to remount it in data=%s mode.\n",
- sb->s_id,
+ "try to remount it in data=%s mode.",
data_mode_string(sbi->s_mount_opt &
EXT3_MOUNT_DATA_FLAGS),
data_mode_string(data_opt));
@@ -1066,31 +1097,31 @@ static int parse_options (char *options, struct super_block *sb,
set_qf_name:
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
- printk(KERN_ERR
- "EXT3-fs: Cannot change journaled "
- "quota options when quota turned on.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: cannot change journaled "
+ "quota options when quota turned on.");
return 0;
}
qname = match_strdup(&args[0]);
if (!qname) {
- printk(KERN_ERR
- "EXT3-fs: not enough memory for "
- "storing quotafile name.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: not enough memory for "
+ "storing quotafile name.");
return 0;
}
if (sbi->s_qf_names[qtype] &&
strcmp(sbi->s_qf_names[qtype], qname)) {
- printk(KERN_ERR
- "EXT3-fs: %s quota file already "
- "specified.\n", QTYPE2NAME(qtype));
+ ext3_msg(sb, KERN_ERR,
+ "error: %s quota file already "
+ "specified.", QTYPE2NAME(qtype));
kfree(qname);
return 0;
}
sbi->s_qf_names[qtype] = qname;
if (strchr(sbi->s_qf_names[qtype], '/')) {
- printk(KERN_ERR
- "EXT3-fs: quotafile must be on "
- "filesystem root.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: quotafile must be on "
+ "filesystem root.");
kfree(sbi->s_qf_names[qtype]);
sbi->s_qf_names[qtype] = NULL;
return 0;
@@ -1105,9 +1136,9 @@ set_qf_name:
clear_qf_name:
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
- printk(KERN_ERR "EXT3-fs: Cannot change "
+ ext3_msg(sb, KERN_ERR, "error: cannot change "
"journaled quota options when "
- "quota turned on.\n");
+ "quota turned on.");
return 0;
}
/*
@@ -1121,12 +1152,15 @@ clear_qf_name:
goto set_qf_format;
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
+ goto set_qf_format;
+ case Opt_jqfmt_vfsv1:
+ qfmt = QFMT_VFS_V1;
set_qf_format:
if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
- printk(KERN_ERR "EXT3-fs: Cannot change "
+ ext3_msg(sb, KERN_ERR, "error: cannot change "
"journaled quota options when "
- "quota turned on.\n");
+ "quota turned on.");
return 0;
}
sbi->s_jquota_fmt = qfmt;
@@ -1142,8 +1176,8 @@ set_qf_format:
break;
case Opt_noquota:
if (sb_any_quota_loaded(sb)) {
- printk(KERN_ERR "EXT3-fs: Cannot change quota "
- "options when quota turned on.\n");
+ ext3_msg(sb, KERN_ERR, "error: cannot change "
+ "quota options when quota turned on.");
return 0;
}
clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1154,8 +1188,8 @@ set_qf_format:
case Opt_quota:
case Opt_usrquota:
case Opt_grpquota:
- printk(KERN_ERR
- "EXT3-fs: quota options not supported.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: quota options not supported.");
break;
case Opt_usrjquota:
case Opt_grpjquota:
@@ -1163,9 +1197,10 @@ set_qf_format:
case Opt_offgrpjquota:
case Opt_jqfmt_vfsold:
case Opt_jqfmt_vfsv0:
- printk(KERN_ERR
- "EXT3-fs: journaled quota options not "
- "supported.\n");
+ case Opt_jqfmt_vfsv1:
+ ext3_msg(sb, KERN_ERR,
+ "error: journaled quota options not "
+ "supported.");
break;
case Opt_noquota:
break;
@@ -1185,8 +1220,9 @@ set_qf_format:
break;
case Opt_resize:
if (!is_remount) {
- printk("EXT3-fs: resize option only available "
- "for remount\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: resize option only available "
+ "for remount");
return 0;
}
if (match_int(&args[0], &option) != 0)
@@ -1200,9 +1236,9 @@ set_qf_format:
clear_opt(sbi->s_mount_opt, NOBH);
break;
default:
- printk (KERN_ERR
- "EXT3-fs: Unrecognized mount option \"%s\" "
- "or missing value\n", p);
+ ext3_msg(sb, KERN_ERR,
+ "error: unrecognized mount option \"%s\" "
+ "or missing value", p);
return 0;
}
}
@@ -1220,21 +1256,21 @@ set_qf_format:
(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
(sbi->s_qf_names[GRPQUOTA] &&
(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
- printk(KERN_ERR "EXT3-fs: old and new quota "
- "format mixing.\n");
+ ext3_msg(sb, KERN_ERR, "error: old and new quota "
+ "format mixing.");
return 0;
}
if (!sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journaled quota format "
- "not specified.\n");
+ ext3_msg(sb, KERN_ERR, "error: journaled quota format "
+ "not specified.");
return 0;
}
} else {
if (sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journaled quota format "
+ ext3_msg(sb, KERN_ERR, "error: journaled quota format "
"specified with no journaling "
- "enabled.\n");
+ "enabled.");
return 0;
}
}
@@ -1249,31 +1285,33 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
int res = 0;
if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
- printk (KERN_ERR "EXT3-fs warning: revision level too high, "
- "forcing read-only mode\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: revision level too high, "
+ "forcing read-only mode");
res = MS_RDONLY;
}
if (read_only)
return res;
if (!(sbi->s_mount_state & EXT3_VALID_FS))
- printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
- "running e2fsck is recommended\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: mounting unchecked fs, "
+ "running e2fsck is recommended");
else if ((sbi->s_mount_state & EXT3_ERROR_FS))
- printk (KERN_WARNING
- "EXT3-fs warning: mounting fs with errors, "
- "running e2fsck is recommended\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: mounting fs with errors, "
+ "running e2fsck is recommended");
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
le16_to_cpu(es->s_mnt_count) >=
(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
- printk (KERN_WARNING
- "EXT3-fs warning: maximal mount count reached, "
- "running e2fsck is recommended\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: maximal mount count reached, "
+ "running e2fsck is recommended");
else if (le32_to_cpu(es->s_checkinterval) &&
(le32_to_cpu(es->s_lastcheck) +
le32_to_cpu(es->s_checkinterval) <= get_seconds()))
- printk (KERN_WARNING
- "EXT3-fs warning: checktime reached, "
- "running e2fsck is recommended\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: checktime reached, "
+ "running e2fsck is recommended");
#if 0
/* @@@ We _will_ want to clear the valid bit if we find
inconsistencies, to force a fsck at reboot. But for
@@ -1290,22 +1328,20 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
ext3_commit_super(sb, es, 1);
if (test_opt(sb, DEBUG))
- printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
- "bpg=%lu, ipg=%lu, mo=%04lx]\n",
+ ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
+ "bpg=%lu, ipg=%lu, mo=%04lx]",
sb->s_blocksize,
sbi->s_groups_count,
EXT3_BLOCKS_PER_GROUP(sb),
EXT3_INODES_PER_GROUP(sb),
sbi->s_mount_opt);
- printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
char b[BDEVNAME_SIZE];
-
- printk("external journal on %s\n",
+ ext3_msg(sb, KERN_INFO, "using external journal on %s",
bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
} else {
- printk("internal journal\n");
+ ext3_msg(sb, KERN_INFO, "using internal journal");
}
return res;
}
@@ -1399,8 +1435,8 @@ static void ext3_orphan_cleanup (struct super_block * sb,
}
if (bdev_read_only(sb->s_bdev)) {
- printk(KERN_ERR "EXT3-fs: write access "
- "unavailable, skipping orphan cleanup.\n");
+ ext3_msg(sb, KERN_ERR, "error: write access "
+ "unavailable, skipping orphan cleanup.");
return;
}
@@ -1414,8 +1450,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
}
if (s_flags & MS_RDONLY) {
- printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
- sb->s_id);
+ ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
sb->s_flags &= ~MS_RDONLY;
}
#ifdef CONFIG_QUOTA
@@ -1426,9 +1461,9 @@ static void ext3_orphan_cleanup (struct super_block * sb,
if (EXT3_SB(sb)->s_qf_names[i]) {
int ret = ext3_quota_on_mount(sb, i);
if (ret < 0)
- printk(KERN_ERR
- "EXT3-fs: Cannot turn on journaled "
- "quota: error %d\n", ret);
+ ext3_msg(sb, KERN_ERR,
+ "error: cannot turn on journaled "
+ "quota: %d", ret);
}
}
#endif
@@ -1466,11 +1501,11 @@ static void ext3_orphan_cleanup (struct super_block * sb,
#define PLURAL(x) (x), ((x)==1) ? "" : "s"
if (nr_orphans)
- printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
- sb->s_id, PLURAL(nr_orphans));
+ ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
+ PLURAL(nr_orphans));
if (nr_truncates)
- printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
- sb->s_id, PLURAL(nr_truncates));
+ ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+ PLURAL(nr_truncates));
#ifdef CONFIG_QUOTA
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
@@ -1554,7 +1589,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
struct ext3_super_block *es = NULL;
struct ext3_sb_info *sbi;
ext3_fsblk_t block;
- ext3_fsblk_t sb_block = get_sb_block(&data);
+ ext3_fsblk_t sb_block = get_sb_block(&data, sb);
ext3_fsblk_t logic_sb_block;
unsigned long offset = 0;
unsigned int journal_inum = 0;
@@ -1590,7 +1625,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
if (!blocksize) {
- printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
+ ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
goto out_fail;
}
@@ -1606,7 +1641,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
}
if (!(bh = sb_bread(sb, logic_sb_block))) {
- printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
+ ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
goto out_fail;
}
/*
@@ -1665,9 +1700,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
(EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
- printk(KERN_WARNING
- "EXT3-fs warning: feature flags set on rev 0 fs, "
- "running e2fsck is recommended\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: feature flags set on rev 0 fs, "
+ "running e2fsck is recommended");
/*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
@@ -1675,25 +1710,25 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
*/
features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
if (features) {
- printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
- "unsupported optional features (%x).\n",
- sb->s_id, le32_to_cpu(features));
+ ext3_msg(sb, KERN_ERR,
+ "error: couldn't mount because of unsupported "
+ "optional features (%x)", le32_to_cpu(features));
goto failed_mount;
}
features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
if (!(sb->s_flags & MS_RDONLY) && features) {
- printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
- "unsupported optional features (%x).\n",
- sb->s_id, le32_to_cpu(features));
+ ext3_msg(sb, KERN_ERR,
+ "error: couldn't mount RDWR because of unsupported "
+ "optional features (%x)", le32_to_cpu(features));
goto failed_mount;
}
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT3_MIN_BLOCK_SIZE ||
blocksize > EXT3_MAX_BLOCK_SIZE) {
- printk(KERN_ERR
- "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
- blocksize, sb->s_id);
+ ext3_msg(sb, KERN_ERR,
+ "error: couldn't mount because of unsupported "
+ "filesystem blocksize %d", blocksize);
goto failed_mount;
}
@@ -1704,30 +1739,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
* than the hardware sectorsize for the machine.
*/
if (blocksize < hblock) {
- printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
- "device blocksize %d.\n", blocksize, hblock);
+ ext3_msg(sb, KERN_ERR,
+ "error: fsblocksize %d too small for "
+ "hardware sectorsize %d", blocksize, hblock);
goto failed_mount;
}
brelse (bh);
if (!sb_set_blocksize(sb, blocksize)) {
- printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
- blocksize);
+ ext3_msg(sb, KERN_ERR,
+ "error: bad blocksize %d", blocksize);
goto out_fail;
}
logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
if (!bh) {
- printk(KERN_ERR
- "EXT3-fs: Can't read superblock on 2nd try.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: can't read superblock on 2nd try");
goto failed_mount;
}
es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
- printk (KERN_ERR
- "EXT3-fs: Magic mismatch, very weird !\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: magic mismatch");
goto failed_mount;
}
}
@@ -1743,8 +1779,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
(!is_power_of_2(sbi->s_inode_size)) ||
(sbi->s_inode_size > blocksize)) {
- printk (KERN_ERR
- "EXT3-fs: unsupported inode size: %d\n",
+ ext3_msg(sb, KERN_ERR,
+ "error: unsupported inode size: %d",
sbi->s_inode_size);
goto failed_mount;
}
@@ -1752,8 +1788,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
if (blocksize != sbi->s_frag_size) {
- printk(KERN_ERR
- "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
+ ext3_msg(sb, KERN_ERR,
+ "error: fragsize %lu != blocksize %u (unsupported)",
sbi->s_frag_size, blocksize);
goto failed_mount;
}
@@ -1789,31 +1825,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
}
if (sbi->s_blocks_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT3-fs: #blocks per group too big: %lu\n",
+ ext3_msg(sb, KERN_ERR,
+ "#blocks per group too big: %lu",
sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_frags_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT3-fs: #fragments per group too big: %lu\n",
+ ext3_msg(sb, KERN_ERR,
+ "error: #fragments per group too big: %lu",
sbi->s_frags_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT3-fs: #inodes per group too big: %lu\n",
+ ext3_msg(sb, KERN_ERR,
+ "error: #inodes per group too big: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}
if (le32_to_cpu(es->s_blocks_count) >
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
- printk(KERN_ERR "EXT3-fs: filesystem on %s:"
- " too large to mount safely\n", sb->s_id);
+ ext3_msg(sb, KERN_ERR,
+ "error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8)
- printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not "
- "enabled\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: CONFIG_LBDAF not enabled");
goto failed_mount;
}
@@ -1827,7 +1863,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
- printk (KERN_ERR "EXT3-fs: not enough memory\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: not enough memory");
goto failed_mount;
}
@@ -1837,14 +1874,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
if (!sbi->s_group_desc[i]) {
- printk (KERN_ERR "EXT3-fs: "
- "can't read group descriptor %d\n", i);
+ ext3_msg(sb, KERN_ERR,
+ "error: can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
}
if (!ext3_check_descriptors (sb)) {
- printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: group descriptors corrupted");
goto failed_mount2;
}
sbi->s_gdb_count = db_count;
@@ -1862,7 +1900,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
ext3_count_dirs(sb));
}
if (err) {
- printk(KERN_ERR "EXT3-fs: insufficient memory\n");
+ ext3_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
@@ -1910,9 +1948,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount3;
} else {
if (!silent)
- printk (KERN_ERR
- "ext3: No journal on filesystem on %s\n",
- sb->s_id);
+ ext3_msg(sb, KERN_ERR,
+ "error: no journal found. "
+ "mounting ext3 over ext2?");
goto failed_mount3;
}
@@ -1934,8 +1972,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
case EXT3_MOUNT_WRITEBACK_DATA:
if (!journal_check_available_features
(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
- printk(KERN_ERR "EXT3-fs: Journal does not support "
- "requested data journaling mode\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: journal does not support "
+ "requested data journaling mode");
goto failed_mount4;
}
default:
@@ -1944,8 +1983,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
- printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
- "its supported only with writeback mode\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: ignoring nobh option - "
+ "it is supported only with writeback mode");
clear_opt(sbi->s_mount_opt, NOBH);
}
}
@@ -1956,18 +1996,18 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
root = ext3_iget(sb, EXT3_ROOT_INO);
if (IS_ERR(root)) {
- printk(KERN_ERR "EXT3-fs: get root inode failed\n");
+ ext3_msg(sb, KERN_ERR, "error: get root inode failed");
ret = PTR_ERR(root);
goto failed_mount4;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
iput(root);
- printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
+ ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
goto failed_mount4;
}
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
- printk(KERN_ERR "EXT3-fs: get root dentry failed\n");
+ ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
iput(root);
ret = -ENOMEM;
goto failed_mount4;
@@ -1986,9 +2026,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
ext3_orphan_cleanup(sb, es);
EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
if (needs_recovery)
- printk (KERN_INFO "EXT3-fs: recovery complete.\n");
+ ext3_msg(sb, KERN_INFO, "recovery complete");
ext3_mark_recovery_complete(sb, es);
- printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
+ ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
"writeback");
@@ -1998,7 +2038,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
cantfind_ext3:
if (!silent)
- printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
+ ext3_msg(sb, KERN_INFO,
+ "error: can't find ext3 filesystem on dev %s.",
sb->s_id);
goto failed_mount;
@@ -2066,27 +2107,27 @@ static journal_t *ext3_get_journal(struct super_block *sb,
journal_inode = ext3_iget(sb, journal_inum);
if (IS_ERR(journal_inode)) {
- printk(KERN_ERR "EXT3-fs: no journal found.\n");
+ ext3_msg(sb, KERN_ERR, "error: no journal found");
return NULL;
}
if (!journal_inode->i_nlink) {
make_bad_inode(journal_inode);
iput(journal_inode);
- printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
+ ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
return NULL;
}
jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
journal_inode, journal_inode->i_size);
if (!S_ISREG(journal_inode->i_mode)) {
- printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
+ ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
iput(journal_inode);
return NULL;
}
journal = journal_init_inode(journal_inode);
if (!journal) {
- printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
+ ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
iput(journal_inode);
return NULL;
}
@@ -2108,13 +2149,13 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
struct ext3_super_block * es;
struct block_device *bdev;
- bdev = ext3_blkdev_get(j_dev);
+ bdev = ext3_blkdev_get(j_dev, sb);
if (bdev == NULL)
return NULL;
if (bd_claim(bdev, sb)) {
- printk(KERN_ERR
- "EXT3: failed to claim external journal device.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: failed to claim external journal device");
blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
return NULL;
}
@@ -2122,8 +2163,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
- printk(KERN_ERR
- "EXT3-fs: blocksize too small for journal device.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: blocksize too small for journal device");
goto out_bdev;
}
@@ -2131,8 +2172,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
offset = EXT3_MIN_BLOCK_SIZE % blocksize;
set_blocksize(bdev, blocksize);
if (!(bh = __bread(bdev, sb_block, blocksize))) {
- printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
- "external journal\n");
+ ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
+ "external journal");
goto out_bdev;
}
@@ -2140,14 +2181,14 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
!(le32_to_cpu(es->s_feature_incompat) &
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
- printk(KERN_ERR "EXT3-fs: external journal has "
- "bad superblock\n");
+ ext3_msg(sb, KERN_ERR, "error: external journal has "
+ "bad superblock");
brelse(bh);
goto out_bdev;
}
if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
- printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
+ ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
brelse(bh);
goto out_bdev;
}
@@ -2159,19 +2200,21 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
journal = journal_init_dev(bdev, sb->s_bdev,
start, len, blocksize);
if (!journal) {
- printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: failed to create device journal");
goto out_bdev;
}
journal->j_private = sb;
ll_rw_block(READ, 1, &journal->j_sb_buffer);
wait_on_buffer(journal->j_sb_buffer);
if (!buffer_uptodate(journal->j_sb_buffer)) {
- printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+ ext3_msg(sb, KERN_ERR, "I/O error on journal device");
goto out_journal;
}
if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
- printk(KERN_ERR "EXT3-fs: External journal has more than one "
- "user (unsupported) - %d\n",
+ ext3_msg(sb, KERN_ERR,
+ "error: external journal has more than one "
+ "user (unsupported) - %d",
be32_to_cpu(journal->j_superblock->s_nr_users));
goto out_journal;
}
@@ -2197,8 +2240,8 @@ static int ext3_load_journal(struct super_block *sb,
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
- printk(KERN_INFO "EXT3-fs: external journal device major/minor "
- "numbers have changed\n");
+ ext3_msg(sb, KERN_INFO, "external journal device major/minor "
+ "numbers have changed");
journal_dev = new_decode_dev(journal_devnum);
} else
journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -2213,21 +2256,21 @@ static int ext3_load_journal(struct super_block *sb,
if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
if (sb->s_flags & MS_RDONLY) {
- printk(KERN_INFO "EXT3-fs: INFO: recovery "
- "required on readonly filesystem.\n");
+ ext3_msg(sb, KERN_INFO,
+ "recovery required on readonly filesystem");
if (really_read_only) {
- printk(KERN_ERR "EXT3-fs: write access "
- "unavailable, cannot proceed.\n");
+ ext3_msg(sb, KERN_ERR, "error: write access "
+ "unavailable, cannot proceed");
return -EROFS;
}
- printk (KERN_INFO "EXT3-fs: write access will "
- "be enabled during recovery.\n");
+ ext3_msg(sb, KERN_INFO,
+ "write access will be enabled during recovery");
}
}
if (journal_inum && journal_dev) {
- printk(KERN_ERR "EXT3-fs: filesystem has both journal "
- "and inode journals!\n");
+ ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
+ "and inode journals");
return -EINVAL;
}
@@ -2242,7 +2285,7 @@ static int ext3_load_journal(struct super_block *sb,
if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
err = journal_update_format(journal);
if (err) {
- printk(KERN_ERR "EXT3-fs: error updating journal.\n");
+ ext3_msg(sb, KERN_ERR, "error updating journal");
journal_destroy(journal);
return err;
}
@@ -2254,7 +2297,7 @@ static int ext3_load_journal(struct super_block *sb,
err = journal_load(journal);
if (err) {
- printk(KERN_ERR "EXT3-fs: error loading journal.\n");
+ ext3_msg(sb, KERN_ERR, "error loading journal");
journal_destroy(journal);
return err;
}
@@ -2273,16 +2316,17 @@ static int ext3_load_journal(struct super_block *sb,
return 0;
}
-static int ext3_create_journal(struct super_block * sb,
- struct ext3_super_block * es,
+static int ext3_create_journal(struct super_block *sb,
+ struct ext3_super_block *es,
unsigned int journal_inum)
{
journal_t *journal;
int err;
if (sb->s_flags & MS_RDONLY) {
- printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
- "create journal.\n");
+ ext3_msg(sb, KERN_ERR,
+ "error: readonly filesystem when trying to "
+ "create journal");
return -EROFS;
}
@@ -2290,12 +2334,12 @@ static int ext3_create_journal(struct super_block * sb,
if (!journal)
return -EINVAL;
- printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+ ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
journal_inum);
err = journal_create(journal);
if (err) {
- printk(KERN_ERR "EXT3-fs: error creating journal.\n");
+ ext3_msg(sb, KERN_ERR, "error creating journal");
journal_destroy(journal);
return -EIO;
}
@@ -2376,8 +2420,8 @@ out:
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext3_clear_journal_err(struct super_block * sb,
- struct ext3_super_block * es)
+static void ext3_clear_journal_err(struct super_block *sb,
+ struct ext3_super_block *es)
{
journal_t *journal;
int j_errno;
@@ -2568,10 +2612,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
__le32 ret;
if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
~EXT3_FEATURE_RO_COMPAT_SUPP))) {
- printk(KERN_WARNING "EXT3-fs: %s: couldn't "
- "remount RDWR because of unsupported "
- "optional features (%x).\n",
- sb->s_id, le32_to_cpu(ret));
+ ext3_msg(sb, KERN_WARNING,
+ "warning: couldn't remount RDWR "
+ "because of unsupported optional "
+ "features (%x)", le32_to_cpu(ret));
err = -EROFS;
goto restore_opts;
}
@@ -2582,11 +2626,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
* require a full umount/remount for now.
*/
if (es->s_last_orphan) {
- printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+ ext3_msg(sb, KERN_WARNING, "warning: couldn't "
"remount RDWR because of unprocessed "
"orphan inode list. Please "
- "umount/remount instead.\n",
- sb->s_id);
+ "umount/remount instead.");
err = -EINVAL;
goto restore_opts;
}
@@ -2686,13 +2729,11 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
- es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
buf->f_bavail = 0;
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
- es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
buf->f_namelen = EXT3_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -2837,9 +2878,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
if (EXT3_SB(sb)->s_qf_names[type]) {
/* Quotafile not of fs root? */
if (path.dentry->d_parent != sb->s_root)
- printk(KERN_WARNING
- "EXT3-fs: Quota file not on filesystem root. "
- "Journaled quota will not work.\n");
+ ext3_msg(sb, KERN_WARNING,
+ "warning: Quota file not on filesystem root. "
+ "Journaled quota will not work.");
}
/*
@@ -2921,8 +2962,9 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
handle_t *handle = journal_current_handle();
if (!handle) {
- printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)"
- " cancelled because transaction is not started.\n",
+ ext3_msg(sb, KERN_WARNING,
+ "warning: quota write (off=%llu, len=%llu)"
+ " cancelled because transaction is not started.",
(unsigned long long)off, (unsigned long long)len);
return -EIO;
}
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 545e37c4b91..387d92d00b9 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -960,6 +960,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (error)
goto cleanup;
+ error = ext3_journal_get_write_access(handle, is.iloc.bh);
+ if (error)
+ goto cleanup;
+
if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
@@ -985,9 +989,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (flags & XATTR_CREATE)
goto cleanup;
}
- error = ext3_journal_get_write_access(handle, is.iloc.bh);
- if (error)
- goto cleanup;
if (!value) {
if (!is.s.not_found)
error = ext3_xattr_ibody_set(handle, inode, &i, &is);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8b58a144c31..768c111a77e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -769,9 +769,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
#if defined(CONFIG_QUOTA)
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (sbi->s_jquota_fmt)
- seq_printf(seq, ",jqfmt=%s",
- (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
+ if (sbi->s_jquota_fmt) {
+ char *fmtname = "";
+
+ switch (sbi->s_jquota_fmt) {
+ case QFMT_VFS_OLD:
+ fmtname = "vfsold";
+ break;
+ case QFMT_VFS_V0:
+ fmtname = "vfsv0";
+ break;
+ case QFMT_VFS_V1:
+ fmtname = "vfsv1";
+ break;
+ }
+ seq_printf(seq, ",jqfmt=%s", fmtname);
+ }
if (sbi->s_qf_names[USRQUOTA])
seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -1084,9 +1097,9 @@ enum {
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
- Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
- Opt_usrquota, Opt_grpquota, Opt_i_version,
+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+ Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
+ Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
Opt_stripe, Opt_delalloc, Opt_nodelalloc,
Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
@@ -1137,6 +1150,7 @@ static const match_table_t tokens = {
{Opt_grpjquota, "grpjquota=%s"},
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+ {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
{Opt_grpquota, "grpquota"},
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
@@ -1439,6 +1453,9 @@ clear_qf_name:
goto set_qf_format;
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
+ goto set_qf_format;
+ case Opt_jqfmt_vfsv1:
+ qfmt = QFMT_VFS_V1;
set_qf_format:
if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
@@ -1481,6 +1498,7 @@ set_qf_format:
case Opt_offgrpjquota:
case Opt_jqfmt_vfsold:
case Opt_jqfmt_vfsv0:
+ case Opt_jqfmt_vfsv1:
ext4_msg(sb, KERN_ERR,
"journaled quota options not supported");
break;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index defb932eee9..0b3fa7974fa 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -36,286 +36,323 @@ static void *zisofs_zlib_workspace;
static DEFINE_MUTEX(zisofs_zlib_lock);
/*
- * When decompressing, we typically obtain more than one page
- * per reference. We inject the additional pages into the page
- * cache as a form of readahead.
+ * Read data of @inode from @block_start to @block_end and uncompress
+ * to one zisofs block. Store the data in the @pages array with @pcount
+ * entries. Start storing at offset @poffset of the first page.
*/
-static int zisofs_readpage(struct file *file, struct page *page)
+static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
+ loff_t block_end, int pcount,
+ struct page **pages, unsigned poffset,
+ int *errp)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct address_space *mapping = inode->i_mapping;
- unsigned int maxpage, xpage, fpage, blockindex;
- unsigned long offset;
- unsigned long blockptr, blockendptr, cstart, cend, csize;
- struct buffer_head *bh, *ptrbh[2];
- unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
- unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
- unsigned long bufmask = bufsize - 1;
- int err = -EIO;
- int i;
- unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
- /* unsigned long zisofs_block_size = 1UL << zisofs_block_shift; */
- unsigned int zisofs_block_page_shift = zisofs_block_shift-PAGE_CACHE_SHIFT;
- unsigned long zisofs_block_pages = 1UL << zisofs_block_page_shift;
- unsigned long zisofs_block_page_mask = zisofs_block_pages-1;
- struct page *pages[zisofs_block_pages];
- unsigned long index = page->index;
- int indexblocks;
-
- /* We have already been given one page, this is the one
- we must do. */
- xpage = index & zisofs_block_page_mask;
- pages[xpage] = page;
-
- /* The remaining pages need to be allocated and inserted */
- offset = index & ~zisofs_block_page_mask;
- blockindex = offset >> zisofs_block_page_shift;
- maxpage = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
- /*
- * If this page is wholly outside i_size we just return zero;
- * do_generic_file_read() will handle this for us
- */
- if (page->index >= maxpage) {
- SetPageUptodate(page);
- unlock_page(page);
+ unsigned int bufsize = ISOFS_BUFFER_SIZE(inode);
+ unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
+ unsigned int bufmask = bufsize - 1;
+ int i, block_size = block_end - block_start;
+ z_stream stream = { .total_out = 0,
+ .avail_in = 0,
+ .avail_out = 0, };
+ int zerr;
+ int needblocks = (block_size + (block_start & bufmask) + bufmask)
+ >> bufshift;
+ int haveblocks;
+ blkcnt_t blocknum;
+ struct buffer_head *bhs[needblocks + 1];
+ int curbh, curpage;
+
+ if (block_size > deflateBound(1UL << zisofs_block_shift)) {
+ *errp = -EIO;
return 0;
}
-
- maxpage = min(zisofs_block_pages, maxpage-offset);
-
- for ( i = 0 ; i < maxpage ; i++, offset++ ) {
- if ( i != xpage ) {
- pages[i] = grab_cache_page_nowait(mapping, offset);
- }
- page = pages[i];
- if ( page ) {
- ClearPageError(page);
- kmap(page);
+ /* Empty block? */
+ if (block_size == 0) {
+ for ( i = 0 ; i < pcount ; i++ ) {
+ if (!pages[i])
+ continue;
+ memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
+ flush_dcache_page(pages[i]);
+ SetPageUptodate(pages[i]);
}
+ return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
}
- /* This is the last page filled, plus one; used in case of abort. */
- fpage = 0;
+ /* Because zlib is not thread-safe, do all the I/O at the top. */
+ blocknum = block_start >> bufshift;
+ memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
+ haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
+ ll_rw_block(READ, haveblocks, bhs);
- /* Find the pointer to this specific chunk */
- /* Note: we're not using isonum_731() here because the data is known aligned */
- /* Note: header_size is in 32-bit words (4 bytes) */
- blockptr = (header_size + blockindex) << 2;
- blockendptr = blockptr + 4;
+ curbh = 0;
+ curpage = 0;
+ /*
+ * First block is special since it may be fractional. We also wait for
+ * it before grabbing the zlib mutex; odds are that the subsequent
+ * blocks are going to come in in short order so we don't hold the zlib
+ * mutex longer than necessary.
+ */
- indexblocks = ((blockptr^blockendptr) >> bufshift) ? 2 : 1;
- ptrbh[0] = ptrbh[1] = NULL;
+ if (!bhs[0])
+ goto b_eio;
- if ( isofs_get_blocks(inode, blockptr >> bufshift, ptrbh, indexblocks) != indexblocks ) {
- if ( ptrbh[0] ) brelse(ptrbh[0]);
- printk(KERN_DEBUG "zisofs: Null buffer on reading block table, inode = %lu, block = %lu\n",
- inode->i_ino, blockptr >> bufshift);
- goto eio;
- }
- ll_rw_block(READ, indexblocks, ptrbh);
-
- bh = ptrbh[0];
- if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
- printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
- inode->i_ino, blockptr >> bufshift);
- if ( ptrbh[1] )
- brelse(ptrbh[1]);
- goto eio;
- }
- cstart = le32_to_cpu(*(__le32 *)(bh->b_data + (blockptr & bufmask)));
-
- if ( indexblocks == 2 ) {
- /* We just crossed a block boundary. Switch to the next block */
- brelse(bh);
- bh = ptrbh[1];
- if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
- printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
- inode->i_ino, blockendptr >> bufshift);
- goto eio;
- }
+ wait_on_buffer(bhs[0]);
+ if (!buffer_uptodate(bhs[0])) {
+ *errp = -EIO;
+ goto b_eio;
}
- cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
- brelse(bh);
- if (cstart > cend)
- goto eio;
+ stream.workspace = zisofs_zlib_workspace;
+ mutex_lock(&zisofs_zlib_lock);
- csize = cend-cstart;
-
- if (csize > deflateBound(1UL << zisofs_block_shift))
- goto eio;
-
- /* Now page[] contains an array of pages, any of which can be NULL,
- and the locks on which we hold. We should now read the data and
- release the pages. If the pages are NULL the decompressed data
- for that particular page should be discarded. */
-
- if ( csize == 0 ) {
- /* This data block is empty. */
-
- for ( fpage = 0 ; fpage < maxpage ; fpage++ ) {
- if ( (page = pages[fpage]) != NULL ) {
- memset(page_address(page), 0, PAGE_CACHE_SIZE);
-
- flush_dcache_page(page);
- SetPageUptodate(page);
- kunmap(page);
- unlock_page(page);
- if ( fpage == xpage )
- err = 0; /* The critical page */
- else
- page_cache_release(page);
+ zerr = zlib_inflateInit(&stream);
+ if (zerr != Z_OK) {
+ if (zerr == Z_MEM_ERROR)
+ *errp = -ENOMEM;
+ else
+ *errp = -EIO;
+ printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
+ zerr);
+ goto z_eio;
+ }
+
+ while (curpage < pcount && curbh < haveblocks &&
+ zerr != Z_STREAM_END) {
+ if (!stream.avail_out) {
+ if (pages[curpage]) {
+ stream.next_out = page_address(pages[curpage])
+ + poffset;
+ stream.avail_out = PAGE_CACHE_SIZE - poffset;
+ poffset = 0;
+ } else {
+ stream.next_out = (void *)&zisofs_sink_page;
+ stream.avail_out = PAGE_CACHE_SIZE;
}
}
- } else {
- /* This data block is compressed. */
- z_stream stream;
- int bail = 0, left_out = -1;
- int zerr;
- int needblocks = (csize + (cstart & bufmask) + bufmask) >> bufshift;
- int haveblocks;
- struct buffer_head *bhs[needblocks+1];
- struct buffer_head **bhptr;
-
- /* Because zlib is not thread-safe, do all the I/O at the top. */
-
- blockptr = cstart >> bufshift;
- memset(bhs, 0, (needblocks+1)*sizeof(struct buffer_head *));
- haveblocks = isofs_get_blocks(inode, blockptr, bhs, needblocks);
- ll_rw_block(READ, haveblocks, bhs);
-
- bhptr = &bhs[0];
- bh = *bhptr++;
-
- /* First block is special since it may be fractional.
- We also wait for it before grabbing the zlib
- mutex; odds are that the subsequent blocks are
- going to come in in short order so we don't hold
- the zlib mutex longer than necessary. */
-
- if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
- printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n",
- fpage, xpage, csize);
- goto b_eio;
- }
- stream.next_in = bh->b_data + (cstart & bufmask);
- stream.avail_in = min(bufsize-(cstart & bufmask), csize);
- csize -= stream.avail_in;
-
- stream.workspace = zisofs_zlib_workspace;
- mutex_lock(&zisofs_zlib_lock);
-
- zerr = zlib_inflateInit(&stream);
- if ( zerr != Z_OK ) {
- if ( err && zerr == Z_MEM_ERROR )
- err = -ENOMEM;
- printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
- zerr);
- goto z_eio;
+ if (!stream.avail_in) {
+ wait_on_buffer(bhs[curbh]);
+ if (!buffer_uptodate(bhs[curbh])) {
+ *errp = -EIO;
+ break;
+ }
+ stream.next_in = bhs[curbh]->b_data +
+ (block_start & bufmask);
+ stream.avail_in = min_t(unsigned, bufsize -
+ (block_start & bufmask),
+ block_size);
+ block_size -= stream.avail_in;
+ block_start = 0;
}
- while ( !bail && fpage < maxpage ) {
- page = pages[fpage];
- if ( page )
- stream.next_out = page_address(page);
- else
- stream.next_out = (void *)&zisofs_sink_page;
- stream.avail_out = PAGE_CACHE_SIZE;
-
- while ( stream.avail_out ) {
- int ao, ai;
- if ( stream.avail_in == 0 && left_out ) {
- if ( !csize ) {
- printk(KERN_WARNING "zisofs: ZF read beyond end of input\n");
- bail = 1;
- break;
- } else {
- bh = *bhptr++;
- if ( !bh ||
- (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
- /* Reached an EIO */
- printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n",
- fpage, xpage, csize);
-
- bail = 1;
- break;
- }
- stream.next_in = bh->b_data;
- stream.avail_in = min(csize,bufsize);
- csize -= stream.avail_in;
- }
- }
- ao = stream.avail_out; ai = stream.avail_in;
- zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
- left_out = stream.avail_out;
- if ( zerr == Z_BUF_ERROR && stream.avail_in == 0 )
- continue;
- if ( zerr != Z_OK ) {
- /* EOF, error, or trying to read beyond end of input */
- if ( err && zerr == Z_MEM_ERROR )
- err = -ENOMEM;
- if ( zerr != Z_STREAM_END )
- printk(KERN_DEBUG "zisofs: zisofs_inflate returned %d, inode = %lu, index = %lu, fpage = %d, xpage = %d, avail_in = %d, avail_out = %d, ai = %d, ao = %d\n",
- zerr, inode->i_ino, index,
- fpage, xpage,
- stream.avail_in, stream.avail_out,
- ai, ao);
- bail = 1;
- break;
+ while (stream.avail_out && stream.avail_in) {
+ zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
+ if (zerr == Z_BUF_ERROR && stream.avail_in == 0)
+ break;
+ if (zerr == Z_STREAM_END)
+ break;
+ if (zerr != Z_OK) {
+ /* EOF, error, or trying to read beyond end of input */
+ if (zerr == Z_MEM_ERROR)
+ *errp = -ENOMEM;
+ else {
+ printk(KERN_DEBUG
+ "zisofs: zisofs_inflate returned"
+ " %d, inode = %lu,"
+ " page idx = %d, bh idx = %d,"
+ " avail_in = %d,"
+ " avail_out = %d\n",
+ zerr, inode->i_ino, curpage,
+ curbh, stream.avail_in,
+ stream.avail_out);
+ *errp = -EIO;
}
+ goto inflate_out;
}
+ }
- if ( stream.avail_out && zerr == Z_STREAM_END ) {
- /* Fractional page written before EOF. This may
- be the last page in the file. */
- memset(stream.next_out, 0, stream.avail_out);
- stream.avail_out = 0;
+ if (!stream.avail_out) {
+ /* This page completed */
+ if (pages[curpage]) {
+ flush_dcache_page(pages[curpage]);
+ SetPageUptodate(pages[curpage]);
}
+ curpage++;
+ }
+ if (!stream.avail_in)
+ curbh++;
+ }
+inflate_out:
+ zlib_inflateEnd(&stream);
- if ( !stream.avail_out ) {
- /* This page completed */
- if ( page ) {
- flush_dcache_page(page);
- SetPageUptodate(page);
- kunmap(page);
- unlock_page(page);
- if ( fpage == xpage )
- err = 0; /* The critical page */
- else
- page_cache_release(page);
- }
- fpage++;
- }
+z_eio:
+ mutex_unlock(&zisofs_zlib_lock);
+
+b_eio:
+ for (i = 0; i < haveblocks; i++)
+ brelse(bhs[i]);
+ return stream.total_out;
+}
+
+/*
+ * Uncompress data so that pages[full_page] is fully uptodate and possibly
+ * fills in other pages if we have data for them.
+ */
+static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
+ struct page **pages)
+{
+ loff_t start_off, end_off;
+ loff_t block_start, block_end;
+ unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
+ unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
+ unsigned int blockptr;
+ loff_t poffset = 0;
+ blkcnt_t cstart_block, cend_block;
+ struct buffer_head *bh;
+ unsigned int blkbits = ISOFS_BUFFER_BITS(inode);
+ unsigned int blksize = 1 << blkbits;
+ int err;
+ loff_t ret;
+
+ BUG_ON(!pages[full_page]);
+
+ /*
+ * We want to read at least 'full_page' page. Because we have to
+ * uncompress the whole compression block anyway, fill the surrounding
+ * pages with the data we have anyway...
+ */
+ start_off = page_offset(pages[full_page]);
+ end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
+
+ cstart_block = start_off >> zisofs_block_shift;
+ cend_block = (end_off + (1 << zisofs_block_shift) - 1)
+ >> zisofs_block_shift;
+
+ WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
+ ((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
+
+ /* Find the pointer to this specific chunk */
+ /* Note: we're not using isonum_731() here because the data is known aligned */
+ /* Note: header_size is in 32-bit words (4 bytes) */
+ blockptr = (header_size + cstart_block) << 2;
+ bh = isofs_bread(inode, blockptr >> blkbits);
+ if (!bh)
+ return -EIO;
+ block_start = le32_to_cpu(*(__le32 *)
+ (bh->b_data + (blockptr & (blksize - 1))));
+
+ while (cstart_block < cend_block && pcount > 0) {
+ /* Load end of the compressed block in the file */
+ blockptr += 4;
+ /* Traversed to next block? */
+ if (!(blockptr & (blksize - 1))) {
+ brelse(bh);
+
+ bh = isofs_bread(inode, blockptr >> blkbits);
+ if (!bh)
+ return -EIO;
+ }
+ block_end = le32_to_cpu(*(__le32 *)
+ (bh->b_data + (blockptr & (blksize - 1))));
+ if (block_start > block_end) {
+ brelse(bh);
+ return -EIO;
+ }
+ err = 0;
+ ret = zisofs_uncompress_block(inode, block_start, block_end,
+ pcount, pages, poffset, &err);
+ poffset += ret;
+ pages += poffset >> PAGE_CACHE_SHIFT;
+ pcount -= poffset >> PAGE_CACHE_SHIFT;
+ full_page -= poffset >> PAGE_CACHE_SHIFT;
+ poffset &= ~PAGE_CACHE_MASK;
+
+ if (err) {
+ brelse(bh);
+ /*
+ * Did we finish reading the page we really wanted
+ * to read?
+ */
+ if (full_page < 0)
+ return 0;
+ return err;
}
- zlib_inflateEnd(&stream);
- z_eio:
- mutex_unlock(&zisofs_zlib_lock);
+ block_start = block_end;
+ cstart_block++;
+ }
+
+ if (poffset && *pages) {
+ memset(page_address(*pages) + poffset, 0,
+ PAGE_CACHE_SIZE - poffset);
+ flush_dcache_page(*pages);
+ SetPageUptodate(*pages);
+ }
+ return 0;
+}
- b_eio:
- for ( i = 0 ; i < haveblocks ; i++ ) {
- if ( bhs[i] )
- brelse(bhs[i]);
+/*
+ * When decompressing, we typically obtain more than one page
+ * per reference. We inject the additional pages into the page
+ * cache as a form of readahead.
+ */
+static int zisofs_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct address_space *mapping = inode->i_mapping;
+ int err;
+ int i, pcount, full_page;
+ unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
+ unsigned int zisofs_pages_per_cblock =
+ PAGE_CACHE_SHIFT <= zisofs_block_shift ?
+ (1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
+ struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
+ pgoff_t index = page->index, end_index;
+
+ end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ /*
+ * If this page is wholly outside i_size we just return zero;
+ * do_generic_file_read() will handle this for us
+ */
+ if (index >= end_index) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
+
+ if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
+ /* We have already been given one page, this is the one
+ we must do. */
+ full_page = index & (zisofs_pages_per_cblock - 1);
+ pcount = min_t(int, zisofs_pages_per_cblock,
+ end_index - (index & ~(zisofs_pages_per_cblock - 1)));
+ index -= full_page;
+ } else {
+ full_page = 0;
+ pcount = 1;
+ }
+ pages[full_page] = page;
+
+ for (i = 0; i < pcount; i++, index++) {
+ if (i != full_page)
+ pages[i] = grab_cache_page_nowait(mapping, index);
+ if (pages[i]) {
+ ClearPageError(pages[i]);
+ kmap(pages[i]);
}
}
-eio:
+ err = zisofs_fill_pages(inode, full_page, pcount, pages);
/* Release any residual pages, do not SetPageUptodate */
- while ( fpage < maxpage ) {
- page = pages[fpage];
- if ( page ) {
- flush_dcache_page(page);
- if ( fpage == xpage )
- SetPageError(page);
- kunmap(page);
- unlock_page(page);
- if ( fpage != xpage )
- page_cache_release(page);
+ for (i = 0; i < pcount; i++) {
+ if (pages[i]) {
+ flush_dcache_page(pages[i]);
+ if (i == full_page && err)
+ SetPageError(pages[i]);
+ kunmap(pages[i]);
+ unlock_page(pages[i]);
+ if (i != full_page)
+ page_cache_release(pages[i]);
}
- fpage++;
}
/* At this point, err contains 0 or -EIO depending on the "critical" page */
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c2fb2dd0131..96a685c550f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -518,8 +518,7 @@ repeat:
if (algo == SIG('p', 'z')) {
int block_shift =
isonum_711(&rr->u.ZF.parms[1]);
- if (block_shift < PAGE_CACHE_SHIFT
- || block_shift > 17) {
+ if (block_shift > 17) {
printk(KERN_WARNING "isofs: "
"Can't handle ZF block "
"size of 2^%d\n",
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8896c1d4feb..6a10238d2c6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -286,7 +286,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
if (err) {
/*
* Because AS_EIO is cleared by
- * wait_on_page_writeback_range(), set it again so
+ * filemap_fdatawait_range(), set it again so
* that user process can get -EIO from fsync().
*/
set_bit(AS_EIO,
diff --git a/fs/namei.c b/fs/namei.c
index d11f404667e..87f97ba90ad 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1279,28 +1279,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
return __lookup_hash(&this, base, NULL);
}
-/**
- * lookup_one_noperm - bad hack for sysfs
- * @name: pathname component to lookup
- * @base: base directory to lookup from
- *
- * This is a variant of lookup_one_len that doesn't perform any permission
- * checks. It's a horrible hack to work around the braindead sysfs
- * architecture and should not be used anywhere else.
- *
- * DON'T USE THIS FUNCTION EVER, thanks.
- */
-struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
-{
- int err;
- struct qstr this;
-
- err = __lookup_one_len(name, &this, base, strlen(name));
- if (err)
- return ERR_PTR(err);
- return __lookup_hash(&this, base, NULL);
-}
-
int user_path_at(int dfd, const char __user *name, unsigned flags,
struct path *path)
{
@@ -1678,6 +1656,15 @@ struct file *do_filp_open(int dfd, const char *pathname,
int will_write;
int flag = open_to_namei_flags(open_flag);
+ /*
+ * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
+ * check for O_DSYNC if the need any syncing at all we enforce it's
+ * always set instead of having to deal with possibly weird behaviour
+ * for malicious applications setting only __O_SYNC.
+ */
+ if (open_flag & __O_SYNC)
+ open_flag |= O_DSYNC;
+
if (!acc_mode)
acc_mode = MAY_OPEN | ACC_MODE(flag);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f5fdd39e037..6b891328f33 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
{
struct nfs_open_context *ctx;
- if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
+ if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
return 1;
ctx = nfs_file_open_context(filp);
if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
@@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
result = generic_file_aio_write(iocb, iov, nr_segs, pos);
- /* Return error values for O_SYNC and IS_SYNC() */
+ /* Return error values for O_DSYNC and IS_SYNC() */
if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
if (err < 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c84b5cc1a94..b1ce2ea9b93 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page,
*/
if (nfs_write_pageuptodate(page, inode) &&
inode->i_flock == NULL &&
- !(file->f_flags & O_SYNC)) {
+ !(file->f_flags & O_DSYNC)) {
count = max(count + offset, nfs_page_length(page));
offset = 0;
}
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index d69e6ae5925..3f959f1879d 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -142,29 +142,75 @@ static void nilfs_palloc_desc_block_init(struct inode *inode,
}
}
+static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
+ int create,
+ void (*init_block)(struct inode *,
+ struct buffer_head *,
+ void *),
+ struct buffer_head **bhp,
+ struct nilfs_bh_assoc *prev,
+ spinlock_t *lock)
+{
+ int ret;
+
+ spin_lock(lock);
+ if (prev->bh && blkoff == prev->blkoff) {
+ get_bh(prev->bh);
+ *bhp = prev->bh;
+ spin_unlock(lock);
+ return 0;
+ }
+ spin_unlock(lock);
+
+ ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
+ if (!ret) {
+ spin_lock(lock);
+ /*
+ * The following code must be safe for change of the
+ * cache contents during the get block call.
+ */
+ brelse(prev->bh);
+ get_bh(*bhp);
+ prev->bh = *bhp;
+ prev->blkoff = blkoff;
+ spin_unlock(lock);
+ }
+ return ret;
+}
+
static int nilfs_palloc_get_desc_block(struct inode *inode,
unsigned long group,
int create, struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(inode,
- nilfs_palloc_desc_blkoff(inode, group),
- create, nilfs_palloc_desc_block_init, bhp);
+ struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+
+ return nilfs_palloc_get_block(inode,
+ nilfs_palloc_desc_blkoff(inode, group),
+ create, nilfs_palloc_desc_block_init,
+ bhp, &cache->prev_desc, &cache->lock);
}
static int nilfs_palloc_get_bitmap_block(struct inode *inode,
unsigned long group,
int create, struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(inode,
- nilfs_palloc_bitmap_blkoff(inode, group),
- create, NULL, bhp);
+ struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+
+ return nilfs_palloc_get_block(inode,
+ nilfs_palloc_bitmap_blkoff(inode, group),
+ create, NULL, bhp,
+ &cache->prev_bitmap, &cache->lock);
}
int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
int create, struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr),
- create, NULL, bhp);
+ struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+
+ return nilfs_palloc_get_block(inode,
+ nilfs_palloc_entry_blkoff(inode, nr),
+ create, NULL, bhp,
+ &cache->prev_entry, &cache->lock);
}
static struct nilfs_palloc_group_desc *
@@ -176,13 +222,6 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode,
group % nilfs_palloc_groups_per_desc_block(inode);
}
-static unsigned char *
-nilfs_palloc_block_get_bitmap(const struct inode *inode,
- const struct buffer_head *bh, void *kaddr)
-{
- return (unsigned char *)(kaddr + bh_offset(bh));
-}
-
void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
const struct buffer_head *bh, void *kaddr)
{
@@ -289,8 +328,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
if (ret < 0)
goto out_desc;
bitmap_kaddr = kmap(bitmap_bh->b_page);
- bitmap = nilfs_palloc_block_get_bitmap(
- inode, bitmap_bh, bitmap_kaddr);
+ bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
pos = nilfs_palloc_find_available_slot(
inode, group, group_offset, bitmap,
entries_per_group);
@@ -351,8 +389,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
desc = nilfs_palloc_block_get_group_desc(inode, group,
req->pr_desc_bh, desc_kaddr);
bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
- bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh,
- bitmap_kaddr);
+ bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
group_offset, bitmap))
@@ -385,8 +422,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
desc = nilfs_palloc_block_get_group_desc(inode, group,
req->pr_desc_bh, desc_kaddr);
bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
- bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh,
- bitmap_kaddr);
+ bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
group_offset, bitmap))
printk(KERN_WARNING "%s: entry numer %llu already freed\n",
@@ -472,8 +508,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
desc = nilfs_palloc_block_get_group_desc(
inode, group, desc_bh, desc_kaddr);
bitmap_kaddr = kmap(bitmap_bh->b_page);
- bitmap = nilfs_palloc_block_get_bitmap(
- inode, bitmap_bh, bitmap_kaddr);
+ bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
for (j = i, n = 0;
(j < nitems) && nilfs_palloc_group_is_in(inode, group,
entry_nrs[j]);
@@ -502,3 +537,30 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
}
return 0;
}
+
+void nilfs_palloc_setup_cache(struct inode *inode,
+ struct nilfs_palloc_cache *cache)
+{
+ NILFS_MDT(inode)->mi_palloc_cache = cache;
+ spin_lock_init(&cache->lock);
+}
+
+void nilfs_palloc_clear_cache(struct inode *inode)
+{
+ struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+
+ spin_lock(&cache->lock);
+ brelse(cache->prev_desc.bh);
+ brelse(cache->prev_bitmap.bh);
+ brelse(cache->prev_entry.bh);
+ cache->prev_desc.bh = NULL;
+ cache->prev_bitmap.bh = NULL;
+ cache->prev_entry.bh = NULL;
+ spin_unlock(&cache->lock);
+}
+
+void nilfs_palloc_destroy_cache(struct inode *inode)
+{
+ nilfs_palloc_clear_cache(inode);
+ NILFS_MDT(inode)->mi_palloc_cache = NULL;
+}
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 4ace5475c2c..f4543ac4f56 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -69,4 +69,25 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
#define nilfs_find_next_zero_bit ext2_find_next_zero_bit
+/*
+ * persistent object allocator cache
+ */
+
+struct nilfs_bh_assoc {
+ unsigned long blkoff;
+ struct buffer_head *bh;
+};
+
+struct nilfs_palloc_cache {
+ spinlock_t lock;
+ struct nilfs_bh_assoc prev_desc;
+ struct nilfs_bh_assoc prev_bitmap;
+ struct nilfs_bh_assoc prev_entry;
+};
+
+void nilfs_palloc_setup_cache(struct inode *inode,
+ struct nilfs_palloc_cache *cache);
+void nilfs_palloc_clear_cache(struct inode *inode);
+void nilfs_palloc_destroy_cache(struct inode *inode);
+
#endif /* _NILFS_ALLOC_H */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 08834df6ec6..f4a14ea2ed9 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -402,19 +402,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
{
inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
- if (NILFS_MDT(bmap->b_inode))
- nilfs_mdt_mark_dirty(bmap->b_inode);
- else
- mark_inode_dirty(bmap->b_inode);
}
void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
{
inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
- if (NILFS_MDT(bmap->b_inode))
- nilfs_mdt_mark_dirty(bmap->b_inode);
- else
- mark_inode_dirty(bmap->b_inode);
}
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 84c25382f8e..471e269536a 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -68,9 +68,34 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
truncate_inode_pages(btnc, 0);
}
+struct buffer_head *
+nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
+{
+ struct inode *inode = NILFS_BTNC_I(btnc);
+ struct buffer_head *bh;
+
+ bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
+ if (unlikely(!bh))
+ return NULL;
+
+ if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
+ buffer_dirty(bh))) {
+ brelse(bh);
+ BUG();
+ }
+ memset(bh->b_data, 0, 1 << inode->i_blkbits);
+ bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_blocknr = blocknr;
+ set_buffer_mapped(bh);
+ set_buffer_uptodate(bh);
+
+ unlock_page(bh->b_page);
+ page_cache_release(bh->b_page);
+ return bh;
+}
+
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
- sector_t pblocknr, struct buffer_head **pbh,
- int newblk)
+ sector_t pblocknr, struct buffer_head **pbh)
{
struct buffer_head *bh;
struct inode *inode = NILFS_BTNC_I(btnc);
@@ -81,19 +106,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
return -ENOMEM;
err = -EEXIST; /* internal code */
- if (newblk) {
- if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
- buffer_dirty(bh))) {
- brelse(bh);
- BUG();
- }
- memset(bh->b_data, 0, 1 << inode->i_blkbits);
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
- bh->b_blocknr = blocknr;
- set_buffer_mapped(bh);
- set_buffer_uptodate(bh);
- goto found;
- }
if (buffer_uptodate(bh) || buffer_dirty(bh))
goto found;
@@ -135,27 +147,6 @@ out_locked:
return err;
}
-int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
- sector_t pblocknr, struct buffer_head **pbh, int newblk)
-{
- struct buffer_head *bh;
- int err;
-
- err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
- if (err == -EEXIST) /* internal code (cache hit) */
- return 0;
- if (unlikely(err))
- return err;
-
- bh = *pbh;
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- brelse(bh);
- return -EIO;
- }
- return 0;
-}
-
/**
* nilfs_btnode_delete - delete B-tree node buffer
* @bh: buffer to be deleted
@@ -244,12 +235,13 @@ retry:
unlock_page(obh->b_page);
}
- err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1);
- if (likely(!err)) {
- BUG_ON(nbh == obh);
- ctxt->newbh = nbh;
- }
- return err;
+ nbh = nilfs_btnode_create_block(btnc, newkey);
+ if (!nbh)
+ return -ENOMEM;
+
+ BUG_ON(nbh == obh);
+ ctxt->newbh = nbh;
+ return 0;
failed_unlock:
unlock_page(obh->b_page);
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 3e2275172ed..07da83f0771 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -40,10 +40,10 @@ struct nilfs_btnode_chkey_ctxt {
void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
+struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
+ __u64 blocknr);
int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
- struct buffer_head **, int);
-int nilfs_btnode_get(struct address_space *, __u64, sector_t,
- struct buffer_head **, int);
+ struct buffer_head **);
void nilfs_btnode_delete(struct buffer_head *);
int nilfs_btnode_prepare_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index e25b507a474..7cdd98b8d51 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -114,7 +114,18 @@ static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
{
struct address_space *btnc =
&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
- return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
+ int err;
+
+ err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
+ if (err)
+ return err == -EEXIST ? 0 : err;
+
+ wait_on_buffer(*bhp);
+ if (!buffer_uptodate(*bhp)) {
+ brelse(*bhp);
+ return -EIO;
+ }
+ return 0;
}
static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
@@ -122,12 +133,15 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
{
struct address_space *btnc =
&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
- int ret;
+ struct buffer_head *bh;
- ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
- if (!ret)
- set_buffer_nilfs_volatile(*bhp);
- return ret;
+ bh = nilfs_btnode_create_block(btnc, ptr);
+ if (!bh)
+ return -ENOMEM;
+
+ set_buffer_nilfs_volatile(bh);
+ *bhp = bh;
+ return 0;
}
static inline int
@@ -444,6 +458,18 @@ nilfs_btree_get_node(const struct nilfs_btree *btree,
nilfs_btree_get_nonroot_node(path, level);
}
+static inline int
+nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
+{
+ if (unlikely(nilfs_btree_node_get_level(node) != level)) {
+ dump_stack();
+ printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
+ nilfs_btree_node_get_level(node), level);
+ return 1;
+ }
+ return 0;
+}
+
static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
struct nilfs_btree_path *path,
__u64 key, __u64 *ptrp, int minlevel)
@@ -467,7 +493,8 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
if (ret < 0)
return ret;
node = nilfs_btree_get_nonroot_node(path, level);
- BUG_ON(level != nilfs_btree_node_get_level(node));
+ if (nilfs_btree_bad_node(node, level))
+ return -EINVAL;
if (!found)
found = nilfs_btree_node_lookup(node, key, &index);
else
@@ -512,7 +539,8 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
if (ret < 0)
return ret;
node = nilfs_btree_get_nonroot_node(path, level);
- BUG_ON(level != nilfs_btree_node_get_level(node));
+ if (nilfs_btree_bad_node(node, level))
+ return -EINVAL;
index = nilfs_btree_node_get_nchildren(node) - 1;
ptr = nilfs_btree_node_get_ptr(btree, node, index);
path[level].bp_index = index;
@@ -638,13 +666,11 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
{
if (level < nilfs_btree_height(btree) - 1) {
do {
- lock_buffer(path[level].bp_bh);
nilfs_btree_node_set_key(
nilfs_btree_get_nonroot_node(path, level),
path[level].bp_index, key);
if (!buffer_dirty(path[level].bp_bh))
nilfs_btnode_mark_dirty(path[level].bp_bh);
- unlock_buffer(path[level].bp_bh);
} while ((path[level].bp_index == 0) &&
(++level < nilfs_btree_height(btree) - 1));
}
@@ -663,13 +689,11 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
struct nilfs_btree_node *node;
if (level < nilfs_btree_height(btree) - 1) {
- lock_buffer(path[level].bp_bh);
node = nilfs_btree_get_nonroot_node(path, level);
nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
path[level].bp_index);
if (!buffer_dirty(path[level].bp_bh))
nilfs_btnode_mark_dirty(path[level].bp_bh);
- unlock_buffer(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
@@ -689,9 +713,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
struct nilfs_btree_node *node, *left;
int nchildren, lnchildren, n, move;
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
left = nilfs_btree_get_sib_node(path, level);
nchildren = nilfs_btree_node_get_nchildren(node);
@@ -712,9 +733,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -740,9 +758,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
struct nilfs_btree_node *node, *right;
int nchildren, rnchildren, n, move;
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
right = nilfs_btree_get_sib_node(path, level);
nchildren = nilfs_btree_node_get_nchildren(node);
@@ -763,9 +778,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(right, 0));
@@ -794,9 +806,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
__u64 newptr;
int nchildren, n, move;
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
right = nilfs_btree_get_sib_node(path, level);
nchildren = nilfs_btree_node_get_nchildren(node);
@@ -815,9 +824,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
newkey = nilfs_btree_node_get_key(right, 0);
newptr = path[level].bp_newreq.bpr_ptr;
@@ -852,8 +858,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
struct nilfs_btree_node *root, *child;
int n;
- lock_buffer(path[level].bp_sib_bh);
-
root = nilfs_btree_get_root(btree);
child = nilfs_btree_get_sib_node(path, level);
@@ -865,8 +869,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
@@ -1023,11 +1025,9 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
stats->bs_nblocks++;
- lock_buffer(bh);
nilfs_btree_node_init(btree,
(struct nilfs_btree_node *)bh->b_data,
0, level, 0, NULL, NULL);
- unlock_buffer(bh);
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_split;
}
@@ -1052,10 +1052,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
if (ret < 0)
goto err_out_curr_node;
- lock_buffer(bh);
nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
0, level, 0, NULL, NULL);
- unlock_buffer(bh);
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_grow;
@@ -1154,13 +1152,11 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
struct nilfs_btree_node *node;
if (level < nilfs_btree_height(btree) - 1) {
- lock_buffer(path[level].bp_bh);
node = nilfs_btree_get_nonroot_node(path, level);
nilfs_btree_node_delete(btree, node, keyp, ptrp,
path[level].bp_index);
if (!buffer_dirty(path[level].bp_bh))
nilfs_btnode_mark_dirty(path[level].bp_bh);
- unlock_buffer(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -1180,9 +1176,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
left = nilfs_btree_get_sib_node(path, level);
nchildren = nilfs_btree_node_get_nchildren(node);
@@ -1197,9 +1190,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -1217,9 +1207,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
right = nilfs_btree_get_sib_node(path, level);
nchildren = nilfs_btree_node_get_nchildren(node);
@@ -1234,9 +1221,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(right, 0));
@@ -1255,9 +1239,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
left = nilfs_btree_get_sib_node(path, level);
@@ -1268,9 +1249,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
@@ -1286,9 +1264,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
- lock_buffer(path[level].bp_bh);
- lock_buffer(path[level].bp_sib_bh);
-
node = nilfs_btree_get_nonroot_node(path, level);
right = nilfs_btree_get_sib_node(path, level);
@@ -1299,9 +1274,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
if (!buffer_dirty(path[level].bp_bh))
nilfs_btnode_mark_dirty(path[level].bp_bh);
- unlock_buffer(path[level].bp_bh);
- unlock_buffer(path[level].bp_sib_bh);
-
nilfs_btnode_delete(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
path[level + 1].bp_index++;
@@ -1316,7 +1288,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
- lock_buffer(path[level].bp_bh);
root = nilfs_btree_get_root(btree);
child = nilfs_btree_get_nonroot_node(path, level);
@@ -1324,7 +1295,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
nilfs_btree_node_set_level(root, level);
n = nilfs_btree_node_get_nchildren(child);
nilfs_btree_node_move_left(btree, root, child, n);
- unlock_buffer(path[level].bp_bh);
nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = NULL;
@@ -1699,7 +1669,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
/* create child node at level 1 */
- lock_buffer(bh);
node = (struct nilfs_btree_node *)bh->b_data;
nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
nilfs_btree_node_insert(btree, node,
@@ -1709,7 +1678,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
if (!nilfs_bmap_dirty(bmap))
nilfs_bmap_set_dirty(bmap);
- unlock_buffer(bh);
brelse(bh);
/* create root node at level 2 */
@@ -2050,7 +2018,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
for (level = NILFS_BTREE_LEVEL_NODE_MIN;
level < NILFS_BTREE_LEVEL_MAX;
level++)
- list_splice(&lists[level], listp->prev);
+ list_splice_tail(&lists[level], listp);
}
static int nilfs_btree_assign_p(struct nilfs_btree *btree,
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 0e72bbbc6b6..4b82d84ade7 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -34,28 +34,6 @@ struct nilfs_btree;
struct nilfs_btree_path;
/**
- * struct nilfs_btree_node - B-tree node
- * @bn_flags: flags
- * @bn_level: level
- * @bn_nchildren: number of children
- * @bn_pad: padding
- */
-struct nilfs_btree_node {
- __u8 bn_flags;
- __u8 bn_level;
- __le16 bn_nchildren;
- __le32 bn_pad;
-};
-
-/* flags */
-#define NILFS_BTREE_NODE_ROOT 0x01
-
-/* level */
-#define NILFS_BTREE_LEVEL_DATA 0
-#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1)
-#define NILFS_BTREE_LEVEL_MAX 14
-
-/**
* struct nilfs_btree - B-tree structure
* @bt_bmap: bmap base structure
*/
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 3f5d5d06f53..d5ad54e204a 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -926,3 +926,29 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
up_read(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
+
+/**
+ * nilfs_cpfile_read - read cpfile inode
+ * @cpfile: cpfile inode
+ * @raw_inode: on-disk cpfile inode
+ */
+int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode)
+{
+ return nilfs_read_inode_common(cpfile, raw_inode);
+}
+
+/**
+ * nilfs_cpfile_new - create cpfile
+ * @nilfs: nilfs object
+ * @cpsize: size of a checkpoint entry
+ */
+struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize)
+{
+ struct inode *cpfile;
+
+ cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0);
+ if (cpfile)
+ nilfs_mdt_set_entry_size(cpfile, cpsize,
+ sizeof(struct nilfs_cpfile_header));
+ return cpfile;
+}
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index debea896e70..bc0809e0ab4 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -40,4 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
size_t);
+int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode);
+struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize);
+
#endif /* _NILFS_CPFILE_H */
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 1ff8e15bd36..187dd07ba86 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -33,6 +33,16 @@
#define NILFS_CNO_MIN ((__u64)1)
#define NILFS_CNO_MAX (~(__u64)0)
+struct nilfs_dat_info {
+ struct nilfs_mdt_info mi;
+ struct nilfs_palloc_cache palloc_cache;
+};
+
+static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
+{
+ return (struct nilfs_dat_info *)NILFS_MDT(dat);
+}
+
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
@@ -425,3 +435,40 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
return nvi;
}
+
+/**
+ * nilfs_dat_read - read dat inode
+ * @dat: dat inode
+ * @raw_inode: on-disk dat inode
+ */
+int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode)
+{
+ return nilfs_read_inode_common(dat, raw_inode);
+}
+
+/**
+ * nilfs_dat_new - create dat file
+ * @nilfs: nilfs object
+ * @entry_size: size of a dat entry
+ */
+struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size)
+{
+ static struct lock_class_key dat_lock_key;
+ struct inode *dat;
+ struct nilfs_dat_info *di;
+ int err;
+
+ dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di));
+ if (dat) {
+ err = nilfs_palloc_init_blockgroup(dat, entry_size);
+ if (unlikely(err)) {
+ nilfs_mdt_destroy(dat);
+ return NULL;
+ }
+
+ di = NILFS_DAT_I(dat);
+ lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
+ nilfs_palloc_setup_cache(dat, &di->palloc_cache);
+ }
+ return dat;
+}
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index 406070d3ff4..d31c3aab0ef 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -53,4 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t);
int nilfs_dat_move(struct inode *, __u64, sector_t);
ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
+int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode);
+struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size);
+
#endif /* _NILFS_DAT_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index e097099bfc8..76d803e060a 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -99,9 +99,9 @@ static int nilfs_prepare_chunk(struct page *page,
NULL, nilfs_get_block);
}
-static int nilfs_commit_chunk(struct page *page,
- struct address_space *mapping,
- unsigned from, unsigned to)
+static void nilfs_commit_chunk(struct page *page,
+ struct address_space *mapping,
+ unsigned from, unsigned to)
{
struct inode *dir = mapping->host;
struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
@@ -112,15 +112,13 @@ static int nilfs_commit_chunk(struct page *page,
nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
- if (pos + copied > dir->i_size) {
+ if (pos + copied > dir->i_size)
i_size_write(dir, pos + copied);
- mark_inode_dirty(dir);
- }
if (IS_DIRSYNC(dir))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
err = nilfs_set_file_dirty(sbi, dir, nr_dirty);
+ WARN_ON(err); /* do not happen */
unlock_page(page);
- return err;
}
static void nilfs_check_page(struct page *page)
@@ -455,11 +453,10 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
BUG_ON(err);
de->inode = cpu_to_le64(inode->i_ino);
nilfs_set_de_type(de, inode);
- err = nilfs_commit_chunk(page, mapping, from, to);
+ nilfs_commit_chunk(page, mapping, from, to);
nilfs_put_page(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
- mark_inode_dirty(dir);
}
/*
@@ -548,10 +545,10 @@ got_it:
memcpy(de->name, name, namelen);
de->inode = cpu_to_le64(inode->i_ino);
nilfs_set_de_type(de, inode);
- err = nilfs_commit_chunk(page, page->mapping, from, to);
+ nilfs_commit_chunk(page, page->mapping, from, to);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
- mark_inode_dirty(dir);
+ nilfs_mark_inode_dirty(dir);
/* OFFSET_CACHE */
out_put:
nilfs_put_page(page);
@@ -595,10 +592,9 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
if (pde)
pde->rec_len = cpu_to_le16(to - from);
dir->inode = 0;
- err = nilfs_commit_chunk(page, mapping, from, to);
+ nilfs_commit_chunk(page, mapping, from, to);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
- mark_inode_dirty(inode);
out:
nilfs_put_page(page);
return err;
@@ -640,7 +636,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
memcpy(de->name, "..\0", 4);
nilfs_set_de_type(de, inode);
kunmap_atomic(kaddr, KM_USER0);
- err = nilfs_commit_chunk(page, mapping, 0, chunk_size);
+ nilfs_commit_chunk(page, mapping, 0, chunk_size);
fail:
page_cache_release(page);
return err;
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index 93383c5cee9..dd5f7e0a95f 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -61,6 +61,8 @@ void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs)
nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap);
+ nilfs_palloc_clear_cache(dat);
+ nilfs_palloc_clear_cache(gcdat);
nilfs_clear_dirty_pages(mapping);
nilfs_copy_back_pages(mapping, gmapping);
/* note: mdt dirty flags should be cleared by segctor. */
@@ -79,6 +81,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
gcdat->i_state = I_CLEAR;
gii->i_flags = 0;
+ nilfs_palloc_clear_cache(gcdat);
truncate_inode_pages(gcdat->i_mapping, 0);
truncate_inode_pages(&gii->i_btnode_cache, 0);
}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index e6de0a27ab5..e16a6664dfa 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -149,7 +149,7 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
__u64 vbn, struct buffer_head **out_bh)
{
int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
- vbn ? : pbn, pbn, out_bh, 0);
+ vbn ? : pbn, pbn, out_bh);
if (ret == -EEXIST) /* internal code (cache hit) */
ret = 0;
return ret;
@@ -212,9 +212,10 @@ void nilfs_destroy_gccache(struct the_nilfs *nilfs)
static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino,
__u64 cno)
{
- struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS);
+ struct inode *inode;
struct nilfs_inode_info *ii;
+ inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0);
if (!inode)
return NULL;
@@ -265,7 +266,6 @@ struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno)
*/
void nilfs_clear_gcinode(struct inode *inode)
{
- nilfs_mdt_clear(inode);
nilfs_mdt_destroy(inode);
}
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index de86401f209..922d9dd42c8 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -29,6 +29,17 @@
#include "alloc.h"
#include "ifile.h"
+
+struct nilfs_ifile_info {
+ struct nilfs_mdt_info mi;
+ struct nilfs_palloc_cache palloc_cache;
+};
+
+static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile)
+{
+ return (struct nilfs_ifile_info *)NILFS_MDT(ifile);
+}
+
/**
* nilfs_ifile_create_inode - create a new disk inode
* @ifile: ifile inode
@@ -148,3 +159,27 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
}
return err;
}
+
+/**
+ * nilfs_ifile_new - create inode file
+ * @sbi: nilfs_sb_info struct
+ * @inode_size: size of an inode
+ */
+struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size)
+{
+ struct inode *ifile;
+ int err;
+
+ ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO,
+ sizeof(struct nilfs_ifile_info));
+ if (ifile) {
+ err = nilfs_palloc_init_blockgroup(ifile, inode_size);
+ if (unlikely(err)) {
+ nilfs_mdt_destroy(ifile);
+ return NULL;
+ }
+ nilfs_palloc_setup_cache(ifile,
+ &NILFS_IFILE_I(ifile)->palloc_cache);
+ }
+ return ifile;
+}
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index ecc3ba76db4..cbca32e498f 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -49,4 +49,6 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
int nilfs_ifile_delete_inode(struct inode *, ino_t);
int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
+struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size);
+
#endif /* _NILFS_IFILE_H */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2a0a5a3ac13..7868cc122ac 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -97,6 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
nilfs_transaction_abort(inode->i_sb);
goto out;
}
+ nilfs_mark_inode_dirty(inode);
nilfs_transaction_commit(inode->i_sb); /* never fails */
/* Error handling should be detailed */
set_buffer_new(bh_result);
@@ -322,7 +323,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
nilfs_init_acl(), proper cancellation of
above jobs should be considered */
- mark_inode_dirty(inode);
return inode;
failed_acl:
@@ -525,7 +525,6 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh);
- /* The buffer is guarded with lock_buffer() by the caller */
if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size);
set_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
@@ -599,6 +598,7 @@ void nilfs_truncate(struct inode *inode)
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
+ nilfs_mark_inode_dirty(inode);
nilfs_set_file_dirty(NILFS_SB(sb), inode, 0);
nilfs_transaction_commit(sb);
/* May construct a logical segment and may fail in sync mode.
@@ -623,6 +623,7 @@ void nilfs_delete_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
nilfs_truncate_bmap(ii, 0);
+ nilfs_mark_inode_dirty(inode);
nilfs_free_inode(inode);
/* nilfs_free_inode() marks inode buffer dirty */
if (IS_SYNC(inode))
@@ -745,9 +746,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
"failed to reget inode block.\n");
return err;
}
- lock_buffer(ibh);
nilfs_update_inode(inode, ibh);
- unlock_buffer(ibh);
nilfs_mdt_mark_buffer_dirty(ibh);
nilfs_mdt_mark_dirty(sbi->s_ifile);
brelse(ibh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index f6326112d64..06713ffcc7f 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -186,7 +186,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
}
static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
- struct buffer_head **out_bh)
+ int readahead, struct buffer_head **out_bh)
{
struct buffer_head *first_bh, *bh;
unsigned long blkoff;
@@ -200,16 +200,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
if (unlikely(err))
goto failed;
- blkoff = block + 1;
- for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
- err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
- if (likely(!err || err == -EEXIST))
- brelse(bh);
- else if (err != -EBUSY)
- break; /* abort readahead if bmap lookup failed */
-
- if (!buffer_locked(first_bh))
- goto out_no_wait;
+ if (readahead) {
+ blkoff = block + 1;
+ for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
+ err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
+ if (likely(!err || err == -EEXIST))
+ brelse(bh);
+ else if (err != -EBUSY)
+ break;
+ /* abort readahead if bmap lookup failed */
+ if (!buffer_locked(first_bh))
+ goto out_no_wait;
+ }
}
wait_on_buffer(first_bh);
@@ -263,7 +265,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
/* Should be rewritten with merging nilfs_mdt_read_block() */
retry:
- ret = nilfs_mdt_read_block(inode, blkoff, out_bh);
+ ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
if (!create || ret != -ENOENT)
return ret;
@@ -371,7 +373,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
struct buffer_head *bh;
int err;
- err = nilfs_mdt_read_block(inode, block, &bh);
+ err = nilfs_mdt_read_block(inode, block, 0, &bh);
if (unlikely(err))
return err;
nilfs_mark_buffer_dirty(bh);
@@ -445,9 +447,17 @@ static const struct file_operations def_mdt_fops;
* longer than those of the super block structs; they may continue for
* several consecutive mounts/umounts. This would need discussions.
*/
+/**
+ * nilfs_mdt_new_common - allocate a pseudo inode for metadata file
+ * @nilfs: nilfs object
+ * @sb: super block instance the metadata file belongs to
+ * @ino: inode number
+ * @gfp_mask: gfp mask for data pages
+ * @objsz: size of the private object attached to inode->i_private
+ */
struct inode *
nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
- ino_t ino, gfp_t gfp_mask)
+ ino_t ino, gfp_t gfp_mask, size_t objsz)
{
struct inode *inode = nilfs_alloc_inode_common(nilfs);
@@ -455,8 +465,9 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
return NULL;
else {
struct address_space * const mapping = &inode->i_data;
- struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS);
+ struct nilfs_mdt_info *mi;
+ mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
if (!mi) {
nilfs_destroy_inode(inode);
return NULL;
@@ -513,11 +524,11 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
}
struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
- ino_t ino)
+ ino_t ino, size_t objsz)
{
- struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino,
- NILFS_MDT_GFP);
+ struct inode *inode;
+ inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz);
if (!inode)
return NULL;
@@ -544,14 +555,15 @@ void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow)
&NILFS_I(orig)->i_btnode_cache;
}
-void nilfs_mdt_clear(struct inode *inode)
+static void nilfs_mdt_clear(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
truncate_inode_pages(inode->i_mapping, 0);
- nilfs_bmap_clear(ii->i_bmap);
+ if (test_bit(NILFS_I_BMAP, &ii->i_state))
+ nilfs_bmap_clear(ii->i_bmap);
nilfs_btnode_cache_clear(&ii->i_btnode_cache);
}
@@ -559,6 +571,10 @@ void nilfs_mdt_destroy(struct inode *inode)
{
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+ if (mdi->mi_palloc_cache)
+ nilfs_palloc_destroy_cache(inode);
+ nilfs_mdt_clear(inode);
+
kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
kfree(mdi);
nilfs_destroy_inode(inode);
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index 431599733c9..6c4bbb0470f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -36,6 +36,7 @@
* @mi_entry_size: size of an entry
* @mi_first_entry_offset: offset to the first entry
* @mi_entries_per_block: number of entries in a block
+ * @mi_palloc_cache: persistent object allocator cache
* @mi_blocks_per_group: number of blocks in a group
* @mi_blocks_per_desc_block: number of blocks per descriptor block
*/
@@ -46,6 +47,7 @@ struct nilfs_mdt_info {
unsigned mi_entry_size;
unsigned mi_first_entry_offset;
unsigned long mi_entries_per_block;
+ struct nilfs_palloc_cache *mi_palloc_cache;
unsigned long mi_blocks_per_group;
unsigned long mi_blocks_per_desc_block;
};
@@ -74,11 +76,11 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long);
int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
int nilfs_mdt_fetch_dirty(struct inode *);
-struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t);
+struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t,
+ size_t);
struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
- ino_t, gfp_t);
+ ino_t, gfp_t, size_t);
void nilfs_mdt_destroy(struct inode *);
-void nilfs_mdt_clear(struct inode *);
void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned);
void nilfs_mdt_set_shadow(struct inode *, struct inode *);
@@ -104,21 +106,4 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
#define nilfs_mdt_bgl_lock(inode, bg) \
(&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
-
-static inline int
-nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh,
- unsigned n)
-{
- return nilfs_read_inode_common(
- inode, (struct nilfs_inode *)(bh->b_data + n));
-}
-
-static inline void
-nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh,
- unsigned n)
-{
- nilfs_write_inode_common(
- inode, (struct nilfs_inode *)(bh->b_data + n), 1);
-}
-
#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index ed02e886fa7..07ba838ef08 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -120,7 +120,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
inode->i_op = &nilfs_file_inode_operations;
inode->i_fop = &nilfs_file_operations;
inode->i_mapping->a_ops = &nilfs_aops;
- mark_inode_dirty(inode);
+ nilfs_mark_inode_dirty(inode);
err = nilfs_add_nondir(dentry, inode);
}
if (!err)
@@ -148,7 +148,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
- mark_inode_dirty(inode);
+ nilfs_mark_inode_dirty(inode);
err = nilfs_add_nondir(dentry, inode);
}
if (!err)
@@ -188,7 +188,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_fail;
/* mark_inode_dirty(inode); */
- /* nilfs_new_inode() and page_symlink() do this */
+ /* page_symlink() do this */
err = nilfs_add_nondir(dentry, inode);
out:
@@ -200,7 +200,8 @@ out:
return err;
out_fail:
- inode_dec_link_count(inode);
+ drop_nlink(inode);
+ nilfs_mark_inode_dirty(inode);
iput(inode);
goto out;
}
@@ -245,7 +246,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (err)
return err;
- inode_inc_link_count(dir);
+ inc_nlink(dir);
inode = nilfs_new_inode(dir, S_IFDIR | mode);
err = PTR_ERR(inode);
@@ -256,7 +257,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
inode->i_fop = &nilfs_dir_operations;
inode->i_mapping->a_ops = &nilfs_aops;
- inode_inc_link_count(inode);
+ inc_nlink(inode);
err = nilfs_make_empty(inode, dir);
if (err)
@@ -266,6 +267,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (err)
goto out_fail;
+ nilfs_mark_inode_dirty(inode);
d_instantiate(dentry, inode);
out:
if (!err)
@@ -276,26 +278,23 @@ out:
return err;
out_fail:
- inode_dec_link_count(inode);
- inode_dec_link_count(inode);
+ drop_nlink(inode);
+ drop_nlink(inode);
+ nilfs_mark_inode_dirty(inode);
iput(inode);
out_dir:
- inode_dec_link_count(dir);
+ drop_nlink(dir);
+ nilfs_mark_inode_dirty(dir);
goto out;
}
-static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
+static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
struct nilfs_dir_entry *de;
struct page *page;
- struct nilfs_transaction_info ti;
int err;
- err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
- if (err)
- return err;
-
err = -ENOENT;
de = nilfs_find_entry(dir, dentry, &page);
if (!de)
@@ -317,12 +316,28 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
goto out;
inode->i_ctime = dir->i_ctime;
- inode_dec_link_count(inode);
+ drop_nlink(inode);
err = 0;
out:
- if (!err)
+ return err;
+}
+
+static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct nilfs_transaction_info ti;
+ int err;
+
+ err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
+ if (err)
+ return err;
+
+ err = nilfs_do_unlink(dir, dentry);
+
+ if (!err) {
+ nilfs_mark_inode_dirty(dir);
+ nilfs_mark_inode_dirty(dentry->d_inode);
err = nilfs_transaction_commit(dir->i_sb);
- else
+ } else
nilfs_transaction_abort(dir->i_sb);
return err;
@@ -340,11 +355,13 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
err = -ENOTEMPTY;
if (nilfs_empty_dir(inode)) {
- err = nilfs_unlink(dir, dentry);
+ err = nilfs_do_unlink(dir, dentry);
if (!err) {
inode->i_size = 0;
- inode_dec_link_count(inode);
- inode_dec_link_count(dir);
+ drop_nlink(inode);
+ nilfs_mark_inode_dirty(inode);
+ drop_nlink(dir);
+ nilfs_mark_inode_dirty(dir);
}
}
if (!err)
@@ -395,42 +412,48 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = nilfs_find_entry(new_dir, new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
+ inc_nlink(old_inode);
nilfs_set_link(new_dir, new_de, new_page, old_inode);
+ nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
drop_nlink(new_inode);
- inode_dec_link_count(new_inode);
+ drop_nlink(new_inode);
+ nilfs_mark_inode_dirty(new_inode);
} else {
if (dir_de) {
err = -EMLINK;
if (new_dir->i_nlink >= NILFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
+ inc_nlink(old_inode);
err = nilfs_add_link(new_dentry, old_inode);
if (err) {
- inode_dec_link_count(old_inode);
+ drop_nlink(old_inode);
+ nilfs_mark_inode_dirty(old_inode);
goto out_dir;
}
- if (dir_de)
- inode_inc_link_count(new_dir);
+ if (dir_de) {
+ inc_nlink(new_dir);
+ nilfs_mark_inode_dirty(new_dir);
+ }
}
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ drop_nlink(old_inode);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
- inode_dec_link_count(old_dir);
+ drop_nlink(old_dir);
}
+ nilfs_mark_inode_dirty(old_dir);
+ nilfs_mark_inode_dirty(old_inode);
err = nilfs_transaction_commit(old_dir->i_sb);
return err;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6dc83591d11..c9c96c7825d 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -770,14 +770,8 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
nilfs_finish_roll_forward(nilfs, sbi, ri);
}
- nilfs_detach_checkpoint(sbi);
- return 0;
-
failed:
nilfs_detach_checkpoint(sbi);
- nilfs_mdt_clear(nilfs->ns_cpfile);
- nilfs_mdt_clear(nilfs->ns_sufile);
- nilfs_mdt_clear(nilfs->ns_dat);
return err;
}
@@ -804,6 +798,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
struct nilfs_segsum_info ssi;
sector_t pseg_start, pseg_end, sr_pseg_start = 0;
sector_t seg_start, seg_end; /* range of full segment (block number) */
+ sector_t b, end;
u64 seg_seq;
__u64 segnum, nextnum = 0;
__u64 cno;
@@ -819,6 +814,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
/* Calculate range of segment */
nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
+ /* Read ahead segment */
+ b = seg_start;
+ while (b <= seg_end)
+ sb_breadahead(sbi->s_super, b++);
+
for (;;) {
/* Load segment summary */
ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
@@ -841,14 +841,20 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
ri->ri_nextnum = nextnum;
empty_seg = 0;
+ if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) {
+ /* This will never happen because a superblock
+ (last_segment) always points to a pseg
+ having a super root. */
+ ret = NILFS_SEG_FAIL_CONSISTENCY;
+ goto failed;
+ }
+
+ if (pseg_start == seg_start) {
+ nilfs_get_segment_range(nilfs, nextnum, &b, &end);
+ while (b <= end)
+ sb_breadahead(sbi->s_super, b++);
+ }
if (!NILFS_SEG_HAS_SR(&ssi)) {
- if (!scan_newer) {
- /* This will never happen because a superblock
- (last_segment) always points to a pseg
- having a super root. */
- ret = NILFS_SEG_FAIL_CONSISTENCY;
- goto failed;
- }
if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
ri->ri_lsegs_start = pseg_start;
ri->ri_lsegs_start_seq = seg_seq;
@@ -919,7 +925,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
super_root_found:
/* Updating pointers relating to the latest checkpoint */
- list_splice(&segments, ri->ri_used_segments.prev);
+ list_splice_tail(&segments, &ri->ri_used_segments);
nilfs->ns_last_pseg = sr_pseg_start;
nilfs->ns_last_seq = nilfs->ns_seg_seq;
nilfs->ns_last_cno = ri->ri_cno;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index e6d9e37fa24..645c78656aa 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -24,10 +24,22 @@
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/crc32.h>
+#include <linux/backing-dev.h>
#include "page.h"
#include "segbuf.h"
+struct nilfs_write_info {
+ struct the_nilfs *nilfs;
+ struct bio *bio;
+ int start, end; /* The region to be submitted */
+ int rest_blocks;
+ int max_pages;
+ int nr_vecs;
+ sector_t blocknr;
+};
+
+
static struct kmem_cache *nilfs_segbuf_cachep;
static void nilfs_segbuf_init_once(void *obj)
@@ -63,6 +75,11 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
INIT_LIST_HEAD(&segbuf->sb_list);
INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
+
+ init_completion(&segbuf->sb_bio_event);
+ atomic_set(&segbuf->sb_err, 0);
+ segbuf->sb_nbio = 0;
+
return segbuf;
}
@@ -83,6 +100,22 @@ void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum,
segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
}
+/**
+ * nilfs_segbuf_map_cont - map a new log behind a given log
+ * @segbuf: new segment buffer
+ * @prev: segment buffer containing a log to be continued
+ */
+void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
+ struct nilfs_segment_buffer *prev)
+{
+ segbuf->sb_segnum = prev->sb_segnum;
+ segbuf->sb_fseg_start = prev->sb_fseg_start;
+ segbuf->sb_fseg_end = prev->sb_fseg_end;
+ segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks;
+ segbuf->sb_rest_blocks =
+ segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
+}
+
void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf,
__u64 nextnum, struct the_nilfs *nilfs)
{
@@ -132,8 +165,6 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
segbuf->sb_sum.ctime = ctime;
-
- segbuf->sb_io_error = 0;
return 0;
}
@@ -219,7 +250,7 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
raw_sum->ss_datasum = cpu_to_le32(crc);
}
-void nilfs_release_buffers(struct list_head *list)
+static void nilfs_release_buffers(struct list_head *list)
{
struct buffer_head *bh, *n;
@@ -241,13 +272,56 @@ void nilfs_release_buffers(struct list_head *list)
}
}
+static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
+{
+ nilfs_release_buffers(&segbuf->sb_segsum_buffers);
+ nilfs_release_buffers(&segbuf->sb_payload_buffers);
+}
+
+/*
+ * Iterators for segment buffers
+ */
+void nilfs_clear_logs(struct list_head *logs)
+{
+ struct nilfs_segment_buffer *segbuf;
+
+ list_for_each_entry(segbuf, logs, sb_list)
+ nilfs_segbuf_clear(segbuf);
+}
+
+void nilfs_truncate_logs(struct list_head *logs,
+ struct nilfs_segment_buffer *last)
+{
+ struct nilfs_segment_buffer *n, *segbuf;
+
+ segbuf = list_prepare_entry(last, logs, sb_list);
+ list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) {
+ list_del_init(&segbuf->sb_list);
+ nilfs_segbuf_clear(segbuf);
+ nilfs_segbuf_free(segbuf);
+ }
+}
+
+int nilfs_wait_on_logs(struct list_head *logs)
+{
+ struct nilfs_segment_buffer *segbuf;
+ int err;
+
+ list_for_each_entry(segbuf, logs, sb_list) {
+ err = nilfs_segbuf_wait(segbuf);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
/*
* BIO operations
*/
static void nilfs_end_bio_write(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nilfs_write_info *wi = bio->bi_private;
+ struct nilfs_segment_buffer *segbuf = bio->bi_private;
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
@@ -256,21 +330,22 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
}
if (!uptodate)
- atomic_inc(&wi->err);
+ atomic_inc(&segbuf->sb_err);
bio_put(bio);
- complete(&wi->bio_event);
+ complete(&segbuf->sb_bio_event);
}
-static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
+static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
+ struct nilfs_write_info *wi, int mode)
{
struct bio *bio = wi->bio;
int err;
- if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) {
- wait_for_completion(&wi->bio_event);
- wi->nbio--;
- if (unlikely(atomic_read(&wi->err))) {
+ if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) {
+ wait_for_completion(&segbuf->sb_bio_event);
+ segbuf->sb_nbio--;
+ if (unlikely(atomic_read(&segbuf->sb_err))) {
bio_put(bio);
err = -EIO;
goto failed;
@@ -278,7 +353,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
}
bio->bi_end_io = nilfs_end_bio_write;
- bio->bi_private = wi;
+ bio->bi_private = segbuf;
bio_get(bio);
submit_bio(mode, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
@@ -286,7 +361,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
err = -EOPNOTSUPP;
goto failed;
}
- wi->nbio++;
+ segbuf->sb_nbio++;
bio_put(bio);
wi->bio = NULL;
@@ -301,17 +376,15 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
}
/**
- * nilfs_alloc_seg_bio - allocate a bio for writing segment.
- * @sb: super block
- * @start: beginning disk block number of this BIO.
+ * nilfs_alloc_seg_bio - allocate a new bio for writing log
+ * @nilfs: nilfs object
+ * @start: start block number of the bio
* @nr_vecs: request size of page vector.
*
- * alloc_seg_bio() allocates a new BIO structure and initialize it.
- *
* Return Value: On success, pointer to the struct bio is returned.
* On error, NULL is returned.
*/
-static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
+static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
int nr_vecs)
{
struct bio *bio;
@@ -322,36 +395,33 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
bio = bio_alloc(GFP_NOIO, nr_vecs);
}
if (likely(bio)) {
- bio->bi_bdev = sb->s_bdev;
- bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9);
+ bio->bi_bdev = nilfs->ns_bdev;
+ bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9);
}
return bio;
}
-void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
- struct nilfs_write_info *wi)
+static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
+ struct nilfs_write_info *wi)
{
wi->bio = NULL;
wi->rest_blocks = segbuf->sb_sum.nblocks;
- wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev);
+ wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end = 0;
- wi->nbio = 0;
wi->blocknr = segbuf->sb_pseg_start;
-
- atomic_set(&wi->err, 0);
- init_completion(&wi->bio_event);
}
-static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh,
- int mode)
+static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
+ struct nilfs_write_info *wi,
+ struct buffer_head *bh, int mode)
{
int len, err;
BUG_ON(wi->nr_vecs <= 0);
repeat:
if (!wi->bio) {
- wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end,
+ wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
wi->nr_vecs);
if (unlikely(!wi->bio))
return -ENOMEM;
@@ -363,76 +433,83 @@ static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh,
return 0;
}
/* bio is FULL */
- err = nilfs_submit_seg_bio(wi, mode);
+ err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
/* never submit current bh */
if (likely(!err))
goto repeat;
return err;
}
+/**
+ * nilfs_segbuf_write - submit write requests of a log
+ * @segbuf: buffer storing a log to be written
+ * @nilfs: nilfs object
+ *
+ * Return Value: On Success, 0 is returned. On Error, one of the following
+ * negative error code is returned.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOMEM - Insufficient memory available.
+ */
int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
- struct nilfs_write_info *wi)
+ struct the_nilfs *nilfs)
{
+ struct nilfs_write_info wi;
struct buffer_head *bh;
- int res, rw = WRITE;
+ int res = 0, rw = WRITE;
+
+ wi.nilfs = nilfs;
+ nilfs_segbuf_prepare_write(segbuf, &wi);
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
- res = nilfs_submit_bh(wi, bh, rw);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
if (unlikely(res))
goto failed_bio;
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
- res = nilfs_submit_bh(wi, bh, rw);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
if (unlikely(res))
goto failed_bio;
}
- if (wi->bio) {
+ if (wi.bio) {
/*
* Last BIO is always sent through the following
* submission.
*/
rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
- res = nilfs_submit_seg_bio(wi, rw);
- if (unlikely(res))
- goto failed_bio;
+ res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
}
- res = 0;
- out:
- return res;
-
failed_bio:
- atomic_inc(&wi->err);
- goto out;
+ return res;
}
/**
* nilfs_segbuf_wait - wait for completion of requested BIOs
- * @wi: nilfs_write_info
+ * @segbuf: segment buffer
*
* Return Value: On Success, 0 is returned. On Error, one of the following
* negative error code is returned.
*
* %-EIO - I/O error
*/
-int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf,
- struct nilfs_write_info *wi)
+int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
{
int err = 0;
- if (!wi->nbio)
+ if (!segbuf->sb_nbio)
return 0;
do {
- wait_for_completion(&wi->bio_event);
- } while (--wi->nbio > 0);
+ wait_for_completion(&segbuf->sb_bio_event);
+ } while (--segbuf->sb_nbio > 0);
- if (unlikely(atomic_read(&wi->err) > 0)) {
+ if (unlikely(atomic_read(&segbuf->sb_err) > 0)) {
printk(KERN_ERR "NILFS: IO error writing segment\n");
err = -EIO;
- segbuf->sb_io_error = 1;
}
return err;
}
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 0c3076f4e59..6af1630fb40 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -27,7 +27,6 @@
#include <linux/buffer_head.h>
#include <linux/bio.h>
#include <linux/completion.h>
-#include <linux/backing-dev.h>
/**
* struct nilfs_segsum_info - On-memory segment summary
@@ -77,7 +76,9 @@ struct nilfs_segsum_info {
* @sb_rest_blocks: Number of residual blocks in the current segment
* @sb_segsum_buffers: List of buffers for segment summaries
* @sb_payload_buffers: List of buffers for segment payload
- * @sb_io_error: I/O error status
+ * @sb_nbio: Number of flying bio requests
+ * @sb_err: I/O error status
+ * @sb_bio_event: Completion event of log writing
*/
struct nilfs_segment_buffer {
struct super_block *sb_super;
@@ -96,7 +97,9 @@ struct nilfs_segment_buffer {
struct list_head sb_payload_buffers; /* including super root */
/* io status */
- int sb_io_error;
+ int sb_nbio;
+ atomic_t sb_err;
+ struct completion sb_bio_event;
};
#define NILFS_LIST_SEGBUF(head) \
@@ -125,6 +128,8 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
void nilfs_segbuf_free(struct nilfs_segment_buffer *);
void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
struct the_nilfs *);
+void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
+ struct nilfs_segment_buffer *prev);
void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
struct the_nilfs *);
int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t);
@@ -161,41 +166,18 @@ nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf,
segbuf->sb_sum.nfileblk++;
}
-void nilfs_release_buffers(struct list_head *);
+int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
+ struct the_nilfs *nilfs);
+int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf);
-static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
+void nilfs_clear_logs(struct list_head *logs);
+void nilfs_truncate_logs(struct list_head *logs,
+ struct nilfs_segment_buffer *last);
+int nilfs_wait_on_logs(struct list_head *logs);
+
+static inline void nilfs_destroy_logs(struct list_head *logs)
{
- nilfs_release_buffers(&segbuf->sb_segsum_buffers);
- nilfs_release_buffers(&segbuf->sb_payload_buffers);
+ nilfs_truncate_logs(logs, NULL);
}
-struct nilfs_write_info {
- struct bio *bio;
- int start, end; /* The region to be submitted */
- int rest_blocks;
- int max_pages;
- int nr_vecs;
- sector_t blocknr;
-
- int nbio;
- atomic_t err;
- struct completion bio_event;
- /* completion event of segment write */
-
- /*
- * The following fields must be set explicitly
- */
- struct super_block *sb;
- struct backing_dev_info *bdi; /* backing dev info */
- struct buffer_head *bh_sr;
-};
-
-
-void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *,
- struct nilfs_write_info *);
-int nilfs_segbuf_write(struct nilfs_segment_buffer *,
- struct nilfs_write_info *);
-int nilfs_segbuf_wait(struct nilfs_segment_buffer *,
- struct nilfs_write_info *);
-
#endif /* _NILFS_SEGBUF_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6eff66a070d..17584c52448 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -974,12 +974,12 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
raw_sr->sr_flags = 0;
- nilfs_mdt_write_inode_direct(
- nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz));
- nilfs_mdt_write_inode_direct(
- nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz));
- nilfs_mdt_write_inode_direct(
- nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz));
+ nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr +
+ NILFS_SR_DAT_OFFSET(isz), 1);
+ nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
+ NILFS_SR_CPFILE_OFFSET(isz), 1);
+ nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
+ NILFS_SR_SUFILE_OFFSET(isz), 1);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -1273,73 +1273,75 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
return err;
}
-static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum)
-{
- struct buffer_head *bh_su;
- struct nilfs_segment_usage *raw_su;
- int err;
-
- err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su);
- if (unlikely(err))
- return err;
- nilfs_mdt_mark_buffer_dirty(bh_su);
- nilfs_mdt_mark_dirty(sufile);
- nilfs_sufile_put_segment_usage(sufile, segnum, bh_su);
- return 0;
-}
-
+/**
+ * nilfs_segctor_begin_construction - setup segment buffer to make a new log
+ * @sci: nilfs_sc_info
+ * @nilfs: nilfs object
+ */
static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
- struct nilfs_segment_buffer *segbuf, *n;
+ struct nilfs_segment_buffer *segbuf, *prev;
__u64 nextnum;
- int err;
+ int err, alloc = 0;
- if (list_empty(&sci->sc_segbufs)) {
- segbuf = nilfs_segbuf_new(sci->sc_super);
- if (unlikely(!segbuf))
- return -ENOMEM;
- list_add(&segbuf->sb_list, &sci->sc_segbufs);
- } else
- segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
+ segbuf = nilfs_segbuf_new(sci->sc_super);
+ if (unlikely(!segbuf))
+ return -ENOMEM;
+
+ if (list_empty(&sci->sc_write_logs)) {
+ nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
+ nilfs->ns_pseg_offset, nilfs);
+ if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
+ nilfs_shift_to_next_segment(nilfs);
+ nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
+ }
- nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset,
- nilfs);
+ segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
+ nextnum = nilfs->ns_nextnum;
- if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
- nilfs_shift_to_next_segment(nilfs);
- nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
+ if (nilfs->ns_segnum == nilfs->ns_nextnum)
+ /* Start from the head of a new full segment */
+ alloc++;
+ } else {
+ /* Continue logs */
+ prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
+ nilfs_segbuf_map_cont(segbuf, prev);
+ segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
+ nextnum = prev->sb_nextnum;
+
+ if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
+ nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
+ segbuf->sb_sum.seg_seq++;
+ alloc++;
+ }
}
- sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
- err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum);
- if (unlikely(err))
- return err;
+ err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
+ if (err)
+ goto failed;
- if (nilfs->ns_segnum == nilfs->ns_nextnum) {
- /* Start from the head of a new full segment */
+ if (alloc) {
err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
- if (unlikely(err))
- return err;
- } else
- nextnum = nilfs->ns_nextnum;
-
- segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
+ if (err)
+ goto failed;
+ }
nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
- /* truncating segment buffers */
- list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs,
- sb_list) {
- list_del_init(&segbuf->sb_list);
- nilfs_segbuf_free(segbuf);
- }
+ BUG_ON(!list_empty(&sci->sc_segbufs));
+ list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
+ sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
return 0;
+
+ failed:
+ nilfs_segbuf_free(segbuf);
+ return err;
}
static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs, int nadd)
{
- struct nilfs_segment_buffer *segbuf, *prev, *n;
+ struct nilfs_segment_buffer *segbuf, *prev;
struct inode *sufile = nilfs->ns_sufile;
__u64 nextnextnum;
LIST_HEAD(list);
@@ -1352,7 +1354,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
* not be dirty. The following call ensures that the buffer is dirty
* and will pin the buffer on memory until the sufile is written.
*/
- err = nilfs_touch_segusage(sufile, prev->sb_nextnum);
+ err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
if (unlikely(err))
return err;
@@ -1378,33 +1380,33 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
list_add_tail(&segbuf->sb_list, &list);
prev = segbuf;
}
- list_splice(&list, sci->sc_segbufs.prev);
+ list_splice_tail(&list, &sci->sc_segbufs);
return 0;
failed_segbuf:
nilfs_segbuf_free(segbuf);
failed:
- list_for_each_entry_safe(segbuf, n, &list, sb_list) {
+ list_for_each_entry(segbuf, &list, sb_list) {
ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
WARN_ON(ret); /* never fails */
- list_del_init(&segbuf->sb_list);
- nilfs_segbuf_free(segbuf);
}
+ nilfs_destroy_logs(&list);
return err;
}
-static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci,
- struct the_nilfs *nilfs)
+static void nilfs_free_incomplete_logs(struct list_head *logs,
+ struct the_nilfs *nilfs)
{
- struct nilfs_segment_buffer *segbuf;
- int ret, done = 0;
+ struct nilfs_segment_buffer *segbuf, *prev;
+ struct inode *sufile = nilfs->ns_sufile;
+ int ret;
- segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
+ segbuf = NILFS_FIRST_SEGBUF(logs);
if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
- ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum);
+ ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
WARN_ON(ret); /* never fails */
}
- if (segbuf->sb_io_error) {
+ if (atomic_read(&segbuf->sb_err)) {
/* Case 1: The first segment failed */
if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
/* Case 1a: Partial segment appended into an existing
@@ -1413,106 +1415,54 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci,
segbuf->sb_fseg_end);
else /* Case 1b: New full segment */
set_nilfs_discontinued(nilfs);
- done++;
}
- list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
- ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum);
- WARN_ON(ret); /* never fails */
- if (!done && segbuf->sb_io_error) {
- if (segbuf->sb_segnum != nilfs->ns_nextnum)
- /* Case 2: extended segment (!= next) failed */
- nilfs_sufile_set_error(nilfs->ns_sufile,
- segbuf->sb_segnum);
- done++;
- }
- }
-}
-
-static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci)
-{
- struct nilfs_segment_buffer *segbuf;
-
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list)
- nilfs_segbuf_clear(segbuf);
- sci->sc_super_root = NULL;
-}
-
-static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci)
-{
- struct nilfs_segment_buffer *segbuf;
-
- while (!list_empty(&sci->sc_segbufs)) {
- segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
- list_del_init(&segbuf->sb_list);
- nilfs_segbuf_free(segbuf);
- }
- /* sci->sc_curseg = NULL; */
-}
-
-static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
- struct the_nilfs *nilfs, int err)
-{
- if (unlikely(err)) {
- nilfs_segctor_free_incomplete_segments(sci, nilfs);
- if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
- int ret;
-
- ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
- sci->sc_freesegs,
- sci->sc_nfreesegs,
- NULL);
- WARN_ON(ret); /* do not happen */
+ prev = segbuf;
+ list_for_each_entry_continue(segbuf, logs, sb_list) {
+ if (prev->sb_nextnum != segbuf->sb_nextnum) {
+ ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
+ WARN_ON(ret); /* never fails */
}
+ if (atomic_read(&segbuf->sb_err) &&
+ segbuf->sb_segnum != nilfs->ns_nextnum)
+ /* Case 2: extended segment (!= next) failed */
+ nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
+ prev = segbuf;
}
- nilfs_segctor_clear_segment_buffers(sci);
}
static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
struct inode *sufile)
{
struct nilfs_segment_buffer *segbuf;
- struct buffer_head *bh_su;
- struct nilfs_segment_usage *raw_su;
unsigned long live_blocks;
int ret;
list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
- &raw_su, &bh_su);
- WARN_ON(ret); /* always succeed because bh_su is dirty */
live_blocks = segbuf->sb_sum.nblocks +
(segbuf->sb_pseg_start - segbuf->sb_fseg_start);
- raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime);
- raw_su->su_nblocks = cpu_to_le32(live_blocks);
- nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
- bh_su);
+ ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
+ live_blocks,
+ sci->sc_seg_ctime);
+ WARN_ON(ret); /* always succeed because the segusage is dirty */
}
}
-static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci,
- struct inode *sufile)
+static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
{
struct nilfs_segment_buffer *segbuf;
- struct buffer_head *bh_su;
- struct nilfs_segment_usage *raw_su;
int ret;
- segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
- ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
- &raw_su, &bh_su);
- WARN_ON(ret); /* always succeed because bh_su is dirty */
- raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start -
- segbuf->sb_fseg_start);
- nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su);
+ segbuf = NILFS_FIRST_SEGBUF(logs);
+ ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
+ segbuf->sb_pseg_start -
+ segbuf->sb_fseg_start, 0);
+ WARN_ON(ret); /* always succeed because the segusage is dirty */
- list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
- ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
- &raw_su, &bh_su);
+ list_for_each_entry_continue(segbuf, logs, sb_list) {
+ ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
+ 0, 0);
WARN_ON(ret); /* always succeed */
- raw_su->su_nblocks = 0;
- nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
- bh_su);
}
}
@@ -1520,17 +1470,15 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
struct nilfs_segment_buffer *last,
struct inode *sufile)
{
- struct nilfs_segment_buffer *segbuf = last, *n;
+ struct nilfs_segment_buffer *segbuf = last;
int ret;
- list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs,
- sb_list) {
- list_del_init(&segbuf->sb_list);
+ list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
WARN_ON(ret);
- nilfs_segbuf_free(segbuf);
}
+ nilfs_truncate_logs(&sci->sc_segbufs, last);
}
@@ -1569,7 +1517,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
NULL);
WARN_ON(err); /* do not happen */
}
- nilfs_segctor_clear_segment_buffers(sci);
+ nilfs_clear_logs(&sci->sc_segbufs);
err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
if (unlikely(err))
@@ -1814,26 +1762,18 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
- struct backing_dev_info *bdi)
+ struct the_nilfs *nilfs)
{
struct nilfs_segment_buffer *segbuf;
- struct nilfs_write_info wi;
- int err, res;
-
- wi.sb = sci->sc_super;
- wi.bh_sr = sci->sc_super_root;
- wi.bdi = bdi;
+ int ret = 0;
list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- nilfs_segbuf_prepare_write(segbuf, &wi);
- err = nilfs_segbuf_write(segbuf, &wi);
-
- res = nilfs_segbuf_wait(segbuf, &wi);
- err = err ? : res;
- if (err)
- return err;
+ ret = nilfs_segbuf_write(segbuf, nilfs);
+ if (ret)
+ break;
}
- return 0;
+ list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
+ return ret;
}
static void __nilfs_end_page_io(struct page *page, int err)
@@ -1911,15 +1851,17 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
}
}
-static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
- struct page *failed_page, int err)
+static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
+ struct buffer_head *bh_sr, int err)
{
struct nilfs_segment_buffer *segbuf;
struct page *bd_page = NULL, *fs_page = NULL;
+ struct buffer_head *bh;
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- struct buffer_head *bh;
+ if (list_empty(logs))
+ return;
+ list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
if (bh->b_page != bd_page) {
@@ -1931,7 +1873,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- if (bh == sci->sc_super_root) {
+ if (bh == bh_sr) {
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
@@ -1941,7 +1883,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, err);
if (fs_page && fs_page == failed_page)
- goto done;
+ return;
fs_page = bh->b_page;
}
}
@@ -1950,8 +1892,34 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
end_page_writeback(bd_page);
nilfs_end_page_io(fs_page, err);
- done:
+}
+
+static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
+ struct the_nilfs *nilfs, int err)
+{
+ LIST_HEAD(logs);
+ int ret;
+
+ list_splice_tail_init(&sci->sc_write_logs, &logs);
+ ret = nilfs_wait_on_logs(&logs);
+ if (ret)
+ nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret);
+
+ list_splice_tail_init(&sci->sc_segbufs, &logs);
+ nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
+ nilfs_free_incomplete_logs(&logs, nilfs);
nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
+
+ if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
+ ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
+ sci->sc_freesegs,
+ sci->sc_nfreesegs,
+ NULL);
+ WARN_ON(ret); /* do not happen */
+ }
+
+ nilfs_destroy_logs(&logs);
+ sci->sc_super_root = NULL;
}
static void nilfs_set_next_segment(struct the_nilfs *nilfs,
@@ -1973,7 +1941,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
struct the_nilfs *nilfs = sbi->s_nilfs;
int update_sr = (sci->sc_super_root != NULL);
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
+ list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
struct buffer_head *bh;
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
@@ -2046,7 +2014,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
sci->sc_nblk_inc += sci->sc_nblk_this_inc;
- segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
+ segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
nilfs_set_next_segment(nilfs, segbuf);
if (update_sr) {
@@ -2057,10 +2025,23 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
+ nilfs_segctor_clear_metadata_dirty(sci);
} else
clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
}
+static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
+{
+ int ret;
+
+ ret = nilfs_wait_on_logs(&sci->sc_write_logs);
+ if (!ret) {
+ nilfs_segctor_complete_write(sci);
+ nilfs_destroy_logs(&sci->sc_write_logs);
+ }
+ return ret;
+}
+
static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
struct nilfs_sb_info *sbi)
{
@@ -2173,7 +2154,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
/* Avoid empty segment */
if (sci->sc_stage.scnt == NILFS_ST_DONE &&
NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
- nilfs_segctor_end_construction(sci, nilfs, 1);
+ nilfs_segctor_abort_construction(sci, nilfs, 1);
goto out;
}
@@ -2187,7 +2168,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
if (has_sr) {
err = nilfs_segctor_fill_in_checkpoint(sci);
if (unlikely(err))
- goto failed_to_make_up;
+ goto failed_to_write;
nilfs_segctor_fill_in_super_root(sci, nilfs);
}
@@ -2195,42 +2176,46 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
/* Write partial segments */
err = nilfs_segctor_prepare_write(sci, &failed_page);
- if (unlikely(err))
+ if (err) {
+ nilfs_abort_logs(&sci->sc_segbufs, failed_page,
+ sci->sc_super_root, err);
goto failed_to_write;
-
+ }
nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed);
- err = nilfs_segctor_write(sci, nilfs->ns_bdi);
+ err = nilfs_segctor_write(sci, nilfs);
if (unlikely(err))
goto failed_to_write;
- nilfs_segctor_complete_write(sci);
-
- /* Commit segments */
- if (has_sr)
- nilfs_segctor_clear_metadata_dirty(sci);
-
- nilfs_segctor_end_construction(sci, nilfs, 0);
-
+ if (sci->sc_stage.scnt == NILFS_ST_DONE ||
+ nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
+ /*
+ * At this point, we avoid double buffering
+ * for blocksize < pagesize because page dirty
+ * flag is turned off during write and dirty
+ * buffers are not properly collected for
+ * pages crossing over segments.
+ */
+ err = nilfs_segctor_wait(sci);
+ if (err)
+ goto failed_to_write;
+ }
} while (sci->sc_stage.scnt != NILFS_ST_DONE);
+ sci->sc_super_root = NULL;
+
out:
- nilfs_segctor_destroy_segment_buffers(sci);
nilfs_segctor_check_out_files(sci, sbi);
return err;
failed_to_write:
- nilfs_segctor_abort_write(sci, failed_page, err);
- nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile);
-
- failed_to_make_up:
if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
nilfs_redirty_inodes(&sci->sc_dirty_files);
failed:
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
- nilfs_segctor_end_construction(sci, nilfs, err);
+ nilfs_segctor_abort_construction(sci, nilfs, err);
goto out;
}
@@ -2559,7 +2544,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
sci->sc_freesegs = kbufs[4];
sci->sc_nfreesegs = argv[4].v_nmembs;
- list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev);
+ list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
for (;;) {
nilfs_segctor_accept(sci, &req);
@@ -2788,6 +2773,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
spin_lock_init(&sci->sc_state_lock);
INIT_LIST_HEAD(&sci->sc_dirty_files);
INIT_LIST_HEAD(&sci->sc_segbufs);
+ INIT_LIST_HEAD(&sci->sc_write_logs);
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_copied_buffers);
@@ -2855,6 +2841,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
}
WARN_ON(!list_empty(&sci->sc_segbufs));
+ WARN_ON(!list_empty(&sci->sc_write_logs));
down_write(&sbi->s_nilfs->ns_segctor_sem);
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 0d2a475a741..3d3ab2f9864 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -97,6 +97,7 @@ struct nilfs_segsum_pointer {
* @sc_dsync_start: start byte offset of data pages
* @sc_dsync_end: end byte offset of data pages (inclusive)
* @sc_segbufs: List of segment buffers
+ * @sc_write_logs: List of segment buffers to hold logs under writing
* @sc_segbuf_nblocks: Number of available blocks in segment buffers.
* @sc_curseg: Current segment buffer
* @sc_super_root: Pointer to the super root buffer
@@ -143,6 +144,7 @@ struct nilfs_sc_info {
/* Segment buffers */
struct list_head sc_segbufs;
+ struct list_head sc_write_logs;
unsigned long sc_segbuf_nblocks;
struct nilfs_segment_buffer *sc_curseg;
struct buffer_head *sc_super_root;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 37994d4a59c..b6c36d0cc33 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -31,6 +31,16 @@
#include "sufile.h"
+struct nilfs_sufile_info {
+ struct nilfs_mdt_info mi;
+ unsigned long ncleansegs;
+};
+
+static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
+{
+ return (struct nilfs_sufile_info *)NILFS_MDT(sufile);
+}
+
static inline unsigned long
nilfs_sufile_segment_usages_per_block(const struct inode *sufile)
{
@@ -62,14 +72,6 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr,
max - curr + 1);
}
-static inline struct nilfs_sufile_header *
-nilfs_sufile_block_get_header(const struct inode *sufile,
- struct buffer_head *bh,
- void *kaddr)
-{
- return kaddr + bh_offset(bh);
-}
-
static struct nilfs_segment_usage *
nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum,
struct buffer_head *bh, void *kaddr)
@@ -110,6 +112,15 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
}
/**
+ * nilfs_sufile_get_ncleansegs - return the number of clean segments
+ * @sufile: inode of segment usage file
+ */
+unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
+{
+ return NILFS_SUI(sufile)->ncleansegs;
+}
+
+/**
* nilfs_sufile_updatev - modify multiple segment usages at a time
* @sufile: inode of segment usage file
* @segnumv: array of segment numbers
@@ -270,7 +281,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
if (ret < 0)
goto out_sem;
kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
- header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
+ header = kaddr + bh_offset(header_bh);
ncleansegs = le64_to_cpu(header->sh_ncleansegs);
last_alloc = le64_to_cpu(header->sh_last_alloc);
kunmap_atomic(kaddr, KM_USER0);
@@ -302,13 +313,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
kunmap_atomic(kaddr, KM_USER0);
kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
- header = nilfs_sufile_block_get_header(
- sufile, header_bh, kaddr);
+ header = kaddr + bh_offset(header_bh);
le64_add_cpu(&header->sh_ncleansegs, -1);
le64_add_cpu(&header->sh_ndirtysegs, 1);
header->sh_last_alloc = cpu_to_le64(segnum);
kunmap_atomic(kaddr, KM_USER0);
+ NILFS_SUI(sufile)->ncleansegs--;
nilfs_mdt_mark_buffer_dirty(header_bh);
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
@@ -351,6 +362,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
kunmap_atomic(kaddr, KM_USER0);
nilfs_sufile_mod_counter(header_bh, -1, 1);
+ NILFS_SUI(sufile)->ncleansegs--;
+
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -380,6 +393,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
kunmap_atomic(kaddr, KM_USER0);
nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
+ NILFS_SUI(sufile)->ncleansegs -= clean;
+
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -409,79 +424,65 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
+ NILFS_SUI(sufile)->ncleansegs++;
+
nilfs_mdt_mark_dirty(sufile);
}
/**
- * nilfs_sufile_get_segment_usage - get a segment usage
+ * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty
* @sufile: inode of segment usage file
* @segnum: segment number
- * @sup: pointer to segment usage
- * @bhp: pointer to buffer head
- *
- * Description: nilfs_sufile_get_segment_usage() acquires the segment usage
- * specified by @segnum.
- *
- * Return Value: On success, 0 is returned, and the segment usage and the
- * buffer head of the buffer on which the segment usage is located are stored
- * in the place pointed by @sup and @bhp, respectively. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid segment usage number.
*/
-int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum,
- struct nilfs_segment_usage **sup,
- struct buffer_head **bhp)
+int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
- struct nilfs_segment_usage *su;
- void *kaddr;
int ret;
- /* segnum is 0 origin */
- if (segnum >= nilfs_sufile_get_nsegments(sufile))
- return -EINVAL;
- down_write(&NILFS_MDT(sufile)->mi_sem);
- ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh);
- if (ret < 0)
- goto out_sem;
- kaddr = kmap(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- if (nilfs_segment_usage_error(su)) {
- kunmap(bh->b_page);
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
+ if (!ret) {
+ nilfs_mdt_mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(sufile);
brelse(bh);
- ret = -EINVAL;
- goto out_sem;
}
-
- if (sup != NULL)
- *sup = su;
- *bhp = bh;
-
- out_sem:
- up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
/**
- * nilfs_sufile_put_segment_usage - put a segment usage
+ * nilfs_sufile_set_segment_usage - set usage of a segment
* @sufile: inode of segment usage file
* @segnum: segment number
- * @bh: buffer head
- *
- * Description: nilfs_sufile_put_segment_usage() releases the segment usage
- * specified by @segnum. @bh must be the buffer head which have been returned
- * by a previous call to nilfs_sufile_get_segment_usage() with @segnum.
+ * @nblocks: number of live blocks in the segment
+ * @modtime: modification time (option)
*/
-void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum,
- struct buffer_head *bh)
+int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
+ unsigned long nblocks, time_t modtime)
{
- kunmap(bh->b_page);
+ struct buffer_head *bh;
+ struct nilfs_segment_usage *su;
+ void *kaddr;
+ int ret;
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
+ if (ret < 0)
+ goto out_sem;
+
+ kaddr = kmap_atomic(bh->b_page, KM_USER0);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ WARN_ON(nilfs_segment_usage_error(su));
+ if (modtime)
+ su->su_lastmod = cpu_to_le64(modtime);
+ su->su_nblocks = cpu_to_le32(nblocks);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ nilfs_mdt_mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(sufile);
brelse(bh);
+
+ out_sem:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
}
/**
@@ -515,7 +516,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
goto out_sem;
kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
- header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
+ header = kaddr + bh_offset(header_bh);
sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs);
@@ -532,33 +533,6 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
return ret;
}
-/**
- * nilfs_sufile_get_ncleansegs - get the number of clean segments
- * @sufile: inode of segment usage file
- * @nsegsp: pointer to the number of clean segments
- *
- * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean
- * segments.
- *
- * Return Value: On success, 0 is returned and the number of clean segments is
- * stored in the place pointed by @nsegsp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- */
-int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
-{
- struct nilfs_sustat sustat;
- int ret;
-
- ret = nilfs_sufile_get_stat(sufile, &sustat);
- if (ret == 0)
- *nsegsp = sustat.ss_ncleansegs;
- return ret;
-}
-
void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
struct buffer_head *header_bh,
struct buffer_head *su_bh)
@@ -577,8 +551,10 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
nilfs_segment_usage_set_error(su);
kunmap_atomic(kaddr, KM_USER0);
- if (suclean)
+ if (suclean) {
nilfs_sufile_mod_counter(header_bh, -1, 0);
+ NILFS_SUI(sufile)->ncleansegs--;
+ }
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -657,3 +633,48 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
up_read(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
+
+/**
+ * nilfs_sufile_read - read sufile inode
+ * @sufile: sufile inode
+ * @raw_inode: on-disk sufile inode
+ */
+int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode)
+{
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+ struct buffer_head *header_bh;
+ struct nilfs_sufile_header *header;
+ void *kaddr;
+ int ret;
+
+ ret = nilfs_read_inode_common(sufile, raw_inode);
+ if (ret < 0)
+ return ret;
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (!ret) {
+ kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+ header = kaddr + bh_offset(header_bh);
+ sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
+ kunmap_atomic(kaddr, KM_USER0);
+ brelse(header_bh);
+ }
+ return ret;
+}
+
+/**
+ * nilfs_sufile_new - create sufile
+ * @nilfs: nilfs object
+ * @susize: size of a segment usage entry
+ */
+struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize)
+{
+ struct inode *sufile;
+
+ sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO,
+ sizeof(struct nilfs_sufile_info));
+ if (sufile)
+ nilfs_mdt_set_entry_size(sufile, susize,
+ sizeof(struct nilfs_sufile_header));
+ return sufile;
+}
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 0e99e5c0bd0..15163b8aff7 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -34,14 +34,13 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments;
}
+unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
+
int nilfs_sufile_alloc(struct inode *, __u64 *);
-int nilfs_sufile_get_segment_usage(struct inode *, __u64,
- struct nilfs_segment_usage **,
- struct buffer_head **);
-void nilfs_sufile_put_segment_usage(struct inode *, __u64,
- struct buffer_head *);
+int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
+int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
+ unsigned long nblocks, time_t modtime);
int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
-int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
size_t);
@@ -62,6 +61,9 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
+int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode);
+struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize);
+
/**
* nilfs_sufile_scrap - make a segment garbage
* @sufile: inode of segment usage file
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 644e66727dd..5403b3ef3a4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -363,14 +363,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
list_add(&sbi->s_list, &nilfs->ns_supers);
up_write(&nilfs->ns_super_sem);
- sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO);
+ sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size);
if (!sbi->s_ifile)
return -ENOMEM;
- err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size);
- if (unlikely(err))
- goto failed;
-
down_read(&nilfs->ns_segctor_sem);
err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
&bh_cp);
@@ -411,7 +407,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
{
struct the_nilfs *nilfs = sbi->s_nilfs;
- nilfs_mdt_clear(sbi->s_ifile);
nilfs_mdt_destroy(sbi->s_ifile);
sbi->s_ifile = NULL;
down_write(&nilfs->ns_super_sem);
@@ -419,22 +414,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
up_write(&nilfs->ns_super_sem);
}
-static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
-{
- struct the_nilfs *nilfs = sbi->s_nilfs;
- int err = 0;
-
- down_write(&nilfs->ns_sem);
- if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) {
- nilfs->ns_mount_state |= NILFS_VALID_FS;
- err = nilfs_commit_super(sbi, 1);
- if (likely(!err))
- printk(KERN_INFO "NILFS: recovery complete.\n");
- }
- up_write(&nilfs->ns_sem);
- return err;
-}
-
static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -490,7 +469,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
struct nilfs_sb_info *sbi = NILFS_SB(sb);
if (!nilfs_test_opt(sbi, BARRIER))
- seq_printf(seq, ",barrier=off");
+ seq_printf(seq, ",nobarrier");
if (nilfs_test_opt(sbi, SNAPSHOT))
seq_printf(seq, ",cp=%llu",
(unsigned long long int)sbi->s_snapshot_cno);
@@ -500,6 +479,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_printf(seq, ",errors=panic");
if (nilfs_test_opt(sbi, STRICT_ORDER))
seq_printf(seq, ",order=strict");
+ if (nilfs_test_opt(sbi, NORECOVERY))
+ seq_printf(seq, ",norecovery");
return 0;
}
@@ -568,7 +549,7 @@ static const struct export_operations nilfs_export_ops = {
enum {
Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_barrier, Opt_snapshot, Opt_order,
+ Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
Opt_err,
};
@@ -576,25 +557,13 @@ static match_table_t tokens = {
{Opt_err_cont, "errors=continue"},
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
- {Opt_barrier, "barrier=%s"},
+ {Opt_nobarrier, "nobarrier"},
{Opt_snapshot, "cp=%u"},
{Opt_order, "order=%s"},
+ {Opt_norecovery, "norecovery"},
{Opt_err, NULL}
};
-static int match_bool(substring_t *s, int *result)
-{
- int len = s->to - s->from;
-
- if (strncmp(s->from, "on", len) == 0)
- *result = 1;
- else if (strncmp(s->from, "off", len) == 0)
- *result = 0;
- else
- return 1;
- return 0;
-}
-
static int parse_options(char *options, struct super_block *sb)
{
struct nilfs_sb_info *sbi = NILFS_SB(sb);
@@ -612,13 +581,8 @@ static int parse_options(char *options, struct super_block *sb)
token = match_token(p, tokens, args);
switch (token) {
- case Opt_barrier:
- if (match_bool(&args[0], &option))
- return 0;
- if (option)
- nilfs_set_opt(sbi, BARRIER);
- else
- nilfs_clear_opt(sbi, BARRIER);
+ case Opt_nobarrier:
+ nilfs_clear_opt(sbi, BARRIER);
break;
case Opt_order:
if (strcmp(args[0].from, "relaxed") == 0)
@@ -647,6 +611,9 @@ static int parse_options(char *options, struct super_block *sb)
sbi->s_snapshot_cno = option;
nilfs_set_opt(sbi, SNAPSHOT);
break;
+ case Opt_norecovery:
+ nilfs_set_opt(sbi, NORECOVERY);
+ break;
default:
printk(KERN_ERR
"NILFS: Unrecognized mount option \"%s\"\n", p);
@@ -672,9 +639,7 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi)
int mnt_count = le16_to_cpu(sbp->s_mnt_count);
/* nilfs->sem must be locked by the caller. */
- if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) {
- printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
- } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
+ if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
printk(KERN_WARNING
"NILFS warning: mounting fs with errors\n");
#if 0
@@ -782,11 +747,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
sb->s_root = NULL;
sb->s_time_gran = 1;
- if (!nilfs_loaded(nilfs)) {
- err = load_nilfs(nilfs, sbi);
- if (err)
- goto failed_sbi;
- }
+ err = load_nilfs(nilfs, sbi);
+ if (err)
+ goto failed_sbi;
+
cno = nilfs_last_cno(nilfs);
if (sb->s_flags & MS_RDONLY) {
@@ -854,12 +818,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
up_write(&nilfs->ns_sem);
}
- err = nilfs_mark_recovery_complete(sbi);
- if (unlikely(err)) {
- printk(KERN_ERR "NILFS: recovery failed.\n");
- goto failed_root;
- }
-
down_write(&nilfs->ns_super_sem);
if (!nilfs_test_opt(sbi, SNAPSHOT))
nilfs->ns_current = sbi;
@@ -867,10 +825,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
return 0;
- failed_root:
- dput(sb->s_root);
- sb->s_root = NULL;
-
failed_segctor:
nilfs_detach_segment_constructor(sbi);
@@ -915,6 +869,14 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ if (!nilfs_valid_fs(nilfs)) {
+ printk(KERN_WARNING "NILFS (device %s): couldn't "
+ "remount because the filesystem is in an "
+ "incomplete recovery state.\n", sb->s_id);
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
goto out;
if (*flags & MS_RDONLY) {
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ad391a8c3e7..6241e1722ef 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -146,13 +146,9 @@ void put_nilfs(struct the_nilfs *nilfs)
might_sleep();
if (nilfs_loaded(nilfs)) {
- nilfs_mdt_clear(nilfs->ns_sufile);
nilfs_mdt_destroy(nilfs->ns_sufile);
- nilfs_mdt_clear(nilfs->ns_cpfile);
nilfs_mdt_destroy(nilfs->ns_cpfile);
- nilfs_mdt_clear(nilfs->ns_dat);
nilfs_mdt_destroy(nilfs->ns_dat);
- /* XXX: how and when to clear nilfs->ns_gc_dat? */
nilfs_mdt_destroy(nilfs->ns_gc_dat);
}
if (nilfs_init(nilfs)) {
@@ -166,7 +162,6 @@ void put_nilfs(struct the_nilfs *nilfs)
static int nilfs_load_super_root(struct the_nilfs *nilfs,
struct nilfs_sb_info *sbi, sector_t sr_block)
{
- static struct lock_class_key dat_lock_key;
struct buffer_head *bh_sr;
struct nilfs_super_root *raw_sr;
struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -187,51 +182,36 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
inode_size = nilfs->ns_inode_size;
err = -ENOMEM;
- nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
+ nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size);
if (unlikely(!nilfs->ns_dat))
goto failed;
- nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
+ nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size);
if (unlikely(!nilfs->ns_gc_dat))
goto failed_dat;
- nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO);
+ nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size);
if (unlikely(!nilfs->ns_cpfile))
goto failed_gc_dat;
- nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO);
+ nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size);
if (unlikely(!nilfs->ns_sufile))
goto failed_cpfile;
- err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size);
- if (unlikely(err))
- goto failed_sufile;
-
- err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size);
- if (unlikely(err))
- goto failed_sufile;
-
- lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
- lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
-
nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
- nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
- sizeof(struct nilfs_cpfile_header));
- nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size,
- sizeof(struct nilfs_sufile_header));
- err = nilfs_mdt_read_inode_direct(
- nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size));
+ err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data +
+ NILFS_SR_DAT_OFFSET(inode_size));
if (unlikely(err))
goto failed_sufile;
- err = nilfs_mdt_read_inode_direct(
- nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size));
+ err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data +
+ NILFS_SR_CPFILE_OFFSET(inode_size));
if (unlikely(err))
goto failed_sufile;
- err = nilfs_mdt_read_inode_direct(
- nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size));
+ err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data +
+ NILFS_SR_SUFILE_OFFSET(inode_size));
if (unlikely(err))
goto failed_sufile;
@@ -281,29 +261,30 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
struct nilfs_recovery_info ri;
unsigned int s_flags = sbi->s_super->s_flags;
int really_read_only = bdev_read_only(nilfs->ns_bdev);
- unsigned valid_fs;
- int err = 0;
-
- nilfs_init_recovery_info(&ri);
+ int valid_fs = nilfs_valid_fs(nilfs);
+ int err;
- down_write(&nilfs->ns_sem);
- valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
- up_write(&nilfs->ns_sem);
+ if (nilfs_loaded(nilfs)) {
+ if (valid_fs ||
+ ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY)))
+ return 0;
+ printk(KERN_ERR "NILFS: the filesystem is in an incomplete "
+ "recovery state.\n");
+ return -EINVAL;
+ }
- if (!valid_fs && (s_flags & MS_RDONLY)) {
- printk(KERN_INFO "NILFS: INFO: recovery "
- "required for readonly filesystem.\n");
- if (really_read_only) {
- printk(KERN_ERR "NILFS: write access "
- "unavailable, cannot proceed.\n");
- err = -EROFS;
- goto failed;
+ if (!valid_fs) {
+ printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
+ if (s_flags & MS_RDONLY) {
+ printk(KERN_INFO "NILFS: INFO: recovery "
+ "required for readonly filesystem.\n");
+ printk(KERN_INFO "NILFS: write access will "
+ "be enabled during recovery.\n");
}
- printk(KERN_INFO "NILFS: write access will "
- "be enabled during recovery.\n");
- sbi->s_super->s_flags &= ~MS_RDONLY;
}
+ nilfs_init_recovery_info(&ri);
+
err = nilfs_search_super_root(nilfs, sbi, &ri);
if (unlikely(err)) {
printk(KERN_ERR "NILFS: error searching super root.\n");
@@ -316,19 +297,56 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
goto failed;
}
- if (!valid_fs) {
- err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
- if (unlikely(err)) {
- nilfs_mdt_destroy(nilfs->ns_cpfile);
- nilfs_mdt_destroy(nilfs->ns_sufile);
- nilfs_mdt_destroy(nilfs->ns_dat);
- goto failed;
+ if (valid_fs)
+ goto skip_recovery;
+
+ if (s_flags & MS_RDONLY) {
+ if (nilfs_test_opt(sbi, NORECOVERY)) {
+ printk(KERN_INFO "NILFS: norecovery option specified. "
+ "skipping roll-forward recovery\n");
+ goto skip_recovery;
}
- if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED)
- sbi->s_super->s_dirt = 1;
+ if (really_read_only) {
+ printk(KERN_ERR "NILFS: write access "
+ "unavailable, cannot proceed.\n");
+ err = -EROFS;
+ goto failed_unload;
+ }
+ sbi->s_super->s_flags &= ~MS_RDONLY;
+ } else if (nilfs_test_opt(sbi, NORECOVERY)) {
+ printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
+ "option was specified for a read/write mount\n");
+ err = -EINVAL;
+ goto failed_unload;
}
+ err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
+ if (err)
+ goto failed_unload;
+
+ down_write(&nilfs->ns_sem);
+ nilfs->ns_mount_state |= NILFS_VALID_FS;
+ nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
+ err = nilfs_commit_super(sbi, 1);
+ up_write(&nilfs->ns_sem);
+
+ if (err) {
+ printk(KERN_ERR "NILFS: failed to update super block. "
+ "recovery unfinished.\n");
+ goto failed_unload;
+ }
+ printk(KERN_INFO "NILFS: recovery complete.\n");
+
+ skip_recovery:
set_nilfs_loaded(nilfs);
+ nilfs_clear_recovery_info(&ri);
+ sbi->s_super->s_flags = s_flags;
+ return 0;
+
+ failed_unload:
+ nilfs_mdt_destroy(nilfs->ns_cpfile);
+ nilfs_mdt_destroy(nilfs->ns_sufile);
+ nilfs_mdt_destroy(nilfs->ns_dat);
failed:
nilfs_clear_recovery_info(&ri);
@@ -632,30 +650,23 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
{
struct inode *dat = nilfs_dat_inode(nilfs);
unsigned long ncleansegs;
- int err;
down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
- err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
- if (likely(!err))
- *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
- return err;
+ *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
+ return 0;
}
int nilfs_near_disk_full(struct the_nilfs *nilfs)
{
- struct inode *sufile = nilfs->ns_sufile;
unsigned long ncleansegs, nincsegs;
- int ret;
- ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs);
- if (likely(!ret)) {
- nincsegs = atomic_read(&nilfs->ns_ndirtyblks) /
- nilfs->ns_blocks_per_segment + 1;
- if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs)
- ret++;
- }
- return ret;
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+ nincsegs = atomic_read(&nilfs->ns_ndirtyblks) /
+ nilfs->ns_blocks_per_segment + 1;
+
+ return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs;
}
/**
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 20abd55881e..589786e3346 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -258,6 +258,16 @@ static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
kfree(sbi);
}
+static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
+{
+ unsigned valid_fs;
+
+ down_read(&nilfs->ns_sem);
+ valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
+ up_read(&nilfs->ns_sem);
+ return valid_fs;
+}
+
static inline void
nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
sector_t *seg_start, sector_t *seg_end)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index de059f49058..3d30a1c974a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2006,7 +2006,7 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
- if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
+ if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) {
ret = filemap_fdatawrite_range(file->f_mapping, pos,
pos + count - 1);
if (ret < 0)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index e5df9d170b0..123bc520a2c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -17,10 +17,6 @@
#include "ocfs2.h"
-/* Common stuff */
-/* id number of quota format */
-#define QFMT_OCFS2 3
-
/*
* In-memory structures
*/
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 1a2c50a759f..21f9e71223c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1325,7 +1325,7 @@ out:
return status;
}
-static struct quota_format_ops ocfs2_format_ops = {
+static const struct quota_format_ops ocfs2_format_ops = {
.check_quota_file = ocfs2_local_check_quota_file,
.read_file_info = ocfs2_local_read_info,
.write_file_info = ocfs2_global_write_info,
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 353e78a9ebe..efc02ebb8c7 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -46,12 +46,14 @@ config QFMT_V1
format say Y here.
config QFMT_V2
- tristate "Quota format v2 support"
+ tristate "Quota format vfsv0 and vfsv1 support"
depends on QUOTA
select QUOTA_TREE
help
- This quota format allows using quotas with 32-bit UIDs/GIDs. If you
- need this functionality say Y here.
+ This config option enables kernel support for vfsv0 and vfsv1 quota
+ formats. Both these formats support 32-bit UIDs/GIDs and vfsv1 format
+ also supports 64-bit inode and block quota limits. If you need this
+ functionality say Y here.
config QUOTACTL
bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index eb5a755718f..cd6bb9a33c1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2164,7 +2164,9 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;
+ mutex_lock(&sb->s_root->d_inode->i_mutex);
dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
+ mutex_unlock(&sb->s_root->d_inode->i_mutex);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 0edcf42b177..2ae757e9c00 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -204,7 +204,7 @@ out:
return ret;
}
-static struct quota_format_ops v1_format_ops = {
+static const struct quota_format_ops v1_format_ops = {
.check_quota_file = v1_check_quota_file,
.read_file_info = v1_read_file_info,
.write_file_info = v1_write_file_info,
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index a5475fb1ae4..3dfc23e0213 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -23,14 +23,23 @@ MODULE_LICENSE("GPL");
#define __QUOTA_V2_PARANOIA
-static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
-static void v2_disk2memdqb(struct dquot *dquot, void *dp);
-static int v2_is_id(void *dp, struct dquot *dquot);
-
-static struct qtree_fmt_operations v2_qtree_ops = {
- .mem2disk_dqblk = v2_mem2diskdqb,
- .disk2mem_dqblk = v2_disk2memdqb,
- .is_id = v2_is_id,
+static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2r0_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2r0_is_id(void *dp, struct dquot *dquot);
+static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2r1_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2r1_is_id(void *dp, struct dquot *dquot);
+
+static struct qtree_fmt_operations v2r0_qtree_ops = {
+ .mem2disk_dqblk = v2r0_mem2diskdqb,
+ .disk2mem_dqblk = v2r0_disk2memdqb,
+ .is_id = v2r0_is_id,
+};
+
+static struct qtree_fmt_operations v2r1_qtree_ops = {
+ .mem2disk_dqblk = v2r1_mem2diskdqb,
+ .disk2mem_dqblk = v2r1_disk2memdqb,
+ .is_id = v2r1_is_id,
};
#define QUOTABLOCK_BITS 10
@@ -46,23 +55,33 @@ static inline qsize_t v2_qbtos(qsize_t blocks)
return blocks << QUOTABLOCK_BITS;
}
+static int v2_read_header(struct super_block *sb, int type,
+ struct v2_disk_dqheader *dqhead)
+{
+ ssize_t size;
+
+ size = sb->s_op->quota_read(sb, type, (char *)dqhead,
+ sizeof(struct v2_disk_dqheader), 0);
+ if (size != sizeof(struct v2_disk_dqheader)) {
+ printk(KERN_WARNING "quota_v2: Failed header read:"
+ " expected=%zd got=%zd\n",
+ sizeof(struct v2_disk_dqheader), size);
+ return 0;
+ }
+ return 1;
+}
+
/* Check whether given file is really vfsv0 quotafile */
static int v2_check_quota_file(struct super_block *sb, int type)
{
struct v2_disk_dqheader dqhead;
- ssize_t size;
static const uint quota_magics[] = V2_INITQMAGICS;
static const uint quota_versions[] = V2_INITQVERSIONS;
- size = sb->s_op->quota_read(sb, type, (char *)&dqhead,
- sizeof(struct v2_disk_dqheader), 0);
- if (size != sizeof(struct v2_disk_dqheader)) {
- printk("quota_v2: failed read expected=%zd got=%zd\n",
- sizeof(struct v2_disk_dqheader), size);
+ if (!v2_read_header(sb, type, &dqhead))
return 0;
- }
if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
- le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+ le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
return 0;
return 1;
}
@@ -71,14 +90,20 @@ static int v2_check_quota_file(struct super_block *sb, int type)
static int v2_read_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
+ struct v2_disk_dqheader dqhead;
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct qtree_mem_dqinfo *qinfo;
ssize_t size;
+ unsigned int version;
+
+ if (!v2_read_header(sb, type, &dqhead))
+ return 0;
+ version = le32_to_cpu(dqhead.dqh_version);
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
if (size != sizeof(struct v2_disk_dqinfo)) {
- printk(KERN_WARNING "Can't read info structure on device %s.\n",
+ printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
sb->s_id);
return -1;
}
@@ -89,9 +114,15 @@ static int v2_read_file_info(struct super_block *sb, int type)
return -1;
}
qinfo = info->dqi_priv;
- /* limits are stored as unsigned 32-bit data */
- info->dqi_maxblimit = 0xffffffff;
- info->dqi_maxilimit = 0xffffffff;
+ if (version == 0) {
+ /* limits are stored as unsigned 32-bit data */
+ info->dqi_maxblimit = 0xffffffff;
+ info->dqi_maxilimit = 0xffffffff;
+ } else {
+ /* used space is stored as unsigned 64-bit value */
+ info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */
+ info->dqi_maxilimit = 0xffffffffffffffff;
+ }
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
@@ -103,8 +134,13 @@ static int v2_read_file_info(struct super_block *sb, int type)
qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
qinfo->dqi_qtree_depth = qtree_depth(qinfo);
- qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
- qinfo->dqi_ops = &v2_qtree_ops;
+ if (version == 0) {
+ qinfo->dqi_entry_size = sizeof(struct v2r0_disk_dqblk);
+ qinfo->dqi_ops = &v2r0_qtree_ops;
+ } else {
+ qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
+ qinfo->dqi_ops = &v2r1_qtree_ops;
+ }
return 0;
}
@@ -135,9 +171,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
return 0;
}
-static void v2_disk2memdqb(struct dquot *dquot, void *dp)
+static void v2r0_disk2memdqb(struct dquot *dquot, void *dp)
{
- struct v2_disk_dqblk *d = dp, empty;
+ struct v2r0_disk_dqblk *d = dp, empty;
struct mem_dqblk *m = &dquot->dq_dqb;
m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
@@ -149,15 +185,15 @@ static void v2_disk2memdqb(struct dquot *dquot, void *dp)
m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
m->dqb_btime = le64_to_cpu(d->dqb_btime);
/* We need to escape back all-zero structure */
- memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+ memset(&empty, 0, sizeof(struct v2r0_disk_dqblk));
empty.dqb_itime = cpu_to_le64(1);
- if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
+ if (!memcmp(&empty, dp, sizeof(struct v2r0_disk_dqblk)))
m->dqb_itime = 0;
}
-static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
+static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
{
- struct v2_disk_dqblk *d = dp;
+ struct v2r0_disk_dqblk *d = dp;
struct mem_dqblk *m = &dquot->dq_dqb;
struct qtree_mem_dqinfo *info =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
@@ -175,9 +211,60 @@ static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
d->dqb_itime = cpu_to_le64(1);
}
-static int v2_is_id(void *dp, struct dquot *dquot)
+static int v2r0_is_id(void *dp, struct dquot *dquot)
+{
+ struct v2r0_disk_dqblk *d = dp;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+ if (qtree_entry_unused(info, dp))
+ return 0;
+ return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+}
+
+static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
+{
+ struct v2r1_disk_dqblk *d = dp, empty;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
+ m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
+ m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
+ m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
+ m->dqb_itime = le64_to_cpu(d->dqb_itime);
+ m->dqb_bhardlimit = v2_qbtos(le64_to_cpu(d->dqb_bhardlimit));
+ m->dqb_bsoftlimit = v2_qbtos(le64_to_cpu(d->dqb_bsoftlimit));
+ m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+ m->dqb_btime = le64_to_cpu(d->dqb_btime);
+ /* We need to escape back all-zero structure */
+ memset(&empty, 0, sizeof(struct v2r1_disk_dqblk));
+ empty.dqb_itime = cpu_to_le64(1);
+ if (!memcmp(&empty, dp, sizeof(struct v2r1_disk_dqblk)))
+ m->dqb_itime = 0;
+}
+
+static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
+{
+ struct v2r1_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+ d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+ d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+ d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+ d->dqb_itime = cpu_to_le64(m->dqb_itime);
+ d->dqb_bhardlimit = cpu_to_le64(v2_stoqb(m->dqb_bhardlimit));
+ d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
+ d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+ d->dqb_btime = cpu_to_le64(m->dqb_btime);
+ d->dqb_id = cpu_to_le32(dquot->dq_id);
+ if (qtree_entry_unused(info, dp))
+ d->dqb_itime = cpu_to_le64(1);
+}
+
+static int v2r1_is_id(void *dp, struct dquot *dquot)
{
- struct v2_disk_dqblk *d = dp;
+ struct v2r1_disk_dqblk *d = dp;
struct qtree_mem_dqinfo *info =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
@@ -207,7 +294,7 @@ static int v2_free_file_info(struct super_block *sb, int type)
return 0;
}
-static struct quota_format_ops v2_format_ops = {
+static const struct quota_format_ops v2_format_ops = {
.check_quota_file = v2_check_quota_file,
.read_file_info = v2_read_file_info,
.write_file_info = v2_write_file_info,
@@ -217,20 +304,32 @@ static struct quota_format_ops v2_format_ops = {
.release_dqblk = v2_release_dquot,
};
-static struct quota_format_type v2_quota_format = {
+static struct quota_format_type v2r0_quota_format = {
.qf_fmt_id = QFMT_VFS_V0,
.qf_ops = &v2_format_ops,
.qf_owner = THIS_MODULE
};
+static struct quota_format_type v2r1_quota_format = {
+ .qf_fmt_id = QFMT_VFS_V1,
+ .qf_ops = &v2_format_ops,
+ .qf_owner = THIS_MODULE
+};
+
static int __init init_v2_quota_format(void)
{
- return register_quota_format(&v2_quota_format);
+ int ret;
+
+ ret = register_quota_format(&v2r0_quota_format);
+ if (ret)
+ return ret;
+ return register_quota_format(&v2r1_quota_format);
}
static void __exit exit_v2_quota_format(void)
{
- unregister_quota_format(&v2_quota_format);
+ unregister_quota_format(&v2r0_quota_format);
+ unregister_quota_format(&v2r1_quota_format);
}
module_init(init_v2_quota_format);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index 530fe580685..f1966b42c2f 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -17,8 +17,8 @@
}
#define V2_INITQVERSIONS {\
- 0, /* USRQUOTA */\
- 0 /* GRPQUOTA */\
+ 1, /* USRQUOTA */\
+ 1 /* GRPQUOTA */\
}
/* First generic header */
@@ -32,7 +32,7 @@ struct v2_disk_dqheader {
* (as it appears on disk) - the file is a radix tree whose leaves point
* to blocks of these structures.
*/
-struct v2_disk_dqblk {
+struct v2r0_disk_dqblk {
__le32 dqb_id; /* id this quota applies to */
__le32 dqb_ihardlimit; /* absolute limit on allocated inodes */
__le32 dqb_isoftlimit; /* preferred inode limit */
@@ -44,6 +44,19 @@ struct v2_disk_dqblk {
__le64 dqb_itime; /* time limit for excessive inode use */
};
+struct v2r1_disk_dqblk {
+ __le32 dqb_id; /* id this quota applies to */
+ __le32 dqb_pad;
+ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
+ __le64 dqb_isoftlimit; /* preferred inode limit */
+ __le64 dqb_curinodes; /* current # allocated inodes */
+ __le64 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+ __le64 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+ __le64 dqb_curspace; /* current space occupied (in bytes) */
+ __le64 dqb_btime; /* time limit for excessive disk use */
+ __le64 dqb_itime; /* time limit for excessive inode use */
+};
+
/* Header with type and version specific information */
struct v2_disk_dqinfo {
__le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
diff --git a/fs/sync.c b/fs/sync.c
index d104591b066..36752a68348 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
*/
int generic_write_sync(struct file *file, loff_t pos, loff_t count)
{
- if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
+ if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
return 0;
return vfs_fsync_range(file, file->f_path.dentry, pos,
- pos + count - 1, 1);
+ pos + count - 1,
+ (file->f_flags & __O_SYNC) ? 0 : 1);
}
EXPORT_SYMBOL(generic_write_sync);
@@ -452,9 +453,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
ret = 0;
if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
- ret = wait_on_page_writeback_range(mapping,
- offset >> PAGE_CACHE_SHIFT,
- endbyte >> PAGE_CACHE_SHIFT);
+ ret = filemap_fdatawait_range(mapping, offset, endbyte);
if (ret < 0)
goto out;
}
@@ -467,9 +466,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
}
if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
- ret = wait_on_page_writeback_range(mapping,
- offset >> PAGE_CACHE_SHIFT,
- endbyte >> PAGE_CACHE_SHIFT);
+ ret = filemap_fdatawait_range(mapping, offset, endbyte);
}
out:
return ret;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e0201837d24..f05f2303a8b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -25,7 +25,6 @@
#include "sysfs.h"
DEFINE_MUTEX(sysfs_mutex);
-DEFINE_MUTEX(sysfs_rename_mutex);
DEFINE_SPINLOCK(sysfs_assoc_lock);
static DEFINE_SPINLOCK(sysfs_ino_lock);
@@ -85,46 +84,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
}
/**
- * sysfs_get_dentry - get dentry for the given sysfs_dirent
- * @sd: sysfs_dirent of interest
- *
- * Get dentry for @sd. Dentry is looked up if currently not
- * present. This function descends from the root looking up
- * dentry for each step.
- *
- * LOCKING:
- * mutex_lock(sysfs_rename_mutex)
- *
- * RETURNS:
- * Pointer to found dentry on success, ERR_PTR() value on error.
- */
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
-{
- struct dentry *dentry = dget(sysfs_sb->s_root);
-
- while (dentry->d_fsdata != sd) {
- struct sysfs_dirent *cur;
- struct dentry *parent;
-
- /* find the first ancestor which hasn't been looked up */
- cur = sd;
- while (cur->s_parent != dentry->d_fsdata)
- cur = cur->s_parent;
-
- /* look it up */
- parent = dentry;
- mutex_lock(&parent->d_inode->i_mutex);
- dentry = lookup_one_noperm(cur->s_name, parent);
- mutex_unlock(&parent->d_inode->i_mutex);
- dput(parent);
-
- if (IS_ERR(dentry))
- break;
- }
- return dentry;
-}
-
-/**
* sysfs_get_active - get an active reference to sysfs_dirent
* @sd: sysfs_dirent to get an active reference to
*
@@ -298,7 +257,61 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
goto repeat;
}
-static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
+static int sysfs_dentry_delete(struct dentry *dentry)
+{
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
+}
+
+static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ int is_dir;
+
+ mutex_lock(&sysfs_mutex);
+
+ /* The sysfs dirent has been deleted */
+ if (sd->s_flags & SYSFS_FLAG_REMOVED)
+ goto out_bad;
+
+ /* The sysfs dirent has been moved? */
+ if (dentry->d_parent->d_fsdata != sd->s_parent)
+ goto out_bad;
+
+ /* The sysfs dirent has been renamed */
+ if (strcmp(dentry->d_name.name, sd->s_name) != 0)
+ goto out_bad;
+
+ mutex_unlock(&sysfs_mutex);
+out_valid:
+ return 1;
+out_bad:
+ /* Remove the dentry from the dcache hashes.
+ * If this is a deleted dentry we use d_drop instead of d_delete
+ * so sysfs doesn't need to cope with negative dentries.
+ *
+ * If this is a dentry that has simply been renamed we
+ * use d_drop to remove it from the dcache lookup on its
+ * old parent. If this dentry persists later when a lookup
+ * is performed at its new name the dentry will be readded
+ * to the dcache hashes.
+ */
+ is_dir = (sysfs_type(sd) == SYSFS_DIR);
+ mutex_unlock(&sysfs_mutex);
+ if (is_dir) {
+ /* If we have submounts we must allow the vfs caches
+ * to lie about the state of the filesystem to prevent
+ * leaks and other nasty things.
+ */
+ if (have_submounts(dentry))
+ goto out_valid;
+ shrink_dcache_parent(dentry);
+ }
+ d_drop(dentry);
+ return 0;
+}
+
+static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
{
struct sysfs_dirent * sd = dentry->d_fsdata;
@@ -307,7 +320,9 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
}
static const struct dentry_operations sysfs_dentry_ops = {
- .d_iput = sysfs_d_iput,
+ .d_revalidate = sysfs_dentry_revalidate,
+ .d_delete = sysfs_dentry_delete,
+ .d_iput = sysfs_dentry_iput,
};
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
@@ -344,12 +359,6 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
return NULL;
}
-static int sysfs_ilookup_test(struct inode *inode, void *arg)
-{
- struct sysfs_dirent *sd = arg;
- return inode->i_ino == sd->s_ino;
-}
-
/**
* sysfs_addrm_start - prepare for sysfs_dirent add/remove
* @acxt: pointer to sysfs_addrm_cxt to be used
@@ -357,47 +366,20 @@ static int sysfs_ilookup_test(struct inode *inode, void *arg)
*
* This function is called when the caller is about to add or
* remove sysfs_dirent under @parent_sd. This function acquires
- * sysfs_mutex, grabs inode for @parent_sd if available and lock
- * i_mutex of it. @acxt is used to keep and pass context to
+ * sysfs_mutex. @acxt is used to keep and pass context to
* other addrm functions.
*
* LOCKING:
* Kernel thread context (may sleep). sysfs_mutex is locked on
- * return. i_mutex of parent inode is locked on return if
- * available.
+ * return.
*/
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
struct sysfs_dirent *parent_sd)
{
- struct inode *inode;
-
memset(acxt, 0, sizeof(*acxt));
acxt->parent_sd = parent_sd;
- /* Lookup parent inode. inode initialization is protected by
- * sysfs_mutex, so inode existence can be determined by
- * looking up inode while holding sysfs_mutex.
- */
mutex_lock(&sysfs_mutex);
-
- inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
- parent_sd);
- if (inode) {
- WARN_ON(inode->i_state & I_NEW);
-
- /* parent inode available */
- acxt->parent_inode = inode;
-
- /* sysfs_mutex is below i_mutex in lock hierarchy.
- * First, trylock i_mutex. If fails, unlock
- * sysfs_mutex and lock them in order.
- */
- if (!mutex_trylock(&inode->i_mutex)) {
- mutex_unlock(&sysfs_mutex);
- mutex_lock(&inode->i_mutex);
- mutex_lock(&sysfs_mutex);
- }
- }
}
/**
@@ -422,18 +404,22 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
*/
int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
+ struct sysfs_inode_attrs *ps_iattr;
+
if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
return -EEXIST;
sd->s_parent = sysfs_get(acxt->parent_sd);
- if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
- inc_nlink(acxt->parent_inode);
-
- acxt->cnt++;
-
sysfs_link_sibling(sd);
+ /* Update timestamps on the parent */
+ ps_iattr = acxt->parent_sd->s_iattr;
+ if (ps_iattr) {
+ struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
+ ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
+ }
+
return 0;
}
@@ -512,70 +498,22 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
*/
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
+ struct sysfs_inode_attrs *ps_iattr;
+
BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);
sysfs_unlink_sibling(sd);
+ /* Update timestamps on the parent */
+ ps_iattr = acxt->parent_sd->s_iattr;
+ if (ps_iattr) {
+ struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
+ ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
+ }
+
sd->s_flags |= SYSFS_FLAG_REMOVED;
sd->s_sibling = acxt->removed;
acxt->removed = sd;
-
- if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
- drop_nlink(acxt->parent_inode);
-
- acxt->cnt++;
-}
-
-/**
- * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
- * @sd: target sysfs_dirent
- *
- * Drop dentry for @sd. @sd must have been unlinked from its
- * parent on entry to this function such that it can't be looked
- * up anymore.
- */
-static void sysfs_drop_dentry(struct sysfs_dirent *sd)
-{
- struct inode *inode;
- struct dentry *dentry;
-
- inode = ilookup(sysfs_sb, sd->s_ino);
- if (!inode)
- return;
-
- /* Drop any existing dentries associated with sd.
- *
- * For the dentry to be properly freed we need to grab a
- * reference to the dentry under the dcache lock, unhash it,
- * and then put it. The playing with the dentry count allows
- * dput to immediately free the dentry if it is not in use.
- */
-repeat:
- spin_lock(&dcache_lock);
- list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
- if (d_unhashed(dentry))
- continue;
- dget_locked(dentry);
- spin_lock(&dentry->d_lock);
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
- dput(dentry);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
-
- /* adjust nlink and update timestamp */
- mutex_lock(&inode->i_mutex);
-
- inode->i_ctime = CURRENT_TIME;
- drop_nlink(inode);
- if (sysfs_type(sd) == SYSFS_DIR)
- drop_nlink(inode);
-
- mutex_unlock(&inode->i_mutex);
-
- iput(inode);
}
/**
@@ -584,25 +522,15 @@ repeat:
*
* Finish up sysfs_dirent add/remove. Resources acquired by
* sysfs_addrm_start() are released and removed sysfs_dirents are
- * cleaned up. Timestamps on the parent inode are updated.
+ * cleaned up.
*
* LOCKING:
- * All mutexes acquired by sysfs_addrm_start() are released.
+ * sysfs_mutex is released.
*/
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
{
/* release resources acquired by sysfs_addrm_start() */
mutex_unlock(&sysfs_mutex);
- if (acxt->parent_inode) {
- struct inode *inode = acxt->parent_inode;
-
- /* if added/removed, update timestamps on the parent */
- if (acxt->cnt)
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-
- mutex_unlock(&inode->i_mutex);
- iput(inode);
- }
/* kill removed sysfs_dirents */
while (acxt->removed) {
@@ -611,7 +539,6 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
acxt->removed = sd->s_sibling;
sd->s_sibling = NULL;
- sysfs_drop_dentry(sd);
sysfs_deactivate(sd);
unmap_bin_file(sd);
sysfs_put(sd);
@@ -751,10 +678,15 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
}
/* instantiate and hash dentry */
- dentry->d_op = &sysfs_dentry_ops;
- dentry->d_fsdata = sysfs_get(sd);
- d_instantiate(dentry, inode);
- d_rehash(dentry);
+ ret = d_find_alias(inode);
+ if (!ret) {
+ dentry->d_op = &sysfs_dentry_ops;
+ dentry->d_fsdata = sysfs_get(sd);
+ d_add(dentry, inode);
+ } else {
+ d_move(ret, dentry);
+ iput(inode);
+ }
out_unlock:
mutex_unlock(&sysfs_mutex);
@@ -763,7 +695,9 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
const struct inode_operations sysfs_dir_inode_operations = {
.lookup = sysfs_lookup,
+ .permission = sysfs_permission,
.setattr = sysfs_setattr,
+ .getattr = sysfs_getattr,
.setxattr = sysfs_setxattr,
};
@@ -826,141 +760,65 @@ void sysfs_remove_dir(struct kobject * kobj)
__sysfs_remove_dir(sd);
}
-int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+int sysfs_rename(struct sysfs_dirent *sd,
+ struct sysfs_dirent *new_parent_sd, const char *new_name)
{
- struct sysfs_dirent *sd = kobj->sd;
- struct dentry *parent = NULL;
- struct dentry *old_dentry = NULL, *new_dentry = NULL;
const char *dup_name = NULL;
int error;
- mutex_lock(&sysfs_rename_mutex);
+ mutex_lock(&sysfs_mutex);
error = 0;
- if (strcmp(sd->s_name, new_name) == 0)
+ if ((sd->s_parent == new_parent_sd) &&
+ (strcmp(sd->s_name, new_name) == 0))
goto out; /* nothing to rename */
- /* get the original dentry */
- old_dentry = sysfs_get_dentry(sd);
- if (IS_ERR(old_dentry)) {
- error = PTR_ERR(old_dentry);
- old_dentry = NULL;
- goto out;
- }
-
- parent = old_dentry->d_parent;
-
- /* lock parent and get dentry for new name */
- mutex_lock(&parent->d_inode->i_mutex);
- mutex_lock(&sysfs_mutex);
-
error = -EEXIST;
- if (sysfs_find_dirent(sd->s_parent, new_name))
- goto out_unlock;
-
- error = -ENOMEM;
- new_dentry = d_alloc_name(parent, new_name);
- if (!new_dentry)
- goto out_unlock;
+ if (sysfs_find_dirent(new_parent_sd, new_name))
+ goto out;
/* rename sysfs_dirent */
- error = -ENOMEM;
- new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
- if (!new_name)
- goto out_unlock;
-
- dup_name = sd->s_name;
- sd->s_name = new_name;
+ if (strcmp(sd->s_name, new_name) != 0) {
+ error = -ENOMEM;
+ new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
+ if (!new_name)
+ goto out;
+
+ dup_name = sd->s_name;
+ sd->s_name = new_name;
+ }
- /* rename */
- d_add(new_dentry, NULL);
- d_move(old_dentry, new_dentry);
+ /* Remove from old parent's list and insert into new parent's list. */
+ if (sd->s_parent != new_parent_sd) {
+ sysfs_unlink_sibling(sd);
+ sysfs_get(new_parent_sd);
+ sysfs_put(sd->s_parent);
+ sd->s_parent = new_parent_sd;
+ sysfs_link_sibling(sd);
+ }
error = 0;
- out_unlock:
+ out:
mutex_unlock(&sysfs_mutex);
- mutex_unlock(&parent->d_inode->i_mutex);
kfree(dup_name);
- dput(old_dentry);
- dput(new_dentry);
- out:
- mutex_unlock(&sysfs_rename_mutex);
return error;
}
+int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
+{
+ return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name);
+}
+
int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
{
struct sysfs_dirent *sd = kobj->sd;
struct sysfs_dirent *new_parent_sd;
- struct dentry *old_parent, *new_parent = NULL;
- struct dentry *old_dentry = NULL, *new_dentry = NULL;
- int error;
- mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
- new_parent_sd = (new_parent_kobj && new_parent_kobj->sd) ?
+ new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
new_parent_kobj->sd : &sysfs_root;
- error = 0;
- if (sd->s_parent == new_parent_sd)
- goto out; /* nothing to move */
-
- /* get dentries */
- old_dentry = sysfs_get_dentry(sd);
- if (IS_ERR(old_dentry)) {
- error = PTR_ERR(old_dentry);
- old_dentry = NULL;
- goto out;
- }
- old_parent = old_dentry->d_parent;
-
- new_parent = sysfs_get_dentry(new_parent_sd);
- if (IS_ERR(new_parent)) {
- error = PTR_ERR(new_parent);
- new_parent = NULL;
- goto out;
- }
-
-again:
- mutex_lock(&old_parent->d_inode->i_mutex);
- if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
- mutex_unlock(&old_parent->d_inode->i_mutex);
- goto again;
- }
- mutex_lock(&sysfs_mutex);
-
- error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, sd->s_name))
- goto out_unlock;
-
- error = -ENOMEM;
- new_dentry = d_alloc_name(new_parent, sd->s_name);
- if (!new_dentry)
- goto out_unlock;
-
- error = 0;
- d_add(new_dentry, NULL);
- d_move(old_dentry, new_dentry);
-
- /* Remove from old parent's list and insert into new parent's list. */
- sysfs_unlink_sibling(sd);
- sysfs_get(new_parent_sd);
- drop_nlink(old_parent->d_inode);
- sysfs_put(sd->s_parent);
- sd->s_parent = new_parent_sd;
- inc_nlink(new_parent->d_inode);
- sysfs_link_sibling(sd);
-
- out_unlock:
- mutex_unlock(&sysfs_mutex);
- mutex_unlock(&new_parent->d_inode->i_mutex);
- mutex_unlock(&old_parent->d_inode->i_mutex);
- out:
- dput(new_parent);
- dput(old_dentry);
- dput(new_dentry);
- mutex_unlock(&sysfs_rename_mutex);
- return error;
+ return sysfs_rename(sd, new_parent_sd, sd->s_name);
}
/* Relationship between s_mode and the DT_xxx types */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f5ea4680f15..dc30d9e3168 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -579,46 +579,23 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
*/
int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
{
- struct sysfs_dirent *victim_sd = NULL;
- struct dentry *victim = NULL;
- struct inode * inode;
+ struct sysfs_dirent *sd;
struct iattr newattrs;
int rc;
- rc = -ENOENT;
- victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
- if (!victim_sd)
- goto out;
+ mutex_lock(&sysfs_mutex);
- mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(victim_sd);
- mutex_unlock(&sysfs_rename_mutex);
- if (IS_ERR(victim)) {
- rc = PTR_ERR(victim);
- victim = NULL;
+ rc = -ENOENT;
+ sd = sysfs_find_dirent(kobj->sd, attr->name);
+ if (!sd)
goto out;
- }
-
- inode = victim->d_inode;
-
- mutex_lock(&inode->i_mutex);
- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- newattrs.ia_ctime = current_fs_time(inode->i_sb);
- rc = sysfs_setattr(victim, &newattrs);
+ newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE;
+ rc = sysfs_sd_setattr(sd, &newattrs);
- if (rc == 0) {
- fsnotify_change(victim, newattrs.ia_valid);
- mutex_lock(&sysfs_mutex);
- victim_sd->s_mode = newattrs.ia_mode;
- mutex_unlock(&sysfs_mutex);
- }
-
- mutex_unlock(&inode->i_mutex);
out:
- dput(victim);
- sysfs_put(victim_sd);
+ mutex_unlock(&sysfs_mutex);
return rc;
}
EXPORT_SYMBOL_GPL(sysfs_chmod_file);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e28cecf179f..220b758523a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -37,7 +37,9 @@ static struct backing_dev_info sysfs_backing_dev_info = {
};
static const struct inode_operations sysfs_inode_operations ={
+ .permission = sysfs_permission,
.setattr = sysfs_setattr,
+ .getattr = sysfs_getattr,
.setxattr = sysfs_setxattr,
};
@@ -46,7 +48,7 @@ int __init sysfs_inode_init(void)
return bdi_init(&sysfs_backing_dev_info);
}
-struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
+static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
{
struct sysfs_inode_attrs *attrs;
struct iattr *iattrs;
@@ -64,30 +66,15 @@ struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
return attrs;
}
-int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
+
+int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
{
- struct inode * inode = dentry->d_inode;
- struct sysfs_dirent * sd = dentry->d_fsdata;
struct sysfs_inode_attrs *sd_attrs;
struct iattr *iattrs;
unsigned int ia_valid = iattr->ia_valid;
- int error;
-
- if (!sd)
- return -EINVAL;
sd_attrs = sd->s_iattr;
- error = inode_change_ok(inode, iattr);
- if (error)
- return error;
-
- iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
-
- error = inode_setattr(inode, iattr);
- if (error)
- return error;
-
if (!sd_attrs) {
/* setting attributes for the first time, allocate now */
sd_attrs = sysfs_init_inode_attrs(sd);
@@ -103,42 +90,78 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (ia_valid & ATTR_GID)
iattrs->ia_gid = iattr->ia_gid;
if (ia_valid & ATTR_ATIME)
- iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
- inode->i_sb->s_time_gran);
+ iattrs->ia_atime = iattr->ia_atime;
if (ia_valid & ATTR_MTIME)
- iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
- inode->i_sb->s_time_gran);
+ iattrs->ia_mtime = iattr->ia_mtime;
if (ia_valid & ATTR_CTIME)
- iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
- inode->i_sb->s_time_gran);
+ iattrs->ia_ctime = iattr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = iattr->ia_mode;
-
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- mode &= ~S_ISGID;
iattrs->ia_mode = sd->s_mode = mode;
}
}
+ return 0;
+}
+
+int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ int error;
+
+ if (!sd)
+ return -EINVAL;
+
+ error = inode_change_ok(inode, iattr);
+ if (error)
+ return error;
+
+ iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
+
+ error = inode_setattr(inode, iattr);
+ if (error)
+ return error;
+
+ mutex_lock(&sysfs_mutex);
+ error = sysfs_sd_setattr(sd, iattr);
+ mutex_unlock(&sysfs_mutex);
+
return error;
}
+static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *secdata_len)
+{
+ struct sysfs_inode_attrs *iattrs;
+ void *old_secdata;
+ size_t old_secdata_len;
+
+ iattrs = sd->s_iattr;
+ if (!iattrs)
+ iattrs = sysfs_init_inode_attrs(sd);
+ if (!iattrs)
+ return -ENOMEM;
+
+ old_secdata = iattrs->ia_secdata;
+ old_secdata_len = iattrs->ia_secdata_len;
+
+ iattrs->ia_secdata = *secdata;
+ iattrs->ia_secdata_len = *secdata_len;
+
+ *secdata = old_secdata;
+ *secdata_len = old_secdata_len;
+ return 0;
+}
+
int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
struct sysfs_dirent *sd = dentry->d_fsdata;
- struct sysfs_inode_attrs *iattrs;
void *secdata;
int error;
u32 secdata_len = 0;
if (!sd)
return -EINVAL;
- if (!sd->s_iattr)
- sd->s_iattr = sysfs_init_inode_attrs(sd);
- if (!sd->s_iattr)
- return -ENOMEM;
-
- iattrs = sd->s_iattr;
if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
@@ -150,12 +173,13 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
&secdata, &secdata_len);
if (error)
goto out;
- if (iattrs->ia_secdata)
- security_release_secctx(iattrs->ia_secdata,
- iattrs->ia_secdata_len);
- iattrs->ia_secdata = secdata;
- iattrs->ia_secdata_len = secdata_len;
+ mutex_lock(&sysfs_mutex);
+ error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
+ mutex_unlock(&sysfs_mutex);
+
+ if (secdata)
+ security_release_secctx(secdata, secdata_len);
} else
return -EINVAL;
out:
@@ -170,7 +194,6 @@ static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
{
- inode->i_mode = iattr->ia_mode;
inode->i_uid = iattr->ia_uid;
inode->i_gid = iattr->ia_gid;
inode->i_atime = iattr->ia_atime;
@@ -178,17 +201,6 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
inode->i_ctime = iattr->ia_ctime;
}
-
-/*
- * sysfs has a different i_mutex lock order behavior for i_mutex than other
- * filesystems; sysfs i_mutex is called in many places with subsystem locks
- * held. At the same time, many of the VFS locking rules do not apply to
- * sysfs at all (cross directory rename for example). To untangle this mess
- * (which gives false positives in lockdep), we're giving sysfs inodes their
- * own class for i_mutex.
- */
-static struct lock_class_key sysfs_inode_imutex_key;
-
static int sysfs_count_nlink(struct sysfs_dirent *sd)
{
struct sysfs_dirent *child;
@@ -201,38 +213,55 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
return nr + 2;
}
+static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
+{
+ struct sysfs_inode_attrs *iattrs = sd->s_iattr;
+
+ inode->i_mode = sd->s_mode;
+ if (iattrs) {
+ /* sysfs_dirent has non-default attributes
+ * get them from persistent copy in sysfs_dirent
+ */
+ set_inode_attr(inode, &iattrs->ia_iattr);
+ security_inode_notifysecctx(inode,
+ iattrs->ia_secdata,
+ iattrs->ia_secdata_len);
+ }
+
+ if (sysfs_type(sd) == SYSFS_DIR)
+ inode->i_nlink = sysfs_count_nlink(sd);
+}
+
+int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ struct inode *inode = dentry->d_inode;
+
+ mutex_lock(&sysfs_mutex);
+ sysfs_refresh_inode(sd, inode);
+ mutex_unlock(&sysfs_mutex);
+
+ generic_fillattr(inode, stat);
+ return 0;
+}
+
static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
- struct sysfs_inode_attrs *iattrs;
inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
inode->i_op = &sysfs_inode_operations;
- inode->i_ino = sd->s_ino;
- lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
- iattrs = sd->s_iattr;
- if (iattrs) {
- /* sysfs_dirent has non-default attributes
- * get them for the new inode from persistent copy
- * in sysfs_dirent
- */
- set_inode_attr(inode, &iattrs->ia_iattr);
- if (iattrs->ia_secdata)
- security_inode_notifysecctx(inode,
- iattrs->ia_secdata,
- iattrs->ia_secdata_len);
- } else
- set_default_inode_attr(inode, sd->s_mode);
+ set_default_inode_attr(inode, sd->s_mode);
+ sysfs_refresh_inode(sd, inode);
/* initialize inode according to type */
switch (sysfs_type(sd)) {
case SYSFS_DIR:
inode->i_op = &sysfs_dir_inode_operations;
inode->i_fop = &sysfs_dir_operations;
- inode->i_nlink = sysfs_count_nlink(sd);
break;
case SYSFS_KOBJ_ATTR:
inode->i_size = PAGE_SIZE;
@@ -315,3 +344,14 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
else
return -ENOENT;
}
+
+int sysfs_permission(struct inode *inode, int mask)
+{
+ struct sysfs_dirent *sd = inode->i_private;
+
+ mutex_lock(&sysfs_mutex);
+ sysfs_refresh_inode(sd, inode);
+ mutex_unlock(&sysfs_mutex);
+
+ return generic_permission(inode, mask, NULL);
+}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c5081ad7702..c5eff49fa41 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -210,10 +210,13 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
}
const struct inode_operations sysfs_symlink_inode_operations = {
- .setxattr = sysfs_setxattr,
- .readlink = generic_readlink,
- .follow_link = sysfs_follow_link,
- .put_link = sysfs_put_link,
+ .setxattr = sysfs_setxattr,
+ .readlink = generic_readlink,
+ .follow_link = sysfs_follow_link,
+ .put_link = sysfs_put_link,
+ .setattr = sysfs_setattr,
+ .getattr = sysfs_getattr,
+ .permission = sysfs_permission,
};
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index af4c4e7482a..ca52e7b9d8f 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -89,9 +89,7 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
*/
struct sysfs_addrm_cxt {
struct sysfs_dirent *parent_sd;
- struct inode *parent_inode;
struct sysfs_dirent *removed;
- int cnt;
};
/*
@@ -105,7 +103,6 @@ extern struct kmem_cache *sysfs_dir_cachep;
* dir.c
*/
extern struct mutex sysfs_mutex;
-extern struct mutex sysfs_rename_mutex;
extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
@@ -133,6 +130,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
struct sysfs_dirent **p_sd);
void sysfs_remove_subdir(struct sysfs_dirent *sd);
+int sysfs_rename(struct sysfs_dirent *sd,
+ struct sysfs_dirent *new_parent_sd, const char *new_name);
+
static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
{
if (sd) {
@@ -155,7 +155,10 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
void sysfs_delete_inode(struct inode *inode);
+int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
+int sysfs_permission(struct inode *inode, int mask);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags);
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 70f989895d1..87813e405ce 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -235,71 +235,36 @@ xfs_setfilesize(
}
/*
- * Buffered IO write completion for delayed allocate extents.
+ * IO write completion.
*/
STATIC void
-xfs_end_bio_delalloc(
- struct work_struct *work)
-{
- xfs_ioend_t *ioend =
- container_of(work, xfs_ioend_t, io_work);
-
- xfs_setfilesize(ioend);
- xfs_destroy_ioend(ioend);
-}
-
-/*
- * Buffered IO write completion for regular, written extents.
- */
-STATIC void
-xfs_end_bio_written(
- struct work_struct *work)
-{
- xfs_ioend_t *ioend =
- container_of(work, xfs_ioend_t, io_work);
-
- xfs_setfilesize(ioend);
- xfs_destroy_ioend(ioend);
-}
-
-/*
- * IO write completion for unwritten extents.
- *
- * Issue transactions to convert a buffer range from unwritten
- * to written extents.
- */
-STATIC void
-xfs_end_bio_unwritten(
+xfs_end_io(
struct work_struct *work)
{
xfs_ioend_t *ioend =
container_of(work, xfs_ioend_t, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
- xfs_off_t offset = ioend->io_offset;
- size_t size = ioend->io_size;
-
- if (likely(!ioend->io_error)) {
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- int error;
- error = xfs_iomap_write_unwritten(ip, offset, size);
- if (error)
- ioend->io_error = error;
- }
- xfs_setfilesize(ioend);
- }
- xfs_destroy_ioend(ioend);
-}
-/*
- * IO read completion for regular, written extents.
- */
-STATIC void
-xfs_end_bio_read(
- struct work_struct *work)
-{
- xfs_ioend_t *ioend =
- container_of(work, xfs_ioend_t, io_work);
+ /*
+ * For unwritten extents we need to issue transactions to convert a
+ * range to normal written extens after the data I/O has finished.
+ */
+ if (ioend->io_type == IOMAP_UNWRITTEN &&
+ likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
+ int error;
+
+ error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+ ioend->io_size);
+ if (error)
+ ioend->io_error = error;
+ }
+ /*
+ * We might have to update the on-disk file size after extending
+ * writes.
+ */
+ if (ioend->io_type != IOMAP_READ)
+ xfs_setfilesize(ioend);
xfs_destroy_ioend(ioend);
}
@@ -314,10 +279,10 @@ xfs_finish_ioend(
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
- struct workqueue_struct *wq = xfsdatad_workqueue;
- if (ioend->io_work.func == xfs_end_bio_unwritten)
- wq = xfsconvertd_workqueue;
+ struct workqueue_struct *wq;
+ wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
+ xfsconvertd_workqueue : xfsdatad_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
flush_workqueue(wq);
@@ -355,15 +320,7 @@ xfs_alloc_ioend(
ioend->io_offset = 0;
ioend->io_size = 0;
- if (type == IOMAP_UNWRITTEN)
- INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
- else if (type == IOMAP_DELAY)
- INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
- else if (type == IOMAP_READ)
- INIT_WORK(&ioend->io_work, xfs_end_bio_read);
- else
- INIT_WORK(&ioend->io_work, xfs_end_bio_written);
-
+ INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
}
@@ -380,7 +337,7 @@ xfs_map_blocks(
return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
}
-STATIC_INLINE int
+STATIC int
xfs_iomap_valid(
xfs_iomap_t *iomapp,
loff_t offset)
@@ -412,8 +369,9 @@ xfs_end_bio(
STATIC void
xfs_submit_ioend_bio(
- xfs_ioend_t *ioend,
- struct bio *bio)
+ struct writeback_control *wbc,
+ xfs_ioend_t *ioend,
+ struct bio *bio)
{
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
@@ -426,7 +384,8 @@ xfs_submit_ioend_bio(
if (xfs_ioend_new_eof(ioend))
xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
- submit_bio(WRITE, bio);
+ submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC_PLUG : WRITE, bio);
ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
bio_put(bio);
}
@@ -505,6 +464,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
*/
STATIC void
xfs_submit_ioend(
+ struct writeback_control *wbc,
xfs_ioend_t *ioend)
{
xfs_ioend_t *head = ioend;
@@ -533,19 +493,19 @@ xfs_submit_ioend(
retry:
bio = xfs_alloc_ioend_bio(bh);
} else if (bh->b_blocknr != lastblock + 1) {
- xfs_submit_ioend_bio(ioend, bio);
+ xfs_submit_ioend_bio(wbc, ioend, bio);
goto retry;
}
if (bio_add_buffer(bio, bh) != bh->b_size) {
- xfs_submit_ioend_bio(ioend, bio);
+ xfs_submit_ioend_bio(wbc, ioend, bio);
goto retry;
}
lastblock = bh->b_blocknr;
}
if (bio)
- xfs_submit_ioend_bio(ioend, bio);
+ xfs_submit_ioend_bio(wbc, ioend, bio);
xfs_finish_ioend(ioend, 0);
} while ((ioend = next) != NULL);
}
@@ -1191,7 +1151,7 @@ xfs_page_state_convert(
}
if (iohead)
- xfs_submit_ioend(iohead);
+ xfs_submit_ioend(wbc, iohead);
return page_dirty;
@@ -1528,7 +1488,7 @@ xfs_end_io_direct(
* didn't map an unwritten extent so switch it's completion
* handler.
*/
- INIT_WORK(&ioend->io_work, xfs_end_bio_written);
+ ioend->io_type = IOMAP_NEW;
xfs_finish_ioend(ioend, 0);
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 965df1227d6..4ddc973aea7 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -149,7 +149,7 @@ page_region_mask(
return mask;
}
-STATIC_INLINE void
+STATIC void
set_page_region(
struct page *page,
size_t offset,
@@ -161,7 +161,7 @@ set_page_region(
SetPageUptodate(page);
}
-STATIC_INLINE int
+STATIC int
test_page_region(
struct page *page,
size_t offset,
@@ -582,7 +582,7 @@ found:
* although backing storage may not be.
*/
xfs_buf_t *
-xfs_buf_get_flags(
+xfs_buf_get(
xfs_buftarg_t *target,/* target for buffer */
xfs_off_t ioff, /* starting offset of range */
size_t isize, /* length of range */
@@ -661,7 +661,7 @@ _xfs_buf_read(
}
xfs_buf_t *
-xfs_buf_read_flags(
+xfs_buf_read(
xfs_buftarg_t *target,
xfs_off_t ioff,
size_t isize,
@@ -671,7 +671,7 @@ xfs_buf_read_flags(
flags |= XBF_READ;
- bp = xfs_buf_get_flags(target, ioff, isize, flags);
+ bp = xfs_buf_get(target, ioff, isize, flags);
if (bp) {
if (!XFS_BUF_ISDONE(bp)) {
XB_TRACE(bp, "read", (unsigned long)flags);
@@ -718,7 +718,7 @@ xfs_buf_readahead(
return;
flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
- xfs_buf_read_flags(target, ioff, isize, flags);
+ xfs_buf_read(target, ioff, isize, flags);
}
xfs_buf_t *
@@ -1113,7 +1113,7 @@ xfs_bdwrite(
xfs_buf_delwri_queue(bp, 1);
}
-STATIC_INLINE void
+STATIC void
_xfs_buf_ioend(
xfs_buf_t *bp,
int schedule)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9b4d666ad31..5f07dd91c5f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -186,15 +186,10 @@ extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
#define xfs_incore(buftarg,blkno,len,lockit) \
_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
-#define xfs_buf_get(target, blkno, len, flags) \
- xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-
-extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
-#define xfs_buf_read(target, blkno, len, flags) \
- xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index eff61e2732a..e4caeb28ce2 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -52,7 +52,7 @@ xfs_file_aio_read(
loff_t pos)
{
struct file *file = iocb->ki_filp;
- int ioflags = IO_ISAIO;
+ int ioflags = 0;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
@@ -71,7 +71,7 @@ xfs_file_aio_write(
loff_t pos)
{
struct file *file = iocb->ki_filp;
- int ioflags = IO_ISAIO;
+ int ioflags = 0;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cd42ef78f6b..1f3b4b8f7dd 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -573,8 +573,8 @@ xfs_vn_fallocate(
bf.l_len = len;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
- 0, XFS_ATTR_NOLOCK);
+ error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
+ 0, XFS_ATTR_NOLOCK);
if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode))
new_size = offset + len;
@@ -585,7 +585,7 @@ xfs_vn_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+ error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 072050f8d34..1bf47f219c9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -255,8 +255,6 @@ xfs_read(
iocb->ki_pos = *offset;
ret = generic_file_aio_read(iocb, iovp, segs, *offset);
- if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
- ret = wait_on_sync_kiocb(iocb);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
@@ -774,9 +772,6 @@ write_retry:
current->backing_dev_info = NULL;
- if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
- ret = wait_on_sync_kiocb(iocb);
-
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
*offset = isize;
@@ -811,7 +806,7 @@ write_retry:
XFS_STATS_ADD(xs_write_bytes, ret);
/* Handle various SYNC-type writes */
- if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
int error2;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18a4b8e11df..1bfb0e98019 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -930,13 +930,39 @@ xfs_fs_alloc_inode(
*/
STATIC void
xfs_fs_destroy_inode(
- struct inode *inode)
+ struct inode *inode)
{
- xfs_inode_t *ip = XFS_I(inode);
+ struct xfs_inode *ip = XFS_I(inode);
+
+ xfs_itrace_entry(ip);
XFS_STATS_INC(vn_reclaim);
- if (xfs_reclaim(ip))
- panic("%s: cannot reclaim 0x%p\n", __func__, inode);
+
+ /* bad inode, get out here ASAP */
+ if (is_bad_inode(inode))
+ goto out_reclaim;
+
+ xfs_ioend_wait(ip);
+
+ ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+ /*
+ * We should never get here with one of the reclaim flags already set.
+ */
+ ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+ /*
+ * If we have nothing to flush with this inode then complete the
+ * teardown now, otherwise delay the flush operation.
+ */
+ if (!xfs_inode_clean(ip)) {
+ xfs_inode_set_reclaim_tag(ip);
+ return;
+ }
+
+out_reclaim:
+ xfs_ireclaim(ip);
}
/*
@@ -973,7 +999,6 @@ xfs_fs_inode_init_once(
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
}
/*
@@ -1075,6 +1100,20 @@ xfs_fs_clear_inode(
XFS_STATS_INC(vn_remove);
XFS_STATS_DEC(vn_active);
+ /*
+ * The iolock is used by the file system to coordinate reads,
+ * writes, and block truncates. Up to this point the lock
+ * protected concurrent accesses by users of the inode. But
+ * from here forward we're doing some final processing of the
+ * inode because we're done with it, and although we reuse the
+ * iolock for protection it is really a distinct lock class
+ * (in the lockdep sense) from before. To keep lockdep happy
+ * (and basically indicate what we are doing), we explicitly
+ * re-init the iolock here.
+ */
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+ mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+
xfs_inactive(ip);
}
@@ -1092,8 +1131,6 @@ xfs_fs_put_super(
struct super_block *sb)
{
struct xfs_mount *mp = XFS_M(sb);
- struct xfs_inode *rip = mp->m_rootip;
- int unmount_event_flags = 0;
xfs_syncd_stop(mp);
@@ -1109,20 +1146,7 @@ xfs_fs_put_super(
xfs_sync_attr(mp, 0);
}
-#ifdef HAVE_DMAPI
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- unmount_event_flags =
- (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
- 0 : DM_FLAGS_UNWANTED;
- /*
- * Ignore error from dmapi here, first unmount is not allowed
- * to fail anyway, and second we wouldn't want to fail a
- * unmount because of dmapi.
- */
- XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
- NULL, NULL, 0, 0, unmount_event_flags);
- }
-#endif
+ XFS_SEND_PREUNMOUNT(mp);
/*
* Blow away any referenced inode in the filestreams cache.
@@ -1133,10 +1157,7 @@ xfs_fs_put_super(
XFS_bflush(mp->m_ddev_targp);
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
- unmount_event_flags);
- }
+ XFS_SEND_UNMOUNT(mp);
xfs_unmountfs(mp);
xfs_freesb(mp);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 961df0a22c7..d895a3a960f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -663,10 +663,9 @@ xfs_syncd_stop(
kthread_stop(mp->m_sync_task);
}
-int
+STATIC int
xfs_reclaim_inode(
xfs_inode_t *ip,
- int locked,
int sync_mode)
{
xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
@@ -682,10 +681,6 @@ xfs_reclaim_inode(
!__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
- if (locked) {
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
return -EAGAIN;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
@@ -704,10 +699,8 @@ xfs_reclaim_inode(
* We get the flush lock regardless, though, just to make sure
* we don't free it while it is being flushed.
*/
- if (!locked) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_iflock(ip);
- }
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_iflock(ip);
/*
* In the case of a forced shutdown we rely on xfs_iflush() to
@@ -778,7 +771,7 @@ xfs_reclaim_inode_now(
}
read_unlock(&pag->pag_ici_lock);
- return xfs_reclaim_inode(ip, 0, flags);
+ return xfs_reclaim_inode(ip, flags);
}
int
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 27920eb7a82..a500b4d9183 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -44,7 +44,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
-int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index ad7fbead4c9..00cabf5354d 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -36,7 +36,6 @@ struct attrlist_cursor_kern;
/*
* Flags for read/write calls - same values as IRIX
*/
-#define IO_ISAIO 0x00001 /* don't wait for completion */
#define IO_ISDIRECT 0x00004 /* bypass page cache */
#define IO_INVIS 0x00020 /* don't update inode timestamps */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 6f4fd37c67a..d2d20462fd4 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -41,10 +41,6 @@ extern void assfail(char *expr, char *f, int l);
# define STATIC static noinline
#endif
-#ifndef STATIC_INLINE
-# define STATIC_INLINE static inline
-#endif
-
#else /* DEBUG */
#define ASSERT(expr) \
@@ -54,19 +50,5 @@ extern void assfail(char *expr, char *f, int l);
# define STATIC noinline
#endif
-/*
- * We stop inlining of inline functions in debug mode.
- * Unfortunately, this means static inline in header files
- * get multiple definitions, so they need to remain static.
- * This then gives tonnes of warnings about unused but defined
- * functions, so we need to add the unused attribute to prevent
- * these spurious warnings.
- */
-#ifndef STATIC_INLINE
-# define STATIC_INLINE static __attribute__ ((unused)) noinline
-#endif
-
#endif /* DEBUG */
-
-
#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 4ece1906bd4..8fe6f6b78a4 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -123,9 +123,13 @@ xfs_inode_hasattr(
* Overall external interface routines.
*========================================================================*/
-int
-xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
- char *value, int *valuelenp, int flags)
+STATIC int
+xfs_attr_get_int(
+ struct xfs_inode *ip,
+ struct xfs_name *name,
+ char *value,
+ int *valuelenp,
+ int flags)
{
xfs_da_args_t args;
int error;
@@ -188,7 +192,7 @@ xfs_attr_get(
return error;
xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
+ error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return(error);
}
@@ -2143,8 +2147,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
- bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
- XFS_BUF_LOCK | XBF_DONT_BLOCK);
+ bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
+ XFS_BUF_LOCK | XBF_DONT_BLOCK);
ASSERT(bp);
ASSERT(!XFS_BUF_GETERROR(bp));
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index fb3b2a68b9b..12f0be3a73d 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -131,7 +131,6 @@ typedef struct xfs_attr_list_context {
*/
int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
int xfs_attr_inactive(struct xfs_inode *dp);
-int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_list_int(struct xfs_attr_list_context *);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index afdc8911637..0b687351293 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -98,7 +98,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
* If namespace bits don't match return 0.
* If all match then return 1.
*/
-STATIC_INLINE int
+STATIC int
xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
{
return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index eb7b702d069..6f5ccede63f 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -98,8 +98,7 @@ xfs_bmdr_to_bmbt(
* This code must be in sync with the routines xfs_bmbt_get_startoff,
* xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
*/
-
-STATIC_INLINE void
+STATIC void
__xfs_bmbt_get_all(
__uint64_t l0,
__uint64_t l1,
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index f655f7dc334..4aba67c5f64 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,7 +79,7 @@ extern ktrace_t *xfs_filestreams_trace_buf;
* the cache that reference per-ag array elements that have since been
* reallocated.
*/
-STATIC_INLINE int
+static inline int
xfs_filestream_peek_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
@@ -87,7 +87,7 @@ xfs_filestream_peek_ag(
return atomic_read(&mp->m_perag[agno].pagf_fstrms);
}
-STATIC_INLINE int
+static inline int
xfs_filestream_get_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
@@ -95,7 +95,7 @@ xfs_filestream_get_ag(
return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
}
-STATIC_INLINE int
+static inline int
xfs_filestream_put_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
@@ -122,7 +122,7 @@ int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
/* filestreams for the inode? */
-STATIC_INLINE int
+static inline int
xfs_inode_is_filestream(
struct xfs_inode *ip)
{
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2d0b3e1da9e..36079aa9134 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -201,8 +201,8 @@ xfs_growfs_data_private(
* AG freelist header block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0);
+ XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
agf = XFS_BUF_TO_AGF(bp);
memset(agf, 0, mp->m_sb.sb_sectsize);
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
@@ -233,8 +233,8 @@ xfs_growfs_data_private(
* AG inode header block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0);
+ XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
agi = XFS_BUF_TO_AGI(bp);
memset(agi, 0, mp->m_sb.sb_sectsize);
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
@@ -257,8 +257,9 @@ xfs_growfs_data_private(
* BNO btree root block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
- BTOBB(mp->m_sb.sb_blocksize), 0);
+ XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
+ BTOBB(mp->m_sb.sb_blocksize),
+ XBF_LOCK | XBF_MAPPED);
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
@@ -278,8 +279,9 @@ xfs_growfs_data_private(
* CNT btree root block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
- BTOBB(mp->m_sb.sb_blocksize), 0);
+ XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
+ BTOBB(mp->m_sb.sb_blocksize),
+ XBF_LOCK | XBF_MAPPED);
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
@@ -300,8 +302,9 @@ xfs_growfs_data_private(
* INO btree root block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
- BTOBB(mp->m_sb.sb_blocksize), 0);
+ XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
+ BTOBB(mp->m_sb.sb_blocksize),
+ XBF_LOCK | XBF_MAPPED);
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
@@ -611,7 +614,7 @@ xfs_fs_log_dummy(
xfs_inode_t *ip;
int error;
- tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+ tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0785797db82..cb907ba69c4 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -425,7 +425,7 @@ xfs_ialloc_ag_alloc(
return 0;
}
-STATIC_INLINE xfs_agnumber_t
+STATIC xfs_agnumber_t
xfs_ialloc_next_ag(
xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 80e526489be..073bb4a26b1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,6 +73,9 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+
+ mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
/* initialise the xfs inode */
ip->i_ino = ino;
@@ -290,7 +293,7 @@ xfs_iget_cache_miss(
struct xfs_inode **ipp,
xfs_daddr_t bno,
int flags,
- int lock_flags) __releases(pag->pag_ici_lock)
+ int lock_flags)
{
struct xfs_inode *ip;
int error;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 67ae5555a30..7294abce6ef 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -860,8 +860,15 @@ xfs_iomap_write_unwritten(
* set up a transaction to convert the range of extents
* from unwritten to real. Do allocations in a loop until
* we have covered the range passed in.
+ *
+ * Note that we open code the transaction allocation here
+ * to pass KM_NOFS--we can't risk to recursing back into
+ * the filesystem here as we might be asked to write out
+ * the same inode that we complete here and might deadlock
+ * on the iolock.
*/
- tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+ xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
+ tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, resblks,
XFS_WRITE_LOG_RES(mp), 0,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fb17f8226b0..1ec98ed914d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2206,6 +2206,7 @@ xlog_recover_do_buffer_trans(
xfs_daddr_t blkno;
int len;
ushort flags;
+ uint buf_flags;
buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
@@ -2246,12 +2247,11 @@ xlog_recover_do_buffer_trans(
}
mp = log->l_mp;
- if (flags & XFS_BLI_INODE_BUF) {
- bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len,
- XFS_BUF_LOCK);
- } else {
- bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0);
- }
+ buf_flags = XFS_BUF_LOCK;
+ if (!(flags & XFS_BLI_INODE_BUF))
+ buf_flags |= XFS_BUF_MAPPED;
+
+ bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
if (XFS_BUF_ISERROR(bp)) {
xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
bp, blkno);
@@ -2350,8 +2350,8 @@ xlog_recover_do_inode_trans(
goto error;
}
- bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno,
- in_f->ilf_len, XFS_BUF_LOCK);
+ bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+ XFS_BUF_LOCK);
if (XFS_BUF_ISERROR(bp)) {
xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
bp, in_f->ilf_blkno);
@@ -3517,7 +3517,7 @@ xlog_do_recovery_pass(
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
- xfs_caddr_t bufaddr, offset;
+ xfs_caddr_t offset;
xfs_buf_t *hbp, *dbp;
int error = 0, h_size;
int bblks, split_bblks;
@@ -3610,7 +3610,7 @@ xlog_do_recovery_pass(
/*
* Check for header wrapping around physical end-of-log
*/
- offset = NULL;
+ offset = XFS_BUF_PTR(hbp);
split_hblks = 0;
wrapped_hblks = 0;
if (blk_no + hblks <= log->l_logBBsize) {
@@ -3646,9 +3646,8 @@ xlog_do_recovery_pass(
* - order is important.
*/
wrapped_hblks = hblks - split_hblks;
- bufaddr = XFS_BUF_PTR(hbp);
error = XFS_BUF_SET_PTR(hbp,
- bufaddr + BBTOB(split_hblks),
+ offset + BBTOB(split_hblks),
BBTOB(hblks - split_hblks));
if (error)
goto bread_err2;
@@ -3658,14 +3657,10 @@ xlog_do_recovery_pass(
if (error)
goto bread_err2;
- error = XFS_BUF_SET_PTR(hbp, bufaddr,
+ error = XFS_BUF_SET_PTR(hbp, offset,
BBTOB(hblks));
if (error)
goto bread_err2;
-
- if (!offset)
- offset = xlog_align(log, 0,
- wrapped_hblks, hbp);
}
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead,
@@ -3685,7 +3680,7 @@ xlog_do_recovery_pass(
} else {
/* This log record is split across the
* physical end of log */
- offset = NULL;
+ offset = XFS_BUF_PTR(dbp);
split_bblks = 0;
if (blk_no != log->l_logBBsize) {
/* some data is before the physical
@@ -3714,9 +3709,8 @@ xlog_do_recovery_pass(
* _first_, then the log start (LR header end)
* - order is important.
*/
- bufaddr = XFS_BUF_PTR(dbp);
error = XFS_BUF_SET_PTR(dbp,
- bufaddr + BBTOB(split_bblks),
+ offset + BBTOB(split_bblks),
BBTOB(bblks - split_bblks));
if (error)
goto bread_err2;
@@ -3727,13 +3721,9 @@ xlog_do_recovery_pass(
if (error)
goto bread_err2;
- error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
+ error = XFS_BUF_SET_PTR(dbp, offset, h_size);
if (error)
goto bread_err2;
-
- if (!offset)
- offset = xlog_align(log, wrapped_hblks,
- bblks - split_bblks, dbp);
}
xlog_unpack_data(rhead, offset, log);
if ((error = xlog_recover_process_data(log, rhash,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8b6c9e807ef..66a888a9ad6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -583,8 +583,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
- bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
- BTOBB(sector_size), extra_flags);
+ bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
+ extra_flags);
if (!bp || XFS_BUF_ISERROR(bp)) {
xfs_fs_mount_cmn_err(flags, "SB read failed");
error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -624,8 +624,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
XFS_BUF_UNMANAGE(bp);
xfs_buf_relse(bp);
sector_size = mp->m_sb.sb_sectsize;
- bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
- BTOBB(sector_size), extra_flags);
+ bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
+ BTOBB(sector_size), extra_flags);
if (!bp || XFS_BUF_ISERROR(bp)) {
xfs_fs_mount_cmn_err(flags, "SB re-read failed");
error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -1471,7 +1471,7 @@ xfs_log_sbcount(
if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
return 0;
- tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
+ tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
XFS_DEFAULT_LOG_COUNT);
if (error) {
@@ -2123,7 +2123,7 @@ xfs_icsb_destroy_counters(
mutex_destroy(&mp->m_icsb_mutex);
}
-STATIC_INLINE void
+STATIC void
xfs_icsb_lock_cntr(
xfs_icsb_cnts_t *icsbp)
{
@@ -2132,7 +2132,7 @@ xfs_icsb_lock_cntr(
}
}
-STATIC_INLINE void
+STATIC void
xfs_icsb_unlock_cntr(
xfs_icsb_cnts_t *icsbp)
{
@@ -2140,7 +2140,7 @@ xfs_icsb_unlock_cntr(
}
-STATIC_INLINE void
+STATIC void
xfs_icsb_lock_all_counters(
xfs_mount_t *mp)
{
@@ -2153,7 +2153,7 @@ xfs_icsb_lock_all_counters(
}
}
-STATIC_INLINE void
+STATIC void
xfs_icsb_unlock_all_counters(
xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a6c023bc0fb..1df7e450296 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -93,6 +93,9 @@ typedef struct xfs_dmops {
xfs_send_unmount_t xfs_send_unmount;
} xfs_dmops_t;
+#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
+ (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
+
#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
(*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
#define XFS_SEND_MMAP(mp, vma,fl) \
@@ -101,12 +104,24 @@ typedef struct xfs_dmops {
(*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
(*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
-#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
- (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
#define XFS_SEND_MOUNT(mp,right,path,name) \
(*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
-#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \
- (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
+#define XFS_SEND_PREUNMOUNT(mp) \
+do { \
+ if (mp->m_flags & XFS_MOUNT_DMAPI) { \
+ (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
+ (mp)->m_rootip, DM_RIGHT_NULL, \
+ (mp)->m_rootip, DM_RIGHT_NULL, \
+ NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
+ } \
+} while (0)
+#define XFS_SEND_UNMOUNT(mp) \
+do { \
+ if (mp->m_flags & XFS_MOUNT_DMAPI) { \
+ (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
+ DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
+ } \
+} while (0)
#ifdef HAVE_PERCPU_SB
@@ -387,13 +402,13 @@ xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
* Per-cpu superblock locking functions
*/
#ifdef HAVE_PERCPU_SB
-STATIC_INLINE void
+static inline void
xfs_icsb_lock(xfs_mount_t *mp)
{
mutex_lock(&mp->m_icsb_mutex);
}
-STATIC_INLINE void
+static inline void
xfs_icsb_unlock(xfs_mount_t *mp)
{
mutex_unlock(&mp->m_icsb_mutex);
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 3f816ad7ff1..4c199d18f85 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -277,10 +277,10 @@ xfs_read_buf(
xfs_buf_t *bp;
int error;
- if (flags)
- bp = xfs_buf_read_flags(target, blkno, len, flags);
- else
- bp = xfs_buf_read(target, blkno, len, flags);
+ if (!flags)
+ flags = XBF_LOCK | XBF_MAPPED;
+
+ bp = xfs_buf_read(target, blkno, len, flags);
if (!bp)
return XFS_ERROR(EIO);
error = XFS_BUF_GETERROR(bp);
@@ -336,3 +336,25 @@ xfs_bwrite(
}
return (error);
}
+
+/*
+ * helper function to extract extent size hint from inode
+ */
+xfs_extlen_t
+xfs_get_extsz_hint(
+ struct xfs_inode *ip)
+{
+ xfs_extlen_t extsz;
+
+ if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
+ extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+ ? ip->i_d.di_extsize
+ : ip->i_mount->m_sb.sb_rextsize;
+ ASSERT(extsz);
+ } else {
+ extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+ ? ip->i_d.di_extsize : 0;
+ }
+
+ return extsz;
+}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f5e4874c37d..571f2174435 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -37,34 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
}
/*
- * Flags for xfs_free_eofblocks
- */
-#define XFS_FREE_EOF_LOCK (1<<0)
-#define XFS_FREE_EOF_NOLOCK (1<<1)
-
-
-/*
- * helper function to extract extent size hint from inode
- */
-STATIC_INLINE xfs_extlen_t
-xfs_get_extsz_hint(
- xfs_inode_t *ip)
-{
- xfs_extlen_t extsz;
-
- if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
- extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
- ? ip->i_d.di_extsize
- : ip->i_mount->m_sb.sb_rextsize;
- ASSERT(extsz);
- } else {
- extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
- ? ip->i_d.di_extsize : 0;
- }
- return extsz;
-}
-
-/*
* Prototypes for functions in xfs_rw.c.
*/
extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -76,5 +48,6 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
struct xfs_buf **bpp);
extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
xfs_buf_t *bp, xfs_daddr_t blkno);
+extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 66b849358e6..237badcbac3 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -236,19 +236,20 @@ xfs_trans_alloc(
uint type)
{
xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
- return _xfs_trans_alloc(mp, type);
+ return _xfs_trans_alloc(mp, type, KM_SLEEP);
}
xfs_trans_t *
_xfs_trans_alloc(
xfs_mount_t *mp,
- uint type)
+ uint type,
+ uint memflags)
{
xfs_trans_t *tp;
atomic_inc(&mp->m_active_trans);
- tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
+ tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
tp->t_magic = XFS_TRANS_MAGIC;
tp->t_type = type;
tp->t_mountp = mp;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ed47fc77759..a0574f593f5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -924,7 +924,7 @@ typedef struct xfs_trans {
* XFS transaction mechanism exported interfaces.
*/
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
-xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
+xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
uint, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 218829e6a15..03a1f701fea 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -79,11 +79,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
/*
* Default to a normal get_buf() call if the tp is NULL.
*/
- if (tp == NULL) {
- bp = xfs_buf_get_flags(target_dev, blkno, len,
- flags | BUF_BUSY);
- return(bp);
- }
+ if (tp == NULL)
+ return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY);
/*
* If we find the buffer in the cache with this transaction
@@ -129,7 +126,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
* easily deadlock with our current transaction as well as cause
* us to run out of stack space.
*/
- bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY);
+ bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY);
if (bp == NULL) {
return NULL;
}
@@ -302,7 +299,7 @@ xfs_trans_read_buf(
* Default to a normal get_buf() call if the tp is NULL.
*/
if (tp == NULL) {
- bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
+ bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY);
if (!bp)
return (flags & XFS_BUF_TRYLOCK) ?
EAGAIN : XFS_ERROR(ENOMEM);
@@ -398,7 +395,7 @@ xfs_trans_read_buf(
* easily deadlock with our current transaction as well as cause
* us to run out of stack space.
*/
- bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
+ bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY);
if (bp == NULL) {
*bpp = NULL;
return 0;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b572f7e840e..578f3f59b78 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,9 +538,8 @@ xfs_readlink_bmap(
d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
- bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
- XBF_LOCK | XBF_MAPPED |
- XBF_DONT_BLOCK);
+ bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
+ XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
error = XFS_BUF_GETERROR(bp);
if (error) {
xfs_ioerror_alert("xfs_readlink",
@@ -709,6 +708,11 @@ xfs_fsync(
}
/*
+ * Flags for xfs_free_eofblocks
+ */
+#define XFS_FREE_EOF_TRYLOCK (1<<0)
+
+/*
* This is called by xfs_inactive to free any blocks beyond eof
* when the link count isn't zero and by xfs_dm_punch_hole() when
* punching a hole to EOF.
@@ -726,7 +730,6 @@ xfs_free_eofblocks(
xfs_filblks_t map_len;
int nimaps;
xfs_bmbt_irec_t imap;
- int use_iolock = (flags & XFS_FREE_EOF_LOCK);
/*
* Figure out if there are any blocks beyond the end
@@ -768,14 +771,19 @@ xfs_free_eofblocks(
* cache and we can't
* do that within a transaction.
*/
- if (use_iolock)
+ if (flags & XFS_FREE_EOF_TRYLOCK) {
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ xfs_trans_cancel(tp, 0);
+ return 0;
+ }
+ } else {
xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ }
error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
ip->i_size);
if (error) {
xfs_trans_cancel(tp, 0);
- if (use_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
@@ -812,8 +820,7 @@ xfs_free_eofblocks(
error = xfs_trans_commit(tp,
XFS_TRANS_RELEASE_LOG_RES);
}
- xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
- : XFS_ILOCK_EXCL));
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
}
return error;
}
@@ -1113,7 +1120,17 @@ xfs_release(
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
(!(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
- error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+
+ /*
+ * If we can't get the iolock just skip truncating
+ * the blocks past EOF because we could deadlock
+ * with the mmap_sem otherwise. We'll get another
+ * chance to drop them once the last reference to
+ * the inode is dropped, so we'll never leak blocks
+ * permanently.
+ */
+ error = xfs_free_eofblocks(mp, ip,
+ XFS_FREE_EOF_TRYLOCK);
if (error)
return error;
}
@@ -1184,7 +1201,7 @@ xfs_inactive(
(!(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
(ip->i_delayed_blks != 0)))) {
- error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+ error = xfs_free_eofblocks(mp, ip, 0);
if (error)
return VN_INACTIVE_CACHE;
}
@@ -2456,46 +2473,6 @@ xfs_set_dmattrs(
return error;
}
-int
-xfs_reclaim(
- xfs_inode_t *ip)
-{
-
- xfs_itrace_entry(ip);
-
- ASSERT(!VN_MAPPED(VFS_I(ip)));
-
- /* bad inode, get out here ASAP */
- if (is_bad_inode(VFS_I(ip))) {
- xfs_ireclaim(ip);
- return 0;
- }
-
- xfs_ioend_wait(ip);
-
- ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
- /*
- * If we have nothing to flush with this inode then complete the
- * teardown now, otherwise break the link between the xfs inode and the
- * linux inode and clean up the xfs inode later. This avoids flushing
- * the inode to disk during the delete operation itself.
- *
- * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
- * first to ensure that xfs_iunpin() will never see an xfs inode
- * that has a linux inode being reclaimed. Synchronisation is provided
- * by the i_flags_lock.
- */
- if (!ip->i_update_core && (ip->i_itemp == NULL)) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_iflock(ip);
- xfs_iflags_set(ip, XFS_IRECLAIMABLE);
- return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
- }
- xfs_inode_set_reclaim_tag(ip);
- return 0;
-}
-
/*
* xfs_alloc_file_space()
* This routine allocates disk space for the given file.
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index a9e102de71a..167a467403a 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -38,7 +38,6 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
const char *target_path, mode_t mode, struct xfs_inode **ipp,
cred_t *credp);
int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
-int xfs_reclaim(struct xfs_inode *ip);
int xfs_change_file_space(struct xfs_inode *ip, int cmd,
xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,