aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/error.c1
-rw-r--r--fs/9p/fid.c69
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/mux.c4
-rw-r--r--fs/9p/v9fs.c11
-rw-r--r--fs/9p/vfs_file.c47
-rw-r--r--fs/9p/vfs_inode.c206
-rw-r--r--fs/aio.c20
-rw-r--r--fs/binfmt_elf.c51
-rw-r--r--fs/binfmt_elf_fdpic.c8
-rw-r--r--fs/block_dev.c53
-rw-r--r--fs/buffer.c25
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/cifs_debug.c4
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/file.c12
-rw-r--r--fs/cifs/misc.c8
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/sess.c13
-rw-r--r--fs/cifs/smbdes.c10
-rw-r--r--fs/dlm/Kconfig18
-rw-r--r--fs/dlm/config.c154
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/dlm_internal.h20
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c10
-rw-r--r--fs/dlm/lowcomms-sctp.c151
-rw-r--r--fs/dlm/lowcomms-tcp.c361
-rw-r--r--fs/dlm/midcomms.c4
-rw-r--r--fs/dlm/rcom.c85
-rw-r--r--fs/dlm/recover.c8
-rw-r--r--fs/dlm/recoverd.c22
-rw-r--r--fs/dlm/user.c9
-rw-r--r--fs/dlm/util.c4
-rw-r--r--fs/fs-writeback.c13
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/gfs2/Kconfig47
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/dir.c25
-rw-r--r--fs/gfs2/dir.h21
-rw-r--r--fs/gfs2/eattr.c8
-rw-r--r--fs/gfs2/glock.c316
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c136
-rw-r--r--fs/gfs2/incore.h18
-rw-r--r--fs/gfs2/inode.c61
-rw-r--r--fs/gfs2/lm.c8
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h2
-rw-r--r--fs/gfs2/locking/dlm/main.c6
-rw-r--r--fs/gfs2/locking/dlm/mount.c6
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c13
-rw-r--r--fs/gfs2/lops.c14
-rw-r--r--fs/gfs2/ops_address.c134
-rw-r--r--fs/gfs2/ops_dentry.c16
-rw-r--r--fs/gfs2/ops_export.c15
-rw-r--r--fs/gfs2/ops_file.c52
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/ops_inode.c55
-rw-r--r--fs/gfs2/ops_super.c11
-rw-r--r--fs/gfs2/ops_vm.c24
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/gfs2/sys.c10
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hostfs/hostfs_user.c4
-rw-r--r--fs/jffs/jffs_fm.c3
-rw-r--r--fs/jffs2/debug.c4
-rw-r--r--fs/jffs2/debug.h1
-rw-r--r--fs/jffs2/fs.c3
-rw-r--r--fs/jffs2/gc.c2
-rw-r--r--fs/jffs2/nodelist.h10
-rw-r--r--fs/jffs2/readinode.c3
-rw-r--r--fs/jffs2/scan.c6
-rw-r--r--fs/jffs2/summary.c6
-rw-r--r--fs/jffs2/super.c7
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/wbuf.c21
-rw-r--r--fs/jffs2/xattr.c5
-rw-r--r--fs/lockd/clntlock.c4
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/file.c21
-rw-r--r--fs/nfs/inode.c97
-rw-r--r--fs/nfs/symlink.c4
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs3xdr.c9
-rw-r--r--fs/nfsd/nfs4xdr.c5
-rw-r--r--fs/nfsd/nfsfh.c14
-rw-r--r--fs/nfsd/nfssvc.c8
-rw-r--r--fs/nfsd/nfsxdr.c5
-rw-r--r--fs/nfsd/vfs.c29
-rw-r--r--fs/ntfs/ChangeLog7
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/aops.c4
-rw-r--r--fs/ntfs/dir.c45
-rw-r--r--fs/ntfs/inode.c69
-rw-r--r--fs/ntfs/inode.h6
-rw-r--r--fs/ntfs/super.c7
-rw-r--r--fs/ocfs2/export.c5
-rw-r--r--fs/ocfs2/inode.c11
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/namei.c69
-rw-r--r--fs/ocfs2/ocfs2_fs.h43
-rw-r--r--fs/ocfs2/symlink.c3
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/proc_misc.c8
-rw-r--r--fs/reiserfs/file.c20
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/super.c4
-rw-r--r--fs/ufs/balloc.c46
-rw-r--r--fs/ufs/inode.c14
-rw-r--r--fs/ufs/truncate.c4
112 files changed, 1655 insertions, 1595 deletions
diff --git a/fs/9p/error.c b/fs/9p/error.c
index ae91555c155..0d7fa4e0881 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -83,6 +83,7 @@ int v9fs_errstr2errno(char *errstr, int len)
if (errno == 0) {
/* TODO: if error isn't found, add it dynamically */
+ errstr[len] = 0;
printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
errstr);
errno = 1;
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 27507201f9e..a9b6301a04f 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/idr.h>
+#include <asm/semaphore.h>
#include "debug.h"
#include "v9fs.h"
@@ -84,6 +85,7 @@ struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid)
new->iounit = 0;
new->rdir_pos = 0;
new->rdir_fcall = NULL;
+ init_MUTEX(&new->lock);
INIT_LIST_HEAD(&new->list);
return new;
@@ -102,11 +104,11 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
}
/**
- * v9fs_fid_lookup - retrieve the right fid from a particular dentry
+ * v9fs_fid_lookup - return a locked fid from a dentry
* @dentry: dentry to look for fid in
- * @type: intent of lookup (operation or traversal)
*
- * find a fid in the dentry
+ * find a fid in the dentry, obtain its semaphore and return a reference to it.
+ * code calling lookup is responsible for releasing lock
*
* TODO: only match fids that have the same uid as current user
*
@@ -124,7 +126,68 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
if (!return_fid) {
dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n");
+ return_fid = ERR_PTR(-EBADF);
}
+ if(down_interruptible(&return_fid->lock))
+ return ERR_PTR(-EINTR);
+
return return_fid;
}
+
+/**
+ * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and release it
+ * @dentry: dentry to look for fid in
+ *
+ * find a fid in the dentry and then clone to a new private fid
+ *
+ * TODO: only match fids that have the same uid as current user
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF);
+ struct v9fs_fcall *fcall = NULL;
+ int fid, err;
+
+ base_fid = v9fs_fid_lookup(dentry);
+
+ if(IS_ERR(base_fid))
+ return base_fid;
+
+ if(base_fid) { /* clone fid */
+ fid = v9fs_get_idpool(&v9ses->fidpool);
+ if (fid < 0) {
+ eprintk(KERN_WARNING, "newfid fails!\n");
+ new_fid = ERR_PTR(-ENOSPC);
+ goto Release_Fid;
+ }
+
+ err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall);
+ if (err < 0) {
+ dprintk(DEBUG_ERROR, "clone walk didn't work\n");
+ v9fs_put_idpool(fid, &v9ses->fidpool);
+ new_fid = ERR_PTR(err);
+ goto Free_Fcall;
+ }
+ new_fid = v9fs_fid_create(v9ses, fid);
+ if (new_fid == NULL) {
+ dprintk(DEBUG_ERROR, "out of memory\n");
+ new_fid = ERR_PTR(-ENOMEM);
+ }
+Free_Fcall:
+ kfree(fcall);
+ }
+
+Release_Fid:
+ up(&base_fid->lock);
+ return new_fid;
+}
+
+void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid)
+{
+ v9fs_t_clunk(v9ses, fid->fid);
+ v9fs_fid_destroy(fid);
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index aa974d6875c..48fc170c26c 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -30,6 +30,8 @@ struct v9fs_fid {
struct list_head list; /* list of fids associated with a dentry */
struct list_head active; /* XXX - debug */
+ struct semaphore lock;
+
u32 fid;
unsigned char fidopen; /* set when fid is opened */
unsigned char fidclunked; /* set when fid has already been clunked */
@@ -55,3 +57,6 @@ struct v9fs_fid *v9fs_fid_get_created(struct dentry *);
void v9fs_fid_destroy(struct v9fs_fid *fid);
struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid);
int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry);
+struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry);
+void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid);
+
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 944273c3dbf..147ceef8e53 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -132,8 +132,10 @@ int v9fs_mux_global_init(void)
v9fs_mux_poll_tasks[i].task = NULL;
v9fs_mux_wq = create_workqueue("v9fs");
- if (!v9fs_mux_wq)
+ if (!v9fs_mux_wq) {
+ printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
return -ENOMEM;
+ }
return 0;
}
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 0b96fae8b47..d9b561ba5e5 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -457,14 +457,19 @@ static int __init init_v9fs(void)
v9fs_error_init();
- printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
+ printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
ret = v9fs_mux_global_init();
- if (!ret)
+ if (ret) {
+ printk(KERN_WARNING "v9fs: starting mux failed\n");
return ret;
+ }
ret = register_filesystem(&v9fs_fs_type);
- if (!ret)
+ if (ret) {
+ printk(KERN_WARNING "v9fs: registering file system failed\n");
v9fs_mux_global_exit();
+ }
+
return ret;
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e86a0715128..9f17b0cacdd 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -55,53 +55,22 @@ int v9fs_file_open(struct inode *inode, struct file *file)
struct v9fs_fid *vfid;
struct v9fs_fcall *fcall = NULL;
int omode;
- int fid = V9FS_NOFID;
int err;
dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file);
- vfid = v9fs_fid_lookup(file->f_path.dentry);
- if (!vfid) {
- dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
- return -EBADF;
- }
-
- fid = v9fs_get_idpool(&v9ses->fidpool);
- if (fid < 0) {
- eprintk(KERN_WARNING, "newfid fails!\n");
- return -ENOSPC;
- }
+ vfid = v9fs_fid_clone(file->f_path.dentry);
+ if (IS_ERR(vfid))
+ return PTR_ERR(vfid);
- err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, &fcall);
- if (err < 0) {
- dprintk(DEBUG_ERROR, "rewalk didn't work\n");
- if (fcall && fcall->id == RWALK)
- goto clunk_fid;
- else {
- v9fs_put_idpool(fid, &v9ses->fidpool);
- goto free_fcall;
- }
- }
- kfree(fcall);
-
- /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
- /* translate open mode appropriately */
omode = v9fs_uflags2omode(file->f_flags);
- err = v9fs_t_open(v9ses, fid, omode, &fcall);
+ err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall);
if (err < 0) {
PRINT_FCALL_ERROR("open failed", fcall);
- goto clunk_fid;
- }
-
- vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
- if (vfid == NULL) {
- dprintk(DEBUG_ERROR, "out of memory\n");
- err = -ENOMEM;
- goto clunk_fid;
+ goto Clunk_Fid;
}
file->private_data = vfid;
- vfid->fid = fid;
vfid->fidopen = 1;
vfid->fidclunked = 0;
vfid->iounit = fcall->params.ropen.iounit;
@@ -112,10 +81,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
return 0;
-clunk_fid:
- v9fs_t_clunk(v9ses, fid);
-
-free_fcall:
+Clunk_Fid:
+ v9fs_fid_clunk(v9ses, vfid);
kfree(fcall);
return err;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f26cdfd88..9109ba1d696 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -416,12 +416,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
sb = file_inode->i_sb;
v9ses = v9fs_inode2v9ses(file_inode);
v9fid = v9fs_fid_lookup(file);
-
- if (!v9fid) {
- dprintk(DEBUG_ERROR,
- "no v9fs_fid\n");
- return -EBADF;
- }
+ if(IS_ERR(v9fid))
+ return PTR_ERR(v9fid);
fid = v9fid->fid;
if (fid < 0) {
@@ -433,11 +429,13 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
result = v9fs_t_remove(v9ses, fid, &fcall);
if (result < 0) {
PRINT_FCALL_ERROR("remove fails", fcall);
+ goto Error;
}
v9fs_put_idpool(fid, &v9ses->fidpool);
v9fs_fid_destroy(v9fid);
+Error:
kfree(fcall);
return result;
}
@@ -473,9 +471,13 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
inode = NULL;
vfid = NULL;
v9ses = v9fs_inode2v9ses(dir);
- dfid = v9fs_fid_lookup(dentry->d_parent);
- perm = unixmode2p9mode(v9ses, mode);
+ dfid = v9fs_fid_clone(dentry->d_parent);
+ if(IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ goto error;
+ }
+ perm = unixmode2p9mode(v9ses, mode);
if (nd && nd->flags & LOOKUP_OPEN)
flags = nd->intent.open.flags - 1;
else
@@ -485,9 +487,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
if (err)
- goto error;
+ goto clunk_dfid;
vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
+ v9fs_fid_clunk(v9ses, dfid);
if (IS_ERR(vfid)) {
err = PTR_ERR(vfid);
vfid = NULL;
@@ -525,6 +528,9 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return 0;
+clunk_dfid:
+ v9fs_fid_clunk(v9ses, dfid);
+
error:
if (vfid)
v9fs_fid_destroy(vfid);
@@ -551,7 +557,12 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
inode = NULL;
vfid = NULL;
v9ses = v9fs_inode2v9ses(dir);
- dfid = v9fs_fid_lookup(dentry->d_parent);
+ dfid = v9fs_fid_clone(dentry->d_parent);
+ if(IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ goto error;
+ }
+
perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
@@ -559,37 +570,36 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (err) {
dprintk(DEBUG_ERROR, "create error %d\n", err);
- goto error;
- }
-
- err = v9fs_t_clunk(v9ses, fid);
- if (err) {
- dprintk(DEBUG_ERROR, "clunk error %d\n", err);
- goto error;
+ goto clean_up_dfid;
}
vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
if (IS_ERR(vfid)) {
err = PTR_ERR(vfid);
vfid = NULL;
- goto error;
+ goto clean_up_dfid;
}
+ v9fs_fid_clunk(v9ses, dfid);
inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
- goto error;
+ goto clean_up_fids;
}
dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
return 0;
-error:
+clean_up_fids:
if (vfid)
v9fs_fid_destroy(vfid);
+clean_up_dfid:
+ v9fs_fid_clunk(v9ses, dfid);
+
+error:
return err;
}
@@ -622,28 +632,23 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_op = &v9fs_dentry_operations;
dirfid = v9fs_fid_lookup(dentry->d_parent);
- if (!dirfid) {
- dprintk(DEBUG_ERROR, "no dirfid\n");
- return ERR_PTR(-EINVAL);
- }
+ if(IS_ERR(dirfid))
+ return ERR_PTR(PTR_ERR(dirfid));
dirfidnum = dirfid->fid;
- if (dirfidnum < 0) {
- dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
- dir, dir->i_ino);
- return ERR_PTR(-EBADF);
- }
-
newfid = v9fs_get_idpool(&v9ses->fidpool);
if (newfid < 0) {
eprintk(KERN_WARNING, "newfid fails!\n");
- return ERR_PTR(-ENOSPC);
+ result = -ENOSPC;
+ goto Release_Dirfid;
}
result = v9fs_t_walk(v9ses, dirfidnum, newfid,
(char *)dentry->d_name.name, &fcall);
+ up(&dirfid->lock);
+
if (result < 0) {
if (fcall && fcall->id == RWALK)
v9fs_t_clunk(v9ses, newfid);
@@ -701,8 +706,12 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
return NULL;
- FreeFcall:
+Release_Dirfid:
+ up(&dirfid->lock);
+
+FreeFcall:
kfree(fcall);
+
return ERR_PTR(result);
}
@@ -746,10 +755,8 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
- struct v9fs_fid *olddirfid =
- v9fs_fid_lookup(old_dentry->d_parent);
- struct v9fs_fid *newdirfid =
- v9fs_fid_lookup(new_dentry->d_parent);
+ struct v9fs_fid *olddirfid;
+ struct v9fs_fid *newdirfid;
struct v9fs_wstat wstat;
struct v9fs_fcall *fcall = NULL;
int fid = -1;
@@ -759,16 +766,26 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dprintk(DEBUG_VFS, "\n");
- if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
- dprintk(DEBUG_ERROR, "problem with arguments\n");
- return -EBADF;
+ if(IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
+
+ olddirfid = v9fs_fid_clone(old_dentry->d_parent);
+ if(IS_ERR(olddirfid)) {
+ retval = PTR_ERR(olddirfid);
+ goto Release_lock;
+ }
+
+ newdirfid = v9fs_fid_clone(new_dentry->d_parent);
+ if(IS_ERR(newdirfid)) {
+ retval = PTR_ERR(newdirfid);
+ goto Clunk_olddir;
}
/* 9P can only handle file rename in the same directory */
if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
- retval = -EPERM;
- goto FreeFcallnBail;
+ retval = -EXDEV;
+ goto Clunk_newdir;
}
fid = oldfid->fid;
@@ -779,7 +796,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
old_inode->i_ino);
retval = -EBADF;
- goto FreeFcallnBail;
+ goto Clunk_newdir;
}
v9fs_blank_wstat(&wstat);
@@ -788,11 +805,20 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
- FreeFcallnBail:
if (retval < 0)
PRINT_FCALL_ERROR("wstat error", fcall);
kfree(fcall);
+
+Clunk_newdir:
+ v9fs_fid_clunk(v9ses, newdirfid);
+
+Clunk_olddir:
+ v9fs_fid_clunk(v9ses, olddirfid);
+
+Release_lock:
+ up(&oldfid->lock);
+
return retval;
}
@@ -810,15 +836,12 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
{
struct v9fs_fcall *fcall = NULL;
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
- struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
+ struct v9fs_fid *fid = v9fs_fid_clone(dentry);
int err = -EPERM;
dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
- if (!fid) {
- dprintk(DEBUG_ERROR,
- "couldn't find fid associated with dentry\n");
- return -EBADF;
- }
+ if(IS_ERR(fid))
+ return PTR_ERR(fid);
err = v9fs_t_stat(v9ses, fid->fid, &fcall);
@@ -831,6 +854,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
}
kfree(fcall);
+ v9fs_fid_clunk(v9ses, fid);
return err;
}
@@ -844,18 +868,14 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
- struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
+ struct v9fs_fid *fid = v9fs_fid_clone(dentry);
struct v9fs_fcall *fcall = NULL;
struct v9fs_wstat wstat;
int res = -EPERM;
dprintk(DEBUG_VFS, "\n");
-
- if (!fid) {
- dprintk(DEBUG_ERROR,
- "Couldn't find fid associated with dentry\n");
- return -EBADF;
- }
+ if(IS_ERR(fid))
+ return PTR_ERR(fid);
v9fs_blank_wstat(&wstat);
if (iattr->ia_valid & ATTR_MODE)
@@ -887,6 +907,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (res >= 0)
res = inode_setattr(dentry->d_inode, iattr);
+ v9fs_fid_clunk(v9ses, fid);
return res;
}
@@ -987,18 +1008,15 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
struct v9fs_fcall *fcall = NULL;
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
- struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
+ struct v9fs_fid *fid = v9fs_fid_clone(dentry);
- if (!fid) {
- dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
- retval = -EBADF;
- goto FreeFcall;
- }
+ if(IS_ERR(fid))
+ return PTR_ERR(fid);
if (!v9ses->extended) {
retval = -EBADF;
dprintk(DEBUG_ERROR, "not extended\n");
- goto FreeFcall;
+ goto ClunkFid;
}
dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
@@ -1009,8 +1027,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
goto FreeFcall;
}
- if (!fcall)
- return -EIO;
+ if (!fcall) {
+ retval = -EIO;
+ goto ClunkFid;
+ }
if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
retval = -EINVAL;
@@ -1028,9 +1048,12 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
fcall->params.rstat.stat.extension.str, buffer);
retval = buflen;
- FreeFcall:
+FreeFcall:
kfree(fcall);
+ClunkFid:
+ v9fs_fid_clunk(v9ses, fid);
+
return retval;
}
@@ -1123,52 +1146,58 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
int err;
u32 fid, perm;
struct v9fs_session_info *v9ses;
- struct v9fs_fid *dfid, *vfid;
- struct inode *inode;
+ struct v9fs_fid *dfid, *vfid = NULL;
+ struct inode *inode = NULL;
- inode = NULL;
- vfid = NULL;
v9ses = v9fs_inode2v9ses(dir);
- dfid = v9fs_fid_lookup(dentry->d_parent);
- perm = unixmode2p9mode(v9ses, mode);
-
if (!v9ses->extended) {
dprintk(DEBUG_ERROR, "not extended\n");
return -EPERM;
}
+ dfid = v9fs_fid_clone(dentry->d_parent);
+ if(IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ goto error;
+ }
+
+ perm = unixmode2p9mode(v9ses, mode);
+
err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
if (err)
- goto error;
+ goto clunk_dfid;
err = v9fs_t_clunk(v9ses, fid);
if (err)
- goto error;
+ goto clunk_dfid;
vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
if (IS_ERR(vfid)) {
err = PTR_ERR(vfid);
vfid = NULL;
- goto error;
+ goto clunk_dfid;
}
inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
- goto error;
+ goto free_vfid;
}
dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
return 0;
-error:
- if (vfid)
- v9fs_fid_destroy(vfid);
+free_vfid:
+ v9fs_fid_destroy(vfid);
+
+clunk_dfid:
+ v9fs_fid_clunk(v9ses, dfid);
+error:
return err;
}
@@ -1209,26 +1238,29 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int retval;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
struct v9fs_fid *oldfid;
char *name;
dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
old_dentry->d_name.name);
- oldfid = v9fs_fid_lookup(old_dentry);
- if (!oldfid) {
- dprintk(DEBUG_ERROR, "can't find oldfid\n");
- return -EPERM;
- }
+ oldfid = v9fs_fid_clone(old_dentry);
+ if(IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
name = __getname();
- if (unlikely(!name))
- return -ENOMEM;
+ if (unlikely(!name)) {
+ retval = -ENOMEM;
+ goto clunk_fid;
+ }
sprintf(name, "%d\n", oldfid->fid);
retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
__putname(name);
+clunk_fid:
+ v9fs_fid_clunk(v9ses, oldfid);
return retval;
}
diff --git a/fs/aio.c b/fs/aio.c
index ee20fc4240e..55991e4132a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -298,17 +298,23 @@ static void wait_for_all_aios(struct kioctx *ctx)
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
+ spin_lock_irq(&ctx->ctx_lock);
if (!ctx->reqs_active)
- return;
+ goto out;
add_wait_queue(&ctx->wait, &wait);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
while (ctx->reqs_active) {
+ spin_unlock_irq(&ctx->ctx_lock);
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ spin_lock_irq(&ctx->ctx_lock);
}
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(&ctx->wait, &wait);
+
+out:
+ spin_unlock_irq(&ctx->ctx_lock);
}
/* wait_on_sync_kiocb:
@@ -424,7 +430,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
list_add(&req->ki_list, &ctx->active_reqs);
- get_ioctx(ctx);
ctx->reqs_active++;
okay = 1;
}
@@ -536,8 +541,6 @@ int fastcall aio_put_req(struct kiocb *req)
spin_lock_irq(&ctx->ctx_lock);
ret = __aio_put_req(ctx, req);
spin_unlock_irq(&ctx->ctx_lock);
- if (ret)
- put_ioctx(ctx);
return ret;
}
@@ -779,8 +782,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
*/
iocb->ki_users++; /* grab extra reference */
aio_run_iocb(iocb);
- if (__aio_put_req(ctx, iocb)) /* drop extra ref */
- put_ioctx(ctx);
+ __aio_put_req(ctx, iocb);
}
if (!list_empty(&ctx->run_list))
return 1;
@@ -997,14 +999,10 @@ put_rq:
/* everything turned out well, dispose of the aiocb. */
ret = __aio_put_req(ctx, iocb);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- if (ret)
- put_ioctx(ctx);
-
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
return ret;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7cb28720f90..669dbe5b031 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -682,6 +682,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp;
+
+ /*
+ * If the binary is not readable then enforce
+ * mm->dumpable = 0 regardless of the interpreter's
+ * permissions.
+ */
+ if (file_permission(interpreter, MAY_READ) < 0)
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
retval = kernel_read(interpreter, 0, bprm->buf,
BINPRM_BUF_SIZE);
if (retval != BINPRM_BUF_SIZE) {
@@ -1178,6 +1187,10 @@ static int dump_seek(struct file *file, loff_t off)
*/
static int maydump(struct vm_area_struct *vma)
{
+ /* The vma can be set up to tell us the answer directly. */
+ if (vma->vm_flags & VM_ALWAYSDUMP)
+ return 1;
+
/* Do not dump I/O mapped devices or special mappings */
if (vma->vm_flags & (VM_IO | VM_RESERVED))
return 0;
@@ -1424,6 +1437,32 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
return sz;
}
+static struct vm_area_struct *first_vma(struct task_struct *tsk,
+ struct vm_area_struct *gate_vma)
+{
+ struct vm_area_struct *ret = tsk->mm->mmap;
+
+ if (ret)
+ return ret;
+ return gate_vma;
+}
+/*
+ * Helper function for iterating across a vma list. It ensures that the caller
+ * will visit `gate_vma' prior to terminating the search.
+ */
+static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+ struct vm_area_struct *gate_vma)
+{
+ struct vm_area_struct *ret;
+
+ ret = this_vma->vm_next;
+ if (ret)
+ return ret;
+ if (this_vma == gate_vma)
+ return NULL;
+ return gate_vma;
+}
+
/*
* Actual dumper
*
@@ -1439,7 +1478,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
int segs;
size_t size = 0;
int i;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset;
unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1525,6 +1564,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
segs += ELF_CORE_EXTRA_PHDRS;
#endif
+ gate_vma = get_gate_vma(current);
+ if (gate_vma != NULL)
+ segs++;
+
/* Set up header */
fill_elf_header(elf, segs + 1); /* including notes section */
@@ -1592,7 +1635,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
/* Write program headers for segments dump */
- for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (vma = first_vma(current, gate_vma); vma != NULL;
+ vma = next_vma(vma, gate_vma)) {
struct elf_phdr phdr;
size_t sz;
@@ -1641,7 +1685,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
/* Align to page */
DUMP_SEEK(dataoff - foffset);
- for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (vma = first_vma(current, gate_vma); vma != NULL;
+ vma = next_vma(vma, gate_vma)) {
unsigned long addr;
if (!maydump(vma))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 6e6d4568d54..a4d933a5120 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -234,6 +234,14 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
goto error;
}
+ /*
+ * If the binary is not readable then enforce
+ * mm->dumpable = 0 regardless of the interpreter's
+ * permissions.
+ */
+ if (file_permission(interpreter, MAY_READ) < 0)
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
retval = kernel_read(interpreter, 0, bprm->buf,
BINPRM_BUF_SIZE);
if (retval < 0)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1715d6b5f41..fc7028b685f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -129,6 +129,46 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
return 0;
}
+static int
+blkdev_get_blocks(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ sector_t end_block = max_block(I_BDEV(inode));
+ unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
+
+ if ((iblock + max_blocks) > end_block) {
+ max_blocks = end_block - iblock;
+ if ((long)max_blocks <= 0) {
+ if (create)
+ return -EIO; /* write fully beyond EOF */
+ /*
+ * It is a read which is fully beyond EOF. We return
+ * a !buffer_mapped buffer
+ */
+ max_blocks = 0;
+ }
+ }
+
+ bh->b_bdev = I_BDEV(inode);
+ bh->b_blocknr = iblock;
+ bh->b_size = max_blocks << inode->i_blkbits;
+ if (max_blocks)
+ set_buffer_mapped(bh);
+ return 0;
+}
+
+static ssize_t
+blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+
+ return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
+ iov, offset, nr_segs, blkdev_get_blocks, NULL);
+}
+
+#if 0
static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
{
struct kiocb *iocb = bio->bi_private;
@@ -146,7 +186,7 @@ static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
iocb->ki_nbytes = -EIO;
if (atomic_dec_and_test(bio_count)) {
- if (iocb->ki_nbytes < 0)
+ if ((long)iocb->ki_nbytes < 0)
aio_complete(iocb, iocb->ki_nbytes, 0);
else
aio_complete(iocb, iocb->ki_left, 0);
@@ -190,6 +230,12 @@ static struct page *blk_get_page(unsigned long addr, size_t count, int rw,
return pvec->page[pvec->idx++];
}
+/* return a page back to pvec array */
+static void blk_unget_page(struct page *page, struct pvec *pvec)
+{
+ pvec->page[--pvec->idx] = page;
+}
+
static ssize_t
blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t pos, unsigned long nr_segs)
@@ -278,6 +324,8 @@ same_bio:
count = min(count, nbytes);
goto same_bio;
}
+ } else {
+ blk_unget_page(page, &pvec);
}
/* bio is ready, submit it */
@@ -315,6 +363,7 @@ backout:
return PTR_ERR(page);
goto completion;
}
+#endif
static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
{
@@ -411,7 +460,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
{
memset(bdev, 0, sizeof(*bdev));
mutex_init(&bdev->bd_mutex);
- mutex_init(&bdev->bd_mount_mutex);
+ sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
#ifdef CONFIG_SYSFS
diff --git a/fs/buffer.c b/fs/buffer.c
index 263f88e4dff..1ad674fd348 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -180,7 +180,7 @@ int fsync_bdev(struct block_device *bdev)
* freeze_bdev -- lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
*
- * This takes the block device bd_mount_mutex to make sure no new mounts
+ * This takes the block device bd_mount_sem to make sure no new mounts
* happen on bdev until thaw_bdev() is called.
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
@@ -189,7 +189,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
{
struct super_block *sb;
- mutex_lock(&bdev->bd_mount_mutex);
+ down(&bdev->bd_mount_sem);
sb = get_super(bdev);
if (sb && !(sb->s_flags & MS_RDONLY)) {
sb->s_frozen = SB_FREEZE_WRITE;
@@ -231,7 +231,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
drop_super(sb);
}
- mutex_unlock(&bdev->bd_mount_mutex);
+ up(&bdev->bd_mount_sem);
}
EXPORT_SYMBOL(thaw_bdev);
@@ -2834,7 +2834,7 @@ int try_to_free_buffers(struct page *page)
int ret = 0;
BUG_ON(!PageLocked(page));
- if (PageDirty(page) || PageWriteback(page))
+ if (PageWriteback(page))
return 0;
if (mapping == NULL) { /* can this still happen? */
@@ -2844,6 +2844,23 @@ int try_to_free_buffers(struct page *page)
spin_lock(&mapping->private_lock);
ret = drop_buffers(page, &buffers_to_free);
+
+ /*
+ * If the filesystem writes its buffers by hand (eg ext3)
+ * then we can have clean buffers against a dirty page. We
+ * clean the page here; otherwise the VM will never notice
+ * that the filesystem did any IO at all.
+ *
+ * Also, during truncate, discard_buffer will have marked all
+ * the page's buffers clean. We discover that here and clean
+ * the page also.
+ *
+ * private_lock must be held over this entire operation in order
+ * to synchronise against __set_page_dirty_buffers and prevent the
+ * dirty bit from being lost.
+ */
+ if (ret)
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 3539d6ef961..85e3850bf2c 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,9 @@
+Version 1.47
+------------
+Fix oops in list_del during mount caused by unaligned string.
+Seek to SEEK_END forces check for update of file size for non-cached
+files.
+
Version 1.46
------------
Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps.
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 96abeb73897..6017c465440 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -143,8 +143,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
if((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- buf += sprintf("\nentry for %s not fully displayed\n\t",
- ses->serverName);
+ buf += sprintf(buf, "\nentry for %s not fully "
+ "displayed\n\t", ses->serverName);
} else {
length =
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 10c90294cd1..93ef09971d2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -511,7 +511,15 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
{
/* origin == SEEK_END => we must revalidate the cached file length */
if (origin == SEEK_END) {
- int retval = cifs_revalidate(file->f_path.dentry);
+ int retval;
+
+ /* some applications poll for the file length in this strange
+ way so we must seek to end on non-oplocked files by
+ setting the revalidate time to zero */
+ if(file->f_path.dentry->d_inode)
+ CIFS_I(file->f_path.dentry->d_inode)->time = 0;
+
+ retval = cifs_revalidate(file->f_path.dentry);
if (retval < 0)
return (loff_t)retval;
}
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a243f779b36..8aa66dcf13b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
extern int cifs_ioctl (struct inode * inode, struct file * filep,
unsigned int command, unsigned long arg);
-#define CIFS_VERSION "1.46"
+#define CIFS_VERSION "1.47"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8a49b2e77d3..e9dcf5ee29a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1146,7 +1146,7 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end;
pgoff_t index;
int range_whole = 0;
- struct kvec iov[32];
+ struct kvec * iov;
int len;
int n_iov = 0;
pgoff_t next;
@@ -1171,15 +1171,21 @@ static int cifs_writepages(struct address_space *mapping,
if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
if(cifs_sb->tcon->ses->server->secMode &
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
- if(!experimEnabled)
+ if(!experimEnabled)
return generic_writepages(mapping, wbc);
+ iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
+ if(iov == NULL)
+ return generic_writepages(mapping, wbc);
+
+
/*
* BB: Is this meaningful for a non-block-device file system?
* If it is, we should test it again after we do I/O
*/
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
+ kfree(iov);
return 0;
}
@@ -1345,7 +1351,7 @@ retry:
mapping->writeback_index = index;
FreeXid(xid);
-
+ kfree(iov);
return rc;
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index aedf683f011..19cc294c7c7 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -71,9 +71,7 @@ sesInfoAlloc(void)
{
struct cifsSesInfo *ret_buf;
- ret_buf =
- (struct cifsSesInfo *) kzalloc(sizeof (struct cifsSesInfo),
- GFP_KERNEL);
+ ret_buf = kzalloc(sizeof (struct cifsSesInfo), GFP_KERNEL);
if (ret_buf) {
write_lock(&GlobalSMBSeslock);
atomic_inc(&sesInfoAllocCount);
@@ -109,9 +107,7 @@ struct cifsTconInfo *
tconInfoAlloc(void)
{
struct cifsTconInfo *ret_buf;
- ret_buf =
- (struct cifsTconInfo *) kzalloc(sizeof (struct cifsTconInfo),
- GFP_KERNEL);
+ ret_buf = kzalloc(sizeof (struct cifsTconInfo), GFP_KERNEL);
if (ret_buf) {
write_lock(&GlobalSMBSeslock);
atomic_inc(&tconInfoAllocCount);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 99dfb5337e3..782940be550 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -156,9 +156,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
tmp_inode->i_atime = cnvrtDosUnixTm(
le16_to_cpu(pfindData->LastAccessDate),
le16_to_cpu(pfindData->LastAccessTime));
- tmp_inode->i_ctime = cnvrtDosUnixTm(
- le16_to_cpu(pfindData->LastWriteDate),
- le16_to_cpu(pfindData->LastWriteTime));
+ tmp_inode->i_ctime = cnvrtDosUnixTm(
+ le16_to_cpu(pfindData->LastWriteDate),
+ le16_to_cpu(pfindData->LastWriteTime));
AdjustForTZ(cifs_sb->tcon, tmp_inode);
attr = le16_to_cpu(pfindData->Attributes);
allocation_size = le32_to_cpu(pfindData->AllocationSize);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index bbdda99dce6..75846463089 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -182,11 +182,14 @@ static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInf
cFYI(1,("bleft %d",bleft));
- /* word align, if bytes remaining is not even */
- if(bleft % 2) {
- bleft--;
- data++;
- }
+ /* SMB header is unaligned, so cifs servers word align start of
+ Unicode strings */
+ data++;
+ bleft--; /* Windows servers do not always double null terminate
+ their final Unicode string - in which case we
+ now will not attempt to decode the byte of junk
+ which follows it */
+
words_left = bleft / 2;
/* save off server operating system */
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 7a1b2b961ec..1b1daf63f06 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -196,7 +196,7 @@ dohash(char *out, char *in, char *key, int forw)
char c[28];
char d[28];
char *cd;
- char ki[16][48];
+ char (*ki)[48];
char *pd1;
char l[32], r[32];
char *rl;
@@ -206,6 +206,12 @@ dohash(char *out, char *in, char *key, int forw)
if(pk1 == NULL)
return;
+ ki = kmalloc(16*48, GFP_KERNEL);
+ if(ki == NULL) {
+ kfree(pk1);
+ return;
+ }
+
cd = pk1 + 56;
pd1= cd + 56;
rl = pd1 + 64;
@@ -243,6 +249,7 @@ dohash(char *out, char *in, char *key, int forw)
er = kmalloc(48+48+32+32+32, GFP_KERNEL);
if(er == NULL) {
kfree(pk1);
+ kfree(ki);
return;
}
erk = er+48;
@@ -290,6 +297,7 @@ dohash(char *out, char *in, char *key, int forw)
permute(out, rl, perm6, 64);
kfree(pk1);
+ kfree(ki);
}
static void
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index b5654a284fe..6fa7b0d5c04 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
config DLM
tristate "Distributed Lock Manager (DLM)"
- depends on IPV6 || IPV6=n
+ depends on SYSFS && (IPV6 || IPV6=n)
select CONFIGFS_FS
select IP_SCTP if DLM_SCTP
help
- A general purpose distributed lock manager for kernel or userspace
- applications.
+ A general purpose distributed lock manager for kernel or userspace
+ applications.
choice
prompt "Select DLM communications protocol"
depends on DLM
default DLM_TCP
help
- The DLM Can use TCP or SCTP for it's network communications.
- SCTP supports multi-homed operations whereas TCP doesn't.
- However, SCTP seems to have stability problems at the moment.
+ The DLM Can use TCP or SCTP for it's network communications.
+ SCTP supports multi-homed operations whereas TCP doesn't.
+ However, SCTP seems to have stability problems at the moment.
config DLM_TCP
bool "TCP/IP"
@@ -31,8 +31,8 @@ config DLM_DEBUG
bool "DLM debugging"
depends on DLM
help
- Under the debugfs mount point, the name of each lockspace will
- appear as a file in the "dlm" directory. The output is the
- list of resource and locks the local node knows about.
+ Under the debugfs mount point, the name of each lockspace will
+ appear as a file in the "dlm" directory. The output is the
+ list of resource and locks the local node knows about.
endmenu
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88553054bbf..8665c88e5af 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
static void drop_node(struct config_group *, struct config_item *);
static void release_node(struct config_item *);
+static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
+ char *buf);
+static ssize_t store_cluster(struct config_item *i,
+ struct configfs_attribute *a,
+ const char *buf, size_t len);
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
static ssize_t node_weight_read(struct node *nd, char *buf);
static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
+struct cluster {
+ struct config_group group;
+ unsigned int cl_tcp_port;
+ unsigned int cl_buffer_size;
+ unsigned int cl_rsbtbl_size;
+ unsigned int cl_lkbtbl_size;
+ unsigned int cl_dirtbl_size;
+ unsigned int cl_recover_timer;
+ unsigned int cl_toss_secs;
+ unsigned int cl_scan_secs;
+ unsigned int cl_log_debug;
+};
+
+enum {
+ CLUSTER_ATTR_TCP_PORT = 0,
+ CLUSTER_ATTR_BUFFER_SIZE,
+ CLUSTER_ATTR_RSBTBL_SIZE,
+ CLUSTER_ATTR_LKBTBL_SIZE,
+ CLUSTER_ATTR_DIRTBL_SIZE,
+ CLUSTER_ATTR_RECOVER_TIMER,
+ CLUSTER_ATTR_TOSS_SECS,
+ CLUSTER_ATTR_SCAN_SECS,
+ CLUSTER_ATTR_LOG_DEBUG,
+};
+
+struct cluster_attribute {
+ struct configfs_attribute attr;
+ ssize_t (*show)(struct cluster *, char *);
+ ssize_t (*store)(struct cluster *, const char *, size_t);
+};
+
+static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
+ unsigned int *info_field, int check_zero,
+ const char *buf, size_t len)
+{
+ unsigned int x;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ x = simple_strtoul(buf, NULL, 0);
+
+ if (check_zero && !x)
+ return -EINVAL;
+
+ *cl_field = x;
+ *info_field = x;
+
+ return len;
+}
+
+#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
+ .attr = { .ca_name = __stringify(_name), \
+ .ca_mode = _mode, \
+ .ca_owner = THIS_MODULE }, \
+ .show = _read, \
+ .store = _write, \
+}
+
+#define CLUSTER_ATTR(name, check_zero) \
+static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
+{ \
+ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
+ check_zero, buf, len); \
+} \
+static ssize_t name##_read(struct cluster *cl, char *buf) \
+{ \
+ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
+} \
+static struct cluster_attribute cluster_attr_##name = \
+__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
+
+CLUSTER_ATTR(tcp_port, 1);
+CLUSTER_ATTR(buffer_size, 1);
+CLUSTER_ATTR(rsbtbl_size, 1);
+CLUSTER_ATTR(lkbtbl_size, 1);
+CLUSTER_ATTR(dirtbl_size, 1);
+CLUSTER_ATTR(recover_timer, 1);
+CLUSTER_ATTR(toss_secs, 1);
+CLUSTER_ATTR(scan_secs, 1);
+CLUSTER_ATTR(log_debug, 0);
+
+static struct configfs_attribute *cluster_attrs[] = {
+ [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
+ [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
+ [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
+ [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
+ [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
+ [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
+ [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
+ [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
+ [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
+ NULL,
+};
+
enum {
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
@@ -152,10 +252,6 @@ struct clusters {
struct configfs_subsystem subsys;
};
-struct cluster {
- struct config_group group;
-};
-
struct spaces {
struct config_group ss_group;
};
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
static struct configfs_item_operations cluster_ops = {
.release = release_cluster,
+ .show_attribute = show_cluster,
+ .store_attribute = store_cluster,
};
static struct configfs_group_operations spaces_ops = {
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
static struct config_item_type cluster_type = {
.ct_item_ops = &cluster_ops,
+ .ct_attrs = cluster_attrs,
.ct_owner = THIS_MODULE,
};
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
cl->group.default_groups[1] = &cms->cs_group;
cl->group.default_groups[2] = NULL;
+ cl->cl_tcp_port = dlm_config.ci_tcp_port;
+ cl->cl_buffer_size = dlm_config.ci_buffer_size;
+ cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
+ cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
+ cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
+ cl->cl_recover_timer = dlm_config.ci_recover_timer;
+ cl->cl_toss_secs = dlm_config.ci_toss_secs;
+ cl->cl_scan_secs = dlm_config.ci_scan_secs;
+ cl->cl_log_debug = dlm_config.ci_log_debug;
+
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
return &cl->group;
@@ -509,6 +618,25 @@ void dlm_config_exit(void)
* Functions for user space to read/write attributes
*/
+static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
+ char *buf)
+{
+ struct cluster *cl = to_cluster(i);
+ struct cluster_attribute *cla =
+ container_of(a, struct cluster_attribute, attr);
+ return cla->show ? cla->show(cl, buf) : 0;
+}
+
+static ssize_t store_cluster(struct config_item *i,
+ struct configfs_attribute *a,
+ const char *buf, size_t len)
+{
+ struct cluster *cl = to_cluster(i);
+ struct cluster_attribute *cla =
+ container_of(a, struct cluster_attribute, attr);
+ return cla->store ? cla->store(cl, buf, len) : -EINVAL;
+}
+
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
#define DEFAULT_SCAN_SECS 5
+#define DEFAULT_LOG_DEBUG 0
struct dlm_config_info dlm_config = {
- .tcp_port = DEFAULT_TCP_PORT,
- .buffer_size = DEFAULT_BUFFER_SIZE,
- .rsbtbl_size = DEFAULT_RSBTBL_SIZE,
- .lkbtbl_size = DEFAULT_LKBTBL_SIZE,
- .dirtbl_size = DEFAULT_DIRTBL_SIZE,
- .recover_timer = DEFAULT_RECOVER_TIMER,
- .toss_secs = DEFAULT_TOSS_SECS,
- .scan_secs = DEFAULT_SCAN_SECS
+ .ci_tcp_port = DEFAULT_TCP_PORT,
+ .ci_buffer_size = DEFAULT_BUFFER_SIZE,
+ .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
+ .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
+ .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
+ .ci_recover_timer = DEFAULT_RECOVER_TIMER,
+ .ci_toss_secs = DEFAULT_TOSS_SECS,
+ .ci_scan_secs = DEFAULT_SCAN_SECS,
+ .ci_log_debug = DEFAULT_LOG_DEBUG
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9da7839958a..1e978611a96 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -17,14 +17,15 @@
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
- int tcp_port;
- int buffer_size;
- int rsbtbl_size;
- int lkbtbl_size;
- int dirtbl_size;
- int recover_timer;
- int toss_secs;
- int scan_secs;
+ int ci_tcp_port;
+ int ci_buffer_size;
+ int ci_rsbtbl_size;
+ int ci_lkbtbl_size;
+ int ci_dirtbl_size;
+ int ci_recover_timer;
+ int ci_toss_secs;
+ int ci_scan_secs;
+ int ci_log_debug;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1ee8195e6fc..61d93201e1b 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,6 +41,7 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
+#include "config.h"
#define DLM_LOCKSPACE_LEN 64
@@ -69,12 +70,12 @@ struct dlm_mhandle;
#define log_error(ls, fmt, args...) \
printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
-#define DLM_LOG_DEBUG
-#ifdef DLM_LOG_DEBUG
-#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args)
-#else
-#define log_debug(ls, fmt, args...)
-#endif
+#define log_debug(ls, fmt, args...) \
+do { \
+ if (dlm_config.ci_log_debug) \
+ printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
+ (ls)->ls_name , ##args); \
+} while (0)
#define DLM_ASSERT(x, do) \
{ \
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
/* dlm_header is first element of all structs sent between nodes */
-#define DLM_HEADER_MAJOR 0x00020000
-#define DLM_HEADER_MINOR 0x00000001
+#define DLM_HEADER_MAJOR 0x00030000
+#define DLM_HEADER_MINOR 0x00000000
#define DLM_MSG 1
#define DLM_RCOM 2
@@ -386,6 +387,8 @@ struct dlm_rcom {
uint32_t rc_type; /* DLM_RCOM_ */
int rc_result; /* multi-purpose */
uint64_t rc_id; /* match reply with request */
+ uint64_t rc_seq; /* sender's ls_recover_seq */
+ uint64_t rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[0];
};
@@ -523,6 +526,7 @@ struct dlm_user_proc {
spinlock_t asts_spin;
struct list_head locks;
spinlock_t locks_spin;
+ struct list_head unlocking;
wait_queue_head_t wait;
};
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb..e725005fafd 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
mutex_unlock(&ls->ls_waiters_mutex);
}
+/* We clear the RESEND flag because we might be taking an lkb off the waiters
+ list as part of process_requestqueue (e.g. a lookup that has an optimized
+ request reply on the requestqueue) between dlm_recover_waiters_pre() which
+ set RESEND and dlm_recover_waiters_post() */
+
static int _remove_from_waiters(struct dlm_lkb *lkb)
{
int error = 0;
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
goto out;
}
lkb->lkb_wait_type = 0;
+ lkb->lkb_flags &= ~DLM_IFL_RESEND;
list_del(&lkb->lkb_wait_reply);
unhold_lkb(lkb);
out:
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
res_hashchain) {
if (!time_after_eq(jiffies, r->res_toss_time +
- dlm_config.toss_secs * HZ))
+ dlm_config.ci_toss_secs * HZ))
continue;
found = 1;
break;
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
if (lkb->lkb_astaddr)
ms->m_asts |= AST_COMP;
- if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP)
- memcpy(ms->m_extra, r->res_name, r->res_length);
+ /* compare with switch in create_message; send_remove() doesn't
+ use send_args() */
- else if (lkb->lkb_lvbptr)
+ switch (ms->m_type) {
+ case DLM_MSG_REQUEST:
+ case DLM_MSG_LOOKUP:
+ memcpy(ms->m_extra, r->res_name, r->res_length);
+ break;
+ case DLM_MSG_CONVERT:
+ case DLM_MSG_UNLOCK:
+ case DLM_MSG_REQUEST_REPLY:
+ case DLM_MSG_CONVERT_REPLY:
+ case DLM_MSG_GRANT:
+ if (!lkb->lkb_lvbptr)
+ break;
memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
-
+ break;
+ }
}
static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
- if (receive_lvb(ls, lkb, ms))
- return -ENOMEM;
+ if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
+ /* lkb was just created so there won't be an lvb yet */
+ lkb->lkb_lvbptr = allocate_lvb(ls);
+ if (!lkb->lkb_lvbptr)
+ return -ENOMEM;
+ }
return 0;
}
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_ls *ls;
- int error;
+ int error = 0;
if (!recovery)
dlm_message_in(ms);
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
- return 0;
+ return error;
}
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
if (middle_conversion(lkb)) {
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -EINPROGRESS;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_convert_reply(lkb, &ls->ls_stub_ms);
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_unlock_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_CANCEL:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_ECANCEL;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_cancel_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
lock_rsb(r);
switch (error) {
+ case -EBADR:
+ /* There's a chance the new master received our lock before
+ dlm_recover_master_reply(), this wouldn't happen if we did
+ a barrier between recover_masters and recover_locks. */
+ log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
+ (unsigned long)r, r->res_name);
+ dlm_send_rcom_lock(r, lkb);
+ goto out;
case -EEXIST:
log_debug(ls, "master copy exists %x", lkb->lkb_id);
/* fall through */
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
/* an ack for dlm_recover_locks() which waits for replies from
all the locks it sends to new masters */
dlm_recovered_lock(r);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
}
if (flags & DLM_LKF_VALBLK) {
- ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
__put_lkb(ls, lkb);
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua = (struct dlm_user_args *)lkb->lkb_astparam;
if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
- ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
error = -ENOMEM;
goto out_put;
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out_put;
spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
+ /* dlm_user_add_ast() may have already taken lkb off the proc list */
+ if (!list_empty(&lkb->lkb_ownqueue))
+ list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
-
- /* this removes the reference for the proc->locks list added by
- dlm_user_request */
- unhold_lkb(lkb);
out_put:
dlm_put_lkb(lkb);
out:
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
/* this lkb was removed from the WAITING queue */
if (lkb->lkb_grmode == DLM_LOCK_IV) {
spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
+ list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
- unhold_lkb(lkb);
}
out_put:
dlm_put_lkb(lkb);
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
mutex_lock(&ls->ls_clear_proc_locks);
list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
- if (lkb->lkb_ast_type) {
- list_del(&lkb->lkb_astqueue);
- unhold_lkb(lkb);
- }
-
list_del_init(&lkb->lkb_ownqueue);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
+
+ /* in-progress unlocks */
+ list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
+ list_del_init(&lkb->lkb_ownqueue);
+ lkb->lkb_flags |= DLM_IFL_DEAD;
+ dlm_put_lkb(lkb);
+ }
+
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_del(&lkb->lkb_astqueue);
+ dlm_put_lkb(lkb);
+ }
+
mutex_unlock(&ls->ls_clear_proc_locks);
unlock_recovery(ls);
}
+
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 59012b089e8..f40817b53c6 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
while (!kthread_should_stop()) {
list_for_each_entry(ls, &lslist, ls_list)
dlm_scan_rsbs(ls);
- schedule_timeout_interruptible(dlm_config.scan_secs * HZ);
+ schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_count = 0;
ls->ls_flags = 0;
- size = dlm_config.rsbtbl_size;
+ size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
rwlock_init(&ls->ls_rsbtbl[i].lock);
}
- size = dlm_config.lkbtbl_size;
+ size = dlm_config.ci_lkbtbl_size;
ls->ls_lkbtbl_size = size;
ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_lkbtbl[i].counter = 1;
}
- size = dlm_config.dirtbl_size;
+ size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
- ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
+ ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (!ls->ls_recover_buf)
goto out_dirfree;
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index fe158d7a928..dc83a9d979b 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -72,6 +72,8 @@ struct nodeinfo {
struct list_head writequeue; /* outgoing writequeue_entries */
spinlock_t writequeue_lock;
int nodeid;
+ struct work_struct swork; /* Send workqueue */
+ struct work_struct lwork; /* Locking workqueue */
};
static DEFINE_IDR(nodeinfo_idr);
@@ -96,6 +98,7 @@ struct connection {
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
+ struct work_struct work; /* Send workqueue */
};
/* An entry waiting to be sent */
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
static LIST_HEAD(write_nodes);
static DEFINE_SPINLOCK(write_nodes_lock);
+
/* Maximum number of incoming messages to process before
* doing a schedule()
*/
#define MAX_RX_MSG_COUNT 25
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
+/* Work queues */
+static struct workqueue_struct *recv_workqueue;
+static struct workqueue_struct *send_workqueue;
+static struct workqueue_struct *lock_workqueue;
/* The SCTP connection */
static struct connection sctp_con;
+static void process_send_sockets(struct work_struct *work);
+static void process_recv_sockets(struct work_struct *work);
+static void process_lock_request(struct work_struct *work);
static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
{
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
spin_lock_init(&ni->lock);
INIT_LIST_HEAD(&ni->writequeue);
spin_lock_init(&ni->writequeue_lock);
+ INIT_WORK(&ni->lwork, process_lock_request);
+ INIT_WORK(&ni->swork, process_send_sockets);
ni->nodeid = nodeid;
if (nodeid > max_nodeid)
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
/* Data or notification available on socket */
static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
- atomic_inc(&sctp_con.waiting_requests);
if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
- return;
-
- wake_up_interruptible(&lowcomms_recv_wait);
+ queue_work(recv_workqueue, &sctp_con.work);
}
@@ -361,10 +367,10 @@ static void init_failed(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
}
}
- wake_up_process(send_task);
}
/* Something happened to an association */
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
- wake_up_process(send_task);
}
break;
@@ -580,8 +586,8 @@ static int receive_from_sock(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
- wake_up_process(send_task);
}
}
@@ -590,6 +596,7 @@ static int receive_from_sock(void)
return 0;
cbuf_add(&sctp_con.cb, ret);
+ // PJC: TODO: Add to node's workqueue....can we ??
ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
page_address(sctp_con.rx_page),
sctp_con.cb.base, sctp_con.cb.len,
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
if (result < 0)
log_print("Can't bind to port %d addr number %d",
- dlm_config.tcp_port, num);
+ dlm_config.ci_tcp_port, num);
return result;
}
@@ -711,7 +718,7 @@ static int init_sock(void)
/* Bind to all interfaces. */
for (i = 0; i < dlm_local_count; i++) {
memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
- make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
result = add_bind_addr(&localaddr, addr_len, num);
if (result)
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
- wake_up_process(send_task);
+
+ queue_work(send_workqueue, &ni->swork);
}
return;
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
return;
}
- make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
+ make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
outmessage.msg_name = &rem_addr;
outmessage.msg_namelen = addrlen;
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
return 0;
}
-static int write_list_empty(void)
+// PJC: The work queue function for receiving.
+static void process_recv_sockets(struct work_struct *work)
{
- int status;
-
- spin_lock_bh(&write_nodes_lock);
- status = list_empty(&write_nodes);
- spin_unlock_bh(&write_nodes_lock);
-
- return status;
-}
-
-static int dlm_recvd(void *data)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- while (!kthread_should_stop()) {
+ if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
+ int ret;
int count = 0;
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&lowcomms_recv_wait, &wait);
- if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
- cond_resched();
- remove_wait_queue(&lowcomms_recv_wait, &wait);
- set_current_state(TASK_RUNNING);
-
- if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
- int ret;
-
- do {
- ret = receive_from_sock();
+ do {
+ ret = receive_from_sock();
- /* Don't starve out everyone else */
- if (++count >= MAX_RX_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
- } while (!kthread_should_stop() && ret >=0);
- }
- cond_resched();
+ /* Don't starve out everyone else */
+ if (++count >= MAX_RX_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ } while (!kthread_should_stop() && ret >=0);
}
-
- return 0;
+ cond_resched();
}
-static int dlm_sendd(void *data)
+// PJC: the work queue function for sending
+static void process_send_sockets(struct work_struct *work)
{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (write_list_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- if (sctp_con.eagain_flag) {
- sctp_con.eagain_flag = 0;
- refill_write_queue();
- }
- process_output_queue();
+ if (sctp_con.eagain_flag) {
+ sctp_con.eagain_flag = 0;
+ refill_write_queue();
}
+ process_output_queue();
+}
- remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
- return 0;
+// PJC: Process lock requests from a particular node.
+// TODO: can we optimise this out on UP ??
+static void process_lock_request(struct work_struct *work)
+{
}
static void daemons_stop(void)
{
- kthread_stop(recv_task);
- kthread_stop(send_task);
+ destroy_workqueue(recv_workqueue);
+ destroy_workqueue(send_workqueue);
+ destroy_workqueue(lock_workqueue);
}
static int daemons_start(void)
{
- struct task_struct *p;
int error;
+ recv_workqueue = create_workqueue("dlm_recv");
+ error = IS_ERR(recv_workqueue);
+ if (error) {
+ log_print("can't start dlm_recv %d", error);
+ return error;
+ }
- p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
- error = IS_ERR(p);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
+ error = IS_ERR(send_workqueue);
if (error) {
- log_print("can't start dlm_recvd %d", error);
+ log_print("can't start dlm_send %d", error);
+ destroy_workqueue(recv_workqueue);
return error;
}
- recv_task = p;
- p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
- error = IS_ERR(p);
+ lock_workqueue = create_workqueue("dlm_rlock");
+ error = IS_ERR(lock_workqueue);
if (error) {
- log_print("can't start dlm_sendd %d", error);
- kthread_stop(recv_task);
+ log_print("can't start dlm_rlock %d", error);
+ destroy_workqueue(send_workqueue);
+ destroy_workqueue(recv_workqueue);
return error;
}
- send_task = p;
return 0;
}
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
{
int error;
+ INIT_WORK(&sctp_con.work, process_recv_sockets);
+
error = init_sock();
if (error)
goto fail_sock;
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
}
-
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9be3a440c42..f1efd17b261 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb)
struct connection {
struct socket *sock; /* NULL if not connected */
uint32_t nodeid; /* So we know who we are in the list */
- struct rw_semaphore sock_sem; /* Stop connect races */
- struct list_head read_list; /* On this list when ready for reading */
- struct list_head write_list; /* On this list when ready for writing */
- struct list_head state_list; /* On this list when ready to connect */
+ struct mutex sock_mutex;
unsigned long flags; /* bit 1,2 = We are on the read/write lists */
#define CF_READ_PENDING 1
#define CF_WRITE_PENDING 2
@@ -112,9 +109,10 @@ struct connection {
struct page *rx_page;
struct cbuf cb;
int retries;
- atomic_t waiting_requests;
#define MAX_CONNECT_RETRIES 3
struct connection *othercon;
+ struct work_struct rwork; /* Receive workqueue */
+ struct work_struct swork; /* Send workqueue */
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@@ -131,14 +129,9 @@ struct writequeue_entry {
static struct sockaddr_storage dlm_local_addr;
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-
-static wait_queue_t lowcomms_send_waitq_head;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
-static wait_queue_t lowcomms_recv_waitq_head;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
+/* Work queues */
+static struct workqueue_struct *recv_workqueue;
+static struct workqueue_struct *send_workqueue;
/* An array of pointers to connections, indexed by NODEID */
static struct connection **connections;
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock);
static struct kmem_cache *con_cache;
static int conn_array_size;
-/* List of sockets that have reads pending */
-static LIST_HEAD(read_sockets);
-static DEFINE_SPINLOCK(read_sockets_lock);
-
-/* List of sockets which have writes pending */
-static LIST_HEAD(write_sockets);
-static DEFINE_SPINLOCK(write_sockets_lock);
-
-/* List of sockets which have connects pending */
-static LIST_HEAD(state_sockets);
-static DEFINE_SPINLOCK(state_sockets_lock);
+static void process_recv_sockets(struct work_struct *work);
+static void process_send_sockets(struct work_struct *work);
static struct connection *nodeid2con(int nodeid, gfp_t allocation)
{
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
goto finish;
con->nodeid = nodeid;
- init_rwsem(&con->sock_sem);
+ mutex_init(&con->sock_mutex);
INIT_LIST_HEAD(&con->writequeue);
spin_lock_init(&con->writequeue_lock);
+ INIT_WORK(&con->swork, process_send_sockets);
+ INIT_WORK(&con->rwork, process_recv_sockets);
connections[nodeid] = con;
}
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
struct connection *con = sock2con(sk);
- atomic_inc(&con->waiting_requests);
- if (test_and_set_bit(CF_READ_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&read_sockets_lock);
- list_add_tail(&con->read_list, &read_sockets);
- spin_unlock_bh(&read_sockets_lock);
-
- wake_up_interruptible(&lowcomms_recv_waitq);
+ if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ queue_work(recv_workqueue, &con->rwork);
}
static void lowcomms_write_space(struct sock *sk)
{
struct connection *con = sock2con(sk);
- if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&write_sockets_lock);
- list_add_tail(&con->write_list, &write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+ queue_work(send_workqueue, &con->swork);
}
static inline void lowcomms_connect_sock(struct connection *con)
{
- if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&state_sockets_lock);
- list_add_tail(&con->state_list, &state_sockets);
- spin_unlock_bh(&state_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
+ queue_work(send_workqueue, &con->swork);
}
static void lowcomms_state_change(struct sock *sk)
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
/* Close a remote connection and tidy up */
static void close_connection(struct connection *con, bool and_other)
{
- down_write(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->sock) {
sock_release(con->sock);
@@ -294,7 +261,7 @@ static void close_connection(struct connection *con, bool and_other)
con->rx_page = NULL;
}
con->retries = 0;
- up_write(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
}
/* Data received from remote end */
@@ -308,10 +275,13 @@ static int receive_from_sock(struct connection *con)
int r;
int call_again_soon = 0;
- down_read(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
+
+ if (con->sock == NULL) {
+ ret = -EAGAIN;
+ goto out_close;
+ }
- if (con->sock == NULL)
- goto out;
if (con->rx_page == NULL) {
/*
* This doesn't need to be atomic, but I think it should
@@ -359,6 +329,9 @@ static int receive_from_sock(struct connection *con)
if (ret <= 0)
goto out_close;
+ if (ret == -EAGAIN)
+ goto out_resched;
+
if (ret == len)
call_again_soon = 1;
cbuf_add(&con->cb, ret);
@@ -381,24 +354,26 @@ static int receive_from_sock(struct connection *con)
con->rx_page = NULL;
}
-out:
if (call_again_soon)
goto out_resched;
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return 0;
out_resched:
- lowcomms_data_ready(con->sock->sk, 0);
- up_read(&con->sock_sem);
- cond_resched();
- return 0;
+ if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ queue_work(recv_workqueue, &con->rwork);
+ mutex_unlock(&con->sock_mutex);
+ return -EAGAIN;
out_close:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
close_connection(con, false);
/* Reconnect when there is something to send */
}
+ /* Don't return success if we really got EOF */
+ if (ret == 0)
+ ret = -EAGAIN;
return ret;
}
@@ -412,6 +387,7 @@ static int accept_from_sock(struct connection *con)
int len;
int nodeid;
struct connection *newcon;
+ struct connection *addcon;
memset(&peeraddr, 0, sizeof(peeraddr));
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
@@ -419,7 +395,7 @@ static int accept_from_sock(struct connection *con)
if (result < 0)
return -ENOMEM;
- down_read(&con->sock_sem);
+ mutex_lock_nested(&con->sock_mutex, 0);
result = -ENOTCONN;
if (con->sock == NULL)
@@ -445,7 +421,7 @@ static int accept_from_sock(struct connection *con)
if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
printk("dlm: connect from non cluster node\n");
sock_release(newsock);
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return -1;
}
@@ -462,7 +438,7 @@ static int accept_from_sock(struct connection *con)
result = -ENOMEM;
goto accept_err;
}
- down_write(&newcon->sock_sem);
+ mutex_lock_nested(&newcon->sock_mutex, 1);
if (newcon->sock) {
struct connection *othercon = newcon->othercon;
@@ -470,41 +446,45 @@ static int accept_from_sock(struct connection *con)
othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
if (!othercon) {
printk("dlm: failed to allocate incoming socket\n");
- up_write(&newcon->sock_sem);
+ mutex_unlock(&newcon->sock_mutex);
result = -ENOMEM;
goto accept_err;
}
othercon->nodeid = nodeid;
othercon->rx_action = receive_from_sock;
- init_rwsem(&othercon->sock_sem);
+ mutex_init(&othercon->sock_mutex);
+ INIT_WORK(&othercon->swork, process_send_sockets);
+ INIT_WORK(&othercon->rwork, process_recv_sockets);
set_bit(CF_IS_OTHERCON, &othercon->flags);
newcon->othercon = othercon;
}
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
add_sock(newsock, othercon);
+ addcon = othercon;
}
else {
newsock->sk->sk_user_data = newcon;
newcon->rx_action = receive_from_sock;
add_sock(newsock, newcon);
-
+ addcon = newcon;
}
- up_write(&newcon->sock_sem);
+ mutex_unlock(&newcon->sock_mutex);
/*
* Add it to the active queue in case we got data
* beween processing the accept adding the socket
* to the read_sockets list
*/
- lowcomms_data_ready(newsock->sk, 0);
- up_read(&con->sock_sem);
+ if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
+ queue_work(recv_workqueue, &addcon->rwork);
+ mutex_unlock(&con->sock_mutex);
return 0;
accept_err:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
sock_release(newsock);
if (result != -EAGAIN)
@@ -525,7 +505,7 @@ static void connect_to_sock(struct connection *con)
return;
}
- down_write(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->retries++ > MAX_CONNECT_RETRIES)
goto out;
@@ -548,7 +528,7 @@ static void connect_to_sock(struct connection *con)
sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock;
- make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
add_sock(sock, con);
@@ -577,7 +557,7 @@ out_err:
result = 0;
}
out:
- up_write(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return;
}
@@ -616,10 +596,10 @@ static struct socket *create_listen_sock(struct connection *con,
con->sock = sock;
/* Bind to our port */
- make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
if (result < 0) {
- printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
+ printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port);
sock_release(sock);
sock = NULL;
con->sock = NULL;
@@ -638,7 +618,7 @@ static struct socket *create_listen_sock(struct connection *con,
result = sock->ops->listen(sock, 5);
if (result < 0) {
- printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
+ printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port);
sock_release(sock);
sock = NULL;
goto create_out;
@@ -709,6 +689,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
if (!con)
return NULL;
+ spin_lock(&con->writequeue_lock);
e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
if ((&e->list == &con->writequeue) ||
(PAGE_CACHE_SIZE - e->end < len)) {
@@ -747,6 +728,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
struct connection *con = e->con;
int users;
+ spin_lock(&con->writequeue_lock);
users = --e->users;
if (users)
goto out;
@@ -754,12 +736,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
kunmap(e->page);
spin_unlock(&con->writequeue_lock);
- if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
- spin_lock_bh(&write_sockets_lock);
- list_add_tail(&con->write_list, &write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
+ queue_work(send_workqueue, &con->swork);
}
return;
@@ -783,7 +761,7 @@ static void send_to_sock(struct connection *con)
struct writequeue_entry *e;
int len, offset;
- down_read(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->sock == NULL)
goto out_connect;
@@ -800,6 +778,7 @@ static void send_to_sock(struct connection *con)
offset = e->offset;
BUG_ON(len == 0 && e->users == 0);
spin_unlock(&con->writequeue_lock);
+ kmap(e->page);
ret = 0;
if (len) {
@@ -828,18 +807,18 @@ static void send_to_sock(struct connection *con)
}
spin_unlock(&con->writequeue_lock);
out:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return;
send_error:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
close_connection(con, false);
lowcomms_connect_sock(con);
return;
out_connect:
- up_read(&con->sock_sem);
- lowcomms_connect_sock(con);
+ mutex_unlock(&con->sock_mutex);
+ connect_to_sock(con);
return;
}
@@ -872,7 +851,6 @@ int dlm_lowcomms_close(int nodeid)
if (con) {
clean_one_writequeue(con);
close_connection(con, true);
- atomic_set(&con->waiting_requests, 0);
}
return 0;
@@ -880,102 +858,29 @@ out:
return -1;
}
-/* API send message call, may queue the request */
-/* N.B. This is the old interface - use the new one for new calls */
-int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
-{
- struct writequeue_entry *e;
- char *b;
-
- e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
- if (e) {
- memcpy(b, buf, len);
- dlm_lowcomms_commit_buffer(e);
- return 0;
- }
- return -ENOBUFS;
-}
-
/* Look for activity on active sockets */
-static void process_sockets(void)
+static void process_recv_sockets(struct work_struct *work)
{
- struct list_head *list;
- struct list_head *temp;
- int count = 0;
-
- spin_lock_bh(&read_sockets_lock);
- list_for_each_safe(list, temp, &read_sockets) {
+ struct connection *con = container_of(work, struct connection, rwork);
+ int err;
- struct connection *con =
- list_entry(list, struct connection, read_list);
- list_del(&con->read_list);
- clear_bit(CF_READ_PENDING, &con->flags);
-
- spin_unlock_bh(&read_sockets_lock);
-
- /* This can reach zero if we are processing requests
- * as they come in.
- */
- if (atomic_read(&con->waiting_requests) == 0) {
- spin_lock_bh(&read_sockets_lock);
- continue;
- }
-
- do {
- con->rx_action(con);
-
- /* Don't starve out everyone else */
- if (++count >= MAX_RX_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
-
- } while (!atomic_dec_and_test(&con->waiting_requests) &&
- !kthread_should_stop());
-
- spin_lock_bh(&read_sockets_lock);
- }
- spin_unlock_bh(&read_sockets_lock);
+ clear_bit(CF_READ_PENDING, &con->flags);
+ do {
+ err = con->rx_action(con);
+ } while (!err);
}
-/* Try to send any messages that are pending
- */
-static void process_output_queue(void)
-{
- struct list_head *list;
- struct list_head *temp;
-
- spin_lock_bh(&write_sockets_lock);
- list_for_each_safe(list, temp, &write_sockets) {
- struct connection *con =
- list_entry(list, struct connection, write_list);
- clear_bit(CF_WRITE_PENDING, &con->flags);
- list_del(&con->write_list);
-
- spin_unlock_bh(&write_sockets_lock);
- send_to_sock(con);
- spin_lock_bh(&write_sockets_lock);
- }
- spin_unlock_bh(&write_sockets_lock);
-}
-static void process_state_queue(void)
+static void process_send_sockets(struct work_struct *work)
{
- struct list_head *list;
- struct list_head *temp;
-
- spin_lock_bh(&state_sockets_lock);
- list_for_each_safe(list, temp, &state_sockets) {
- struct connection *con =
- list_entry(list, struct connection, state_list);
- list_del(&con->state_list);
- clear_bit(CF_CONNECT_PENDING, &con->flags);
- spin_unlock_bh(&state_sockets_lock);
+ struct connection *con = container_of(work, struct connection, swork);
+ if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
connect_to_sock(con);
- spin_lock_bh(&state_sockets_lock);
}
- spin_unlock_bh(&state_sockets_lock);
+
+ clear_bit(CF_WRITE_PENDING, &con->flags);
+ send_to_sock(con);
}
@@ -992,109 +897,33 @@ static void clean_writequeues(void)
}
}
-static int read_list_empty(void)
+static void work_stop(void)
{
- int status;
-
- spin_lock_bh(&read_sockets_lock);
- status = list_empty(&read_sockets);
- spin_unlock_bh(&read_sockets_lock);
-
- return status;
-}
-
-/* DLM Transport comms receive daemon */
-static int dlm_recvd(void *data)
-{
- init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
- add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (read_list_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- process_sockets();
- }
-
- return 0;
+ destroy_workqueue(recv_workqueue);
+ destroy_workqueue(send_workqueue);
}
-static int write_and_state_lists_empty(void)
+static int work_start(void)
{
- int status;
-
- spin_lock_bh(&write_sockets_lock);
- status = list_empty(&write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- spin_lock_bh(&state_sockets_lock);
- if (list_empty(&state_sockets) == 0)
- status = 0;
- spin_unlock_bh(&state_sockets_lock);
-
- return status;
-}
-
-/* DLM Transport send daemon */
-static int dlm_sendd(void *data)
-{
- init_waitqueue_entry(&lowcomms_send_waitq_head, current);
- add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (write_and_state_lists_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- process_state_queue();
- process_output_queue();
- }
-
- return 0;
-}
-
-static void daemons_stop(void)
-{
- kthread_stop(recv_task);
- kthread_stop(send_task);
-}
-
-static int daemons_start(void)
-{
- struct task_struct *p;
int error;
-
- p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
- error = IS_ERR(p);
+ recv_workqueue = create_workqueue("dlm_recv");
+ error = IS_ERR(recv_workqueue);
if (error) {
- log_print("can't start dlm_recvd %d", error);
+ log_print("can't start dlm_recv %d", error);
return error;
}
- recv_task = p;
- p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
- error = IS_ERR(p);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
+ error = IS_ERR(send_workqueue);
if (error) {
- log_print("can't start dlm_sendd %d", error);
- kthread_stop(recv_task);
+ log_print("can't start dlm_send %d", error);
+ destroy_workqueue(recv_workqueue);
return error;
}
- send_task = p;
return 0;
}
-/*
- * Return the largest buffer size we can cope with.
- */
-int lowcomms_max_buffer_size(void)
-{
- return PAGE_CACHE_SIZE;
-}
-
void dlm_lowcomms_stop(void)
{
int i;
@@ -1107,7 +936,7 @@ void dlm_lowcomms_stop(void)
connections[i]->flags |= 0xFF;
}
- daemons_stop();
+ work_stop();
clean_writequeues();
for (i = 0; i < conn_array_size; i++) {
@@ -1159,7 +988,7 @@ int dlm_lowcomms_start(void)
if (error)
goto fail_unlisten;
- error = daemons_start();
+ error = work_start();
if (error)
goto fail_unlisten;
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index c9b1c3d535f..a5126e0c68a 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen < sizeof(struct dlm_header))
break;
err = -E2BIG;
- if (msglen > dlm_config.buffer_size) {
+ if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen > sizeof(__tmp) &&
msg == (struct dlm_header *) __tmp) {
- msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
+ msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (msg == NULL)
return ret;
}
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 4cc31be9cd9..6bfbd615380 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
rc->rc_type = type;
+ spin_lock(&ls->ls_recover_lock);
+ rc->rc_seq = ls->ls_recover_seq;
+ spin_unlock(&ls->ls_recover_lock);
+
*mh_ret = mh;
*rc_ret = rc;
return 0;
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
rf->rf_lsflags = ls->ls_exflags;
}
-static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
+static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
+ struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
+
+ if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
+ log_error(ls, "version mismatch: %x nodeid %d: %x",
+ DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
+ rc->rc_header.h_version);
+ return -EINVAL;
+ }
+
if (rf->rf_lvblen != ls->ls_lvblen ||
rf->rf_lsflags != ls->ls_exflags) {
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
goto out;
allow_sync_reply(ls, &rc->rc_id);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
+ memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
} else
- error = check_config(ls, (struct rcom_config *) rc->rc_buf,
- nodeid);
+ error = check_config(ls, rc, nodeid);
/* the caller looks at rc_result for the remote recovery status */
out:
return error;
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
if (error)
return;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = dlm_recover_status(ls);
make_config(ls, (struct rcom_config *) rc->rc_buf);
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
if (nodeid == dlm_our_nodeid()) {
dlm_copy_master_names(ls, last_name, last_len,
ls->ls_recover_buf + len,
- dlm_config.buffer_size - len, nodeid);
+ dlm_config.ci_buffer_size - len, nodeid);
goto out;
}
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
memcpy(rc->rc_buf, last_name, last_len);
allow_sync_reply(ls, &rc->rc_id);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
+ memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
- int error, inlen, outlen;
- int nodeid = rc_in->rc_header.h_nodeid;
- uint32_t status = dlm_recover_status(ls);
-
- /*
- * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
- * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
- * It could only happen in rare cases where we get a late NAMES
- * message from a previous instance of recovery.
- */
-
- if (!(status & DLM_RS_NODES)) {
- log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
- return;
- }
+ int error, inlen, outlen, nodeid;
nodeid = rc_in->rc_header.h_nodeid;
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
- outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom);
+ outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
ret_nodeid = error;
rc->rc_result = ret_nodeid;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
- uint32_t status = dlm_recover_status(ls);
-
- if (!(status & DLM_RS_DIR)) {
- log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
- rc_in->rc_header.h_nodeid);
- return;
- }
-
dlm_recover_process_copy(ls, rc_in);
}
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rc->rc_type = DLM_RCOM_STATUS_REPLY;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = -ESRCH;
rf = (struct rcom_config *) rc->rc_buf;
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
return 0;
}
+static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+ uint64_t seq;
+ int rv = 0;
+
+ switch (rc->rc_type) {
+ case DLM_RCOM_STATUS_REPLY:
+ case DLM_RCOM_NAMES_REPLY:
+ case DLM_RCOM_LOOKUP_REPLY:
+ case DLM_RCOM_LOCK_REPLY:
+ spin_lock(&ls->ls_recover_lock);
+ seq = ls->ls_recover_seq;
+ spin_unlock(&ls->ls_recover_lock);
+ if (rc->rc_seq_reply != seq) {
+ log_debug(ls, "ignoring old reply %x from %d "
+ "seq_reply %llx expect %llx",
+ rc->rc_type, rc->rc_header.h_nodeid,
+ (unsigned long long)rc->rc_seq_reply,
+ (unsigned long long)seq);
+ rv = 1;
+ }
+ }
+ return rv;
+}
+
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
- log_error(ls, "ignoring recovery message %x from %d",
+ log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
goto out;
}
+ if (is_old_reply(ls, rc))
+ goto out;
+
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index cf9f6831bab..c2cc7694cd1 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -44,7 +44,7 @@
static void dlm_wait_timer_fn(unsigned long data)
{
struct dlm_ls *ls = (struct dlm_ls *) data;
- mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ));
+ mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
wake_up(&ls->ls_wait_general);
}
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
init_timer(&ls->ls_timer);
ls->ls_timer.function = dlm_wait_timer_fn;
ls->ls_timer.data = (long) ls;
- ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ);
+ ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
add_timer(&ls->ls_timer);
wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
if (dlm_no_directory(ls))
count += recover_master_static(r);
- else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) {
+ else if (!is_master(r) &&
+ (dlm_is_removed(ls, r->res_nodeid) ||
+ rsb_flag(r, RSB_NEW_MASTER))) {
recover_master(r);
count++;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 650536aa513..3cb636d6024 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_members(ls, rv, &neg);
if (error) {
- log_error(ls, "recover_members failed %d", error);
+ log_debug(ls, "recover_members failed %d", error);
goto fail;
}
start = jiffies;
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory(ls);
if (error) {
- log_error(ls, "recover_directory failed %d", error);
+ log_debug(ls, "recover_directory failed %d", error);
goto fail;
}
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory_wait(ls);
if (error) {
- log_error(ls, "recover_directory_wait failed %d", error);
+ log_debug(ls, "recover_directory_wait failed %d", error);
goto fail;
}
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_masters(ls);
if (error) {
- log_error(ls, "recover_masters failed %d", error);
+ log_debug(ls, "recover_masters failed %d", error);
goto fail;
}
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks(ls);
if (error) {
- log_error(ls, "recover_locks failed %d", error);
+ log_debug(ls, "recover_locks failed %d", error);
goto fail;
}
error = dlm_recover_locks_wait(ls);
if (error) {
- log_error(ls, "recover_locks_wait failed %d", error);
+ log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks_wait(ls);
if (error) {
- log_error(ls, "recover_locks_wait failed %d", error);
+ log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
}
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
- log_error(ls, "recover_done_wait failed %d", error);
+ log_debug(ls, "recover_done_wait failed %d", error);
goto fail;
}
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = enable_locking(ls, rv->seq);
if (error) {
- log_error(ls, "enable_locking failed %d", error);
+ log_debug(ls, "enable_locking failed %d", error);
goto fail;
}
error = dlm_process_requestqueue(ls);
if (error) {
- log_error(ls, "process_requestqueue failed %d", error);
+ log_debug(ls, "process_requestqueue failed %d", error);
goto fail;
}
error = dlm_recover_waiters_post(ls);
if (error) {
- log_error(ls, "recover_waiters_post failed %d", error);
+ log_debug(ls, "recover_waiters_post failed %d", error);
goto fail;
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index c37e93e4f2d..d378b7fe2a1 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
remove_ownqueue = 1;
+ /* unlocks or cancels of waiting requests need to be removed from the
+ proc's unlocking list, again there must be a better way... */
+
+ if (ua->lksb.sb_status == -DLM_EUNLOCK ||
+ (ua->lksb.sb_status == -DLM_ECANCEL &&
+ lkb->lkb_grmode == DLM_LOCK_IV))
+ remove_ownqueue = 1;
+
/* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and
lvb_ops says we should. We could probably have set_lvb_lock()
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
proc->lockspace = ls->ls_local_handle;
INIT_LIST_HEAD(&proc->asts);
INIT_LIST_HEAD(&proc->locks);
+ INIT_LIST_HEAD(&proc->unlocking);
spin_lock_init(&proc->asts_spin);
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 767197db994..963889cf674 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
rc->rc_type = cpu_to_le32(rc->rc_type);
rc->rc_result = cpu_to_le32(rc->rc_result);
rc->rc_id = cpu_to_le64(rc->rc_id);
+ rc->rc_seq = cpu_to_le64(rc->rc_seq);
+ rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
if (type == DLM_RCOM_LOCK)
rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
rc->rc_type = le32_to_cpu(rc->rc_type);
rc->rc_result = le32_to_cpu(rc->rc_result);
rc->rc_id = le64_to_cpu(rc->rc_id);
+ rc->rc_seq = le64_to_cpu(rc->rc_seq);
+ rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
if (rc->rc_type == DLM_RCOM_LOCK)
rcom_lock_in((struct rcom_lock *) rc->rc_buf);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c403b66ec83..a4b142a6a2c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -251,8 +251,19 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
WARN_ON(inode->i_state & I_WILL_FREE);
if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) {
+ struct address_space *mapping = inode->i_mapping;
+ int ret;
+
list_move(&inode->i_list, &inode->i_sb->s_dirty);
- return 0;
+
+ /*
+ * Even if we don't actually write the inode itself here,
+ * we can at least start some of the data writeout..
+ */
+ spin_unlock(&inode_lock);
+ ret = do_writepages(mapping, wbc);
+ spin_lock(&inode_lock);
+ return ret;
}
/*
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 8c58bd45399..1794305f9ed 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -193,8 +193,12 @@ static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
static void fuse_ctl_kill_sb(struct super_block *sb)
{
+ struct fuse_conn *fc;
+
mutex_lock(&fuse_mutex);
fuse_control_sb = NULL;
+ list_for_each_entry(fc, &fuse_conn_list, entry)
+ fc->ctl_ndents = 0;
mutex_unlock(&fuse_mutex);
kill_litter_super(sb);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 6a2ffa2db14..de8e64c03f7 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,44 +4,43 @@ config GFS2_FS
select FS_POSIX_ACL
select CRC32
help
- A cluster filesystem.
+ A cluster filesystem.
- Allows a cluster of computers to simultaneously use a block device
- that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
- and writes to the block device like a local filesystem, but also uses
- a lock module to allow the computers coordinate their I/O so
- filesystem consistency is maintained. One of the nifty features of
- GFS is perfect consistency -- changes made to the filesystem on one
- machine show up immediately on all other machines in the cluster.
+ Allows a cluster of computers to simultaneously use a block device
+ that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
+ and writes to the block device like a local filesystem, but also uses
+ a lock module to allow the computers coordinate their I/O so
+ filesystem consistency is maintained. One of the nifty features of
+ GFS is perfect consistency -- changes made to the filesystem on one
+ machine show up immediately on all other machines in the cluster.
- To use the GFS2 filesystem, you will need to enable one or more of
- the below locking modules. Documentation and utilities for GFS2 can
- be found here: http://sources.redhat.com/cluster
+ To use the GFS2 filesystem, you will need to enable one or more of
+ the below locking modules. Documentation and utilities for GFS2 can
+ be found here: http://sources.redhat.com/cluster
config GFS2_FS_LOCKING_NOLOCK
tristate "GFS2 \"nolock\" locking module"
depends on GFS2_FS
help
- Single node locking module for GFS2.
+ Single node locking module for GFS2.
- Use this module if you want to use GFS2 on a single node without
- its clustering features. You can still take advantage of the
- large file support, and upgrade to running a full cluster later on
- if required.
+ Use this module if you want to use GFS2 on a single node without
+ its clustering features. You can still take advantage of the
+ large file support, and upgrade to running a full cluster later on
+ if required.
- If you will only be using GFS2 in cluster mode, you do not need this
- module.
+ If you will only be using GFS2 in cluster mode, you do not need this
+ module.
config GFS2_FS_LOCKING_DLM
tristate "GFS2 DLM locking module"
- depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n)
+ depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
select IP_SCTP if DLM_SCTP
select CONFIGFS_FS
select DLM
help
- Multiple node locking module for GFS2
-
- Most users of GFS2 will require this module. It provides the locking
- interface between GFS2 and the DLM, which is required to use GFS2
- in a cluster environment.
+ Multiple node locking module for GFS2
+ Most users of GFS2 will require this module. It provides the locking
+ interface between GFS2 and the DLM, which is required to use GFS2
+ in a cluster environment.
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8240c1ff94f..113f6c9110c 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
gfs2_free_data(ip, bstart, blen);
}
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, dibh->b_data);
@@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
}
ip->i_di.di_size = size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
@@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (gfs2_is_stuffed(ip)) {
ip->i_di.di_size = size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (!error) {
ip->i_di.di_size = size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip)
ip->i_num.no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
}
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0fdcb7713cd..c93ca8f361b 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_di.di_size < offset + size)
ip->i_di.di_size = offset + size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -229,7 +229,7 @@ out:
if (ip->i_di.di_size < offset + copied)
ip->i_di.di_size = offset + copied;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b)
*/
static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
- void *opaque, gfs2_filldir_t filldir,
+ void *opaque, filldir_t filldir,
const struct gfs2_dirent **darr, u32 entries,
int *copied)
{
const struct gfs2_dirent *dent, *dent_next;
- struct gfs2_inum_host inum;
u64 off, off_next;
unsigned int x, y;
int run = 0;
@@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
*offset = off;
}
- gfs2_inum_in(&inum, (char *)&dent->de_inum);
-
error = filldir(opaque, (const char *)(dent + 1),
be16_to_cpu(dent->de_name_len),
- off, &inum,
+ off, be64_to_cpu(dent->de_inum.no_addr),
be16_to_cpu(dent->de_type));
if (error)
return 1;
@@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
}
static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
- gfs2_filldir_t filldir, int *copied,
- unsigned *depth, u64 leaf_no)
+ filldir_t filldir, int *copied, unsigned *depth,
+ u64 leaf_no)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
@@ -1343,7 +1340,7 @@ out:
*/
static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
- gfs2_filldir_t filldir)
+ filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1402,7 +1399,7 @@ out:
}
int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
- gfs2_filldir_t filldir)
+ filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
struct dirent_gather g;
@@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
break;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_entries++;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
@@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
gfs2_consist_inode(dip);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_di.di_entries--;
- dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
+ dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
mark_inode_dirty(&dip->i_inode);
@@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
gfs2_trans_add_bh(dip->i_gl, bh, 1);
}
- dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
+ dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
return 0;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index b21b33668a5..48fe89046bb 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,30 +16,13 @@ struct inode;
struct gfs2_inode;
struct gfs2_inum;
-/**
- * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
- * @opaque: opaque data used by the function
- * @name: the name of the directory entry
- * @length: the length of the name
- * @offset: the entry's offset in the directory
- * @inum: the inode number the entry points to
- * @type: the type of inode the entry points to
- *
- * Returns: 0 on success, 1 if buffer full
- */
-
-typedef int (*gfs2_filldir_t) (void *opaque,
- const char *name, unsigned int length,
- u64 offset,
- struct gfs2_inum_host *inum, unsigned int type);
-
int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
struct gfs2_inum_host *inum, unsigned int *type);
int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
const struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
-int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
- gfs2_filldir_t filldir);
+int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
+ filldir_t filldir);
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
struct gfs2_inum_host *new_inum, unsigned int new_type);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index ebebbdcd705..0c83c7f4dda 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
(er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 438146904b5..6618c119025 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -19,6 +19,8 @@
#include <linux/gfs2_ondisk.h>
#include <linux/list.h>
#include <linux/lm_interface.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -33,11 +35,6 @@
#include "super.h"
#include "util.h"
-struct greedy {
- struct gfs2_holder gr_gh;
- struct delayed_work gr_work;
-};
-
struct gfs2_gl_hash_bucket {
struct hlist_head hb_list;
};
@@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl);
static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
static int dump_glock(struct gfs2_glock *gl);
static int dump_inode(struct gfs2_inode *ip);
+static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
+static void gfs2_glock_drop_th(struct gfs2_glock *gl);
+static DECLARE_RWSEM(gfs2_umount_flush_sem);
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -213,30 +213,6 @@ out:
}
/**
- * queue_empty - check to see if a glock's queue is empty
- * @gl: the glock
- * @head: the head of the queue to check
- *
- * This function protects the list in the event that a process already
- * has a holder on the list and is adding a second holder for itself.
- * The glmutex lock is what generally prevents processes from working
- * on the same glock at once, but the special case of adding a second
- * holder for yourself ("recursive" locking) doesn't involve locking
- * glmutex, making the spin lock necessary.
- *
- * Returns: 1 if the queue is empty
- */
-
-static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
-{
- int empty;
- spin_lock(&gl->gl_spin);
- empty = list_empty(head);
- spin_unlock(&gl->gl_spin);
- return empty;
-}
-
-/**
* search_bucket() - Find struct gfs2_glock by lock number
* @bucket: the bucket to search
* @name: The lock name
@@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
gh->gh_flags = flags;
gh->gh_error = 0;
gh->gh_iflags = 0;
- init_completion(&gh->gh_wait);
-
- if (gh->gh_state == LM_ST_EXCLUSIVE)
- gh->gh_flags |= GL_LOCAL_EXCL;
-
gfs2_glock_hold(gl);
}
@@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
{
gh->gh_state = state;
gh->gh_flags = flags;
- if (gh->gh_state == LM_ST_EXCLUSIVE)
- gh->gh_flags |= GL_LOCAL_EXCL;
-
gh->gh_iflags &= 1 << HIF_ALLOCED;
gh->gh_ip = (unsigned long)__builtin_return_address(0);
}
@@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh)
kfree(gh);
}
+static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh)
+{
+ if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) {
+ gfs2_holder_put(gh);
+ return;
+ }
+ clear_bit(HIF_WAIT, &gh->gh_iflags);
+ smp_mb();
+ wake_up_bit(&gh->gh_iflags, HIF_WAIT);
+}
+
+static int holder_wait(void *word)
+{
+ schedule();
+ return 0;
+}
+
+static void wait_on_holder(struct gfs2_holder *gh)
+{
+ might_sleep();
+ wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+}
+
/**
* rq_mutex - process a mutex request in the queue
* @gh: the glock holder
@@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh)
list_del_init(&gh->gh_list);
/* gh->gh_error never examined. */
set_bit(GLF_LOCK, &gl->gl_flags);
- complete(&gh->gh_wait);
+ clear_bit(HIF_WAIT, &gh->gh_iflags);
+ smp_mb();
+ wake_up_bit(&gh->gh_iflags, HIF_WAIT);
return 1;
}
@@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
if (list_empty(&gl->gl_holders)) {
@@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh)
gfs2_reclaim_glock(sdp);
}
- glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
+ gfs2_glock_xmote_th(gh);
spin_lock(&gl->gl_spin);
}
return 1;
@@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh)
set_bit(GLF_LOCK, &gl->gl_flags);
} else {
struct gfs2_holder *next_gh;
- if (gh->gh_flags & GL_LOCAL_EXCL)
+ if (gh->gh_state == LM_ST_EXCLUSIVE)
return 1;
next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
gh_list);
- if (next_gh->gh_flags & GL_LOCAL_EXCL)
+ if (next_gh->gh_state == LM_ST_EXCLUSIVE)
return 1;
}
@@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh)
gh->gh_error = 0;
set_bit(HIF_HOLDER, &gh->gh_iflags);
- complete(&gh->gh_wait);
+ gfs2_holder_dispose_or_wake(gh);
return 0;
}
@@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh)
static int rq_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
if (!list_empty(&gl->gl_holders))
return 1;
@@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh)
list_del_init(&gh->gh_list);
gh->gh_error = 0;
spin_unlock(&gl->gl_spin);
- if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
- gfs2_holder_put(gh);
- else
- complete(&gh->gh_wait);
+ gfs2_holder_dispose_or_wake(gh);
spin_lock(&gl->gl_spin);
} else {
gl->gl_req_gh = gh;
@@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh)
if (gh->gh_state == LM_ST_UNLOCKED ||
gl->gl_state != LM_ST_EXCLUSIVE)
- glops->go_drop_th(gl);
+ gfs2_glock_drop_th(gl);
else
- glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
+ gfs2_glock_xmote_th(gh);
spin_lock(&gl->gl_spin);
}
@@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh)
}
/**
- * rq_greedy - process a queued request to drop greedy status
- * @gh: the glock holder
- *
- * Returns: 1 if the queue is blocked
- */
-
-static int rq_greedy(struct gfs2_holder *gh)
-{
- struct gfs2_glock *gl = gh->gh_gl;
-
- list_del_init(&gh->gh_list);
- /* gh->gh_error never examined. */
- clear_bit(GLF_GREEDY, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
-
- gfs2_holder_uninit(gh);
- kfree(container_of(gh, struct greedy, gr_gh));
-
- spin_lock(&gl->gl_spin);
-
- return 0;
-}
-
-/**
* run_queue - process holder structures on a glock
* @gl: the glock
*
@@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl)
if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
blocked = rq_demote(gh);
- else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
- blocked = rq_greedy(gh);
else
gfs2_assert_warn(gl->gl_sbd, 0);
@@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
gfs2_holder_init(gl, 0, 0, &gh);
set_bit(HIF_MUTEX, &gh.gh_iflags);
+ if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags))
+ BUG();
spin_lock(&gl->gl_spin);
if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
@@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
} else {
gl->gl_owner = current;
gl->gl_ip = (unsigned long)__builtin_return_address(0);
- complete(&gh.gh_wait);
+ clear_bit(HIF_WAIT, &gh.gh_iflags);
+ smp_mb();
+ wake_up_bit(&gh.gh_iflags, HIF_WAIT);
}
spin_unlock(&gl->gl_spin);
- wait_for_completion(&gh.gh_wait);
+ wait_on_holder(&gh);
gfs2_holder_uninit(&gh);
}
@@ -774,6 +740,7 @@ restart:
return;
set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
+ set_bit(HIF_WAIT, &new_gh->gh_iflags);
goto restart;
}
@@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
int op_done = 1;
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
state_change(gl, ret & LM_OUT_ST_MASK);
@@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
gfs2_glock_put(gl);
- if (gh) {
- if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
- gfs2_holder_put(gh);
- else
- complete(&gh->gh_wait);
- }
+ if (gh)
+ gfs2_holder_dispose_or_wake(gh);
}
/**
@@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
*
*/
-void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
+void gfs2_glock_xmote_th(struct gfs2_holder *gh)
{
+ struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
+ int flags = gh->gh_flags;
+ unsigned state = gh->gh_state;
const struct gfs2_glock_operations *glops = gl->gl_ops;
int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
LM_FLAG_NOEXP | LM_FLAG_ANY |
LM_FLAG_PRIORITY);
unsigned int lck_ret;
+ if (glops->go_xmote_th)
+ glops->go_xmote_th(gl);
+
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
gfs2_assert_warn(sdp, state != gl->gl_state);
- if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
- glops->go_sync(gl);
-
gfs2_glock_hold(gl);
gl->gl_req_bh = xmote_bh;
@@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh = gl->gl_req_gh;
- clear_bit(GLF_PREFETCH, &gl->gl_flags);
-
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, !ret);
state_change(gl, LM_ST_UNLOCKED);
@@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
gfs2_glock_put(gl);
- if (gh) {
- if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
- gfs2_holder_put(gh);
- else
- complete(&gh->gh_wait);
- }
+ if (gh)
+ gfs2_holder_dispose_or_wake(gh);
}
/**
@@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
*
*/
-void gfs2_glock_drop_th(struct gfs2_glock *gl)
+static void gfs2_glock_drop_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned int ret;
+ if (glops->go_drop_th)
+ glops->go_drop_th(gl);
+
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
- if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
- glops->go_sync(gl);
-
gfs2_glock_hold(gl);
gl->gl_req_bh = drop_bh;
@@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh)
if (gh->gh_flags & LM_FLAG_PRIORITY)
do_cancels(gh);
- wait_for_completion(&gh->gh_wait);
-
+ wait_on_holder(gh);
if (gh->gh_error)
return gh->gh_error;
@@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh)
struct gfs2_holder *existing;
BUG_ON(!gh->gh_owner);
+ if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
+ BUG();
existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
if (existing) {
@@ -1227,8 +1188,6 @@ restart:
}
}
- clear_bit(GLF_PREFETCH, &gl->gl_flags);
-
return error;
}
@@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
}
/**
- * gfs2_glock_prefetch - Try to prefetch a glock
- * @gl: the glock
- * @state: the state to prefetch in
- * @flags: flags passed to go_xmote_th()
- *
- */
-
-static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
- int flags)
-{
- const struct gfs2_glock_operations *glops = gl->gl_ops;
-
- spin_lock(&gl->gl_spin);
-
- if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) ||
- !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) ||
- !list_empty(&gl->gl_waiters3) ||
- relaxed_state_ok(gl->gl_state, state, flags)) {
- spin_unlock(&gl->gl_spin);
- return;
- }
-
- set_bit(GLF_PREFETCH, &gl->gl_flags);
- set_bit(GLF_LOCK, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
-
- glops->go_xmote_th(gl, state, flags);
-}
-
-static void greedy_work(struct work_struct *work)
-{
- struct greedy *gr = container_of(work, struct greedy, gr_work.work);
- struct gfs2_holder *gh = &gr->gr_gh;
- struct gfs2_glock *gl = gh->gh_gl;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
-
- clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
-
- if (glops->go_greedy)
- glops->go_greedy(gl);
-
- spin_lock(&gl->gl_spin);
-
- if (list_empty(&gl->gl_waiters2)) {
- clear_bit(GLF_GREEDY, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
- gfs2_holder_uninit(gh);
- kfree(gr);
- } else {
- gfs2_glock_hold(gl);
- list_add_tail(&gh->gh_list, &gl->gl_waiters2);
- run_queue(gl);
- spin_unlock(&gl->gl_spin);
- gfs2_glock_put(gl);
- }
-}
-
-/**
- * gfs2_glock_be_greedy -
- * @gl:
- * @time:
- *
- * Returns: 0 if go_greedy will be called, 1 otherwise
- */
-
-int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
-{
- struct greedy *gr;
- struct gfs2_holder *gh;
-
- if (!time || gl->gl_sbd->sd_args.ar_localcaching ||
- test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
- return 1;
-
- gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
- if (!gr) {
- clear_bit(GLF_GREEDY, &gl->gl_flags);
- return 1;
- }
- gh = &gr->gr_gh;
-
- gfs2_holder_init(gl, 0, 0, gh);
- set_bit(HIF_GREEDY, &gh->gh_iflags);
- INIT_DELAYED_WORK(&gr->gr_work, greedy_work);
-
- set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
- schedule_delayed_work(&gr->gr_work, time);
-
- return 0;
-}
-
-/**
* gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
* @gh: the holder structure
*
@@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b)
return 1;
if (a->ln_number < b->ln_number)
return -1;
- if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
- return 1;
- if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL))
- return 1;
+ BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
return 0;
}
@@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
}
/**
- * gfs2_glock_prefetch_num - prefetch a glock based on lock number
- * @sdp: the filesystem
- * @number: the lock number
- * @glops: the glock operations for the type of glock
- * @state: the state to acquire the glock in
- * @flags: modifier flags for the aquisition
- *
- * Returns: errno
- */
-
-void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
- const struct gfs2_glock_operations *glops,
- unsigned int state, int flags)
-{
- struct gfs2_glock *gl;
- int error;
-
- if (atomic_read(&sdp->sd_reclaim_count) <
- gfs2_tune_get(sdp, gt_reclaim_limit)) {
- error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
- if (!error) {
- gfs2_glock_prefetch(gl, state, flags);
- gfs2_glock_put(gl);
- }
- }
-}
-
-/**
* gfs2_lvb_hold - attach a LVB from a glock
* @gl: The glock in question
*
@@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
if (!gl)
return;
- if (gl->gl_ops->go_callback)
- gl->gl_ops->go_callback(gl, state);
handle_callback(gl, state);
spin_lock(&gl->gl_spin);
@@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
struct lm_async_cb *async = data;
struct gfs2_glock *gl;
+ down_read(&gfs2_umount_flush_sem);
gl = gfs2_glock_find(sdp, &async->lc_name);
if (gfs2_assert_warn(sdp, gl))
return;
if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
gl->gl_req_bh(gl, async->lc_ret);
gfs2_glock_put(gl);
+ up_read(&gfs2_umount_flush_sem);
return;
}
@@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
static int demote_ok(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
const struct gfs2_glock_operations *glops = gl->gl_ops;
int demote = 1;
if (test_bit(GLF_STICKY, &gl->gl_flags))
demote = 0;
- else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
- demote = time_after_eq(jiffies, gl->gl_stamp +
- gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
else if (glops->go_demote_ok)
demote = glops->go_demote_ok(gl);
@@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
atomic_inc(&sdp->sd_reclaimed);
if (gfs2_glmutex_trylock(gl)) {
- if (queue_empty(gl, &gl->gl_holders) &&
+ if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED);
gfs2_glmutex_unlock(gl);
@@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl)
return;
if (gfs2_glmutex_trylock(gl)) {
- if (queue_empty(gl, &gl->gl_holders) &&
+ if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
goto out_schedule;
gfs2_glmutex_unlock(gl);
@@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl)
}
if (gfs2_glmutex_trylock(gl)) {
- if (queue_empty(gl, &gl->gl_holders) &&
+ if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED)
handle_callback(gl, LM_ST_UNLOCKED);
gfs2_glmutex_unlock(gl);
@@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
t = jiffies;
}
+ down_write(&gfs2_umount_flush_sem);
invalidate_inodes(sdp->sd_vfs);
+ up_write(&gfs2_umount_flush_sem);
msleep(10);
}
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index fb39108fc05..f50e40ceca4 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -20,7 +20,6 @@
#define LM_FLAG_ANY 0x00000008
#define LM_FLAG_PRIORITY 0x00000010 */
-#define GL_LOCAL_EXCL 0x00000020
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
void gfs2_holder_reinit(unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_uninit(struct gfs2_holder *gh);
-
-void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
-void gfs2_glock_drop_th(struct gfs2_glock *gl);
-
int gfs2_glock_nq(struct gfs2_holder *gh);
int gfs2_glock_poll(struct gfs2_holder *gh);
int gfs2_glock_wait(struct gfs2_holder *gh);
void gfs2_glock_dq(struct gfs2_holder *gh);
-int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
-
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
u64 number, const struct gfs2_glock_operations *glops,
@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
-void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
- const struct gfs2_glock_operations *glops,
- unsigned int state, int flags);
-
/**
* gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
* @gl: the glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index b068d10bcb6..c4b0391b7aa 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
static void meta_go_sync(struct gfs2_glock *gl)
{
+ if (gl->gl_state != LM_ST_EXCLUSIVE)
+ return;
+
if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
gfs2_ail_empty_gl(gl);
}
-
}
/**
@@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
}
/**
+ * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
+ * @gl: the glock protecting the inode
+ *
+ */
+
+static void inode_go_sync(struct gfs2_glock *gl)
+{
+ struct gfs2_inode *ip = gl->gl_object;
+
+ if (ip && !S_ISREG(ip->i_inode.i_mode))
+ ip = NULL;
+
+ if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
+ gfs2_log_flush(gl->gl_sbd, gl);
+ if (ip)
+ filemap_fdatawrite(ip->i_inode.i_mapping);
+ gfs2_meta_sync(gl);
+ if (ip) {
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ int error = filemap_fdatawait(mapping);
+ if (error == -ENOSPC)
+ set_bit(AS_ENOSPC, &mapping->flags);
+ else if (error)
+ set_bit(AS_EIO, &mapping->flags);
+ }
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ gfs2_ail_empty_gl(gl);
+ }
+}
+
+/**
* inode_go_xmote_th - promote/demote a glock
* @gl: the glock
* @state: the requested state
@@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
*
*/
-static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
- int flags)
+static void inode_go_xmote_th(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_UNLOCKED)
gfs2_pte_inval(gl);
- gfs2_glock_xmote_th(gl, state, flags);
+ if (gl->gl_state == LM_ST_EXCLUSIVE)
+ inode_go_sync(gl);
}
/**
@@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl)
static void inode_go_drop_th(struct gfs2_glock *gl)
{
gfs2_pte_inval(gl);
- gfs2_glock_drop_th(gl);
-}
-
-/**
- * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
- * @gl: the glock protecting the inode
- *
- */
-
-static void inode_go_sync(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip = gl->gl_object;
-
- if (ip && !S_ISREG(ip->i_inode.i_mode))
- ip = NULL;
-
- if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
- if (ip)
- filemap_fdatawrite(ip->i_inode.i_mapping);
- gfs2_meta_sync(gl);
- if (ip) {
- struct address_space *mapping = ip->i_inode.i_mapping;
- int error = filemap_fdatawait(mapping);
- if (error == -ENOSPC)
- set_bit(AS_ENOSPC, &mapping->flags);
- else if (error)
- set_bit(AS_EIO, &mapping->flags);
- }
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- gfs2_ail_empty_gl(gl);
- }
+ if (gl->gl_state == LM_ST_EXCLUSIVE)
+ inode_go_sync(gl);
}
/**
@@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
- (gh->gh_flags & GL_LOCAL_EXCL))
+ (gh->gh_state == LM_ST_EXCLUSIVE))
error = gfs2_truncatei_resume(ip);
return error;
@@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh)
}
/**
- * inode_greedy -
- * @gl: the glock
- *
- */
-
-static void inode_greedy(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- struct gfs2_inode *ip = gl->gl_object;
- unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
- unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
- unsigned int new_time;
-
- spin_lock(&ip->i_spin);
-
- if (time_after(ip->i_last_pfault + quantum, jiffies)) {
- new_time = ip->i_greedy + quantum;
- if (new_time > max)
- new_time = max;
- } else {
- new_time = ip->i_greedy - quantum;
- if (!new_time || new_time > max)
- new_time = 1;
- }
-
- ip->i_greedy = new_time;
-
- spin_unlock(&ip->i_spin);
-
- iput(&ip->i_inode);
-}
-
-/**
* rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
* @gl: the glock
*
@@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
*
*/
-static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
- int flags)
+static void trans_go_xmote_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
-
- gfs2_glock_xmote_th(gl, state, flags);
}
/**
@@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl)
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
-
- gfs2_glock_drop_th(gl);
}
/**
@@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl)
}
const struct gfs2_glock_operations gfs2_meta_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
+ .go_xmote_th = meta_go_sync,
+ .go_drop_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
@@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_xmote_th = inode_go_xmote_th,
.go_xmote_bh = inode_go_xmote_bh,
.go_drop_th = inode_go_drop_th,
- .go_sync = inode_go_sync,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
.go_unlock = inode_go_unlock,
- .go_greedy = inode_greedy,
.go_type = LM_TYPE_INODE,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
- .go_sync = meta_go_sync,
.go_inval = meta_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
@@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = {
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_IOPEN,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_FLOCK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_demote_ok = quota_go_demote_ok,
.go_type = LM_TYPE_QUOTA,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_JOURNAL,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 734421edae8..12c80fd28db 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -101,17 +101,14 @@ struct gfs2_bufdata {
};
struct gfs2_glock_operations {
- void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags);
+ void (*go_xmote_th) (struct gfs2_glock *gl);
void (*go_xmote_bh) (struct gfs2_glock *gl);
void (*go_drop_th) (struct gfs2_glock *gl);
void (*go_drop_bh) (struct gfs2_glock *gl);
- void (*go_sync) (struct gfs2_glock *gl);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
- void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
- void (*go_greedy) (struct gfs2_glock *gl);
const int go_type;
};
@@ -120,7 +117,6 @@ enum {
HIF_MUTEX = 0,
HIF_PROMOTE = 1,
HIF_DEMOTE = 2,
- HIF_GREEDY = 3,
/* States */
HIF_ALLOCED = 4,
@@ -128,6 +124,7 @@ enum {
HIF_HOLDER = 6,
HIF_FIRST = 7,
HIF_ABORTED = 9,
+ HIF_WAIT = 10,
};
struct gfs2_holder {
@@ -140,17 +137,14 @@ struct gfs2_holder {
int gh_error;
unsigned long gh_iflags;
- struct completion gh_wait;
unsigned long gh_ip;
};
enum {
GLF_LOCK = 1,
GLF_STICKY = 2,
- GLF_PREFETCH = 3,
GLF_DIRTY = 5,
GLF_SKIP_WAITERS2 = 6,
- GLF_GREEDY = 7,
};
struct gfs2_glock {
@@ -167,7 +161,7 @@ struct gfs2_glock {
unsigned long gl_ip;
struct list_head gl_holders;
struct list_head gl_waiters1; /* HIF_MUTEX */
- struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
+ struct list_head gl_waiters2; /* HIF_DEMOTE */
struct list_head gl_waiters3; /* HIF_PROMOTE */
const struct gfs2_glock_operations *gl_ops;
@@ -236,7 +230,6 @@ struct gfs2_inode {
spinlock_t i_spin;
struct rw_semaphore i_rw_mutex;
- unsigned int i_greedy;
unsigned long i_last_pfault;
struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
@@ -418,17 +411,12 @@ struct gfs2_tune {
unsigned int gt_atime_quantum; /* Min secs between atime updates */
unsigned int gt_new_files_jdata;
unsigned int gt_new_files_directio;
- unsigned int gt_max_atomic_write; /* Split big writes into this size */
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_lockdump_size;
unsigned int gt_stall_secs; /* Detects trouble! */
unsigned int gt_complain_secs;
unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
unsigned int gt_entries_per_readdir;
- unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
- unsigned int gt_greedy_default;
- unsigned int gt_greedy_quantum;
- unsigned int gt_greedy_max;
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d122074c45e..0d6831a4056 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -287,10 +287,8 @@ out:
*
* Returns: errno
*/
-
int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
{
- struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
struct buffer_head *dibh;
u32 nlink;
int error;
@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
else
drop_nlink(&ip->i_inode);
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
mark_inode_dirty(&ip->i_inode);
- if (ip->i_inode.i_nlink == 0) {
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh, rg_gh;
-
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- goto out;
- error = -EIO;
- rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
- if (!rgd)
- goto out_norgrp;
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
- if (error)
- goto out_norgrp;
-
+ if (ip->i_inode.i_nlink == 0)
gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
- gfs2_glock_dq_uninit(&rg_gh);
-out_norgrp:
- gfs2_glock_dq_uninit(&ri_gh);
- }
-out:
+
return error;
}
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
{
struct qstr qstr;
+ struct inode *inode;
gfs2_str2qstr(&qstr, name);
- return gfs2_lookupi(dip, &qstr, 1, NULL);
+ inode = gfs2_lookupi(dip, &qstr, 1, NULL);
+ /* gfs2_lookupi has inconsistent callers: vfs
+ * related routines expect NULL for no entry found,
+ * gfs2_lookup_simple callers expect ENOENT
+ * and do not check for NULL.
+ */
+ if (inode == NULL)
+ return ERR_PTR(-ENOENT);
+ else
+ return inode;
}
@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
* @is_root: If 1, ignore the caller's permissions
* @i_gh: An uninitialized holder for the new inode glock
*
- * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
- * @is_root is true.
+ * This can be called via the VFS filldir function when NFS is doing
+ * a readdirplus and the inode which its intending to stat isn't
+ * already in cache. In this case we must not take the directory glock
+ * again, since the readdir call will have already taken that lock.
*
* Returns: errno
*/
@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
struct gfs2_holder d_gh;
struct gfs2_inum_host inum;
unsigned int type;
- int error = 0;
+ int error;
struct inode *inode = NULL;
+ int unlock = 0;
if (!name->len || name->len > GFS2_FNAMESIZE)
return ERR_PTR(-ENAMETOOLONG);
@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
return dir;
}
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
- if (error)
- return ERR_PTR(error);
+ if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+ if (error)
+ return ERR_PTR(error);
+ unlock = 1;
+ }
if (!is_root) {
error = permission(dir, MAY_EXEC, NULL);
@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
inode = gfs2_inode_lookup(sb, &inum, type);
out:
- gfs2_glock_dq_uninit(&d_gh);
+ if (unlock)
+ gfs2_glock_dq_uninit(&d_gh);
if (error == -ENOENT)
return NULL;
- return inode;
+ return inode ? inode : ERR_PTR(error);
}
static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
index effe4a337c1..e30673dd37e 100644
--- a/fs/gfs2/lm.c
+++ b/fs/gfs2/lm.c
@@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
vprintk(fmt, args);
va_end(args);
- fs_err(sdp, "about to withdraw from the cluster\n");
+ fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
-
- fs_err(sdp, "waiting for outstanding I/O\n");
-
- /* FIXME: suspend dm device so oustanding bio's complete
- and all further io requests fail */
-
fs_err(sdp, "telling LM to withdraw\n");
gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
fs_err(sdp, "withdrawn\n");
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 33af707a4d3..a87c7bf3c56 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -36,7 +36,7 @@
#define GDLM_STRNAME_BYTES 24
#define GDLM_LVB_SIZE 32
-#define GDLM_DROP_COUNT 50000
+#define GDLM_DROP_COUNT 200000
#define GDLM_DROP_PERIOD 60
#define GDLM_NAME_LEN 128
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
index 2194b1d5b5e..a0e7eda643e 100644
--- a/fs/gfs2/locking/dlm/main.c
+++ b/fs/gfs2/locking/dlm/main.c
@@ -11,9 +11,6 @@
#include "lock_dlm.h"
-extern int gdlm_drop_count;
-extern int gdlm_drop_period;
-
extern struct lm_lockops gdlm_ops;
static int __init init_lock_dlm(void)
@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void)
return error;
}
- gdlm_drop_count = GDLM_DROP_COUNT;
- gdlm_drop_period = GDLM_DROP_PERIOD;
-
printk(KERN_INFO
"Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index cdd1694e889..1d8faa3da8a 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -9,8 +9,6 @@
#include "lock_dlm.h"
-int gdlm_drop_count;
-int gdlm_drop_period;
const struct lm_lockops gdlm_ops;
@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
if (!ls)
return NULL;
- ls->drop_locks_count = gdlm_drop_count;
- ls->drop_locks_period = gdlm_drop_period;
+ ls->drop_locks_count = GDLM_DROP_COUNT;
+ ls->drop_locks_period = GDLM_DROP_PERIOD;
ls->fscb = cb;
ls->sdp = sdp;
ls->fsflags = flags;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 29ae06f9494..4746b884662 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
return sprintf(buf, "%d\n", ls->recover_jid_status);
}
+static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
+{
+ return sprintf(buf, "%d\n", ls->drop_locks_count);
+}
+
+static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+ ls->drop_locks_count = simple_strtol(buf, NULL, 0);
+ return len;
+}
+
struct gdlm_attr {
struct attribute attr;
ssize_t (*show)(struct gdlm_ls *, char *);
@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0644, recover_show, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
static struct attribute *gdlm_attrs[] = {
&gdlm_attr_proto_name.attr,
@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = {
&gdlm_attr_recover.attr,
&gdlm_attr_recover_done.attr,
&gdlm_attr_recover_status.attr,
+ &gdlm_attr_drop_count.attr,
NULL,
};
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4d7f94d8c7b..16bb4b4561a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr;
- if (!list_empty(&bd->bd_list_tr))
+ gfs2_log_lock(sdp);
+ if (!list_empty(&bd->bd_list_tr)) {
+ gfs2_log_unlock(sdp);
return;
-
+ }
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+ gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
-
gfs2_log_lock(sdp);
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
struct list_head *head = &tr->tr_list_buf;
struct gfs2_bufdata *bd;
+ gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
list_del_init(&bd->bd_list_tr);
tr->tr_num_buf--;
}
+ gfs2_log_unlock(sdp);
gfs2_assert_warn(sdp, !tr->tr_num_buf);
}
@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
+ gfs2_log_lock(sdp);
tr->tr_touched = 1;
if (list_empty(&bd->bd_list_tr) &&
(ip->i_di.di_flags & GFS2_DIF_JDATA)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+ gfs2_log_unlock(sdp);
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_buf_new++;
+ } else {
+ gfs2_log_unlock(sdp);
}
gfs2_trans_add_gl(bd->bd_gl);
gfs2_log_lock(sdp);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d8d69a72a10..56e33590b65 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -16,6 +16,7 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/fs.h>
+#include <linux/writeback.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
@@ -157,6 +158,32 @@ out_ignore:
}
/**
+ * gfs2_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: Write-back control
+ *
+ * For journaled files and/or ordered writes this just falls back to the
+ * kernel's default writepages path for now. We will probably want to change
+ * that eventually (i.e. when we look at allocate on flush).
+ *
+ * For the data=writeback case though we can already ignore buffer heads
+ * and write whole extents at once. This is a big reduction in the
+ * number of I/O requests we send and the bmap calls we make in this case.
+ */
+static int gfs2_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+ if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
+ return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+
+ return generic_writepages(mapping, wbc);
+}
+
+/**
* stuffed_readpage - Fill in a Linux page with stuffed file data
* @ip: the inode
* @page: the page
@@ -256,7 +283,7 @@ out_unlock:
* the page lock and the glock) and return having done no I/O. Its
* obviously not something we'd want to do on too regular a basis.
* Any I/O we ignore at this time will be done via readpage later.
- * 2. We have to handle stuffed files here too.
+ * 2. We don't handle stuffed files here we let readpage do the honours.
* 3. mpage_readpages() does most of the heavy lifting in the common case.
* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder gh;
- unsigned page_idx;
- int ret;
+ int ret = 0;
int do_unlock = 0;
if (likely(file != &gfs2_internal_file_sentinel)) {
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
goto out_unlock;
}
skip_lock:
- if (gfs2_is_stuffed(ip)) {
- struct pagevec lru_pvec;
- pagevec_init(&lru_pvec, 0);
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
- page->index, GFP_KERNEL)) {
- ret = stuffed_readpage(ip, page);
- unlock_page(page);
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- } else {
- page_cache_release(page);
- }
- }
- pagevec_lru_add(&lru_pvec);
- ret = 0;
- } else {
- /* What we really want to do .... */
+ if (!gfs2_is_stuffed(ip))
ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
- }
if (do_unlock) {
gfs2_glock_dq_m(1, &gh);
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
error = gfs2_glock_nq_atime(&ip->i_gh);
if (unlikely(error)) {
- if (error == GLR_TRYFAILED)
+ if (error == GLR_TRYFAILED) {
+ unlock_page(page);
error = AOP_TRUNCATED_PAGE;
+ }
goto out_uninit;
}
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
return;
}
+/**
+ * gfs2_ok_for_dio - check that dio is valid on this file
+ * @ip: The inode
+ * @rw: READ or WRITE
+ * @offset: The offset at which we are reading or writing
+ *
+ * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
+ * 1 (to accept the i/o request)
+ */
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+{
+ /*
+ * Should we return an error here? I can't see that O_DIRECT for
+ * a journaled file makes any sense. For now we'll silently fall
+ * back to buffered I/O, likewise we do the same for stuffed
+ * files since they are (a) small and (b) unaligned.
+ */
+ if (gfs2_is_jdata(ip))
+ return 0;
+
+ if (gfs2_is_stuffed(ip))
+ return 0;
+
+ if (offset > i_size_read(&ip->i_inode))
+ return 0;
+ return 1;
+}
+
+
+
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
struct gfs2_holder gh;
int rv;
- if (rw == READ)
- mutex_lock(&inode->i_mutex);
/*
- * Shared lock, even if its a write, since we do no allocation
- * on this path. All we need change is atime.
+ * Deferred lock, even if its a write, since we do no allocation
+ * on this path. All we need change is atime, and this lock mode
+ * ensures that other nodes have flushed their buffered read caches
+ * (i.e. their page cache entries for this inode). We do not,
+ * unfortunately have the option of only flushing a range like
+ * the VFS does.
*/
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
rv = gfs2_glock_nq_atime(&gh);
if (rv)
- goto out;
-
- if (offset > i_size_read(inode))
- goto out;
-
- /*
- * Should we return an error here? I can't see that O_DIRECT for
- * a journaled file makes any sense. For now we'll silently fall
- * back to buffered I/O, likewise we do the same for stuffed
- * files since they are (a) small and (b) unaligned.
- */
- if (gfs2_is_jdata(ip))
- goto out;
-
- if (gfs2_is_stuffed(ip))
- goto out;
-
- rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
- inode->i_sb->s_bdev,
- iov, offset, nr_segs,
- gfs2_get_block_direct, NULL);
+ return rv;
+ rv = gfs2_ok_for_dio(ip, rw, offset);
+ if (rv != 1)
+ goto out; /* dio not valid, fall back to buffered i/o */
+
+ rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs,
+ gfs2_get_block_direct, NULL);
out:
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
- if (rw == READ)
- mutex_unlock(&inode->i_mutex);
-
return rv;
}
@@ -763,6 +786,7 @@ out:
const struct address_space_operations gfs2_file_aops = {
.writepage = gfs2_writepage,
+ .writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.sync_page = block_sync_page,
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index d355899585d..9187eb174b4 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
struct gfs2_inum_host inum;
unsigned int type;
int error;
+ int had_lock=0;
if (inode && is_bad_inode(inode))
goto invalid;
@@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
if (sdp->sd_args.ar_localcaching)
goto valid;
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
- if (error)
- goto fail;
+ had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
+ if (!had_lock) {
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+ if (error)
+ goto fail;
+ }
error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
switch (error) {
@@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
}
valid_gunlock:
- gfs2_glock_dq_uninit(&d_gh);
+ if (!had_lock)
+ gfs2_glock_dq_uninit(&d_gh);
valid:
dput(parent);
return 1;
invalid_gunlock:
- gfs2_glock_dq_uninit(&d_gh);
+ if (!had_lock)
+ gfs2_glock_dq_uninit(&d_gh);
invalid:
if (inode && S_ISDIR(inode->i_mode)) {
if (have_submounts(dentry))
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b4e7b877531..4855e8cca62 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,6 +22,7 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
+#include "ops_dentry.h"
#include "ops_export.h"
#include "rgrp.h"
#include "util.h"
@@ -112,13 +113,12 @@ struct get_name_filldir {
char *name;
};
-static int get_name_filldir(void *opaque, const char *name, unsigned int length,
- u64 offset, struct gfs2_inum_host *inum,
- unsigned int type)
+static int get_name_filldir(void *opaque, const char *name, int length,
+ loff_t offset, u64 inum, unsigned int type)
{
- struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
+ struct get_name_filldir *gnfd = opaque;
- if (!gfs2_inum_equal(inum, &gnfd->inum))
+ if (inum != gnfd->inum.no_addr)
return 0;
memcpy(gnfd->name, name, length);
@@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
return ERR_PTR(-ENOMEM);
}
+ dentry->d_op = &gfs2_dops;
return dentry;
}
@@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
}
error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
- LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
- &i_gh);
+ LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return ERR_PTR(error);
@@ -269,6 +269,7 @@ out_inode:
return ERR_PTR(-ENOMEM);
}
+ dentry->d_op = &gfs2_dops;
return dentry;
fail_rgd:
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index faa07e4b97d..c996aa739a0 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -43,15 +43,6 @@
#include "util.h"
#include "eaops.h"
-/* For regular, non-NFS */
-struct filldir_reg {
- struct gfs2_sbd *fdr_sbd;
- int fdr_prefetch;
-
- filldir_t fdr_filldir;
- void *fdr_opaque;
-};
-
/*
* Most fields left uninitialised to catch anybody who tries to
* use them. f_flags set to prevent file_accessed() from touching
@@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
}
/**
- * filldir_func - Report a directory entry to the caller of gfs2_dir_read()
- * @opaque: opaque data used by the function
- * @name: the name of the directory entry
- * @length: the length of the name
- * @offset: the entry's offset in the directory
- * @inum: the inode number the entry points to
- * @type: the type of inode the entry points to
- *
- * Returns: 0 on success, 1 if buffer full
- */
-
-static int filldir_func(void *opaque, const char *name, unsigned int length,
- u64 offset, struct gfs2_inum_host *inum,
- unsigned int type)
-{
- struct filldir_reg *fdr = (struct filldir_reg *)opaque;
- struct gfs2_sbd *sdp = fdr->fdr_sbd;
- int error;
-
- error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
- inum->no_addr, type);
- if (error)
- return 1;
-
- if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
- gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
- LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
- gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
- LM_ST_SHARED, LM_FLAG_TRY);
- }
-
- return 0;
-}
-
-/**
* gfs2_readdir - Read directory entries from a directory
* @file: The directory to read from
* @dirent: Buffer for dirents
@@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
{
struct inode *dir = file->f_mapping->host;
struct gfs2_inode *dip = GFS2_I(dir);
- struct filldir_reg fdr;
struct gfs2_holder d_gh;
u64 offset = file->f_pos;
int error;
- fdr.fdr_sbd = GFS2_SB(dir);
- fdr.fdr_prefetch = 1;
- fdr.fdr_filldir = filldir;
- fdr.fdr_opaque = dirent;
-
gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
error = gfs2_glock_nq_atime(&d_gh);
if (error) {
@@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
return error;
}
- error = gfs2_dir_read(dir, &offset, &fdr, filldir_func);
+ error = gfs2_dir_read(dir, &offset, dirent, filldir);
gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d14e139d267..ee80b8a5e7b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -867,9 +867,9 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
error = -EBUSY;
goto error;
}
- mutex_lock(&sb->s_bdev->bd_mount_mutex);
+ down(&sb->s_bdev->bd_mount_sem);
new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
- mutex_unlock(&sb->s_bdev->bd_mount_mutex);
+ up(&sb->s_bdev->bd_mount_sem);
if (IS_ERR(new)) {
error = PTR_ERR(new);
goto error;
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 636dda4c7d3..f40a84807d7 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
int error;
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
+
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
+ gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
int error;
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
+ gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[4], r_gh;
+ struct gfs2_holder ghs[5], r_gh;
+ struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
int alloc_required;
@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (nip) {
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
num_gh++;
+ /* grab the resource lock for unlink flag twiddling
+ * this is the case of the target file already existing
+ * so we unlink before doing the rename
+ */
+ nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+ if (nrgd)
+ gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
error = gfs2_glock_nq_m(num_gh, ghs);
@@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
al->al_rgd->rd_ri.ri_length +
4 * RES_DINODE + 4 * RES_LEAF +
- RES_STATFS + RES_QUOTA, 0);
+ RES_STATFS + RES_QUOTA + 4, 0);
if (error)
goto out_ipreserv;
} else {
error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
- 5 * RES_LEAF, 0);
+ 5 * RES_LEAF + 4, 0);
if (error)
goto out_gunlock;
}
@@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out_end_trans;
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
}
generic_fillattr(inode, stat);
- if (unlock);
+ if (unlock)
gfs2_glock_dq_uninit(&gh);
return 0;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 7685b46f934..47369d01121 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
+ if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ return;
+
for (;;) {
error = gfs2_freeze_fs(sdp);
if (!error)
@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode)
}
error = gfs2_dinode_dealloc(ip);
+ /*
+ * Must do this before unlock to avoid trying to write back
+ * potentially dirty data now that inode no longer exists
+ * on disk.
+ */
+ truncate_inode_pages(&inode->i_data, 0);
out_unlock:
gfs2_glock_dq(&ip->i_iopen_gh);
@@ -443,14 +452,12 @@ out:
static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
- struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *ip;
ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
if (ip) {
ip->i_flags = 0;
ip->i_gl = NULL;
- ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
ip->i_last_pfault = jiffies;
}
return &ip->i_inode;
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 45a5f11fc39..14b380fb060 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -28,34 +28,13 @@
#include "trans.h"
#include "util.h"
-static void pfault_be_greedy(struct gfs2_inode *ip)
-{
- unsigned int time;
-
- spin_lock(&ip->i_spin);
- time = ip->i_greedy;
- ip->i_last_pfault = jiffies;
- spin_unlock(&ip->i_spin);
-
- igrab(&ip->i_inode);
- if (gfs2_glock_be_greedy(ip->i_gl, time))
- iput(&ip->i_inode);
-}
-
static struct page *gfs2_private_nopage(struct vm_area_struct *area,
unsigned long address, int *type)
{
struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
- struct page *result;
set_bit(GIF_PAGED, &ip->i_flags);
-
- result = filemap_nopage(area, address, type);
-
- if (result && result != NOPAGE_OOM)
- pfault_be_greedy(ip);
-
- return result;
+ return filemap_nopage(area, address, type);
}
static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
@@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
set_page_dirty(result);
}
- pfault_be_greedy(ip);
out:
gfs2_glock_dq_uninit(&i_gh);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 43a24f2e590..70f424fcf1c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_atime_quantum = 3600;
gt->gt_new_files_jdata = 0;
gt->gt_new_files_directio = 0;
- gt->gt_max_atomic_write = 4 << 20;
gt->gt_max_readahead = 1 << 18;
gt->gt_lockdump_size = 131072;
gt->gt_stall_secs = 600;
gt->gt_complain_secs = 10;
gt->gt_reclaim_limit = 5000;
gt->gt_entries_per_readdir = 32;
- gt->gt_prefetch_secs = 10;
- gt->gt_greedy_default = HZ / 10;
- gt->gt_greedy_quantum = HZ / 40;
- gt->gt_greedy_max = HZ / 4;
gt->gt_statfs_quantum = 30;
gt->gt_statfs_slow = 0;
}
@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
mutex_lock(&sdp->sd_jindex_mutex);
for (;;) {
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
- GL_LOCAL_EXCL, ji_gh);
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
if (error)
break;
@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_log_header_host head;
int error;
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- GL_LOCAL_EXCL, &t_gh);
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
if (error)
return error;
@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
gfs2_quota_sync(sdp);
gfs2_statfs_sync(sdp);
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- GL_LOCAL_EXCL | GL_NOCACHE,
- &t_gh);
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+ &t_gh);
if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return error;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 983eaf1e06b..d01f9f0fda2 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(reclaim_limit, 0);
-TUNE_ATTR(prefetch_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(new_files_directio, 0);
TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
-TUNE_ATTR(max_atomic_write, 1);
TUNE_ATTR(stall_secs, 1);
-TUNE_ATTR(greedy_default, 1);
-TUNE_ATTR(greedy_quantum, 1);
-TUNE_ATTR(greedy_max, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(scand_secs, scand_process);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = {
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_reclaim_limit.attr,
- &tune_attr_prefetch_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_quota_simul_sync.attr,
&tune_attr_quota_cache_secs.attr,
- &tune_attr_max_atomic_write.attr,
&tune_attr_stall_secs.attr,
- &tune_attr_greedy_default.attr,
- &tune_attr_greedy_quantum.attr,
- &tune_attr_greedy_max.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_scand_secs.attr,
&tune_attr_recoverd_secs.attr,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index cca3fb693f9..70543b17e4c 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -76,7 +76,7 @@ extern int make_symlink(const char *from, const char *to);
extern int unlink_file(const char *file);
extern int do_mkdir(const char *file, int mode);
extern int do_rmdir(const char *file);
-extern int do_mknod(const char *file, int mode, int dev);
+extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor);
extern int link_file(const char *from, const char *to);
extern int do_readlink(char *file, char *buf, int size);
extern int rename_file(char *from, char *to);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 1e6fc379987..69a376f35a6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -755,7 +755,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
goto out_put;
init_special_inode(inode, mode, dev);
- err = do_mknod(name, mode, dev);
+ err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
if(err)
goto out_free;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 23b7cee7212..1ed5ea389f1 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -295,11 +295,11 @@ int do_rmdir(const char *file)
return(0);
}
-int do_mknod(const char *file, int mode, int dev)
+int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
{
int err;
- err = mknod(file, mode, dev);
+ err = mknod(file, mode, makedev(major, minor));
if(err) return(-errno);
return(0);
}
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 077258b2103..5a95fbdd6fd 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -17,6 +17,7 @@
*
*/
#include <linux/slab.h>
+#include <linux/err.h>
#include <linux/blkdev.h>
#include <linux/jffs.h>
#include "jffs_fm.h"
@@ -104,7 +105,7 @@ jffs_build_begin(struct jffs_control *c, int unit)
mtd = get_mtd_device(NULL, unit);
- if (!mtd) {
+ if (IS_ERR(mtd)) {
kfree(fmc);
DJM(no_jffs_fmcontrol--);
return NULL;
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 72b4fc13a10..4189e4a3605 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -178,8 +178,8 @@ __jffs2_dbg_acct_paranoia_check_nolock(struct jffs2_sb_info *c,
while (ref2) {
uint32_t totlen = ref_totlen(c, jeb, ref2);
- if (ref2->flash_offset < jeb->offset ||
- ref2->flash_offset > jeb->offset + c->sector_size) {
+ if (ref_offset(ref2) < jeb->offset ||
+ ref_offset(ref2) > jeb->offset + c->sector_size) {
JFFS2_ERROR("node_ref %#08x shouldn't be in block at %#08x.\n",
ref_offset(ref2), jeb->offset);
goto error;
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 3daf3bca037..f89c85d5a3f 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -13,6 +13,7 @@
#ifndef _JFFS2_DEBUG_H_
#define _JFFS2_DEBUG_H_
+#include <linux/sched.h>
#ifndef CONFIG_JFFS2_FS_DEBUG
#define CONFIG_JFFS2_FS_DEBUG 0
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 7bc1a4201c0..abb90c0c09c 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -502,12 +502,11 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
if (ret)
return ret;
- c->inocache_list = kmalloc(INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
+ c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
if (!c->inocache_list) {
ret = -ENOMEM;
goto out_wbuf;
}
- memset(c->inocache_list, 0, INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *));
jffs2_init_xattr_subsystem(c);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index daff3341ff9..3a3cf225981 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -838,6 +838,8 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
+ cond_resched();
+
/* We only care about obsolete ones */
if (!(ref_obsolete(raw)))
continue;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 0ddfd70307f..4178b4b5594 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -294,23 +294,21 @@ static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev)
static inline struct jffs2_node_frag *frag_first(struct rb_root *root)
{
- struct rb_node *node = root->rb_node;
+ struct rb_node *node = rb_first(root);
if (!node)
return NULL;
- while(node->rb_left)
- node = node->rb_left;
+
return rb_entry(node, struct jffs2_node_frag, rb);
}
static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
{
- struct rb_node *node = root->rb_node;
+ struct rb_node *node = rb_last(root);
if (!node)
return NULL;
- while(node->rb_right)
- node = node->rb_right;
+
return rb_entry(node, struct jffs2_node_frag, rb);
}
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 266423b2709..58a0b912e9d 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -944,13 +944,12 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
{
struct jffs2_raw_inode n;
- struct jffs2_inode_info *f = kmalloc(sizeof(*f), GFP_KERNEL);
+ struct jffs2_inode_info *f = kzalloc(sizeof(*f), GFP_KERNEL);
int ret;
if (!f)
return -ENOMEM;
- memset(f, 0, sizeof(*f));
init_MUTEX_LOCKED(&f->sem);
f->inocache = ic;
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index e2413466ddd..3af746eaff0 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -128,17 +128,19 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
}
if (jffs2_sum_active()) {
- s = kmalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
+ s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
if (!s) {
+ kfree(flashbuf);
JFFS2_WARNING("Can't allocate memory for summary\n");
return -ENOMEM;
}
- memset(s, 0, sizeof(struct jffs2_summary));
}
for (i=0; i<c->nr_blocks; i++) {
struct jffs2_eraseblock *jeb = &c->blocks[i];
+ cond_resched();
+
/* reset summary info for next eraseblock scan */
jffs2_sum_reset_collected(s);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index e52cef526d9..25265965bdc 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -26,15 +26,13 @@
int jffs2_sum_init(struct jffs2_sb_info *c)
{
- c->summary = kmalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
+ c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
if (!c->summary) {
JFFS2_WARNING("Can't allocate memory for summary information!\n");
return -ENOMEM;
}
- memset(c->summary, 0, sizeof(struct jffs2_summary));
-
c->summary->sum_buf = vmalloc(c->sector_size);
if (!c->summary->sum_buf) {
@@ -398,6 +396,8 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
for (i=0; i<je32_to_cpu(summary->sum_num); i++) {
dbg_summary("processing summary index %d\n", i);
+ cond_resched();
+
/* Make sure there's a spare ref for dirty space */
err = jffs2_prealloc_raw_node_refs(c, jeb, 2);
if (err)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7deb7825402..08a0e6c49e6 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -17,6 +17,7 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/fs.h>
+#include <linux/err.h>
#include <linux/mount.h>
#include <linux/jffs2.h>
#include <linux/pagemap.h>
@@ -184,9 +185,9 @@ static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
struct mtd_info *mtd;
mtd = get_mtd_device(NULL, mtdnr);
- if (!mtd) {
+ if (IS_ERR(mtd)) {
D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr));
- return -EINVAL;
+ return PTR_ERR(mtd);
}
return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
@@ -221,7 +222,7 @@ static int jffs2_get_sb(struct file_system_type *fs_type,
D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd:%%s, name \"%s\"\n", dev_name+4));
for (mtdnr = 0; mtdnr < MAX_MTD_DEVICES; mtdnr++) {
mtd = get_mtd_device(NULL, mtdnr);
- if (mtd) {
+ if (!IS_ERR(mtd)) {
if (!strcmp(mtd->name, dev_name+4))
return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
put_mtd_device(mtd);
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index fc211b6e9b0..b90d5aa3d96 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -51,7 +51,7 @@ static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
*/
if (!p) {
- printk(KERN_ERR "jffs2_follow_link(): can't find symlink taerget\n");
+ printk(KERN_ERR "jffs2_follow_link(): can't find symlink target\n");
p = ERR_PTR(-EIO);
}
D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->target));
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 70707309dfa..9c99859f5ed 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -969,8 +969,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
int oobsize = c->mtd->oobsize;
struct mtd_oob_ops ops;
- ops.len = NR_OOB_SCAN_PAGES * oobsize;
- ops.ooblen = oobsize;
+ ops.ooblen = NR_OOB_SCAN_PAGES * oobsize;
ops.oobbuf = c->oobbuf;
ops.ooboffs = 0;
ops.datbuf = NULL;
@@ -983,10 +982,10 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
return ret;
}
- if (ops.retlen < ops.len) {
+ if (ops.oobretlen < ops.ooblen) {
D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB "
"returned short read (%zd bytes not %d) for block "
- "at %08x\n", ops.retlen, ops.len, jeb->offset));
+ "at %08x\n", ops.oobretlen, ops.ooblen, jeb->offset));
return -EIO;
}
@@ -1005,7 +1004,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
}
/* we know, we are aligned :) */
- for (page = oobsize; page < ops.len; page += sizeof(long)) {
+ for (page = oobsize; page < ops.ooblen; page += sizeof(long)) {
long dat = *(long *)(&ops.oobbuf[page]);
if(dat != -1)
return 1;
@@ -1033,7 +1032,6 @@ int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c,
return 2;
}
- ops.len = oobsize;
ops.ooblen = oobsize;
ops.oobbuf = c->oobbuf;
ops.ooboffs = 0;
@@ -1048,10 +1046,10 @@ int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c,
return ret;
}
- if (ops.retlen < ops.len) {
+ if (ops.oobretlen < ops.ooblen) {
D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): "
"Read OOB return short read (%zd bytes not %d) "
- "for block at %08x\n", ops.retlen, ops.len,
+ "for block at %08x\n", ops.oobretlen, ops.ooblen,
jeb->offset));
return -EIO;
}
@@ -1090,8 +1088,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER);
n.totlen = cpu_to_je32(8);
- ops.len = c->fsdata_len;
- ops.ooblen = c->fsdata_len;;
+ ops.ooblen = c->fsdata_len;
ops.oobbuf = (uint8_t *)&n;
ops.ooboffs = c->fsdata_pos;
ops.datbuf = NULL;
@@ -1105,10 +1102,10 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
jeb->offset, ret));
return ret;
}
- if (ops.retlen != ops.len) {
+ if (ops.oobretlen != ops.ooblen) {
D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): "
"Short write for block at %08x: %zd not %d\n",
- jeb->offset, ops.retlen, ops.len));
+ jeb->offset, ops.oobretlen, ops.ooblen));
return -EIO;
}
return 0;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4da09ce1d1f..4bb3f189733 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -399,8 +399,6 @@ static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datu
{
/* must be called under down_write(xattr_sem) */
if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) {
- uint32_t xid = xd->xid, version = xd->version;
-
unload_xattr_datum(c, xd);
xd->flags |= JFFS2_XFLAGS_DEAD;
if (xd->node == (void *)xd) {
@@ -411,7 +409,8 @@ static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datu
}
spin_unlock(&c->erase_completion_lock);
- dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version);
+ dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n",
+ xd->xid, xd->version);
}
}
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 06270774516..f4d45d4d835 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -176,7 +176,7 @@ reclaimer(void *ptr)
lock_kernel();
lockd_up(0); /* note: this cannot fail as lockd is already running */
- dprintk("lockd: reclaiming locks for host %s", host->h_name);
+ dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
restart:
nsmstate = host->h_nsmstate;
@@ -206,7 +206,7 @@ restart:
host->h_reclaiming = 0;
up_write(&host->h_rwsem);
- dprintk("NLM: done reclaiming locks for host %s", host->h_name);
+ dprintk("NLM: done reclaiming locks for host %s\n", host->h_name);
/* Now, wake up all processes that sleep on a blocked lock */
list_for_each_entry(block, &nlm_blocked, b_list) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dee3d6c0f19..d9ba8cb0ee7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -532,7 +532,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
lock_kernel();
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
+ res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
if (res < 0) {
unlock_kernel();
return res;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0dd6be346aa..9e4a2b70995 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -315,14 +315,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
static int nfs_release_page(struct page *page, gfp_t gfp)
{
- /*
- * Avoid deadlock on nfs_wait_on_request().
- */
- if (!(gfp & __GFP_FS))
- return 0;
- /* Hack... Force nfs_wb_page() to write out the page */
- SetPageDirty(page);
- return !nfs_wb_page(page->mapping->host, page);
+ /* If PagePrivate() is set, then the page is not freeable */
+ return 0;
+}
+
+static int nfs_launder_page(struct page *page)
+{
+ return nfs_wb_page(page->mapping->host, page);
}
const struct address_space_operations nfs_file_aops = {
@@ -338,6 +337,7 @@ const struct address_space_operations nfs_file_aops = {
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
+ .launder_page = nfs_launder_page,
};
static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
@@ -434,8 +434,9 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
BUG();
}
if (res < 0)
- printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
- __FUNCTION__);
+ dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
+ " - error %d!\n",
+ __FUNCTION__, res);
return res;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 63e47027930..d8349828283 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -665,49 +665,86 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
return __nfs_revalidate_inode(server, inode);
}
+static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (mapping->nrpages != 0) {
+ int ret = invalidate_inode_pages2(mapping);
+ if (ret < 0)
+ return ret;
+ }
+ spin_lock(&inode->i_lock);
+ nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
+ if (S_ISDIR(inode->i_mode)) {
+ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ /* This ensures we revalidate child dentries */
+ nfsi->cache_change_attribute = jiffies;
+ }
+ spin_unlock(&inode->i_lock);
+ nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
+ dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
+ inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+ return 0;
+}
+
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+ int ret = 0;
+
+ mutex_lock(&inode->i_mutex);
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
+ ret = nfs_sync_mapping(mapping);
+ if (ret == 0)
+ ret = nfs_invalidate_mapping_nolock(inode, mapping);
+ }
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
/**
- * nfs_revalidate_mapping - Revalidate the pagecache
+ * nfs_revalidate_mapping_nolock - Revalidate the pagecache
* @inode - pointer to host inode
* @mapping - pointer to mapping
*/
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
int ret = 0;
- if (NFS_STALE(inode))
- ret = -ESTALE;
if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
- || nfs_attribute_timeout(inode))
+ || nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (ret < 0)
- goto out;
+ if (ret < 0)
+ goto out;
+ }
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ ret = nfs_invalidate_mapping_nolock(inode, mapping);
+out:
+ return ret;
+}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
- if (mapping->nrpages != 0) {
- if (S_ISREG(inode->i_mode)) {
- ret = nfs_sync_mapping(mapping);
- if (ret < 0)
- goto out;
- }
- ret = invalidate_inode_pages2(mapping);
- if (ret < 0)
- goto out;
- }
- spin_lock(&inode->i_lock);
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode)) {
- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- /* This ensures we revalidate child dentries */
- nfsi->cache_change_attribute = jiffies;
- }
- spin_unlock(&inode->i_lock);
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ *
+ * This version of the function will take the inode->i_mutex and attempt to
+ * flush out all dirty data if it needs to invalidate the page cache.
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int ret = 0;
- nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
- dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode));
+ if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ || nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
+ ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret < 0)
+ goto out;
}
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ ret = nfs_invalidate_mapping(inode, mapping);
out:
return ret;
}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 6c686112cc0..525c136c7d8 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,9 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct page *page;
- void *err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
+ void *err;
+
+ err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
if (err)
goto read_failed;
page = read_cache_page(&inode->i_data, 0,
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 248dd92e6a5..49c310b8492 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -35,7 +35,6 @@
#include <linux/lockd/bind.h>
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
-#define NFSD_PARANOIA 1
typedef struct auth_domain svc_client;
typedef struct svc_export svc_export;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 277df40f098..e695660921e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -990,15 +990,16 @@ encode_entry(struct readdir_cd *ccd, const char *name,
}
int
-nfs3svc_encode_entry(struct readdir_cd *cd, const char *name,
- int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+nfs3svc_encode_entry(void *cd, const char *name,
+ int namlen, loff_t offset, u64 ino, unsigned int d_type)
{
return encode_entry(cd, name, namlen, offset, ino, d_type, 0);
}
int
-nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name,
- int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+nfs3svc_encode_entry_plus(void *cd, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int d_type)
{
return encode_entry(cd, name, namlen, offset, ino, d_type, 1);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fea46368afb..18aa9440df1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1880,9 +1880,10 @@ nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
}
static int
-nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
- loff_t offset, ino_t ino, unsigned int d_type)
+nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
{
+ struct readdir_cd *ccd = ccdv;
struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
int buflen;
__be32 *p = cd->buffer;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index b06bf9f70ef..c59d6fbb7a6 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -24,8 +24,6 @@
#include <linux/nfsd/nfsd.h>
#define NFSDDBG_FACILITY NFSDDBG_FH
-#define NFSD_PARANOIA 1
-/* #define NFSD_DEBUG_VERBOSE 1 */
static int nfsd_nr_verified;
@@ -230,13 +228,12 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
error = nfserrno(PTR_ERR(dentry));
goto out;
}
-#ifdef NFSD_PARANOIA
+
if (S_ISDIR(dentry->d_inode->i_mode) &&
(dentry->d_flags & DCACHE_DISCONNECTED)) {
printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
}
-#endif
fhp->fh_dentry = dentry;
fhp->fh_export = exp;
@@ -267,12 +264,13 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
/* Finally, check access permissions. */
error = nfsd_permission(exp, dentry, access);
-#ifdef NFSD_PARANOIA_EXTREME
if (error) {
- printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n",
- dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24));
+ dprintk("fh_verify: %s/%s permission failure, "
+ "acc=%x, error=%d\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
+ access, ntohl(error));
}
-#endif
out:
if (exp && !IS_ERR(exp))
exp_put(exp);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 0aaccb03bf7..fbf5d51947e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -72,7 +72,7 @@ static struct svc_program nfsd_acl_program = {
.pg_prog = NFS_ACL_PROGRAM,
.pg_nvers = NFSD_ACL_NRVERS,
.pg_vers = nfsd_acl_versions,
- .pg_name = "nfsd",
+ .pg_name = "nfsacl",
.pg_class = "nfsd",
.pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client,
@@ -118,16 +118,16 @@ int nfsd_vers(int vers, enum vers_op change)
switch(change) {
case NFSD_SET:
nfsd_versions[vers] = nfsd_version[vers];
- break;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_version[vers] = nfsd_acl_version[vers];
+ nfsd_acl_versions[vers] = nfsd_acl_version[vers];
#endif
+ break;
case NFSD_CLEAR:
nfsd_versions[vers] = NULL;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_version[vers] = NULL;
+ nfsd_acl_versions[vers] = NULL;
#endif
break;
case NFSD_TEST:
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index f5243f94399..6555c50d900 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -462,9 +462,10 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
}
int
-nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
- int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+nfssvc_encode_entry(void *ccdv, const char *name,
+ int namlen, loff_t offset, u64 ino, unsigned int d_type)
{
+ struct readdir_cd *ccd = ccdv;
struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
__be32 *p = cd->buffer;
int buflen, slen;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a79c23aa6d..8283236c6a0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -59,7 +59,6 @@
#include <asm/uaccess.h>
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
-#define NFSD_PARANOIA
/* We must ignore files (but only files) which might have mandatory
@@ -822,7 +821,8 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
rqstp->rq_res.page_len = size;
} else if (page != pp[-1]) {
get_page(page);
- put_page(*pp);
+ if (*pp)
+ put_page(*pp);
*pp = page;
rqstp->rq_resused++;
rqstp->rq_res.page_len += size;
@@ -1244,7 +1244,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 err;
int host_err;
__u32 v_mtime=0, v_atime=0;
- int v_mode=0;
err = nfserr_perm;
if (!flen)
@@ -1281,16 +1280,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
if (createmode == NFS3_CREATE_EXCLUSIVE) {
- /* while the verifier would fit in mtime+atime,
- * solaris7 gets confused (bugid 4218508) if these have
- * the high bit set, so we use the mode as well
+ /* solaris7 gets confused (bugid 4218508) if these have
+ * the high bit set, so just clear the high bits.
*/
v_mtime = verifier[0]&0x7fffffff;
v_atime = verifier[1]&0x7fffffff;
- v_mode = S_IFREG
- | ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
- | ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
- ;
}
if (dchild->d_inode) {
@@ -1318,7 +1312,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
case NFS3_CREATE_EXCLUSIVE:
if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
&& dchild->d_inode->i_atime.tv_sec == v_atime
- && dchild->d_inode->i_mode == v_mode
&& dchild->d_inode->i_size == 0 )
break;
/* fallthru */
@@ -1340,26 +1333,22 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
if (createmode == NFS3_CREATE_EXCLUSIVE) {
- /* Cram the verifier into atime/mtime/mode */
+ /* Cram the verifier into atime/mtime */
iap->ia_valid = ATTR_MTIME|ATTR_ATIME
- | ATTR_MTIME_SET|ATTR_ATIME_SET
- | ATTR_MODE;
+ | ATTR_MTIME_SET|ATTR_ATIME_SET;
/* XXX someone who knows this better please fix it for nsec */
iap->ia_mtime.tv_sec = v_mtime;
iap->ia_atime.tv_sec = v_atime;
iap->ia_mtime.tv_nsec = 0;
iap->ia_atime.tv_nsec = 0;
- iap->ia_mode = v_mode;
}
/* Set file attributes.
- * Mode has already been set but we might need to reset it
- * for CREATE_EXCLUSIVE
* Irix appears to send along the gid when it tries to
* implement setgid directories via NFS. Clear out all that cruft.
*/
set_attr:
- if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) {
+ if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
if (err2)
err = err2;
@@ -1726,7 +1715,7 @@ out:
*/
__be32
nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
- struct readdir_cd *cdp, encode_dent_fn func)
+ struct readdir_cd *cdp, filldir_t func)
{
__be32 err;
int host_err;
@@ -1751,7 +1740,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
do {
cdp->err = nfserr_eof; /* will be cleared on successful read */
- host_err = vfs_readdir(file, (filldir_t) func, cdp);
+ host_err = vfs_readdir(file, func, cdp);
} while (host_err >=0 && cdp->err == nfs_ok);
if (host_err)
err = nfserrno(host_err);
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 35cc4b1d60f..af4ef808fa9 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -17,6 +17,13 @@ ToDo/Notes:
happen is unclear however so it is worth waiting until someone hits
the problem.
+2.1.28 - Fix a deadlock.
+
+ - Fix deadlock in fs/ntfs/inode.c::ntfs_put_inode(). Thanks to Sergey
+ Vlasov for the report and detailed analysis of the deadlock. The fix
+ involved getting rid of ntfs_put_inode() altogether and hence NTFS no
+ longer has a ->put_inode super operation.
+
2.1.27 - Various bug fixes and cleanups.
- Fix two compiler warnings on Alpha. Thanks to Andrew Morton for
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index e27b4eacffb..82550838556 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
unistr.o upcase.o
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.27\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.28\"
ifeq ($(CONFIG_NTFS_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7b2c8f4f6a6..629e7abdd84 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -92,10 +92,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
ofs = 0;
if (file_ofs < init_size)
ofs = init_size - file_ofs;
+ local_irq_save(flags);
kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
memset(kaddr + bh_offset(bh) + ofs, 0,
bh->b_size - ofs);
kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
+ local_irq_restore(flags);
flush_dcache_page(page);
}
} else {
@@ -143,11 +145,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
recs = PAGE_CACHE_SIZE / rec_size;
/* Should have been verified before we got here... */
BUG_ON(!recs);
+ local_irq_save(flags);
kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
for (i = 0; i < recs; i++)
post_read_mst_fixup((NTFS_RECORD*)(kaddr +
i * rec_size), rec_size);
kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
+ local_irq_restore(flags);
flush_dcache_page(page);
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 8296c29ae3b..74f99a6a369 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1,7 +1,7 @@
/**
* dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -1249,16 +1249,12 @@ skip_index_root:
/* Get the offset into the index allocation attribute. */
ia_pos = (s64)fpos - vol->mft_record_size;
ia_mapping = vdir->i_mapping;
- bmp_vi = ndir->itype.index.bmp_ino;
- if (unlikely(!bmp_vi)) {
- ntfs_debug("Inode 0x%lx, regetting index bitmap.", vdir->i_ino);
- bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
- if (IS_ERR(bmp_vi)) {
- ntfs_error(sb, "Failed to get bitmap attribute.");
- err = PTR_ERR(bmp_vi);
- goto err_out;
- }
- ndir->itype.index.bmp_ino = bmp_vi;
+ ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
+ bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
+ if (IS_ERR(bmp_vi)) {
+ ntfs_error(sb, "Failed to get bitmap attribute.");
+ err = PTR_ERR(bmp_vi);
+ goto err_out;
}
bmp_mapping = bmp_vi->i_mapping;
/* Get the starting bitmap bit position and sanity check it. */
@@ -1266,7 +1262,7 @@ skip_index_root:
if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
ntfs_error(sb, "Current index allocation position exceeds "
"index bitmap size.");
- goto err_out;
+ goto iput_err_out;
}
/* Get the starting bit position in the current bitmap page. */
cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
@@ -1282,7 +1278,7 @@ get_next_bmp_page:
ntfs_error(sb, "Reading index bitmap failed.");
err = PTR_ERR(bmp_page);
bmp_page = NULL;
- goto err_out;
+ goto iput_err_out;
}
bmp = (u8*)page_address(bmp_page);
/* Find next index block in use. */
@@ -1429,6 +1425,7 @@ find_next_index_buffer:
/* @ia_page is already unlocked in this case. */
ntfs_unmap_page(ia_page);
ntfs_unmap_page(bmp_page);
+ iput(bmp_vi);
goto abort;
}
}
@@ -1439,6 +1436,7 @@ unm_EOD:
ntfs_unmap_page(ia_page);
}
ntfs_unmap_page(bmp_page);
+ iput(bmp_vi);
EOD:
/* We are finished, set fpos to EOD. */
fpos = i_size + vol->mft_record_size;
@@ -1455,8 +1453,11 @@ done:
filp->f_pos = fpos;
return 0;
err_out:
- if (bmp_page)
+ if (bmp_page) {
ntfs_unmap_page(bmp_page);
+iput_err_out:
+ iput(bmp_vi);
+ }
if (ia_page) {
unlock_page(ia_page);
ntfs_unmap_page(ia_page);
@@ -1529,14 +1530,22 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
int datasync)
{
- struct inode *vi = dentry->d_inode;
- ntfs_inode *ni = NTFS_I(vi);
+ struct inode *bmp_vi, *vi = dentry->d_inode;
int err, ret;
+ ntfs_attr na;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
BUG_ON(!S_ISDIR(vi->i_mode));
- if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino)
- write_inode_now(ni->itype.index.bmp_ino, !datasync);
+ /* If the bitmap attribute inode is in memory sync it, too. */
+ na.mft_no = vi->i_ino;
+ na.type = AT_BITMAP;
+ na.name = I30;
+ na.name_len = 4;
+ bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na);
+ if (bmp_vi) {
+ write_inode_now(bmp_vi, !datasync);
+ iput(bmp_vi);
+ }
ret = ntfs_write_inode(vi, 1);
write_inode_now(vi, !datasync);
err = sync_blockdev(vi->i_sb->s_bdev);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 247989891b4..f8bf8da67ee 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
/**
* inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2006 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -95,7 +95,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
* If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
* In that case, @na->name and @na->name_len should be set to NULL and 0,
* respectively. Although that is not strictly necessary as
- * ntfs_read_inode_locked() will fill them in later.
+ * ntfs_read_locked_inode() will fill them in later.
*
* Return 0 on success and -errno on error.
*
@@ -171,8 +171,8 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi,
struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
{
struct inode *vi;
- ntfs_attr na;
int err;
+ ntfs_attr na;
na.mft_no = mft_no;
na.type = AT_UNUSED;
@@ -229,8 +229,8 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
ntfschar *name, u32 name_len)
{
struct inode *vi;
- ntfs_attr na;
int err;
+ ntfs_attr na;
/* Make sure no one calls ntfs_attr_iget() for indices. */
BUG_ON(type == AT_INDEX_ALLOCATION);
@@ -287,8 +287,8 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
u32 name_len)
{
struct inode *vi;
- ntfs_attr na;
int err;
+ ntfs_attr na;
na.mft_no = base_vi->i_ino;
na.type = AT_INDEX_ALLOCATION;
@@ -402,7 +402,6 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
ntfs_init_runlist(&ni->attr_list_rl);
lockdep_set_class(&ni->attr_list_rl.lock,
&attr_list_rl_lock_class);
- ni->itype.index.bmp_ino = NULL;
ni->itype.index.block_size = 0;
ni->itype.index.vcn_size = 0;
ni->itype.index.collation_rule = 0;
@@ -546,6 +545,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
{
ntfs_volume *vol = NTFS_SB(vi->i_sb);
ntfs_inode *ni;
+ struct inode *bvi;
MFT_RECORD *m;
ATTR_RECORD *a;
STANDARD_INFORMATION *si;
@@ -780,7 +780,6 @@ skip_attr_list_load:
*/
if (S_ISDIR(vi->i_mode)) {
loff_t bvi_size;
- struct inode *bvi;
ntfs_inode *bni;
INDEX_ROOT *ir;
u8 *ir_end, *index_end;
@@ -985,13 +984,12 @@ skip_attr_list_load:
err = PTR_ERR(bvi);
goto unm_err_out;
}
- ni->itype.index.bmp_ino = bvi;
bni = NTFS_I(bvi);
if (NInoCompressed(bni) || NInoEncrypted(bni) ||
NInoSparse(bni)) {
ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
"and/or encrypted and/or sparse.");
- goto unm_err_out;
+ goto iput_unm_err_out;
}
/* Consistency check bitmap size vs. index allocation size. */
bvi_size = i_size_read(bvi);
@@ -1000,8 +998,10 @@ skip_attr_list_load:
ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
"for index allocation (0x%llx).",
bvi_size << 3, vi->i_size);
- goto unm_err_out;
+ goto iput_unm_err_out;
}
+ /* No longer need the bitmap attribute inode. */
+ iput(bvi);
skip_large_dir_stuff:
/* Setup the operations for this inode. */
vi->i_op = &ntfs_dir_inode_ops;
@@ -1176,7 +1176,8 @@ no_data_attr_special_case:
vi->i_blocks = ni->allocated_size >> 9;
ntfs_debug("Done.");
return 0;
-
+iput_unm_err_out:
+ iput(bvi);
unm_err_out:
if (!err)
err = -EIO;
@@ -1697,7 +1698,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
vi->i_size);
goto iput_unm_err_out;
}
- ni->itype.index.bmp_ino = bvi;
+ iput(bvi);
skip_large_index_stuff:
/* Setup the operations for this index inode. */
vi->i_op = NULL;
@@ -1714,7 +1715,6 @@ skip_large_index_stuff:
ntfs_debug("Done.");
return 0;
-
iput_unm_err_out:
iput(bvi);
unm_err_out:
@@ -2191,37 +2191,6 @@ err_out:
return -1;
}
-/**
- * ntfs_put_inode - handler for when the inode reference count is decremented
- * @vi: vfs inode
- *
- * The VFS calls ntfs_put_inode() every time the inode reference count (i_count)
- * is about to be decremented (but before the decrement itself.
- *
- * If the inode @vi is a directory with two references, one of which is being
- * dropped, we need to put the attribute inode for the directory index bitmap,
- * if it is present, otherwise the directory inode would remain pinned for
- * ever.
- */
-void ntfs_put_inode(struct inode *vi)
-{
- if (S_ISDIR(vi->i_mode) && atomic_read(&vi->i_count) == 2) {
- ntfs_inode *ni = NTFS_I(vi);
- if (NInoIndexAllocPresent(ni)) {
- struct inode *bvi = NULL;
- mutex_lock(&vi->i_mutex);
- if (atomic_read(&vi->i_count) == 2) {
- bvi = ni->itype.index.bmp_ino;
- if (bvi)
- ni->itype.index.bmp_ino = NULL;
- }
- mutex_unlock(&vi->i_mutex);
- if (bvi)
- iput(bvi);
- }
- }
-}
-
static void __ntfs_clear_inode(ntfs_inode *ni)
{
/* Free all alocated memory. */
@@ -2287,18 +2256,6 @@ void ntfs_clear_big_inode(struct inode *vi)
{
ntfs_inode *ni = NTFS_I(vi);
- /*
- * If the inode @vi is an index inode we need to put the attribute
- * inode for the index bitmap, if it is present, otherwise the index
- * inode would disappear and the attribute inode for the index bitmap
- * would no longer be referenced from anywhere and thus it would remain
- * pinned for ever.
- */
- if (NInoAttr(ni) && (ni->type == AT_INDEX_ALLOCATION) &&
- NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) {
- iput(ni->itype.index.bmp_ino);
- ni->itype.index.bmp_ino = NULL;
- }
#ifdef NTFS_RW
if (NInoDirty(ni)) {
bool was_bad = (is_bad_inode(vi));
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index f088291e017..117eaf8032a 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -2,7 +2,7 @@
* inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
* the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -101,8 +101,6 @@ struct _ntfs_inode {
runlist attr_list_rl; /* Run list for the attribute list value. */
union {
struct { /* It is a directory, $MFT, or an index inode. */
- struct inode *bmp_ino; /* Attribute inode for the
- index $BITMAP. */
u32 block_size; /* Size of an index block. */
u32 vcn_size; /* Size of a vcn in this
index. */
@@ -300,8 +298,6 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni);
extern int ntfs_read_inode_mount(struct inode *vi);
-extern void ntfs_put_inode(struct inode *vi);
-
extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt);
#ifdef NTFS_RW
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 03a391ac714..babf94d90de 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
/*
* super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2006 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2001,2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -2702,9 +2702,6 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
static struct super_operations ntfs_sops = {
.alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
.destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */
- .put_inode = ntfs_put_inode, /* VFS: Called just before
- the inode reference count
- is decreased. */
#ifdef NTFS_RW
//.dirty_inode = NULL, /* VFS: Called from
// __mark_inode_dirty(). */
@@ -3261,7 +3258,7 @@ static void __exit exit_ntfs_fs(void)
}
MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2006 Anton Altaparmakov");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2007 Anton Altaparmakov");
MODULE_VERSION(NTFS_VERSION);
MODULE_LICENSE("GPL");
#ifdef DEBUG
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 06be6e774cf..56e1fefc120 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -60,14 +60,11 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
- if (IS_ERR(inode)) {
- mlog_errno(PTR_ERR(inode));
+ if (IS_ERR(inode))
return (void *)inode;
- }
if (handle->ih_generation != inode->i_generation) {
iput(inode);
- mlog_errno(-ESTALE);
return ERR_PTR(-ESTALE);
}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index e4d91493d7d..28ab56f2b98 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -146,7 +146,6 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
if (is_bad_inode(inode)) {
iput(inode);
inode = ERR_PTR(-ESTALE);
- mlog_errno(PTR_ERR(inode));
goto bail;
}
@@ -155,8 +154,7 @@ bail:
mlog(0, "returning inode with number %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
mlog_exit_ptr(inode);
- } else
- mlog_errno(PTR_ERR(inode));
+ }
return inode;
}
@@ -247,7 +245,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
* today. change if needed. */
if (!OCFS2_IS_VALID_DINODE(fe) ||
!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
- mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
+ mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
"signature = %.*s, flags = 0x%x\n",
inode->i_ino,
(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
@@ -478,11 +476,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
S_ISBLK(le16_to_cpu(fe->i_mode)))
inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- if (ocfs2_populate_inode(inode, fe, 0) < 0) {
- mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
- (unsigned long long)fe->i_blkno, inode->i_ino);
+ if (ocfs2_populate_inode(inode, fe, 0) < 0)
goto bail;
- }
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index e1216364d19..d026b4f2775 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -306,8 +306,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* for the dinode, one for the new block. */
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
-/* file update (nlink, etc) + dir entry block */
-#define OCFS2_LINK_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
+#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1)
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9637039c263..f3d7803b4b4 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -932,14 +932,15 @@ static int ocfs2_unlink(struct inode *dir,
goto leave;
}
- if (S_ISDIR(inode->i_mode)) {
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ if (S_ISDIR(inode->i_mode))
drop_nlink(dir);
- status = ocfs2_mark_inode_dirty(handle, dir,
- parent_node_bh);
- if (status < 0) {
- mlog_errno(status);
+
+ status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ if (S_ISDIR(inode->i_mode))
inc_nlink(dir);
- }
}
leave:
@@ -1068,6 +1069,7 @@ static int ocfs2_rename(struct inode *old_dir,
char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
struct buffer_head *orphan_entry_bh = NULL;
struct buffer_head *newfe_bh = NULL;
+ struct buffer_head *old_inode_bh = NULL;
struct buffer_head *insert_entry_bh = NULL;
struct ocfs2_super *osb = NULL;
u64 newfe_blkno;
@@ -1079,7 +1081,7 @@ static int ocfs2_rename(struct inode *old_dir,
struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh
- nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
+ nlink_t old_dir_nlink = old_dir->i_nlink;
/* At some point it might be nice to break this function up a
* bit. */
@@ -1139,12 +1141,11 @@ static int ocfs2_rename(struct inode *old_dir,
}
/*
- * Though we don't require an inode meta data update if
- * old_inode is not a directory, we lock anyway here to ensure
- * the vote thread on other nodes won't have to concurrently
- * downconvert the inode and the dentry locks.
+ * Aside from allowing a meta data update, the locking here
+ * also ensures that the vote thread on other nodes won't have
+ * to concurrently downconvert the inode and the dentry locks.
*/
- status = ocfs2_meta_lock(old_inode, NULL, 1);
+ status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
@@ -1355,6 +1356,7 @@ static int ocfs2_rename(struct inode *old_dir,
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
+ ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh);
/* now that the name has been added to new_dir, remove the old name */
status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
@@ -1384,27 +1386,22 @@ static int ocfs2_rename(struct inode *old_dir,
}
}
mark_inode_dirty(old_dir);
- if (new_inode)
+ ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh);
+ if (new_inode) {
mark_inode_dirty(new_inode);
+ ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh);
+ }
- if (old_dir != new_dir)
- if (new_dir_nlink != new_dir->i_nlink) {
- if (!new_dir_bh) {
- mlog(ML_ERROR, "need to change nlink for new "
- "dir %llu from %d to %d but bh is NULL\n",
- (unsigned long long)OCFS2_I(new_dir)->ip_blkno,
- (int)new_dir_nlink, new_dir->i_nlink);
- } else {
- struct ocfs2_dinode *fe;
- status = ocfs2_journal_access(handle,
- new_dir,
- new_dir_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- fe = (struct ocfs2_dinode *) new_dir_bh->b_data;
- fe->i_links_count = cpu_to_le16(new_dir->i_nlink);
- status = ocfs2_journal_dirty(handle, new_dir_bh);
- }
- }
+ if (old_dir != new_dir) {
+ /* Keep the same times on both directories.*/
+ new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
+
+ /*
+ * This will also pick up the i_nlink change from the
+ * block above.
+ */
+ ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh);
+ }
if (old_dir_nlink != old_dir->i_nlink) {
if (!old_dir_bh) {
@@ -1455,6 +1452,8 @@ bail:
iput(new_inode);
if (newfe_bh)
brelse(newfe_bh);
+ if (old_inode_bh)
+ brelse(old_inode_bh);
if (old_dir_bh)
brelse(old_dir_bh);
if (new_dir_bh)
@@ -1826,6 +1825,13 @@ static int __ocfs2_add_entry(handle_t *handle,
(le16_to_cpu(de->rec_len) >= rec_len)) ||
(le16_to_cpu(de->rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
+ if (retval < 0) {
+ mlog_errno(retval);
+ goto bail;
+ }
+
status = ocfs2_journal_access(handle, dir, insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
@@ -1848,7 +1854,6 @@ static int __ocfs2_add_entry(handle_t *handle,
de->name_len = namelen;
memcpy(de->name, name, namelen);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
dir->i_version++;
status = ocfs2_journal_dirty(handle, insert_bh);
retval = 0;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index b5c68567077..e61e218f5e0 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -85,7 +85,7 @@
#define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \
OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
-#define OCFS2_FEATURE_COMPAT_SUPP 0
+#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
#define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT
#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
@@ -110,6 +110,20 @@
#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010
/*
+ * backup superblock flag is used to indicate that this volume
+ * has backup superblocks.
+ */
+#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
+
+/* The byte offset of the first backup block will be 1G.
+ * The following will be 4G, 16G, 64G, 256G and 1T.
+ */
+#define OCFS2_BACKUP_SB_START 1 << 30
+
+/* the max backup superblock nums */
+#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6
+
+/*
* Flags on ocfs2_dinode.i_flags
*/
#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */
@@ -566,6 +580,20 @@ static inline int ocfs2_truncate_recs_per_inode(struct super_block *sb)
return size / sizeof(struct ocfs2_truncate_rec);
}
+
+static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
+{
+ u64 offset = OCFS2_BACKUP_SB_START;
+
+ if (index >= 0 && index < OCFS2_MAX_BACKUP_SUPERBLOCKS) {
+ offset <<= (2 * index);
+ offset >>= sb->s_blocksize_bits;
+ return offset;
+ }
+
+ return 0;
+
+}
#else
static inline int ocfs2_fast_symlink_chars(int blocksize)
{
@@ -631,6 +659,19 @@ static inline int ocfs2_truncate_recs_per_inode(int blocksize)
return size / sizeof(struct ocfs2_truncate_rec);
}
+
+static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
+{
+ uint64_t offset = OCFS2_BACKUP_SB_START;
+
+ if (index >= 0 && index < OCFS2_MAX_BACKUP_SUPERBLOCKS) {
+ offset <<= (2 * index);
+ offset /= blocksize;
+ return offset;
+ }
+
+ return 0;
+}
#endif /* __KERNEL__ */
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 957d6878b03..03b0191534d 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -158,8 +158,7 @@ static void *ocfs2_follow_link(struct dentry *dentry,
}
status = vfs_follow_link(nd, link);
- if (status && status != -ENOENT)
- mlog_errno(status);
+
bail:
if (page) {
kunmap(page);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 77a57b5799c..1a979ea3b37 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -371,9 +371,11 @@ static int mounts_open(struct inode *inode, struct file *file)
if (task) {
task_lock(task);
- ns = task->nsproxy->mnt_ns;
- if (ns)
- get_mnt_ns(ns);
+ if (task->nsproxy) {
+ ns = task->nsproxy->mnt_ns;
+ if (ns)
+ get_mnt_ns(ns);
+ }
task_unlock(task);
put_task_struct(task);
}
@@ -2326,13 +2328,23 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
- struct task_struct *leader = get_proc_task(inode);
+ struct task_struct *leader = NULL;
struct task_struct *task;
int retval = -ENOENT;
ino_t ino;
int tid;
unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
+ task = get_proc_task(inode);
+ if (!task)
+ goto out_no_task;
+ rcu_read_lock();
+ if (pid_alive(task)) {
+ leader = task->group_leader;
+ get_task_struct(leader);
+ }
+ rcu_read_unlock();
+ put_task_struct(task);
if (!leader)
goto out_no_task;
retval = 0;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 92ea7743fe8..b37ce33f67e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -47,7 +47,6 @@
#include <linux/vmalloc.h>
#include <linux/crash_dump.h>
#include <linux/pid_namespace.h>
-#include <linux/compile.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -254,12 +253,7 @@ static int version_read_proc(char *page, char **start, off_t off,
{
int len;
- /* FIXED STRING! Don't touch! */
- len = snprintf(page, PAGE_SIZE,
- "%s version %s"
- " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
- " (" LINUX_COMPILER ")"
- " %s\n",
+ len = snprintf(page, PAGE_SIZE, linux_proc_banner,
utsname()->sysname,
utsname()->release,
utsname()->version);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 99b6f329ba2..5109f1d5e7f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -48,6 +48,11 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
}
mutex_lock(&inode->i_mutex);
+
+ mutex_lock(&(REISERFS_I(inode)->i_mmap));
+ if (REISERFS_I(inode)->i_flags & i_ever_mapped)
+ REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+
reiserfs_write_lock(inode->i_sb);
/* freeing preallocation only involves relogging blocks that
* are already in the current transaction. preallocation gets
@@ -100,11 +105,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
err = reiserfs_truncate_file(inode, 0);
}
out:
+ mutex_unlock(&(REISERFS_I(inode)->i_mmap));
mutex_unlock(&inode->i_mutex);
reiserfs_write_unlock(inode->i_sb);
return err;
}
+static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *inode;
+
+ inode = file->f_path.dentry->d_inode;
+ mutex_lock(&(REISERFS_I(inode)->i_mmap));
+ REISERFS_I(inode)->i_flags |= i_ever_mapped;
+ mutex_unlock(&(REISERFS_I(inode)->i_mmap));
+
+ return generic_file_mmap(file, vma);
+}
+
static void reiserfs_vfs_truncate_file(struct inode *inode)
{
reiserfs_truncate_file(inode, 1);
@@ -1527,7 +1545,7 @@ const struct file_operations reiserfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = reiserfs_file_mmap,
.open = generic_file_open,
.release = reiserfs_file_release,
.fsync = reiserfs_sync_file,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f3d1c4a7797..9fcbfe31697 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1125,6 +1125,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
REISERFS_I(inode)->i_prealloc_count = 0;
REISERFS_I(inode)->i_trans_id = 0;
REISERFS_I(inode)->i_jl = NULL;
+ mutex_init(&(REISERFS_I(inode)->i_mmap));
reiserfs_init_acl_access(inode);
reiserfs_init_acl_default(inode);
reiserfs_init_xattr_rwsem(inode);
@@ -1832,6 +1833,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
REISERFS_I(inode)->i_attrs =
REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
+ mutex_init(&(REISERFS_I(inode)->i_mmap));
reiserfs_init_acl_access(inode);
reiserfs_init_acl_default(inode);
reiserfs_init_xattr_rwsem(inode);
diff --git a/fs/super.c b/fs/super.c
index f961e030799..3e7458c2bb7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -753,9 +753,9 @@ int get_sb_bdev(struct file_system_type *fs_type,
* will protect the lockfs code from trying to start a snapshot
* while we are mounting
*/
- mutex_lock(&bdev->bd_mount_mutex);
+ down(&bdev->bd_mount_sem);
s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
- mutex_unlock(&bdev->bd_mount_mutex);
+ up(&bdev->bd_mount_sem);
if (IS_ERR(s))
goto error_s;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 2e0021e8f36..638f4c585e8 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -227,24 +227,27 @@ failed:
* We can come here from ufs_writepage or ufs_prepare_write,
* locked_page is argument of these functions, so we already lock it.
*/
-static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
+static void ufs_change_blocknr(struct inode *inode, unsigned int beg,
unsigned int count, unsigned int oldb,
unsigned int newb, struct page *locked_page)
{
- unsigned int blk_per_page = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- struct address_space *mapping = inode->i_mapping;
- pgoff_t index, cur_index = locked_page->index;
- unsigned int i, j;
+ const unsigned mask = (1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1;
+ struct address_space * const mapping = inode->i_mapping;
+ pgoff_t index, cur_index;
+ unsigned end, pos, j;
struct page *page;
struct buffer_head *head, *bh;
UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n",
inode->i_ino, count, oldb, newb);
+ BUG_ON(!locked_page);
BUG_ON(!PageLocked(locked_page));
- for (i = 0; i < count; i += blk_per_page) {
- index = (baseblk+i) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ cur_index = locked_page->index;
+
+ for (end = count + beg; beg < end; beg = (beg | mask) + 1) {
+ index = beg >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
if (likely(cur_index != index)) {
page = ufs_get_locked_page(mapping, index);
@@ -253,21 +256,32 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
} else
page = locked_page;
- j = i;
head = page_buffers(page);
bh = head;
+ pos = beg & mask;
+ for (j = 0; j < pos; ++j)
+ bh = bh->b_this_page;
+ j = 0;
do {
- if (likely(bh->b_blocknr == j + oldb && j < count)) {
- unmap_underlying_metadata(bh->b_bdev,
- bh->b_blocknr);
- bh->b_blocknr = newb + j++;
- mark_buffer_dirty(bh);
+ if (buffer_mapped(bh)) {
+ pos = bh->b_blocknr - oldb;
+ if (pos < count) {
+ UFSD(" change from %llu to %llu\n",
+ (unsigned long long)pos + oldb,
+ (unsigned long long)pos + newb);
+ bh->b_blocknr = newb + pos;
+ unmap_underlying_metadata(bh->b_bdev,
+ bh->b_blocknr);
+ mark_buffer_dirty(bh);
+ ++j;
+ }
}
bh = bh->b_this_page;
} while (bh != head);
- set_page_dirty(page);
+ if (j)
+ set_page_dirty(page);
if (likely(cur_index != index))
ufs_put_locked_page(page);
@@ -415,14 +429,14 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
}
result = ufs_alloc_fragments (inode, cgno, goal, request, err);
if (result) {
+ ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+ locked_page != NULL);
ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp,
result, locked_page);
*p = cpu_to_fs32(sb, result);
*err = 0;
UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
- ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
- locked_page != NULL);
unlock_super(sb);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 2fbab0aab68..4295ca91cf8 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -242,7 +242,8 @@ repeat:
goal = tmp + uspi->s_fpb;
tmp = ufs_new_fragments (inode, p, fragment - blockoff,
goal, required + blockoff,
- err, locked_page);
+ err,
+ phys != NULL ? locked_page : NULL);
}
/*
* We will extend last allocated block
@@ -250,7 +251,7 @@ repeat:
else if (lastblock == block) {
tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff),
- err, locked_page);
+ err, phys != NULL ? locked_page : NULL);
} else /* (lastblock > block) */ {
/*
* We will allocate new block before last allocated block
@@ -261,7 +262,8 @@ repeat:
goal = tmp + uspi->s_fpb;
}
tmp = ufs_new_fragments(inode, p, fragment - blockoff,
- goal, uspi->s_fpb, err, locked_page);
+ goal, uspi->s_fpb, err,
+ phys != NULL ? locked_page : NULL);
}
if (!tmp) {
if ((!blockoff && *p) ||
@@ -438,9 +440,11 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
* it much more readable:
*/
#define GET_INODE_DATABLOCK(x) \
- ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page)
+ ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new,\
+ bh_result->b_page)
#define GET_INODE_PTR(x) \
- ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, NULL)
+ ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL,\
+ bh_result->b_page)
#define GET_INDIRECT_DATABLOCK(x) \
ufs_inode_getblock(inode, bh, x, fragment, \
&err, &phys, &new, bh_result->b_page)
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index ea11d04c41a..0437b0a6fe9 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -109,10 +109,10 @@ static int ufs_trunc_direct (struct inode * inode)
tmp = fs32_to_cpu(sb, *p);
if (!tmp )
ufs_panic (sb, "ufs_trunc_direct", "internal error");
+ frag2 -= frag1;
frag1 = ufs_fragnum (frag1);
- frag2 = ufs_fragnum (frag2);
- ufs_free_fragments (inode, tmp + frag1, frag2 - frag1);
+ ufs_free_fragments(inode, tmp + frag1, frag2);
mark_inode_dirty(inode);
frag_to_free = tmp + frag1;