Merge branch 'master' into for-next

Conflicts: fs/exofs/inode.c
author: Jiri Kosina <jkosina@suse.cz> 2010-08-11 09:36:51 +0200
committer: Jiri Kosina <jkosina@suse.cz> 2010-08-11 09:36:51 +0200
commit: 6396fc3b3ff3f6b942992b653a62df11dcef9bea (patch)
tree: db3c7cbe833b43c653adc99f70941431c5ff7c4e /fs
parent: 4785879e4d340e24e54f6de2ccfc42728b912808 (diff)
parent: 3d30701b58970425e1d45994d6cb82f828924fdd (diff)
239 files changed, 5297 insertions, 4337 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f47c6bbb01b..88418c419ea 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -52,7 +52,7 @@ void v9fs_destroy_inode(struct inode *inode);
 #endif
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
-void v9fs_clear_inode(struct inode *inode);
+void v9fs_evict_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
 void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 6e94f3247ce..d97c34a24f7 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -430,8 +430,10 @@ error:
  * @inode: inode to release
  *
  */
-void v9fs_clear_inode(struct inode *inode)
+void v9fs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(inode->i_mapping, 0);
+	end_writeback(inode);
 	filemap_fdatawrite(inode->i_mapping);
 
 #ifdef CONFIG_9P_FSCACHE
@@ -1209,10 +1211,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	}
 
 	retval = p9_client_wstat(fid, &wstat);
-	if (retval >= 0)
-		retval = inode_setattr(dentry->d_inode, iattr);
+	if (retval < 0)
+		return retval;
 
-	return retval;
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(dentry->d_inode)) {
+		retval = vmtruncate(dentry->d_inode, iattr->ia_size);
+		if (retval)
+			return retval;
+	}
+
+	setattr_copy(dentry->d_inode, iattr);
+	mark_inode_dirty(dentry->d_inode);
+	return 0;
 }
 
 /**
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 4b9ede0b41b..f9311077de6 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -266,7 +266,7 @@ static const struct super_operations v9fs_super_ops = {
 	.destroy_inode = v9fs_destroy_inode,
 #endif
 	.statfs = simple_statfs,
-	.clear_inode = v9fs_clear_inode,
+	.evict_inode = v9fs_evict_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
 };
@@ -277,7 +277,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
 	.destroy_inode = v9fs_destroy_inode,
 #endif
 	.statfs = v9fs_statfs,
-	.clear_inode = v9fs_clear_inode,
+	.evict_inode = v9fs_evict_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
 };
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6f850b06ab6..65794b8fe79 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				adfs_get_block,
 				&ADFS_I(mapping->host)->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
@@ -324,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
 
 	/* XXX: this is missing some actual on-disk truncation.. */
 	if (ia_valid & ATTR_SIZE)
-		error = simple_setsize(inode, attr->ia_size);
-
-	if (error)
-		goto out;
+		truncate_setsize(inode, attr->ia_size);
 
 	if (ia_valid & ATTR_MTIME) {
 		inode->i_mtime = attr->ia_mtime;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index f05b6155ccc..a8cbdeb3402 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -171,8 +171,7 @@ extern int	affs_rename(struct inode *old_dir, struct dentry *old_dentry,
 extern unsigned long		 affs_parent_ino(struct inode *dir);
 extern struct inode		*affs_new_inode(struct inode *dir);
 extern int			 affs_notify_change(struct dentry *dentry, struct iattr *attr);
-extern void			 affs_delete_inode(struct inode *inode);
-extern void			 affs_clear_inode(struct inode *inode);
+extern void			 affs_evict_inode(struct inode *inode);
 extern struct inode		*affs_iget(struct super_block *sb,
 					unsigned long ino);
 extern int			 affs_write_inode(struct inode *inode,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 322710c3eed..c4a9875bd1a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				affs_get_block,
 				&AFFS_I(mapping->host)->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index f4b2a4ee4f9..3a0fdec175b 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -235,31 +235,36 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
-	error = inode_setattr(inode, attr);
-	if (!error && (attr->ia_valid & ATTR_MODE))
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
+	if (attr->ia_valid & ATTR_MODE)
 		mode_to_prot(inode);
 out:
 	return error;
 }
 
 void
-affs_delete_inode(struct inode *inode)
-{
-	pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
-	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	affs_truncate(inode);
-	clear_inode(inode);
-	affs_free_block(inode->i_sb, inode->i_ino);
-}
-
-void
-affs_clear_inode(struct inode *inode)
+affs_evict_inode(struct inode *inode)
 {
 	unsigned long cache_page;
+	pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+	truncate_inode_pages(&inode->i_data, 0);
 
-	pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		affs_truncate(inode);
+	}
 
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	affs_free_prealloc(inode);
 	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
 	if (cache_page) {
@@ -271,6 +276,9 @@ affs_clear_inode(struct inode *inode)
 	affs_brelse(AFFS_I(inode)->i_ext_bh);
 	AFFS_I(inode)->i_ext_last = ~1;
 	AFFS_I(inode)->i_ext_bh = NULL;
+
+	if (!inode->i_nlink)
+		affs_free_block(inode->i_sb, inode->i_ino);
 }
 
 struct inode *
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 16a3e4765f6..33c4e7eef47 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
-affs_commit_super(struct super_block *sb, int clean)
+affs_commit_super(struct super_block *sb, int wait, int clean)
 {
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	struct buffer_head *bh = sbi->s_root_bh;
@@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean)
 	secs_to_datestamp(get_seconds(), &tail->disk_change);
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty(bh);
+	if (wait)
+		sync_dirty_buffer(bh);
 }
 
 static void
@@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb)
 
 	lock_kernel();
 
-	if (!(sb->s_flags & MS_RDONLY))
-		affs_commit_super(sb, 1);
+	if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
+		affs_commit_super(sb, 1, 1);
 
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
@@ -61,27 +63,20 @@ affs_put_super(struct super_block *sb)
 static void
 affs_write_super(struct super_block *sb)
 {
-	int clean = 2;
-
 	lock_super(sb);
-	if (!(sb->s_flags & MS_RDONLY)) {
-		//	if (sbi->s_bitmap[i].bm_bh) {
-		//		if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
-		//			clean = 0;
-		affs_commit_super(sb, clean);
-		sb->s_dirt = !clean;	/* redo until bitmap synced */
-	} else
-		sb->s_dirt = 0;
+	if (!(sb->s_flags & MS_RDONLY))
+		affs_commit_super(sb, 1, 2);
+	sb->s_dirt = 0;
 	unlock_super(sb);
 
-	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
+	pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
 }
 
 static int
 affs_sync_fs(struct super_block *sb, int wait)
 {
 	lock_super(sb);
-	affs_commit_super(sb, 2);
+	affs_commit_super(sb, wait, 2);
 	sb->s_dirt = 0;
 	unlock_super(sb);
 	return 0;
@@ -140,8 +135,7 @@ static const struct super_operations affs_sops = {
 	.alloc_inode	= affs_alloc_inode,
 	.destroy_inode	= affs_destroy_inode,
 	.write_inode	= affs_write_inode,
-	.delete_inode	= affs_delete_inode,
-	.clear_inode	= affs_clear_inode,
+	.evict_inode	= affs_evict_inode,
 	.put_super	= affs_put_super,
 	.write_super	= affs_write_super,
 	.sync_fs	= affs_sync_fs,
@@ -554,9 +548,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		return 0;
 	}
 	if (*flags & MS_RDONLY) {
-		sb->s_dirt = 1;
-		while (sb->s_dirt)
-			affs_write_super(sb);
+		affs_write_super(sb);
 		affs_free_bitmap(sb);
 	} else
 		res = affs_init_bitmap(sb, flags);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d00b312e311..320ffef1157 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -316,7 +316,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 /*
  * clear an AFS inode
  */
-void afs_clear_inode(struct inode *inode)
+void afs_evict_inode(struct inode *inode)
 {
 	struct afs_permits *permits;
 	struct afs_vnode *vnode;
@@ -335,6 +335,9 @@ void afs_clear_inode(struct inode *inode)
 
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+
 	afs_give_up_callback(vnode);
 
 	if (vnode->server) {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5f679b77ce2..8679089ce9a 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -565,7 +565,7 @@ extern void afs_zap_data(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
 extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int afs_setattr(struct dentry *, struct iattr *);
-extern void afs_clear_inode(struct inode *);
+extern void afs_evict_inode(struct inode *);
 
 /*
  * main.c
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e932e5a3a0c..9cf80f02da1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -49,7 +49,7 @@ static const struct super_operations afs_super_ops = {
 	.statfs		= afs_statfs,
 	.alloc_inode	= afs_alloc_inode,
 	.destroy_inode	= afs_destroy_inode,
-	.clear_inode	= afs_clear_inode,
+	.evict_inode	= afs_evict_inode,
 	.put_super	= afs_put_super,
 	.show_options	= generic_show_options,
 };
diff --git a/fs/attr.c b/fs/attr.c
index b4fa3b0aa59..7ca41811afa 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,35 +14,53 @@
 #include <linux/fcntl.h>
 #include <linux/security.h>
 
-/* Taken over from the old code... */
-
-/* POSIX UID/GID verification for setting inode attributes. */
+/**
+ * inode_change_ok - check if attribute changes to an inode are allowed
+ * @inode:	inode to check
+ * @attr:	attributes to change
+ *
+ * Check if we are allowed to change the attributes contained in @attr
+ * in the given inode.  This includes the normal unix access permission
+ * checks, as well as checks for rlimits and others.
+ *
+ * Should be called as the first thing in ->setattr implementations,
+ * possibly after taking additional locks.
+ */
 int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
-	int retval = -EPERM;
 	unsigned int ia_valid = attr->ia_valid;
 
+	/*
+	 * First check size constraints.  These can't be overriden using
+	 * ATTR_FORCE.
+	 */
+	if (ia_valid & ATTR_SIZE) {
+		int error = inode_newsize_ok(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
 	/* If force is set do it anyway. */
 	if (ia_valid & ATTR_FORCE)
-		goto fine;
+		return 0;
 
 	/* Make sure a caller can chown. */
 	if ((ia_valid & ATTR_UID) &&
 	    (current_fsuid() != inode->i_uid ||
 	     attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
-		goto error;
+		return -EPERM;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
 	    (current_fsuid() != inode->i_uid ||
 	    (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
 	    !capable(CAP_CHOWN))
-		goto error;
+		return -EPERM;
 
 	/* Make sure a caller can chmod. */
 	if (ia_valid & ATTR_MODE) {
 		if (!is_owner_or_cap(inode))
-			goto error;
+			return -EPERM;
 		/* Also check the setgid bit! */
 		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
 				inode->i_gid) && !capable(CAP_FSETID))
@@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
 	/* Check for setting the inode time. */
 	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
 		if (!is_owner_or_cap(inode))
-			goto error;
+			return -EPERM;
 	}
-fine:
-	retval = 0;
-error:
-	return retval;
+
+	return 0;
 }
 EXPORT_SYMBOL(inode_change_ok);
 
@@ -105,21 +121,21 @@ out_big:
 EXPORT_SYMBOL(inode_newsize_ok);
 
 /**
- * generic_setattr - copy simple metadata updates into the generic inode
+ * setattr_copy - copy simple metadata updates into the generic inode
  * @inode:	the inode to be updated
  * @attr:	the new attributes
  *
- * generic_setattr must be called with i_mutex held.
+ * setattr_copy must be called with i_mutex held.
  *
- * generic_setattr updates the inode's metadata with that specified
+ * setattr_copy updates the inode's metadata with that specified
  * in attr. Noticably missing is inode size update, which is more complex
- * as it requires pagecache updates. See simple_setsize.
+ * as it requires pagecache updates.
  *
  * The inode is not marked as dirty after this operation. The rationale is
  * that for "simple" filesystems, the struct inode is the inode storage.
  * The caller is free to mark the inode dirty afterwards if needed.
  */
-void generic_setattr(struct inode *inode, const struct iattr *attr)
+void setattr_copy(struct inode *inode, const struct iattr *attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
 
@@ -144,32 +160,7 @@ void generic_setattr(struct inode *inode, const struct iattr *attr)
 		inode->i_mode = mode;
 	}
 }
-EXPORT_SYMBOL(generic_setattr);
-
-/*
- * note this function is deprecated, the new truncate sequence should be
- * used instead -- see eg. simple_setsize, generic_setattr.
- */
-int inode_setattr(struct inode *inode, const struct iattr *attr)
-{
-	unsigned int ia_valid = attr->ia_valid;
-
-	if (ia_valid & ATTR_SIZE &&
-	    attr->ia_size != i_size_read(inode)) {
-		int error;
-
-		error = vmtruncate(inode, attr->ia_size);
-		if (error)
-			return error;
-	}
-
-	generic_setattr(inode, attr);
-
-	mark_inode_dirty(inode);
-
-	return 0;
-}
-EXPORT_SYMBOL(inode_setattr);
+EXPORT_SYMBOL(setattr_copy);
 
 int notify_change(struct dentry * dentry, struct iattr * attr)
 {
@@ -237,13 +228,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	if (ia_valid & ATTR_SIZE)
 		down_write(&dentry->d_inode->i_alloc_sem);
 
-	if (inode->i_op && inode->i_op->setattr) {
+	if (inode->i_op->setattr)
 		error = inode->i_op->setattr(dentry, attr);
-	} else {
-		error = inode_change_ok(inode, attr);
-		if (!error)
-			error = inode_setattr(inode, attr);
-	}
+	else
+		error = simple_setattr(dentry, attr);
 
 	if (ia_valid & ATTR_SIZE)
 		up_write(&dentry->d_inode->i_alloc_sem);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 9a0520b5066..11b1ea786d0 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/param.h>
 #include <linux/time.h>
+#include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include "autofs_i.h"
 
@@ -25,13 +26,17 @@ static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
 static int autofs_root_unlink(struct inode *,struct dentry *);
 static int autofs_root_rmdir(struct inode *,struct dentry *);
 static int autofs_root_mkdir(struct inode *,struct dentry *,int);
-static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
+static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
 
 const struct file_operations autofs_root_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= autofs_root_readdir,
-	.ioctl		= autofs_root_ioctl,
+	.unlocked_ioctl	= autofs_root_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= autofs_root_compat_ioctl,
+#endif
 };
 
 const struct inode_operations autofs_root_inode_operations = {
@@ -492,6 +497,25 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 }
 
 /* Get/set timeout ioctl() operation */
+#ifdef CONFIG_COMPAT
+static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
+					 unsigned int __user *p)
+{
+	unsigned long ntimeout;
+
+	if (get_user(ntimeout, p) ||
+	    put_user(sbi->exp_timeout / HZ, p))
+		return -EFAULT;
+
+	if (ntimeout > UINT_MAX/HZ)
+		sbi->exp_timeout = 0;
+	else
+		sbi->exp_timeout = ntimeout * HZ;
+
+	return 0;
+}
+#endif
+
 static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
 					 unsigned long __user *p)
 {
@@ -546,7 +570,7 @@ static inline int autofs_expire_run(struct super_block *sb,
  * ioctl()'s on the root directory is the chief method for the daemon to
  * generate kernel reactions
  */
-static int autofs_root_ioctl(struct inode *inode, struct file *filp,
+static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
 			     unsigned int cmd, unsigned long arg)
 {
 	struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
@@ -571,6 +595,10 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
 		return 0;
 	case AUTOFS_IOC_PROTOVER: /* Get protocol version */
 		return autofs_get_protover(argp);
+#ifdef CONFIG_COMPAT
+	case AUTOFS_IOC_SETTIMEOUT32:
+		return autofs_compat_get_set_timeout(sbi, argp);
+#endif
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs_get_set_timeout(sbi, argp);
 	case AUTOFS_IOC_EXPIRE:
@@ -579,4 +607,37 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
 	default:
 		return -ENOSYS;
 	}
+
+}
+
+static long autofs_root_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	lock_kernel();
+	ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
+				   filp, cmd, arg);
+	unlock_kernel();
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long autofs_root_compat_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret;
+
+	lock_kernel();
+	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
+		ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
+	else
+		ret = autofs_do_root_ioctl(inode, filp, cmd,
+			(unsigned long)compat_ptr(arg));
+	unlock_kernel();
+
+	return ret;
 }
+#endif
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index db4117ed780..48e056e70fd 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -18,7 +18,9 @@
 #include <linux/slab.h>
 #include <linux/param.h>
 #include <linux/time.h>
+#include <linux/compat.h>
 #include <linux/smp_lock.h>
+
 #include "autofs_i.h"
 
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
@@ -26,6 +28,7 @@ static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
 static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
 static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
 static int autofs4_dir_open(struct inode *inode, struct file *file);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
@@ -40,6 +43,9 @@ const struct file_operations autofs4_root_operations = {
 	.readdir	= dcache_readdir,
 	.llseek		= dcache_dir_lseek,
 	.unlocked_ioctl	= autofs4_root_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= autofs4_root_compat_ioctl,
+#endif
 };
 
 const struct file_operations autofs4_dir_operations = {
@@ -840,6 +846,26 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 }
 
 /* Get/set timeout ioctl() operation */
+#ifdef CONFIG_COMPAT
+static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
+					 compat_ulong_t __user *p)
+{
+	int rv;
+	unsigned long ntimeout;
+
+	if ((rv = get_user(ntimeout, p)) ||
+	     (rv = put_user(sbi->exp_timeout/HZ, p)))
+		return rv;
+
+	if (ntimeout > UINT_MAX/HZ)
+		sbi->exp_timeout = 0;
+	else
+		sbi->exp_timeout = ntimeout * HZ;
+
+	return 0;
+}
+#endif
+
 static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
 					 unsigned long __user *p)
 {
@@ -933,6 +959,10 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 		return autofs4_get_protosubver(sbi, p);
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs4_get_set_timeout(sbi, p);
+#ifdef CONFIG_COMPAT
+	case AUTOFS_IOC_SETTIMEOUT32:
+		return autofs4_compat_get_set_timeout(sbi, p);
+#endif
 
 	case AUTOFS_IOC_ASKUMOUNT:
 		return autofs4_ask_umount(filp->f_path.mnt, p);
@@ -961,3 +991,22 @@ static long autofs4_root_ioctl(struct file *filp,
 
 	return ret;
 }
+
+#ifdef CONFIG_COMPAT
+static long autofs4_root_compat_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret;
+
+	lock_kernel();
+	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
+		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
+	else
+		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
+			(unsigned long)compat_ptr(arg));
+	unlock_kernel();
+
+	return ret;
+}
+#endif
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 7109e451abf..f7f87e233dd 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -17,7 +17,6 @@ struct bfs_sb_info {
 	unsigned long si_lf_eblk;
 	unsigned long si_lasti;
 	unsigned long *si_imap;
-	struct buffer_head *si_sbh;		/* buffer header w/superblock */
 	struct mutex bfs_lock;
 };
 
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 88b9a3ff44e..eb67edd0f8e 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
 	struct super_block *sb = inode->i_sb;
 	struct bfs_sb_info *info = BFS_SB(sb);
 	struct bfs_inode_info *bi = BFS_I(inode);
-	struct buffer_head *sbh = info->si_sbh;
 
 	phys = bi->i_sblock + block;
 	if (!create) {
@@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
 		info->si_freeb -= phys - bi->i_eblock;
 		info->si_lf_eblk = bi->i_eblock = phys;
 		mark_inode_dirty(inode);
-		mark_buffer_dirty(sbh);
 		err = 0;
 		goto out;
 	}
@@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
 	 */
 	info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks;
 	mark_inode_dirty(inode);
-	mark_buffer_dirty(sbh);
 	map_bh(bh_result, sb, phys);
 out:
 	mutex_unlock(&info->bfs_lock);
@@ -168,9 +165,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags,
-					pagep, fsdata, bfs_get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				bfs_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f22a7d3dc36..c4daf0f5fc0 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -31,7 +31,6 @@ MODULE_LICENSE("GPL");
 #define dprintf(x...)
 #endif
 
-static void bfs_write_super(struct super_block *s);
 void dump_imap(const char *prefix, struct super_block *s);
 
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -99,6 +98,24 @@ error:
 	return ERR_PTR(-EIO);
 }
 
+static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p)
+{
+	if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) {
+		printf("Bad inode number %s:%08x\n", sb->s_id, ino);
+		return ERR_PTR(-EIO);
+	}
+
+	ino -= BFS_ROOT_INO;
+
+	*p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK);
+	if (!*p) {
+		printf("Unable to read inode %s:%08x\n", sb->s_id, ino);
+		return ERR_PTR(-EIO);
+	}
+
+	return (struct bfs_inode *)(*p)->b_data +  ino % BFS_INODES_PER_BLOCK;
+}
+
 static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -106,28 +123,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
         unsigned long i_sblock;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
-	int block, off;
 	int err = 0;
 
         dprintf("ino=%08x\n", ino);
 
-	if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) {
-		printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
-		return -EIO;
-	}
+	di = find_inode(inode->i_sb, ino, &bh);
+	if (IS_ERR(di))
+		return PTR_ERR(di);
 
 	mutex_lock(&info->bfs_lock);
-	block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
-	bh = sb_bread(inode->i_sb, block);
-	if (!bh) {
-		printf("Unable to read inode %s:%08x\n",
-				inode->i_sb->s_id, ino);
-		mutex_unlock(&info->bfs_lock);
-		return -EIO;
-	}
-
-	off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
-	di = (struct bfs_inode *)bh->b_data + off;
 
 	if (ino == BFS_ROOT_INO)
 		di->i_vtype = cpu_to_le32(BFS_VDIR);
@@ -158,12 +162,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
-static void bfs_delete_inode(struct inode *inode)
+static void bfs_evict_inode(struct inode *inode)
 {
 	unsigned long ino = inode->i_ino;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
-	int block, off;
 	struct super_block *s = inode->i_sb;
 	struct bfs_sb_info *info = BFS_SB(s);
 	struct bfs_inode_info *bi = BFS_I(inode);
@@ -171,28 +174,19 @@ static void bfs_delete_inode(struct inode *inode)
 	dprintf("ino=%08lx\n", ino);
 
 	truncate_inode_pages(&inode->i_data, 0);
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 
-	if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) {
-		printf("invalid ino=%08lx\n", ino);
+	if (inode->i_nlink)
 		return;
-	}
-	
-	inode->i_size = 0;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	mutex_lock(&info->bfs_lock);
-	mark_inode_dirty(inode);
 
-	block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
-	bh = sb_bread(s, block);
-	if (!bh) {
-		printf("Unable to read inode %s:%08lx\n",
-					inode->i_sb->s_id, ino);
-		mutex_unlock(&info->bfs_lock);
+	di = find_inode(s, inode->i_ino, &bh);
+	if (IS_ERR(di))
 		return;
-	}
-	off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
-	di = (struct bfs_inode *)bh->b_data + off;
-	memset((void *)di, 0, sizeof(struct bfs_inode));
+
+	mutex_lock(&info->bfs_lock);
+	/* clear on-disk inode */
+	memset(di, 0, sizeof(struct bfs_inode));
 	mark_buffer_dirty(bh);
 	brelse(bh);
 
@@ -209,32 +203,9 @@ static void bfs_delete_inode(struct inode *inode)
 	 * "last block of the last file" even if there is no
 	 * real file there, saves us 1 gap.
 	 */
-	if (info->si_lf_eblk == bi->i_eblock) {
+	if (info->si_lf_eblk == bi->i_eblock)
 		info->si_lf_eblk = bi->i_sblock - 1;
-		mark_buffer_dirty(info->si_sbh);
-	}
 	mutex_unlock(&info->bfs_lock);
-	clear_inode(inode);
-}
-
-static int bfs_sync_fs(struct super_block *sb, int wait)
-{
-	struct bfs_sb_info *info = BFS_SB(sb);
-
-	mutex_lock(&info->bfs_lock);
-	mark_buffer_dirty(info->si_sbh);
-	sb->s_dirt = 0;
-	mutex_unlock(&info->bfs_lock);
-
-	return 0;
-}
-
-static void bfs_write_super(struct super_block *sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		bfs_sync_fs(sb, 1);
-	else
-		sb->s_dirt = 0;
 }
 
 static void bfs_put_super(struct super_block *s)
@@ -246,10 +217,6 @@ static void bfs_put_super(struct super_block *s)
 
 	lock_kernel();
 
-	if (s->s_dirt)
-		bfs_write_super(s);
-
-	brelse(info->si_sbh);
 	mutex_destroy(&info->bfs_lock);
 	kfree(info->si_imap);
 	kfree(info);
@@ -319,10 +286,8 @@ static const struct super_operations bfs_sops = {
 	.alloc_inode	= bfs_alloc_inode,
 	.destroy_inode	= bfs_destroy_inode,
 	.write_inode	= bfs_write_inode,
-	.delete_inode	= bfs_delete_inode,
+	.evict_inode	= bfs_evict_inode,
 	.put_super	= bfs_put_super,
-	.write_super	= bfs_write_super,
-	.sync_fs	= bfs_sync_fs,
 	.statfs		= bfs_statfs,
 };
 
@@ -349,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s)
 
 static int bfs_fill_super(struct super_block *s, void *data, int silent)
 {
-	struct buffer_head *bh;
+	struct buffer_head *bh, *sbh;
 	struct bfs_super_block *bfs_sb;
 	struct inode *inode;
 	unsigned i, imap_len;
@@ -365,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 
 	sb_set_blocksize(s, BFS_BSIZE);
 
-	info->si_sbh = sb_bread(s, 0);
-	if (!info->si_sbh)
+	sbh = sb_bread(s, 0);
+	if (!sbh)
 		goto out;
-	bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
+	bfs_sb = (struct bfs_super_block *)sbh->b_data;
 	if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
 		if (!silent)
 			printf("No BFS filesystem on %s (magic=%08x)\n", 
@@ -472,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 			info->si_lf_eblk = eblock;
 	}
 	brelse(bh);
-	if (!(s->s_flags & MS_RDONLY)) {
-		mark_buffer_dirty(info->si_sbh);
-		s->s_dirt = 1;
-	} 
+	brelse(sbh);
 	dump_imap("read_super", s);
 	return 0;
 
@@ -485,7 +447,7 @@ out3:
 out2:
 	kfree(info->si_imap);
 out1:
-	brelse(info->si_sbh);
+	brelse(sbh);
 out:
 	mutex_destroy(&info->bfs_lock);
 	kfree(info);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c4e83537ead..9e60fd20171 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 	return inode;
 }
 
-static void bm_clear_inode(struct inode *inode)
+static void bm_evict_inode(struct inode *inode)
 {
+	end_writeback(inode);
 	kfree(inode->i_private);
 }
 
@@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = {
 
 static const struct super_operations s_ops = {
 	.statfs		= simple_statfs,
-	.clear_inode	= bm_clear_inode,
+	.evict_inode	= bm_evict_inode,
 };
 
 static int bm_fill_super(struct super_block * sb, void * data, int silent)
diff --git a/fs/bio.c b/fs/bio.c
index e7bf6ca64dc..8abb2dfb2e7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	if (!bio)
 		goto out_bmd;
 
-	bio->bi_rw |= (!write_to_vm << BIO_RW);
+	if (!write_to_vm)
+		bio->bi_rw |= REQ_WRITE;
 
 	ret = 0;
 
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
 	 * set data direction, and check if mapped pages need bouncing
 	 */
 	if (!write_to_vm)
-		bio->bi_rw |= (1 << BIO_RW);
+		bio->bi_rw |= REQ_WRITE;
 
 	bio->bi_bdev = bdev;
 	bio->bi_flags |= (1 << BIO_USER_MAPPED);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3171fb0dc9..66411463b73 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
-				I_BDEV(inode), iov, offset, nr_segs,
-				blkdev_get_blocks, NULL);
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
+				    nr_segs, blkdev_get_blocks, NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
@@ -309,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin_newtrunc(file, mapping, pos, len, flags,
-				pagep, fsdata, blkdev_get_block);
+	return block_write_begin(mapping, pos, len, flags, pagep,
+				 blkdev_get_block);
 }
 
 static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -428,10 +426,13 @@ static inline void __bd_forget(struct inode *inode)
 	inode->i_mapping = &inode->i_data;
 }
 
-static void bdev_clear_inode(struct inode *inode)
+static void bdev_evict_inode(struct inode *inode)
 {
 	struct block_device *bdev = &BDEV_I(inode)->bdev;
 	struct list_head *p;
+	truncate_inode_pages(&inode->i_data, 0);
+	invalidate_inode_buffers(inode); /* is it needed here? */
+	end_writeback(inode);
 	spin_lock(&bdev_lock);
 	while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 		__bd_forget(list_entry(p, struct inode, i_devices));
@@ -445,7 +446,7 @@ static const struct super_operations bdev_sops = {
 	.alloc_inode = bdev_alloc_inode,
 	.destroy_inode = bdev_destroy_inode,
 	.drop_inode = generic_delete_inode,
-	.clear_inode = bdev_clear_inode,
+	.evict_inode = bdev_evict_inode,
 };
 
 static int bd_get_sb(struct file_system_type *fs_type,
@@ -1345,13 +1346,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		return ret;
 	}
 
-	lock_kernel();
  restart:
 
 	ret = -ENXIO;
 	disk = get_gendisk(bdev->bd_dev, &partno);
 	if (!disk)
-		goto out_unlock_kernel;
+		goto out;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
@@ -1431,7 +1431,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	if (for_part)
 		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
-	unlock_kernel();
 	return 0;
 
  out_clear:
@@ -1444,9 +1443,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	bdev->bd_contains = NULL;
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
-	unlock_kernel();
-
+ out:
 	if (disk)
 		module_put(disk->fops->owner);
 	put_disk(disk);
@@ -1515,7 +1512,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	struct block_device *victim = NULL;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
-	lock_kernel();
 	if (for_part)
 		bdev->bd_part_count--;
 
@@ -1540,7 +1536,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 	}
-	unlock_kernel();
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
 	if (victim)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29c20092847..eaf286abad1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2389,13 +2389,13 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
 			      pgoff_t offset, pgoff_t last_index);
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
-void btrfs_delete_inode(struct inode *inode);
+void btrfs_evict_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
-void btrfs_drop_inode(struct inode *inode);
+int btrfs_drop_inode(struct inode *inode);
 int btrfs_init_cachep(void);
 void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 34f7c375567..64f10082f04 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
 	end_io_wq->work.func = end_workqueue_fn;
 	end_io_wq->work.flags = 0;
 
-	if (bio->bi_rw & (1 << BIO_RW)) {
+	if (bio->bi_rw & REQ_WRITE) {
 		if (end_io_wq->metadata)
 			btrfs_queue_worker(&fs_info->endio_meta_write_workers,
 					   &end_io_wq->work);
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 
 	atomic_inc(&fs_info->nr_async_submits);
 
-	if (rw & (1 << BIO_RW_SYNCIO))
+	if (rw & REQ_SYNC)
 		btrfs_set_work_high_prio(&async->work);
 
 	btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 					  bio, 1);
 	BUG_ON(ret);
 
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		/*
 		 * called for a read, do the setup so that checksum validation
 		 * can happen in the async kernel threads
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 	 * ram and up to date before trying to verify things.  For
 	 * blocksize <= pagesize, it is basically a noop
 	 */
-	if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+	if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
 	    !bio_ready_for_csum(bio)) {
 		btrfs_queue_worker(&fs_info->endio_meta_workers,
 				   &end_io_wq->work);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1bff92ad474..c03864406af 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
 
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		if (bio_flags & EXTENT_BIO_COMPRESSED) {
 			return btrfs_submit_compressed_read(inode, bio,
 						    mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
 	bio->bi_size = 0;
 
 	bio_add_page(bio, page, failrec->len, start - page_offset(page));
-	if (failed_bio->bi_rw & (1 << BIO_RW))
+	if (failed_bio->bi_rw & REQ_WRITE)
 		rw = WRITE;
 	else
 		rw = READ;
@@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
-	dir->i_sb->s_dirt = 1;
 
 	btrfs_free_path(path);
 	return 0;
@@ -3656,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 		if (err)
 			return err;
 	}
-	attr->ia_valid &= ~ATTR_SIZE;
 
-	if (attr->ia_valid)
-		err = inode_setattr(inode, attr);
+	if (attr->ia_valid) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
+
+		if (attr->ia_valid & ATTR_MODE)
+			err = btrfs_acl_chmod(inode);
+	}
 
-	if (!err && ((attr->ia_valid & ATTR_MODE)))
-		err = btrfs_acl_chmod(inode);
 	return err;
 }
 
-void btrfs_delete_inode(struct inode *inode)
+void btrfs_evict_inode(struct inode *inode)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3674,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode)
 	int ret;
 
 	truncate_inode_pages(&inode->i_data, 0);
+	if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+		goto no_delete;
+
 	if (is_bad_inode(inode)) {
 		btrfs_orphan_del(NULL, inode);
 		goto no_delete;
 	}
+	/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
 	if (root->fs_info->log_root_recovering) {
@@ -3727,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode)
 	btrfs_end_transaction(trans, root);
 	btrfs_btree_balance_dirty(root, nr);
 no_delete:
-	clear_inode(inode);
+	end_writeback(inode);
 	return;
 }
 
@@ -3858,7 +3863,7 @@ again:
 			p = &parent->rb_right;
 		else {
 			WARN_ON(!(entry->vfs_inode.i_state &
-				  (I_WILL_FREE | I_FREEING | I_CLEAR)));
+				  (I_WILL_FREE | I_FREEING)));
 			rb_erase(parent, &root->inode_tree);
 			RB_CLEAR_NODE(parent);
 			spin_unlock(&root->inode_lock);
@@ -3937,7 +3942,7 @@ again:
 			if (atomic_read(&inode->i_count) > 1)
 				d_prune_aliases(inode);
 			/*
-			 * btrfs_drop_inode will remove it from
+			 * btrfs_drop_inode will have it removed from
 			 * the inode cache when its usage count
 			 * hits zero.
 			 */
@@ -5642,7 +5647,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 	struct bio_vec *bvec = bio->bi_io_vec;
 	u64 start;
 	int skip_sum;
-	int write = rw & (1 << BIO_RW);
+	int write = rw & REQ_WRITE;
 	int ret = 0;
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -6331,13 +6336,14 @@ free:
 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
 
-void btrfs_drop_inode(struct inode *inode)
+int btrfs_drop_inode(struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
-		generic_delete_inode(inode);
+
+	if (btrfs_root_refs(&root->root_item) == 0)
+		return 1;
 	else
-		generic_drop_inode(inode);
+		return generic_drop_inode(inode);
 }
 
 static void init_once(void *foo)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2393b39031..1776dbd8dc9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -797,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb)
 
 static const struct super_operations btrfs_super_ops = {
 	.drop_inode	= btrfs_drop_inode,
-	.delete_inode	= btrfs_delete_inode,
+	.evict_inode	= btrfs_evict_inode,
 	.put_super	= btrfs_put_super,
 	.sync_fs	= btrfs_sync_fs,
 	.show_options	= btrfs_show_options,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d6e3af8be95..dd318ff280b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -258,7 +258,7 @@ loop_lock:
 
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
-		if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
+		if (cur->bi_rw & REQ_SYNC)
 			num_sync_run++;
 
 		submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 	int max_errors = 0;
 	struct btrfs_multi_bio *multi = NULL;
 
-	if (multi_ret && !(rw & (1 << BIO_RW)))
+	if (multi_ret && !(rw & REQ_WRITE))
 		stripes_allocated = 1;
 again:
 	if (multi_ret) {
@@ -2687,7 +2687,7 @@ again:
 		mirror_num = 0;
 
 	/* if our multi bio struct is too small, back off and try again */
-	if (rw & (1 << BIO_RW)) {
+	if (rw & REQ_WRITE) {
 		if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
 				 BTRFS_BLOCK_GROUP_DUP)) {
 			stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@ again:
 			max_errors = 1;
 		}
 	}
-	if (multi_ret && (rw & (1 << BIO_RW)) &&
+	if (multi_ret && (rw & REQ_WRITE) &&
 	    stripes_allocated < stripes_required) {
 		stripes_allocated = map->num_stripes;
 		free_extent_map(em);
@@ -2733,7 +2733,7 @@ again:
 	num_stripes = 1;
 	stripe_index = 0;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (unplug_page || (rw & (1 << BIO_RW)))
+		if (unplug_page || (rw & REQ_WRITE))
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@ again:
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (rw & (1 << BIO_RW))
+		if (rw & REQ_WRITE)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@ again:
 		stripe_index = do_div(stripe_nr, factor);
 		stripe_index *= map->sub_stripes;
 
-		if (unplug_page || (rw & (1 << BIO_RW)))
+		if (unplug_page || (rw & REQ_WRITE))
 			num_stripes = map->sub_stripes;
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
 	struct btrfs_pending_bios *pending_bios;
 
 	/* don't bother with additional async steps for reads, right now */
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		bio_get(bio);
 		submit_bio(rw, bio);
 		bio_put(bio);
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
 	bio->bi_rw |= rw;
 
 	spin_lock(&device->io_lock);
-	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
+	if (bio->bi_rw & REQ_SYNC)
 		pending_bios = &device->pending_sync_bios;
 	else
 		pending_bios = &device->pending_bios;
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e..50efa339e05 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(page_zero_new_buffers);
 
-static int __block_prepare_write(struct inode *inode, struct page *page,
-		unsigned from, unsigned to, get_block_t *get_block)
+int block_prepare_write(struct page *page, unsigned from, unsigned to,
+		get_block_t *get_block)
 {
+	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
 	sector_t block;
 	int err = 0;
@@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_uptodate(*wait_bh))
 			err = -EIO;
 	}
-	if (unlikely(err))
+	if (unlikely(err)) {
 		page_zero_new_buffers(page, from, to);
+		ClearPageUptodate(page);
+	}
 	return err;
 }
+EXPORT_SYMBOL(block_prepare_write);
 
 static int __block_commit_write(struct inode *inode, struct page *page,
 		unsigned from, unsigned to)
@@ -1948,90 +1952,41 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 	return 0;
 }
 
-/*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
- * The filesystem needs to handle block truncation upon failure.
- */
-int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block)
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+		get_block_t *get_block)
 {
-	struct inode *inode = mapping->host;
-	int status = 0;
-	struct page *page;
-	pgoff_t index;
-	unsigned start, end;
-	int ownpage = 0;
-
-	index = pos >> PAGE_CACHE_SHIFT;
-	start = pos & (PAGE_CACHE_SIZE - 1);
-	end = start + len;
-
-	page = *pagep;
-	if (page == NULL) {
-		ownpage = 1;
-		page = grab_cache_page_write_begin(mapping, index, flags);
-		if (!page) {
-			status = -ENOMEM;
-			goto out;
-		}
-		*pagep = page;
-	} else
-		BUG_ON(!PageLocked(page));
-
-	status = __block_prepare_write(inode, page, start, end, get_block);
-	if (unlikely(status)) {
-		ClearPageUptodate(page);
+	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
 
-		if (ownpage) {
-			unlock_page(page);
-			page_cache_release(page);
-			*pagep = NULL;
-		}
-	}
-
-out:
-	return status;
+	return block_prepare_write(page, start, start + len, get_block);
 }
-EXPORT_SYMBOL(block_write_begin_newtrunc);
+EXPORT_SYMBOL(__block_write_begin);
 
 /*
  * block_write_begin takes care of the basic task of block allocation and
  * bringing partial write blocks uptodate first.
  *
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
+ * The filesystem needs to handle block truncation upon failure.
  */
-int block_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block)
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+		unsigned flags, struct page **pagep, get_block_t *get_block)
 {
-	int ret;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	struct page *page;
+	int status;
 
-	ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, get_block);
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
 
-	/*
-	 * prepare_write() may have instantiated a few blocks
-	 * outside i_size.  Trim these off again. Don't need
-	 * i_size_read because we hold i_mutex.
-	 *
-	 * Filesystems which pass down their own page also cannot
-	 * call into vmtruncate here because it would lead to lock
-	 * inversion problems (*pagep is locked). This is a further
-	 * example of where the old truncate sequence is inadequate.
-	 */
-	if (unlikely(ret) && *pagep == NULL) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
+	status = __block_write_begin(page, pos, len, get_block);
+	if (unlikely(status)) {
+		unlock_page(page);
+		page_cache_release(page);
+		page = NULL;
 	}
 
-	return ret;
+	*pagep = page;
+	return status;
 }
 EXPORT_SYMBOL(block_write_begin);
 
@@ -2351,7 +2306,7 @@ out:
  * For moronic filesystems that do not allow holes in file.
  * We may have to extend the file.
  */
-int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int cont_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata,
 			get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2318,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 
 	err = cont_expand_zero(file, mapping, pos, bytes);
 	if (err)
-		goto out;
+		return err;
 
 	zerofrom = *bytes & ~PAGE_CACHE_MASK;
 	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2326,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 		(*bytes)++;
 	}
 
-	*pagep = NULL;
-	err = block_write_begin_newtrunc(file, mapping, pos, len,
-				flags, pagep, fsdata, get_block);
-out:
-	return err;
-}
-EXPORT_SYMBOL(cont_write_begin_newtrunc);
-
-int cont_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block, loff_t *bytes)
-{
-	int ret;
-
-	ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, get_block, bytes);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
-
-	return ret;
+	return block_write_begin(mapping, pos, len, flags, pagep, get_block);
 }
 EXPORT_SYMBOL(cont_write_begin);
 
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
-			get_block_t *get_block)
-{
-	struct inode *inode = page->mapping->host;
-	int err = __block_prepare_write(inode, page, from, to, get_block);
-	if (err)
-		ClearPageUptodate(page);
-	return err;
-}
-EXPORT_SYMBOL(block_prepare_write);
-
 int block_commit_write(struct page *page, unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
@@ -2510,11 +2431,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
 }
 
 /*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
  * The filesystem needs to handle block truncation upon failure.
  */
-int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int nobh_write_begin(struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata,
 			get_block_t *get_block)
@@ -2547,8 +2468,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 		unlock_page(page);
 		page_cache_release(page);
 		*pagep = NULL;
-		return block_write_begin_newtrunc(file, mapping, pos, len,
-					flags, pagep, fsdata, get_block);
+		return block_write_begin(mapping, pos, len, flags, pagep,
+					 get_block);
 	}
 
 	if (PageMappedToDisk(page))
@@ -2654,35 +2575,6 @@ out_release:
 
 	return ret;
 }
-EXPORT_SYMBOL(nobh_write_begin_newtrunc);
-
-/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
- */
-int nobh_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block)
-{
-	int ret;
-
-	ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, get_block);
-
-	/*
-	 * prepare_write() may have instantiated a few blocks
-	 * outside i_size.  Trim these off again. Don't need
-	 * i_size_read because we hold i_mutex.
-	 */
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
-
-	return ret;
-}
 EXPORT_SYMBOL(nobh_write_begin);
 
 int nobh_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 2906077ac79..a2603e7c0bb 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
 		goto error_unsupported;
 
 	/* get the cache size and blocksize */
-	ret = vfs_statfs(root, &stats);
+	ret = vfs_statfs(&path, &stats);
 	if (ret < 0)
 		goto error_unsupported;
 
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index c2413561ea7..24eb0d37241 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -683,6 +683,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
 			 unsigned fnr, unsigned bnr)
 {
 	struct kstatfs stats;
+	struct path path = {
+		.mnt	= cache->mnt,
+		.dentry	= cache->mnt->mnt_root,
+	};
 	int ret;
 
 	//_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
@@ -697,7 +701,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
 	/* find out how many pages of blockdev are available */
 	memset(&stats, 0, sizeof(stats));
 
-	ret = vfs_statfs(cache->mnt->mnt_root, &stats);
+	ret = vfs_statfs(&path, &stats);
 	if (ret < 0) {
 		if (ret == -EIO)
 			cachefiles_io_error(cache, "statfs failed");
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a5ed10c9afe..b7431afdd76 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -329,8 +329,10 @@ cifs_destroy_inode(struct inode *inode)
 }
 
 static void
-cifs_clear_inode(struct inode *inode)
+cifs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	cifs_fscache_release_inode_cookie(inode);
 }
 
@@ -479,14 +481,13 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-void cifs_drop_inode(struct inode *inode)
+static int cifs_drop_inode(struct inode *inode)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
-		return generic_drop_inode(inode);
-
-	return generic_delete_inode(inode);
+	/* no serverino => unconditional eviction */
+	return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) ||
+		generic_drop_inode(inode);
 }
 
 static const struct super_operations cifs_super_ops = {
@@ -495,7 +496,7 @@ static const struct super_operations cifs_super_ops = {
 	.alloc_inode = cifs_alloc_inode,
 	.destroy_inode = cifs_destroy_inode,
 	.drop_inode	= cifs_drop_inode,
-	.clear_inode	= cifs_clear_inode,
+	.evict_inode	= cifs_evict_inode,
 /*	.delete_inode	= cifs_delete_inode,  */  /* Do not need above
 	function unless later we add lazy close of inodes or unless the
 	kernel forgets to call us with the same number of releases (closes)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index dc4c47ab958..4bc47e5b5f2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1698,26 +1698,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 	return rc;
 }
 
-static int cifs_vmtruncate(struct inode *inode, loff_t offset)
+static void cifs_setsize(struct inode *inode, loff_t offset)
 {
 	loff_t oldsize;
-	int err;
 
 	spin_lock(&inode->i_lock);
-	err = inode_newsize_ok(inode, offset);
-	if (err) {
-		spin_unlock(&inode->i_lock);
-		goto out;
-	}
-
 	oldsize = inode->i_size;
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
+
 	truncate_pagecache(inode, oldsize, offset);
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-out:
-	return err;
 }
 
 static int
@@ -1790,7 +1780,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 
 	if (rc == 0) {
 		cifsInode->server_eof = attrs->ia_size;
-		rc = cifs_vmtruncate(inode, attrs->ia_size);
+		cifs_setsize(inode, attrs->ia_size);
 		cifs_truncate_page(inode->i_mapping, inode->i_size);
 	}
 
@@ -1815,14 +1805,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 
 	xid = GetXid();
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
-		/* check if we have permission to change attrs */
-		rc = inode_change_ok(inode, attrs);
-		if (rc < 0)
-			goto out;
-		else
-			rc = 0;
-	}
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+		attrs->ia_valid |= ATTR_FORCE;
+
+	rc = inode_change_ok(inode, attrs);
+	if (rc < 0)
+		goto out;
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
@@ -1908,18 +1896,24 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 	}
 
-	if (!rc) {
-		rc = inode_setattr(inode, attrs);
+	if (rc)
+		goto out;
 
-		/* force revalidate when any of these times are set since some
-		   of the fs types (eg ext3, fat) do not have fine enough
-		   time granularity to match protocol, and we do not have a
-		   a way (yet) to query the server fs's time granularity (and
-		   whether it rounds times down).
-		*/
-		if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
-			cifsInode->time = 0;
-	}
+	if ((attrs->ia_valid & ATTR_SIZE) &&
+	    attrs->ia_size != i_size_read(inode))
+		truncate_setsize(inode, attrs->ia_size);
+
+	setattr_copy(inode, attrs);
+	mark_inode_dirty(inode);
+
+	/* force revalidate when any of these times are set since some
+	   of the fs types (eg ext3, fat) do not have fine enough
+	   time granularity to match protocol, and we do not have a
+	   a way (yet) to query the server fs's time granularity (and
+	   whether it rounds times down).
+	*/
+	if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+		cifsInode->time = 0;
 out:
 	kfree(args);
 	kfree(full_path);
@@ -1944,14 +1938,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 	cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
 		 direntry->d_name.name, attrs->ia_valid);
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
-		/* check if we have permission to change attrs */
-		rc = inode_change_ok(inode, attrs);
-		if (rc < 0) {
-			FreeXid(xid);
-			return rc;
-		} else
-			rc = 0;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+		attrs->ia_valid |= ATTR_FORCE;
+
+	rc = inode_change_ok(inode, attrs);
+	if (rc < 0) {
+		FreeXid(xid);
+		return rc;
 	}
 
 	full_path = build_path_from_dentry(direntry);
@@ -2059,8 +2052,17 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	/* do not need local check to inode_check_ok since the server does
 	   that */
-	if (!rc)
-		rc = inode_setattr(inode, attrs);
+	if (rc)
+		goto cifs_setattr_exit;
+
+	if ((attrs->ia_valid & ATTR_SIZE) &&
+	    attrs->ia_size != i_size_read(inode))
+		truncate_setsize(inode, attrs->ia_size);
+
+	setattr_copy(inode, attrs);
+	mark_inode_dirty(inode);
+	return 0;
+
 cifs_setattr_exit:
 	kfree(full_path);
 	FreeXid(xid);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d97f9935a02..6526e6f21ec 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -35,7 +35,7 @@
 #include "coda_int.h"
 
 /* VFS super_block ops */
-static void coda_clear_inode(struct inode *);
+static void coda_evict_inode(struct inode *);
 static void coda_put_super(struct super_block *);
 static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
 
@@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations =
 {
 	.alloc_inode	= coda_alloc_inode,
 	.destroy_inode	= coda_destroy_inode,
-	.clear_inode	= coda_clear_inode,
+	.evict_inode	= coda_evict_inode,
 	.put_super	= coda_put_super,
 	.statfs		= coda_statfs,
 	.remount_fs	= coda_remount,
@@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb)
 	printk("Coda: Bye bye.\n");
 }
 
-static void coda_clear_inode(struct inode *inode)
+static void coda_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	coda_cache_clear_inode(inode);
 }
 
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 66b9cf79c5b..de89645777c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 		nbytes = req->uc_outSize; /* don't have more space! */
 	}
         if (copy_from_user(req->uc_data, buf, nbytes)) {
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 		retval = -EFAULT;
 		goto out;
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
 	        retval = -EFAULT;
         
 	/* If request was not a signal, enqueue and don't free */
-	if (!(req->uc_flags & REQ_ASYNC)) {
-		req->uc_flags |= REQ_READ;
+	if (!(req->uc_flags & CODA_REQ_ASYNC)) {
+		req->uc_flags |= CODA_REQ_READ;
 		list_add_tail(&(req->uc_chain), &vcp->vc_processing);
 		goto out;
 	}
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 		list_del(&req->uc_chain);
 
 		/* Async requests need to be freed here */
-		if (req->uc_flags & REQ_ASYNC) {
+		if (req->uc_flags & CODA_REQ_ASYNC) {
 			CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
 			kfree(req);
 			continue;
 		}
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 	}
 
 	list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
 		list_del(&req->uc_chain);
 
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 	}
 
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f09c5ed76f6..b8893ab6f9e 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old)
 			       (((r)->uc_opcode != CODA_CLOSE && \
 				 (r)->uc_opcode != CODA_STORE && \
 				 (r)->uc_opcode != CODA_RELEASE) || \
-				(r)->uc_flags & REQ_READ))
+				(r)->uc_flags & CODA_REQ_READ))
 
 static inline void coda_waitfor_upcall(struct upc_req *req)
 {
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
 		/* got a reply */
-		if (req->uc_flags & (REQ_WRITE | REQ_ABORT))
+		if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
 			break;
 
 		if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp,
 	coda_waitfor_upcall(req);
 
 	/* Op went through, interrupt or not... */
-	if (req->uc_flags & REQ_WRITE) {
+	if (req->uc_flags & CODA_REQ_WRITE) {
 		out = (union outputArgs *)req->uc_data;
 		/* here we map positive Venus errors to kernel errors */
 		error = -out->oh.result;
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp,
 	}
 
 	error = -EINTR;
-	if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) {
+	if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
 		printk(KERN_WARNING "coda: Unexpected interruption.\n");
 		goto exit;
 	}
 
 	/* Interrupted before venus read it. */
-	if (!(req->uc_flags & REQ_READ))
+	if (!(req->uc_flags & CODA_REQ_READ))
 		goto exit;
 
 	/* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp,
 	sig_inputArgs->ih.opcode = CODA_SIGNAL;
 	sig_inputArgs->ih.unique = req->uc_unique;
 
-	sig_req->uc_flags = REQ_ASYNC;
+	sig_req->uc_flags = CODA_REQ_ASYNC;
 	sig_req->uc_opcode = sig_inputArgs->ih.opcode;
 	sig_req->uc_unique = sig_inputArgs->ih.unique;
 	sig_req->uc_inSize = sizeof(struct coda_in_hdr);
diff --git a/fs/compat.c b/fs/compat.c
index 5976bad85f6..e6d5d70cf3c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -267,7 +267,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(path.dentry, &tmp);
+		error = vfs_statfs(&path, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_put(&path);
@@ -285,7 +285,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_path.dentry, &tmp);
+	error = vfs_statfs(&file->f_path, &tmp);
 	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
@@ -335,7 +335,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(path.dentry, &tmp);
+		error = vfs_statfs(&path, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_put(&path);
@@ -356,7 +356,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_path.dentry, &tmp);
+	error = vfs_statfs(&file->f_path, &tmp);
 	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
@@ -379,7 +379,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
 	sb = user_get_super(new_decode_dev(dev));
 	if (!sb)
 		return -EINVAL;
-	err = vfs_statfs(sb->s_root, &sbuf);
+	err = statfs_by_dentry(sb->s_root, &sbuf);
 	drop_super(sb);
 	if (err)
 		return err;
@@ -1193,11 +1193,10 @@ out:
 	if (iov != iovstack)
 		kfree(iov);
 	if ((ret + (type == READ)) > 0) {
-		struct dentry *dentry = file->f_path.dentry;
 		if (type == READ)
-			fsnotify_access(dentry);
+			fsnotify_access(file);
 		else
-			fsnotify_modify(dentry);
+			fsnotify_modify(file);
 	}
 	return ret;
 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 63ae8583146..70227e0dc01 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -131,23 +131,6 @@ static int w_long(unsigned int fd, unsigned int cmd,
 	return err;
 }
 
-static int rw_long(unsigned int fd, unsigned int cmd,
-		compat_ulong_t __user *argp)
-{
-	mm_segment_t old_fs = get_fs();
-	int err;
-	unsigned long val;
-
-	if(get_user(val, argp))
-		return -EFAULT;
-	set_fs (KERNEL_DS);
-	err = sys_ioctl(fd, cmd, (unsigned long)&val);
-	set_fs (old_fs);
-	if (!err && put_user(val, argp))
-		return -EFAULT;
-	return err;
-}
-
 struct compat_video_event {
 	int32_t		type;
 	compat_time_t	timestamp;
@@ -594,12 +577,6 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
 	return err;
 }
 
-static int ioc_settimeout(unsigned int fd, unsigned int cmd,
-		compat_ulong_t __user *argp)
-{
-	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
-}
-
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO		_IOW('U', 200, int)
 #define HCIUARTGETPROTO		_IOR('U', 201, int)
@@ -969,6 +946,7 @@ COMPATIBLE_IOCTL(TIOCGPGRP)
 COMPATIBLE_IOCTL(TIOCGPTN)
 COMPATIBLE_IOCTL(TIOCSPTLCK)
 COMPATIBLE_IOCTL(TIOCSERGETLSR)
+COMPATIBLE_IOCTL(TIOCSIG)
 #ifdef TCGETS2
 COMPATIBLE_IOCTL(TCGETS2)
 COMPATIBLE_IOCTL(TCSETS2)
@@ -1284,13 +1262,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
 COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
 COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
 COMPATIBLE_IOCTL(OSS_GETVERSION)
-/* AUTOFS */
-COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
-COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
-COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
-COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
 /* Raw devices */
 COMPATIBLE_IOCTL(RAW_SETBIND)
 COMPATIBLE_IOCTL(RAW_GETBIND)
@@ -1557,9 +1528,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case RAW_GETBIND:
 		return raw_ioctl(fd, cmd, argp);
 #endif
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
-	case AUTOFS_IOC_SETTIMEOUT32:
-		return ioc_settimeout(fd, cmd, argp);
 	/* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 	case SMB_IOC_GETMOUNTUID_32:
@@ -1614,9 +1582,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case KDSKBMETA:
 	case KDSKBLED:
 	case KDSETLED:
-	/* AUTOFS */
-	case AUTOFS_IOC_READY:
-	case AUTOFS_IOC_FAIL:
 	/* NBD */
 	case NBD_SET_SOCK:
 	case NBD_SET_BLKSIZE:
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index dd3634e4c96..a53b130b366 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex);
 #define CRAMINO(x)	(((x)->offset && (x)->size)?(x)->offset<<2:1)
 #define OFFSET(x)	((x)->i_ino)
 
-
-static int cramfs_iget5_test(struct inode *inode, void *opaque)
-{
-	struct cramfs_inode *cramfs_inode = opaque;
-	return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
-}
-
-static int cramfs_iget5_set(struct inode *inode, void *opaque)
+static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode)
 {
-	struct cramfs_inode *cramfs_inode = opaque;
-	inode->i_ino = CRAMINO(cramfs_inode);
-	return 0;
+	static struct timespec zerotime;
+	inode->i_mode = cramfs_inode->mode;
+	inode->i_uid = cramfs_inode->uid;
+	inode->i_size = cramfs_inode->size;
+	inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+	inode->i_gid = cramfs_inode->gid;
+	/* Struct copy intentional */
+	inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+	/* inode->i_nlink is left 1 - arguably wrong for directories,
+	   but it's the best we can do without reading the directory
+	   contents.  1 yields the right result in GNU find, even
+	   without -noleaf option. */
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_fop = &generic_ro_fops;
+		inode->i_data.a_ops = &cramfs_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &cramfs_dir_inode_operations;
+		inode->i_fop = &cramfs_directory_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &page_symlink_inode_operations;
+		inode->i_data.a_ops = &cramfs_aops;
+	} else {
+		init_special_inode(inode, inode->i_mode,
+			old_decode_dev(cramfs_inode->size));
+	}
 }
 
 static struct inode *get_cramfs_inode(struct super_block *sb,
 				struct cramfs_inode * cramfs_inode)
 {
-	struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
-					    cramfs_iget5_test, cramfs_iget5_set,
-					    cramfs_inode);
-	static struct timespec zerotime;
-
-	if (inode && (inode->i_state & I_NEW)) {
-		inode->i_mode = cramfs_inode->mode;
-		inode->i_uid = cramfs_inode->uid;
-		inode->i_size = cramfs_inode->size;
-		inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
-		inode->i_gid = cramfs_inode->gid;
-		/* Struct copy intentional */
-		inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
-		/* inode->i_nlink is left 1 - arguably wrong for directories,
-		   but it's the best we can do without reading the directory
-		   contents.  1 yields the right result in GNU find, even
-		   without -noleaf option. */
-		if (S_ISREG(inode->i_mode)) {
-			inode->i_fop = &generic_ro_fops;
-			inode->i_data.a_ops = &cramfs_aops;
-		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = &cramfs_dir_inode_operations;
-			inode->i_fop = &cramfs_directory_operations;
-		} else if (S_ISLNK(inode->i_mode)) {
-			inode->i_op = &page_symlink_inode_operations;
-			inode->i_data.a_ops = &cramfs_aops;
-		} else {
-			init_special_inode(inode, inode->i_mode,
-				old_decode_dev(cramfs_inode->size));
+	struct inode *inode;
+	if (CRAMINO(cramfs_inode) == 1) {
+		inode = new_inode(sb);
+		if (inode) {
+			inode->i_ino = 1;
+			setup_inode(inode, cramfs_inode);
+		}
+	} else {
+		inode = iget_locked(sb, CRAMINO(cramfs_inode));
+		if (inode) {
+			setup_inode(inode, cramfs_inode);
+			unlock_new_inode(inode);
 		}
-		unlock_new_inode(inode);
 	}
 	return inode;
 }
 
-static void cramfs_drop_inode(struct inode *inode)
-{
-	if (inode->i_ino == 1)
-		generic_delete_inode(inode);
-	else
-		generic_drop_inode(inode);
-}
-
 /*
  * We have our own block cache: don't fill up the buffer cache
  * with the rom-image, because the way the filesystem is set
@@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = {
 	.put_super	= cramfs_put_super,
 	.remount_fs	= cramfs_remount,
 	.statfs		= cramfs_statfs,
-	.drop_inode	= cramfs_drop_inode,
 };
 
 static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index 86d4db15473..9f2c1341796 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
  */
 static void prune_dcache(int count)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 	int w_count;
 	int unused = dentry_stat.nr_unused;
 	int prune_ratio;
@@ -550,7 +550,7 @@ static void prune_dcache(int count)
 	else
 		prune_ratio = unused / count;
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_nr_dentry_unused == 0)
@@ -590,14 +590,16 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
-		/* lock was dropped, must reset next */
-		list_safe_reset_next(sb, n, s_list);
+		if (p)
+			__put_super(p);
 		count -= pruned;
-		__put_super(sb);
+		p = sb;
 		/* more work left to do? */
 		if (count <= 0)
 			break;
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -2049,16 +2051,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
 /*
  * Write full pathname from the root of the filesystem into the buffer.
  */
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
 	char *end = buf + buflen;
 	char *retval;
 
-	spin_lock(&dcache_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (d_unlinked(dentry) &&
-		(prepend(&end, &buflen, "//deleted", 9) != 0))
-			goto Elong;
 	if (buflen < 1)
 		goto Elong;
 	/* Get '/' right */
@@ -2076,7 +2074,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 		retval = end;
 		dentry = parent;
 	}
+	return retval;
+Elong:
+	return ERR_PTR(-ENAMETOOLONG);
+}
+EXPORT_SYMBOL(__dentry_path);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+	char *p = NULL;
+	char *retval;
+
+	spin_lock(&dcache_lock);
+	if (d_unlinked(dentry)) {
+		p = buf + buflen;
+		if (prepend(&p, &buflen, "//deleted", 10) != 0)
+			goto Elong;
+		buflen++;
+	}
+	retval = __dentry_path(dentry, buf, buflen);
 	spin_unlock(&dcache_lock);
+	if (!IS_ERR(retval) && p)
+		*p = '/';	/* restore '/' overriden with '\0' */
 	return retval;
 Elong:
 	spin_unlock(&dcache_lock);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a10cb91cade..51f270b479b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1136,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	return ret;
 }
 
+/*
+ * This is a library function for use by filesystem drivers.
+ *
+ * The locking rules are governed by the flags parameter:
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
+ *
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
+ *    internal locking but rather rely on the filesystem to synchronize
+ *    direct I/O reads/writes versus each other and truncate.
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
+ *    entry and are never taken.
+ */
 ssize_t
-__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
@@ -1233,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
 out:
 	return retval;
 }
-EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
-
-/*
- * This is a library function for use by filesystem drivers.
- *
- * The locking rules are governed by the flags parameter:
- *  - if the flags value contains DIO_LOCKING we use a fancy locking
- *    scheme for dumb filesystems.
- *    For writes this function is called under i_mutex and returns with
- *    i_mutex held, for reads, i_mutex is not held on entry, but it is
- *    taken and dropped again before returning.
- *    For reads and writes i_alloc_sem is taken in shared mode and released
- *    on I/O completion (which may happen asynchronously after returning to
- *    the caller).
- *
- *  - if the flags value does NOT contain DIO_LOCKING we don't use any
- *    internal locking but rather rely on the filesystem to synchronize
- *    direct I/O reads/writes versus each other and truncate.
- *    For reads and writes both i_mutex and i_alloc_sem are not held on
- *    entry and are never taken.
- */
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	dio_submit_t submit_io,	int flags)
-{
-	ssize_t retval;
-
-	retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
-			offset, nr_segs, get_block, end_io, submit_io, flags);
-	/*
-	 * In case of error extending write may have instantiated a few
-	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
-	 * their own manner. This is a further example of where the old
-	 * truncate sequence is inadequate.
-	 *
-	 * NOTE: filesystems with their own locking have to handle this
-	 * on their own.
-	 */
-	if (flags & DIO_LOCKING) {
-		if (unlikely((rw & WRITE) && retval < 0)) {
-			loff_t isize = i_size_read(inode);
-			loff_t end = offset + iov_length(iov, nr_segs);
-
-			if (end > isize)
-				vmtruncate(inode, isize);
-		}
-	}
-
-	return retval;
-}
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 83c4f600786..2195c213ab2 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 		if (inode->i_mapping->nrpages == 0)
 			continue;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e8fcf4e2ed7..622c9514080 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,7 +199,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 			       "the persistent file for the dentry with name "
 			       "[%s]; rc = [%d]\n", __func__,
 			       ecryptfs_dentry->d_name.name, rc);
-			goto out;
+			goto out_free;
 		}
 	}
 	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
@@ -207,7 +207,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		rc = -EPERM;
 		printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
 		       "file must hence be opened RO\n", __func__);
-		goto out;
+		goto out_free;
 	}
 	ecryptfs_set_file_lower(
 		file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -292,12 +292,40 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
 	return rc;
 }
 
-static int ecryptfs_ioctl(struct inode *inode, struct file *file,
-			  unsigned int cmd, unsigned long arg);
+static long
+ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct file *lower_file = NULL;
+	long rc = -ENOTTY;
+
+	if (ecryptfs_file_to_private(file))
+		lower_file = ecryptfs_file_to_lower(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+		rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+	return rc;
+}
+
+#ifdef CONFIG_COMPAT
+static long
+ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct file *lower_file = NULL;
+	long rc = -ENOIOCTLCMD;
+
+	if (ecryptfs_file_to_private(file))
+		lower_file = ecryptfs_file_to_lower(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
+		rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+	return rc;
+}
+#endif
 
 const struct file_operations ecryptfs_dir_fops = {
 	.readdir = ecryptfs_readdir,
-	.ioctl = ecryptfs_ioctl,
+	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = ecryptfs_compat_ioctl,
+#endif
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
 	.release = ecryptfs_release,
@@ -313,7 +341,10 @@ const struct file_operations ecryptfs_main_fops = {
 	.write = do_sync_write,
 	.aio_write = generic_file_aio_write,
 	.readdir = ecryptfs_readdir,
-	.ioctl = ecryptfs_ioctl,
+	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = ecryptfs_compat_ioctl,
+#endif
 	.mmap = generic_file_mmap,
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
@@ -322,20 +353,3 @@ const struct file_operations ecryptfs_main_fops = {
 	.fasync = ecryptfs_fasync,
 	.splice_read = generic_file_splice_read,
 };
-
-static int
-ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-	       unsigned long arg)
-{
-	int rc = 0;
-	struct file *lower_file = NULL;
-
-	if (ecryptfs_file_to_private(file))
-		lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
-		rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
-					     lower_file, cmd, arg);
-	else
-		rc = -ENOTTY;
-	return rc;
-}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 31ef5252f0f..6c55113e722 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -264,7 +264,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		printk(KERN_ERR "%s: Out of memory whilst attempting "
 		       "to allocate ecryptfs_dentry_info struct\n",
 			__func__);
-		goto out_dput;
+		goto out_put;
 	}
 	ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
 	ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
@@ -339,14 +339,85 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 out_free_kmem:
 	kmem_cache_free(ecryptfs_header_cache_2, page_virt);
 	goto out;
-out_dput:
+out_put:
 	dput(lower_dentry);
+	mntput(lower_mnt);
 	d_drop(ecryptfs_dentry);
 out:
 	return rc;
 }
 
 /**
+ * ecryptfs_new_lower_dentry
+ * @ename: The name of the new dentry.
+ * @lower_dir_dentry: Parent directory of the new dentry.
+ * @nd: nameidata from last lookup.
+ *
+ * Create a new dentry or get it from lower parent dir.
+ */
+static struct dentry *
+ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
+			  struct nameidata *nd)
+{
+	struct dentry *new_dentry;
+	struct dentry *tmp;
+	struct inode *lower_dir_inode;
+
+	lower_dir_inode = lower_dir_dentry->d_inode;
+
+	tmp = d_alloc(lower_dir_dentry, name);
+	if (!tmp)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&lower_dir_inode->i_mutex);
+	new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
+	mutex_unlock(&lower_dir_inode->i_mutex);
+
+	if (!new_dentry)
+		new_dentry = tmp;
+	else
+		dput(tmp);
+
+	return new_dentry;
+}
+
+
+/**
+ * ecryptfs_lookup_one_lower
+ * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
+ * @lower_dir_dentry: lower parent directory
+ *
+ * Get the lower dentry from vfs. If lower dentry does not exist yet,
+ * create it.
+ */
+static struct dentry *
+ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
+			  struct dentry *lower_dir_dentry)
+{
+	struct nameidata nd;
+	struct vfsmount *lower_mnt;
+	struct qstr *name;
+	int err;
+
+	name = &ecryptfs_dentry->d_name;
+	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
+				    ecryptfs_dentry->d_parent));
+	err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
+	mntput(lower_mnt);
+
+	if (!err) {
+		/* we dont need the mount */
+		mntput(nd.path.mnt);
+		return nd.path.dentry;
+	}
+	if (err != -ENOENT)
+		return ERR_PTR(err);
+
+	/* create a new lower dentry */
+	return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
+}
+
+/**
  * ecryptfs_lookup
  * @ecryptfs_dir_inode: The eCryptfs directory inode
  * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -373,14 +444,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 		goto out_d_drop;
 	}
 	lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-	mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
-	lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
-				      lower_dir_dentry,
-				      ecryptfs_dentry->d_name.len);
-	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+
+	lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
+						 lower_dir_dentry);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
 				"[%d] on lower_dentry = [%s]\n", __func__, rc,
 				encrypted_and_encoded_name);
 		goto out_d_drop;
@@ -402,14 +471,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 		       "filename; rc = [%d]\n", __func__, rc);
 		goto out_d_drop;
 	}
-	mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
-	lower_dentry = lookup_one_len(encrypted_and_encoded_name,
-				      lower_dir_dentry,
-				      encrypted_and_encoded_name_size - 1);
-	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+	lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
+						 lower_dir_dentry);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
 				"[%d] on lower_dentry = [%s]\n", __func__, rc,
 				encrypted_and_encoded_name);
 		goto out_d_drop;
@@ -804,10 +870,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 		size_t num_zeros = (PAGE_CACHE_SIZE
 				    - (ia->ia_size & ~PAGE_CACHE_MASK));
 
+
+		/*
+		 * XXX(truncate) this should really happen at the begginning
+		 * of ->setattr.  But the code is too messy to that as part
+		 * of a larger patch.  ecryptfs is also totally missing out
+		 * on the inode_change_ok check at the beginning of
+		 * ->setattr while would include this.
+		 */
+		rc = inode_newsize_ok(inode, ia->ia_size);
+		if (rc)
+			goto out;
+
 		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
-			rc = simple_setsize(inode, ia->ia_size);
-			if (rc)
-				goto out;
+			truncate_setsize(inode, ia->ia_size);
 			lower_ia->ia_size = ia->ia_size;
 			lower_ia->ia_valid |= ATTR_SIZE;
 			goto out;
@@ -830,7 +906,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 				goto out;
 			}
 		}
-		simple_setsize(inode, ia->ia_size);
+		truncate_setsize(inode, ia->ia_size);
 		rc = ecryptfs_write_inode_size_to_metadata(inode);
 		if (rc) {
 			printk(KERN_ERR	"Problem with "
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 46c4dd8dfcc..bcb68c0cb1f 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -274,7 +274,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
 			      struct user_namespace *user_ns, struct pid *pid,
 			      u32 seq)
 {
-	struct ecryptfs_daemon *daemon;
+	struct ecryptfs_daemon *uninitialized_var(daemon);
 	struct ecryptfs_msg_ctx *msg_ctx;
 	size_t msg_size;
 	struct nsproxy *nsproxy;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 0435886e4a9..f7fc286a3aa 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -118,11 +118,15 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
  */
 static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf);
+	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+
+	if (!lower_dentry->d_sb->s_op->statfs)
+		return -ENOSYS;
+	return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
 }
 
 /**
- * ecryptfs_clear_inode
+ * ecryptfs_evict_inode
  * @inode - The ecryptfs inode
  *
  * Called by iput() when the inode reference count reached zero
@@ -131,8 +135,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  * on the inode free list. We use this to drop out reference to the
  * lower inode.
  */
-static void ecryptfs_clear_inode(struct inode *inode)
+static void ecryptfs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	iput(ecryptfs_inode_to_lower(inode));
 }
 
@@ -184,6 +190,6 @@ const struct super_operations ecryptfs_sops = {
 	.drop_inode = generic_delete_inode,
 	.statfs = ecryptfs_statfs,
 	.remount_fs = NULL,
-	.clear_inode = ecryptfs_clear_inode,
+	.evict_inode = ecryptfs_evict_inode,
 	.show_options = ecryptfs_show_options
 };
diff --git a/fs/exec.c b/fs/exec.c
index dab85ecad68..7761837e450 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	fsnotify_open(file->f_path.dentry);
+	fsnotify_open(file);
 
 	error = -ENOEXEC;
 	if(file->f_op) {
@@ -683,7 +683,7 @@ struct file *open_exec(const char *name)
 	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	fsnotify_open(file->f_path.dentry);
+	fsnotify_open(file);
 
 	err = deny_write_access(file);
 	if (err)
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 22721b2fd89..2dc925fa101 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
 }
 
 /* inode.c               */
-void exofs_truncate(struct inode *inode);
 int exofs_setattr(struct dentry *, struct iattr *);
 int exofs_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
@@ -264,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 extern struct inode *exofs_iget(struct super_block *, unsigned long);
 struct inode *exofs_new_inode(struct inode *, int);
 extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
-extern void exofs_delete_inode(struct inode *);
+extern void exofs_evict_inode(struct inode *);
 
 /* dir.c:                */
 int exofs_add_link(struct dentry *, struct inode *);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index fef6899be39..f9bfe2b501d 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -86,6 +86,5 @@ const struct file_operations exofs_file_operations = {
 };
 
 const struct inode_operations exofs_file_inode_operations = {
-	.truncate	= exofs_truncate,
 	.setattr	= exofs_setattr,
 };
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5862ae87ed2..185ef1281e0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -697,6 +697,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
 	return write_exec(&pcol);
 }
 
+/* i_mutex held using inode->i_size directly */
+static void _write_failed(struct inode *inode, loff_t to)
+{
+	if (to > inode->i_size)
+		truncate_pagecache(inode, to, inode->i_size);
+}
+
 int exofs_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
@@ -710,7 +717,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 					 fsdata);
 		if (ret) {
 			EXOFS_DBGMSG("simple_write_begin failed\n");
-			return ret;
+			goto out;
 		}
 
 		page = *pagep;
@@ -725,6 +732,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 			EXOFS_DBGMSG("__readpage_filler failed\n");
 		}
 	}
+out:
+	if (unlikely(ret))
+		_write_failed(mapping->host, pos + len);
 
 	return ret;
 }
@@ -750,6 +760,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping,
 	int ret;
 
 	ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
+	if (unlikely(ret))
+		_write_failed(inode, pos + len);
+
+	/* TODO: once simple_write_end marks inode dirty remove */
 	if (i_size != inode->i_size)
 		mark_inode_dirty(inode);
 	return ret;
@@ -808,87 +822,55 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
 	return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
 }
 
-/*
- * get_block_t - Fill in a buffer_head
- * An OSD takes care of block allocation so we just fake an allocation by
- * putting in the inode's sector_t in the buffer_head.
- * TODO: What about the case of create==0 and @iblock does not exist in the
- * object?
- */
-static int exofs_get_block(struct inode *inode, sector_t iblock,
-		    struct buffer_head *bh_result, int create)
-{
-	map_bh(bh_result, inode->i_sb, iblock);
-	return 0;
-}
-
 const struct osd_attr g_attr_logical_length = ATTR_DEF(
 	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
 
-static int _do_truncate(struct inode *inode)
+static int _do_truncate(struct inode *inode, loff_t newsize)
 {
 	struct exofs_i_info *oi = exofs_i(inode);
-	loff_t isize = i_size_read(inode);
 	int ret;
 
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
-	nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
+	ret = exofs_oi_truncate(oi, (u64)newsize);
+	if (likely(!ret))
+		truncate_setsize(inode, newsize);
 
-	ret = exofs_oi_truncate(oi, (u64)isize);
-	EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
+	EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n",
+		     inode->i_ino, newsize, ret);
 	return ret;
 }
 
 /*
- * Truncate a file to the specified size - all we have to do is set the size
- * attribute.  We make sure the object exists first.
- */
-void exofs_truncate(struct inode *inode)
-{
-	struct exofs_i_info *oi = exofs_i(inode);
-	int ret;
-
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
-	     || S_ISLNK(inode->i_mode)))
-		return;
-	if (exofs_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-
-	/* if we are about to truncate an object, and it hasn't been
-	 * created yet, wait
-	 */
-	if (unlikely(wait_obj_created(oi)))
-		goto fail;
-
-	ret = _do_truncate(inode);
-	if (ret)
-		goto fail;
-
-out:
-	mark_inode_dirty(inode);
-	return;
-fail:
-	make_bad_inode(inode);
-	goto out;
-}
-
-/*
- * Set inode attributes - just call generic functions.
+ * Set inode attributes - update size attribute on OSD if needed,
+ *                        otherwise just call generic functions.
  */
 int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 
+	/* if we are about to modify an object, and it hasn't been
+	 * created yet, wait
+	 */
+	error = wait_obj_created(exofs_i(inode));
+	if (unlikely(error))
+		return error;
+
 	error = inode_change_ok(inode, iattr);
-	if (error)
+	if (unlikely(error))
 		return error;
 
-	error = inode_setattr(inode, iattr);
-	return error;
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		error = _do_truncate(inode, iattr->ia_size);
+		if (unlikely(error))
+			return error;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
@@ -1325,7 +1307,7 @@ static void delete_done(struct exofs_io_state *ios, void *p)
  * from the OSD here.  We make sure the object was created before we try and
  * delete it.
  */
-void exofs_delete_inode(struct inode *inode)
+void exofs_evict_inode(struct inode *inode)
 {
 	struct exofs_i_info *oi = exofs_i(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1335,30 +1317,27 @@ void exofs_delete_inode(struct inode *inode)
 
 	truncate_inode_pages(&inode->i_data, 0);
 
-	if (is_bad_inode(inode))
+	/* TODO: should do better here */
+	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
-	mark_inode_dirty(inode);
-	exofs_update_inode(inode, inode_needs_sync(inode));
-
 	inode->i_size = 0;
-	if (inode->i_blocks)
-		exofs_truncate(inode);
+	end_writeback(inode);
 
-	clear_inode(inode);
+	/* if we are deleting an obj that hasn't been created yet, wait */
+	if (!obj_created(oi)) {
+		BUG_ON(!obj_2bcreated(oi));
+		wait_event(oi->i_wq, obj_created(oi));
+		/* ignore the error attempt a remove anyway */
+	}
 
+	/* Now Remove the OSD objects */
 	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
 		return;
 	}
 
-	/* if we are deleting an obj that hasn't been created yet, wait */
-	if (!obj_created(oi)) {
-		BUG_ON(!obj_2bcreated(oi));
-		wait_event(oi->i_wq, obj_created(oi));
-	}
-
 	ios->obj.id = exofs_oi_objno(oi);
 	ios->done = delete_done;
 	ios->private = sbi;
@@ -1374,5 +1353,5 @@ void exofs_delete_inode(struct inode *inode)
 	return;
 
 no_delete:
-	clear_inode(inode);
+	end_writeback(inode);
 }
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 95921f501f2..908cdbe4b99 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -599,7 +599,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 			} else {
 				bio = master_dev->bio;
 				/* FIXME: bio_set_dir() */
-				bio->bi_rw |= (1 << BIO_RW);
+				bio->bi_rw |= REQ_WRITE;
 			}
 
 			osd_req_write(or, &ios->obj, per_dev->offset, bio,
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 03149b9a517..32cfd61def5 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -743,7 +743,7 @@ static const struct super_operations exofs_sops = {
 	.alloc_inode    = exofs_alloc_inode,
 	.destroy_inode  = exofs_destroy_inode,
 	.write_inode    = exofs_write_inode,
-	.delete_inode   = exofs_delete_inode,
+	.evict_inode    = exofs_evict_inode,
 	.put_super      = exofs_put_super,
 	.write_super    = exofs_write_super,
 	.sync_fs	= exofs_sync_fs,
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e8766a39677..c6c684b44ea 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -571,7 +571,7 @@ do_more:
 error_return:
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
-	dquot_free_block(inode, freed);
+	dquot_free_block_nodirty(inode, freed);
 }
 
 /**
@@ -1418,7 +1418,8 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	dquot_free_block(inode, *count-num);
+	dquot_free_block_nodirty(inode, *count-num);
+	mark_inode_dirty(inode);
 	*count = num;
 	return ret_block;
 
@@ -1428,8 +1429,10 @@ out:
 	/*
 	 * Undo the block allocation
 	 */
-	if (!performed_allocation)
-		dquot_free_block(inode, *count);
+	if (!performed_allocation) {
+		dquot_free_block_nodirty(inode, *count);
+		mark_inode_dirty(inode);
+	}
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 7516957273e..764109886ec 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -448,6 +448,11 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
 	return res;
 }
 
+static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len)
+{
+	return __block_write_begin(page, pos, len, ext2_get_block);
+}
+
 /* Releases the page */
 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 		   struct page *page, struct inode *inode, int update_times)
@@ -458,8 +463,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 	int err;
 
 	lock_page(page);
-	err = __ext2_write_begin(NULL, page->mapping, pos, len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ext2_prepare_chunk(page, pos, len);
 	BUG_ON(err);
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type(de, inode);
@@ -542,8 +546,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 got_it:
 	pos = page_offset(page) +
 		(char*)de - (char*)page_address(page);
-	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
-							&page, NULL);
+	err = ext2_prepare_chunk(page, pos, rec_len);
 	if (err)
 		goto out_unlock;
 	if (de->inode) {
@@ -576,8 +579,7 @@ out_unlock:
  */
 int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = mapping->host;
+	struct inode *inode = page->mapping->host;
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
 	unsigned to = ((char *)dir - kaddr) +
@@ -601,8 +603,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 		from = (char*)pde - (char*)page_address(page);
 	pos = page_offset(page) + from;
 	lock_page(page);
-	err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0,
-							&page, NULL);
+	err = ext2_prepare_chunk(page, pos, to - from);
 	BUG_ON(err);
 	if (pde)
 		pde->rec_len = ext2_rec_len_to_disk(to - from);
@@ -621,8 +622,7 @@ out:
  */
 int ext2_make_empty(struct inode *inode, struct inode *parent)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page = grab_cache_page(inode->i_mapping, 0);
 	unsigned chunk_size = ext2_chunk_size(inode);
 	struct ext2_dir_entry_2 * de;
 	int err;
@@ -631,8 +631,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 	if (!page)
 		return -ENOMEM;
 
-	err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0,
-							&page, NULL);
+	err = ext2_prepare_chunk(page, 0, chunk_size);
 	if (err) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 52b34f1d273..416daa62242 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -119,7 +119,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 /* inode.c */
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
 extern int ext2_write_inode (struct inode *, struct writeback_control *);
-extern void ext2_delete_inode (struct inode *);
+extern void ext2_evict_inode(struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern int ext2_setattr (struct dentry *, struct iattr *);
@@ -127,9 +127,6 @@ extern void ext2_set_inode_flags(struct inode *inode);
 extern void ext2_get_inode_flags(struct ext2_inode_info *);
 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len);
-int __ext2_write_begin(struct file *file, struct address_space *mapping,
-		loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, void **fsdata);
 
 /* ioctl.c */
 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 938dbc739d0..ad70479aabf 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -118,19 +118,14 @@ void ext2_free_inode (struct inode * inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	if (!is_bad_inode(inode)) {
-		/* Quota is already initialized in iput() */
-		ext2_xattr_delete_inode(inode);
-		dquot_free_inode(inode);
-		dquot_drop(inode);
-	}
+	/* Quota is already initialized in iput() */
+	ext2_xattr_delete_inode(inode);
+	dquot_free_inode(inode);
+	dquot_drop(inode);
 
 	es = EXT2_SB(sb)->s_es;
 	is_directory = S_ISDIR(inode->i_mode);
 
-	/* Do this BEFORE marking the inode not in use or returning an error */
-	clear_inode (inode);
-
 	if (ino < EXT2_FIRST_INO(sb) ||
 	    ino > le32_to_cpu(es->s_inodes_count)) {
 		ext2_error (sb, "ext2_free_inode",
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 3675088cb88..940c9616886 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -69,26 +69,42 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to)
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext2_delete_inode (struct inode * inode)
+void ext2_evict_inode(struct inode * inode)
 {
-	if (!is_bad_inode(inode))
+	struct ext2_block_alloc_info *rsv;
+	int want_delete = 0;
+
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
+		want_delete = 1;
 		dquot_initialize(inode);
+	} else {
+		dquot_drop(inode);
+	}
+
 	truncate_inode_pages(&inode->i_data, 0);
 
-	if (is_bad_inode(inode))
-		goto no_delete;
-	EXT2_I(inode)->i_dtime	= get_seconds();
-	mark_inode_dirty(inode);
-	__ext2_write_inode(inode, inode_needs_sync(inode));
+	if (want_delete) {
+		/* set dtime */
+		EXT2_I(inode)->i_dtime	= get_seconds();
+		mark_inode_dirty(inode);
+		__ext2_write_inode(inode, inode_needs_sync(inode));
+		/* truncate to 0 */
+		inode->i_size = 0;
+		if (inode->i_blocks)
+			ext2_truncate_blocks(inode, 0);
+	}
 
-	inode->i_size = 0;
-	if (inode->i_blocks)
-		ext2_truncate_blocks(inode, 0);
-	ext2_free_inode (inode);
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 
-	return;
-no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	ext2_discard_reservation(inode);
+	rsv = EXT2_I(inode)->i_block_alloc_info;
+	EXT2_I(inode)->i_block_alloc_info = NULL;
+	if (unlikely(rsv))
+		kfree(rsv);
+
+	if (want_delete)
+		ext2_free_inode(inode);
 }
 
 typedef struct {
@@ -423,6 +439,8 @@ static int ext2_alloc_blocks(struct inode *inode,
 failed_out:
 	for (i = 0; i <index; i++)
 		ext2_free_blocks(inode, new_blocks[i], 1);
+	if (index)
+		mark_inode_dirty(inode);
 	return ret;
 }
 
@@ -765,14 +783,6 @@ ext2_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
 }
 
-int __ext2_write_begin(struct file *file, struct address_space *mapping,
-		loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, void **fsdata)
-{
-	return block_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, ext2_get_block);
-}
-
 static int
 ext2_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
@@ -780,8 +790,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
 {
 	int ret;
 
-	*pagep = NULL;
-	ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				ext2_get_block);
 	if (ret < 0)
 		ext2_write_failed(mapping, pos + len);
 	return ret;
@@ -806,13 +816,8 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
 {
 	int ret;
 
-	/*
-	 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
-	 * directory handling code to pass around offsets rather than struct
-	 * pages in order to make this work easily.
-	 */
-	ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep,
-						fsdata, ext2_get_block);
+	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
+			       ext2_get_block);
 	if (ret < 0)
 		ext2_write_failed(mapping, pos + len);
 	return ret;
@@ -838,7 +843,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
 				iov, offset, nr_segs, ext2_get_block, NULL);
 	if (ret < 0 && (rw & WRITE))
 		ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
@@ -1006,8 +1011,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
 			else if (block_to_free == nr - count)
 				count++;
 			else {
-				mark_inode_dirty(inode);
 				ext2_free_blocks (inode, block_to_free, count);
+				mark_inode_dirty(inode);
 			free_this:
 				block_to_free = nr;
 				count = 1;
@@ -1015,8 +1020,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
 		}
 	}
 	if (count > 0) {
-		mark_inode_dirty(inode);
 		ext2_free_blocks (inode, block_to_free, count);
+		mark_inode_dirty(inode);
 	}
 }
 
@@ -1169,15 +1174,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
 	__ext2_truncate_blocks(inode, offset);
 }
 
-int ext2_setsize(struct inode *inode, loff_t newsize)
+static int ext2_setsize(struct inode *inode, loff_t newsize)
 {
-	loff_t oldsize;
 	int error;
 
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	    S_ISLNK(inode->i_mode)))
 		return -EINVAL;
@@ -1197,10 +1197,7 @@ int ext2_setsize(struct inode *inode, loff_t newsize)
 	if (error)
 		return error;
 
-	oldsize = inode->i_size;
-	i_size_write(inode, newsize);
-	truncate_pagecache(inode, oldsize, newsize);
-
+	truncate_setsize(inode, newsize);
 	__ext2_truncate_blocks(inode, newsize);
 
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
@@ -1557,7 +1554,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 		if (error)
 			return error;
 	}
-	generic_setattr(inode, iattr);
+	setattr_copy(inode, iattr);
 	if (iattr->ia_valid & ATTR_MODE)
 		error = ext2_acl_chmod(inode);
 	mark_inode_dirty(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7ff43f4a59c..1ec602673ea 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -195,17 +195,6 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext2_inode_cachep);
 }
 
-static void ext2_clear_inode(struct inode *inode)
-{
-	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
-
-	dquot_drop(inode);
-	ext2_discard_reservation(inode);
-	EXT2_I(inode)->i_block_alloc_info = NULL;
-	if (unlikely(rsv))
-		kfree(rsv);
-}
-
 static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct super_block *sb = vfs->mnt_sb;
@@ -299,13 +288,12 @@ static const struct super_operations ext2_sops = {
 	.alloc_inode	= ext2_alloc_inode,
 	.destroy_inode	= ext2_destroy_inode,
 	.write_inode	= ext2_write_inode,
-	.delete_inode	= ext2_delete_inode,
+	.evict_inode	= ext2_evict_inode,
 	.put_super	= ext2_put_super,
 	.write_super	= ext2_write_super,
 	.sync_fs	= ext2_sync_fs,
 	.statfs		= ext2_statfs,
 	.remount_fs	= ext2_remount,
-	.clear_inode	= ext2_clear_inode,
 	.show_options	= ext2_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext2_quota_read,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7c3915780b1..8c29ae15129 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -674,6 +674,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			new_bh = sb_getblk(sb, block);
 			if (!new_bh) {
 				ext2_free_blocks(inode, block, 1);
+				mark_inode_dirty(inode);
 				error = -EIO;
 				goto cleanup;
 			}
@@ -703,8 +704,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
-			if (new_bh && new_bh != old_bh)
-				dquot_free_block(inode, 1);
+			if (new_bh && new_bh != old_bh) {
+				dquot_free_block_nodirty(inode, 1);
+				mark_inode_dirty(inode);
+			}
 			goto cleanup;
 		}
 	} else
@@ -727,6 +730,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 				mb_cache_entry_free(ce);
 			ea_bdebug(old_bh, "freeing");
 			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
+			mark_inode_dirty(inode);
 			/* We let our caller release old_bh, so we
 			 * need to duplicate the buffer before. */
 			get_bh(old_bh);
@@ -736,7 +740,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
 			if (ce)
 				mb_cache_entry_release(ce);
-			dquot_free_block(inode, 1);
+			dquot_free_block_nodirty(inode, 1);
+			mark_inode_dirty(inode);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -799,7 +804,7 @@ ext2_xattr_delete_inode(struct inode *inode)
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
-		dquot_free_block(inode, 1);
+		dquot_free_block_nodirty(inode, 1);
 	}
 	EXT2_I(inode)->i_file_acl = 0;
 
@@ -838,7 +843,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh)
 	ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
 	if (!ce)
 		return -ENOMEM;
-	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
@@ -912,8 +917,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
-				       inode->i_sb->s_bdev, hash);
+	ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
+				       hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -945,7 +950,7 @@ again:
 			unlock_buffer(bh);
 			brelse(bh);
 		}
-		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
 }
@@ -1021,9 +1026,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
 int __init
 init_ext2_xattr(void)
 {
-	ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
-		sizeof(struct mb_cache_entry) +
-		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
 	if (!ext2_xattr_cache)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 498021eb88f..4ab72db3559 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -119,20 +119,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	ino = inode->i_ino;
 	ext3_debug ("freeing inode %lu\n", ino);
 
-	/*
-	 * Note: we must free any quota before locking the superblock,
-	 * as writing the quota to disk may need the lock as well.
-	 */
-	dquot_initialize(inode);
-	ext3_xattr_delete_inode(handle, inode);
-	dquot_free_inode(inode);
-	dquot_drop(inode);
-
 	is_directory = S_ISDIR(inode->i_mode);
 
-	/* Do this BEFORE marking the inode not in use or returning an error */
-	clear_inode (inode);
-
 	es = EXT3_SB(sb)->s_es;
 	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
 		ext3_error (sb, "ext3_free_inode",
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 001eb0e2d48..5e0faf4cda7 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -190,18 +190,28 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
 }
 
 /*
- * Called at the last iput() if i_nlink is zero.
+ * Called at inode eviction from icache
  */
-void ext3_delete_inode (struct inode * inode)
+void ext3_evict_inode (struct inode *inode)
 {
+	struct ext3_block_alloc_info *rsv;
 	handle_t *handle;
+	int want_delete = 0;
 
-	if (!is_bad_inode(inode))
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
 		dquot_initialize(inode);
+		want_delete = 1;
+	}
 
 	truncate_inode_pages(&inode->i_data, 0);
 
-	if (is_bad_inode(inode))
+	ext3_discard_reservation(inode);
+	rsv = EXT3_I(inode)->i_block_alloc_info;
+	EXT3_I(inode)->i_block_alloc_info = NULL;
+	if (unlikely(rsv))
+		kfree(rsv);
+
+	if (!want_delete)
 		goto no_delete;
 
 	handle = start_transaction(inode);
@@ -238,15 +248,22 @@ void ext3_delete_inode (struct inode * inode)
 	 * having errors), but we can't free the inode if the mark_dirty
 	 * fails.
 	 */
-	if (ext3_mark_inode_dirty(handle, inode))
-		/* If that failed, just do the required in-core inode clear. */
-		clear_inode(inode);
-	else
+	if (ext3_mark_inode_dirty(handle, inode)) {
+		/* If that failed, just dquot_drop() and be done with that */
+		dquot_drop(inode);
+		end_writeback(inode);
+	} else {
+		ext3_xattr_delete_inode(handle, inode);
+		dquot_free_inode(inode);
+		dquot_drop(inode);
+		end_writeback(inode);
 		ext3_free_inode(handle, inode);
+	}
 	ext3_journal_stop(handle);
 	return;
 no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	end_writeback(inode);
+	dquot_drop(inode);
 }
 
 typedef struct {
@@ -1212,8 +1229,7 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-							ext3_get_block);
+	ret = __block_write_begin(page, pos, len, ext3_get_block);
 	if (ret)
 		goto write_begin_failed;
 
@@ -1798,6 +1814,17 @@ retry:
 	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext3_get_block, NULL);
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
@@ -2560,7 +2587,7 @@ out_stop:
 	 * If this was a simple ftruncate(), and the file will remain alive
 	 * then we need to clear up the orphan record which we created above.
 	 * However, if this was a real unlink then we were called by
-	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * ext3_evict_inode(), and we allow that function to clean up the
 	 * orphan info for us.
 	 */
 	if (inode->i_nlink)
@@ -3204,9 +3231,17 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 		ext3_journal_stop(handle);
 	}
 
-	rc = inode_setattr(inode, attr);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		rc = vmtruncate(inode, attr->ia_size);
+		if (rc)
+			goto err_out;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 
-	if (!rc && (ia_valid & ATTR_MODE))
+	if (ia_valid & ATTR_MODE)
 		rc = ext3_acl_chmod(inode);
 
 err_out:
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9650a956fd0..5dbf4dba03c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -527,17 +527,6 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext3_inode_cachep);
 }
 
-static void ext3_clear_inode(struct inode *inode)
-{
-	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-
-	dquot_drop(inode);
-	ext3_discard_reservation(inode);
-	EXT3_I(inode)->i_block_alloc_info = NULL;
-	if (unlikely(rsv))
-		kfree(rsv);
-}
-
 static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
 {
 #if defined(CONFIG_QUOTA)
@@ -780,14 +769,13 @@ static const struct super_operations ext3_sops = {
 	.destroy_inode	= ext3_destroy_inode,
 	.write_inode	= ext3_write_inode,
 	.dirty_inode	= ext3_dirty_inode,
-	.delete_inode	= ext3_delete_inode,
+	.evict_inode	= ext3_evict_inode,
 	.put_super	= ext3_put_super,
 	.sync_fs	= ext3_sync_fs,
 	.freeze_fs	= ext3_freeze,
 	.unfreeze_fs	= ext3_unfreeze,
 	.statfs		= ext3_statfs,
 	.remount_fs	= ext3_remount,
-	.clear_inode	= ext3_clear_inode,
 	.show_options	= ext3_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext3_quota_read,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 71fb8d65e54..e69dc6dfaa8 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1139,7 +1139,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
 		ea_bdebug(bh, "out of memory");
 		return;
 	}
-	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
@@ -1211,8 +1211,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
-				       inode->i_sb->s_bdev, hash);
+	ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev,
+				       hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -1237,7 +1237,7 @@ again:
 			return bh;
 		}
 		brelse(bh);
-		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
 }
@@ -1313,9 +1313,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 int __init
 init_ext3_xattr(void)
 {
-	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
-		sizeof(struct mb_cache_entry) +
-		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	ext3_xattr_cache = mb_cache_create("ext3_xattr", 6);
 	if (!ext3_xattr_cache)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e03841d9f30..889ec9d5e6a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1643,7 +1643,8 @@ extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 				struct kstat *stat);
-extern void ext4_delete_inode(struct inode *);
+extern void ext4_evict_inode(struct inode *);
+extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ac377505ed5..45853e0d1f2 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -222,7 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	is_directory = S_ISDIR(inode->i_mode);
 
 	/* Do this BEFORE marking the inode not in use or returning an error */
-	clear_inode(inode);
+	ext4_clear_inode(inode);
 
 	es = EXT4_SB(sb)->s_es;
 	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a0ab3754d0d..4b8debeb396 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext4_delete_inode(struct inode *inode)
+void ext4_evict_inode(struct inode *inode)
 {
 	handle_t *handle;
 	int err;
 
+	if (inode->i_nlink) {
+		truncate_inode_pages(&inode->i_data, 0);
+		goto no_delete;
+	}
+
 	if (!is_bad_inode(inode))
 		dquot_initialize(inode);
 
@@ -246,13 +251,13 @@ void ext4_delete_inode(struct inode *inode)
 	 */
 	if (ext4_mark_inode_dirty(handle, inode))
 		/* If that failed, just do the required in-core inode clear. */
-		clear_inode(inode);
+		ext4_clear_inode(inode);
 	else
 		ext4_free_inode(handle, inode);
 	ext4_journal_stop(handle);
 	return;
 no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	ext4_clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 
 typedef struct {
@@ -1602,11 +1607,9 @@ retry:
 	*pagep = page;
 
 	if (ext4_should_dioread_nolock(inode))
-		ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-				fsdata, ext4_get_block_write);
+		ret = __block_write_begin(page, pos, len, ext4_get_block_write);
 	else
-		ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-				fsdata, ext4_get_block);
+		ret = __block_write_begin(page, pos, len, ext4_get_block);
 
 	if (!ret && ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -1617,7 +1620,7 @@ retry:
 		unlock_page(page);
 		page_cache_release(page);
 		/*
-		 * block_write_begin may have instantiated a few blocks
+		 * __block_write_begin may have instantiated a few blocks
 		 * outside i_size.  Trim these off again. Don't need
 		 * i_size_read because we hold i_mutex.
 		 *
@@ -3205,8 +3208,7 @@ retry:
 	}
 	*pagep = page;
 
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				ext4_da_get_block_prep);
+	ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
 	if (ret < 0) {
 		unlock_page(page);
 		ext4_journal_stop(handle);
@@ -3565,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 
 retry:
 	if (rw == READ && ext4_should_dioread_nolock(inode))
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+		ret = __blockdev_direct_IO(rw, iocb, inode,
 				 inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
-				 ext4_get_block, NULL);
-	else
+				 ext4_get_block, NULL, NULL, 0);
+	else {
 		ret = blockdev_direct_IO(rw, iocb, inode,
 				 inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext4_get_block, NULL);
+
+		if (unlikely((rw & WRITE) && ret < 0)) {
+			loff_t isize = i_size_read(inode);
+			loff_t end = offset + iov_length(iov, nr_segs);
+
+			if (end > isize)
+				vmtruncate(inode, isize);
+		}
+	}
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
@@ -5536,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			ext4_truncate(inode);
 	}
 
-	rc = inode_setattr(inode, attr);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode))
+		rc = vmtruncate(inode, attr->ia_size);
 
-	/* If inode_setattr's call to ext4_truncate failed to get a
-	 * transaction handle at all, we need to clean up the in-core
-	 * orphan list manually. */
+	if (!rc) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
+	}
+
+	/*
+	 * If the call to ext4_truncate failed to get a transaction handle at
+	 * all, we need to clean up the in-core orphan list manually.
+	 */
 	if (inode->i_nlink)
 		ext4_orphan_del(NULL, inode);
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8d65575f8c8..26147746c27 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -868,8 +868,10 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext4_inode_cachep);
 }
 
-static void ext4_clear_inode(struct inode *inode)
+void ext4_clear_inode(struct inode *inode)
 {
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
@@ -1158,14 +1160,13 @@ static const struct super_operations ext4_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
-	.delete_inode	= ext4_delete_inode,
+	.evict_inode	= ext4_evict_inode,
 	.put_super	= ext4_put_super,
 	.sync_fs	= ext4_sync_fs,
 	.freeze_fs	= ext4_freeze,
 	.unfreeze_fs	= ext4_unfreeze,
 	.statfs		= ext4_statfs,
 	.remount_fs	= ext4_remount,
-	.clear_inode	= ext4_clear_inode,
 	.show_options	= ext4_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext4_quota_read,
@@ -1179,12 +1180,11 @@ static const struct super_operations ext4_nojournal_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
-	.delete_inode	= ext4_delete_inode,
+	.evict_inode	= ext4_evict_inode,
 	.write_super	= ext4_write_super,
 	.put_super	= ext4_put_super,
 	.statfs		= ext4_statfs,
 	.remount_fs	= ext4_remount,
-	.clear_inode	= ext4_clear_inode,
 	.show_options	= ext4_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext4_quota_read,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a6f31424957..3a8cd8dff1a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1417,7 +1417,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh)
 		ea_bdebug(bh, "out of memory");
 		return;
 	}
-	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
@@ -1489,8 +1489,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext4_xattr_cache, 0,
-				       inode->i_sb->s_bdev, hash);
+	ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
+				       hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -1514,7 +1514,7 @@ again:
 			return bh;
 		}
 		brelse(bh);
-		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
 }
@@ -1590,9 +1590,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
 int __init
 init_ext4_xattr(void)
 {
-	ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL,
-		sizeof(struct mb_cache_entry) +
-		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
 	if (!ext4_xattr_cache)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 27ac2572595..d75a77f85c2 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -306,7 +306,6 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
 extern const struct file_operations fat_file_operations;
 extern const struct inode_operations fat_file_inode_operations;
 extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
-extern int fat_setsize(struct inode *inode, loff_t offset);
 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		       struct kstat *stat);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 990dfae022e..7257752b6d5 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -364,18 +364,6 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 	return 0;
 }
 
-int fat_setsize(struct inode *inode, loff_t offset)
-{
-	int error;
-
-	error = simple_setsize(inode, offset);
-	if (error)
-		return error;
-	fat_truncate_blocks(inode, offset);
-
-	return error;
-}
-
 #define TIMES_SET_FLAGS	(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
 /* valid file mode bits */
 #define FAT_VALID_MODE	(S_IFREG | S_IFDIR | S_IRWXUGO)
@@ -387,21 +375,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	unsigned int ia_valid;
 	int error;
 
-	/*
-	 * Expand the file. Since inode_setattr() updates ->i_size
-	 * before calling the ->truncate(), but FAT needs to fill the
-	 * hole before it. XXX: this is no longer true with new truncate
-	 * sequence.
-	 */
-	if (attr->ia_valid & ATTR_SIZE) {
-		if (attr->ia_size > inode->i_size) {
-			error = fat_cont_expand(inode, attr->ia_size);
-			if (error || attr->ia_valid == ATTR_SIZE)
-				goto out;
-			attr->ia_valid &= ~ATTR_SIZE;
-		}
-	}
-
 	/* Check for setting the inode time. */
 	ia_valid = attr->ia_valid;
 	if (ia_valid & TIMES_SET_FLAGS) {
@@ -417,6 +390,21 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
+	/*
+	 * Expand the file. Since inode_setattr() updates ->i_size
+	 * before calling the ->truncate(), but FAT needs to fill the
+	 * hole before it. XXX: this is no longer true with new truncate
+	 * sequence.
+	 */
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (attr->ia_size > inode->i_size) {
+			error = fat_cont_expand(inode, attr->ia_size);
+			if (error || attr->ia_valid == ATTR_SIZE)
+				goto out;
+			attr->ia_valid &= ~ATTR_SIZE;
+		}
+	}
+
 	if (((attr->ia_valid & ATTR_UID) &&
 	     (attr->ia_uid != sbi->options.fs_uid)) ||
 	    ((attr->ia_valid & ATTR_GID) &&
@@ -441,12 +429,11 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		error = fat_setsize(inode, attr->ia_size);
-		if (error)
-			goto out;
+		truncate_setsize(inode, attr->ia_size);
+		fat_truncate_blocks(inode, attr->ia_size);
 	}
 
-	generic_setattr(inode, attr);
+	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 out:
 	return error;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 7bf45aee56d..830058057d3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping,
 	int err;
 
 	*pagep = NULL;
-	err = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+	err = cont_write_begin(file, mapping, pos, len, flags,
 				pagep, fsdata, fat_get_block,
 				&MSDOS_I(mapping->host)->mmu_private);
 	if (err < 0)
@@ -212,8 +212,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
-				iov, offset, nr_segs, fat_get_block, NULL);
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				 iov, offset, nr_segs, fat_get_block, NULL);
 	if (ret < 0 && (rw & WRITE))
 		fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
 
@@ -263,7 +263,7 @@ static const struct address_space_operations fat_aops = {
  *			check if the location is still valid and retry if it
  *			isn't. Otherwise we do changes.
  *		5. Spinlock is used to protect hash/unhash/location check/lookup
- *		6. fat_clear_inode() unhashes the F-d-c entry.
+ *		6. fat_evict_inode() unhashes the F-d-c entry.
  *		7. lookup() and readdir() do igrab() if they find a F-d-c entry
  *			and consider negative result as cache miss.
  */
@@ -448,16 +448,15 @@ out:
 
 EXPORT_SYMBOL_GPL(fat_build_inode);
 
-static void fat_delete_inode(struct inode *inode)
+static void fat_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	fat_truncate_blocks(inode, 0);
-	clear_inode(inode);
-}
-
-static void fat_clear_inode(struct inode *inode)
-{
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		fat_truncate_blocks(inode, 0);
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	fat_cache_inval_inode(inode);
 	fat_detach(inode);
 }
@@ -674,12 +673,11 @@ static const struct super_operations fat_sops = {
 	.alloc_inode	= fat_alloc_inode,
 	.destroy_inode	= fat_destroy_inode,
 	.write_inode	= fat_write_inode,
-	.delete_inode	= fat_delete_inode,
+	.evict_inode	= fat_evict_inode,
 	.put_super	= fat_put_super,
 	.write_super	= fat_write_super,
 	.sync_fs	= fat_sync_fs,
 	.statfs		= fat_statfs,
-	.clear_inode	= fat_clear_inode,
 	.remount_fs	= fat_remount,
 
 	.show_options	= fat_show_options,
diff --git a/fs/file_table.c b/fs/file_table.c
index 5c7d10ead4a..b8a0bb63cbd 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -230,6 +230,15 @@ static void __fput(struct file *file)
 	might_sleep();
 
 	fsnotify_close(file);
+
+	/*
+	 * fsnotify_create_event may have taken one or more references on this
+	 * file.  If it did so it left one reference for us to drop to make sure
+	 * its calls to fput could not prematurely destroy the file.
+	 */
+	if (atomic_long_read(&file->f_count))
+		return fput(file);
+
 	/*
 	 * The function eventpoll_release() should be the first called
 	 * in the file cleanup chain.
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 50ab5eecb99..881aa3d217f 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void			vxfs_put_fake_inode(struct inode *);
 extern struct vxfs_inode_info *	vxfs_blkiget(struct super_block *, u_long, ino_t);
 extern struct vxfs_inode_info *	vxfs_stiget(struct super_block *, ino_t);
 extern struct inode *		vxfs_iget(struct super_block *, ino_t);
-extern void			vxfs_clear_inode(struct inode *);
+extern void			vxfs_evict_inode(struct inode *);
 
 /* vxfs_lookup.c */
 extern const struct inode_operations	vxfs_dir_inode_ops;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 03a6ea5e99f..79d1b4ea13e 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,15 +337,17 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
 }
 
 /**
- * vxfs_clear_inode - remove inode from main memory
+ * vxfs_evict_inode - remove inode from main memory
  * @ip:		inode to discard.
  *
  * Description:
- *  vxfs_clear_inode() is called on the final iput and frees the private
+ *  vxfs_evict_inode() is called on the final iput and frees the private
  *  inode area.
  */
 void
-vxfs_clear_inode(struct inode *ip)
+vxfs_evict_inode(struct inode *ip)
 {
+	truncate_inode_pages(&ip->i_data, 0);
+	end_writeback(ip);
 	kmem_cache_free(vxfs_inode_cachep, ip->i_private);
 }
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 5132c99b1ca..dc0c041e85c 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -61,7 +61,7 @@ static int		vxfs_statfs(struct dentry *, struct kstatfs *);
 static int		vxfs_remount(struct super_block *, int *, char *);
 
 static const struct super_operations vxfs_super_ops = {
-	.clear_inode =		vxfs_clear_inode,
+	.evict_inode =		vxfs_evict_inode,
 	.put_super =		vxfs_put_super,
 	.statfs =		vxfs_statfs,
 	.remount_fs =		vxfs_remount,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 30ac305e829..2f76c4a081a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
 	struct completion *done;	/* set if the caller waits */
 };
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -65,22 +74,21 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
-	spin_lock(&bdi->wb_lock);
-	list_add_tail(&work->list, &bdi->work_list);
-	spin_unlock(&bdi->wb_lock);
+	trace_writeback_queue(bdi, work);
 
-	/*
-	 * If the default thread isn't there, make sure we add it. When
-	 * it gets created and wakes up, we'll run this work.
-	 */
-	if (unlikely(list_empty_careful(&bdi->wb_list)))
+	spin_lock_bh(&bdi->wb_lock);
+	list_add_tail(&work->list, &bdi->work_list);
+	if (bdi->wb.task) {
+		wake_up_process(bdi->wb.task);
+	} else {
+		/*
+		 * The bdi thread isn't there, wake up the forker thread which
+		 * will create and run it.
+		 */
+		trace_writeback_nothread(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
-	else {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		if (wb->task)
-			wake_up_process(wb->task);
 	}
+	spin_unlock_bh(&bdi->wb_lock);
 }
 
 static void
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	 */
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work) {
-		if (bdi->wb.task)
+		if (bdi->wb.task) {
+			trace_writeback_nowork(bdi);
 			wake_up_process(bdi->wb.task);
+		}
 		return;
 	}
 
@@ -352,7 +362,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
 	spin_lock(&inode_lock);
 	inode->i_state &= ~I_SYNC;
-	if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
+	if (!(inode->i_state & I_FREEING)) {
 		if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
 			/*
 			 * More pages get dirtied by a fast dirtier.
@@ -499,7 +509,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 		if (inode_dirtied_after(inode, wbc->wb_start))
 			return 1;
 
-		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
+		BUG_ON(inode->i_state & I_FREEING);
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
 		writeback_single_inode(inode, wbc);
@@ -643,10 +653,14 @@ static long wb_writeback(struct bdi_writeback *wb,
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
+
+		trace_wbc_writeback_start(&wbc, wb->bdi);
 		if (work->sb)
 			__writeback_inodes_sb(work->sb, wb, &wbc);
 		else
 			writeback_inodes_wb(wb, &wbc);
+		trace_wbc_writeback_written(&wbc, wb->bdi);
+
 		work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -674,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		if (!list_empty(&wb->b_more_io))  {
 			inode = list_entry(wb->b_more_io.prev,
 						struct inode, i_list);
+			trace_wbc_writeback_wait(&wbc, wb->bdi);
 			inode_wait_for_writeback(inode);
 		}
 		spin_unlock(&inode_lock);
@@ -686,17 +701,17 @@ static long wb_writeback(struct bdi_writeback *wb,
  * Return the next wb_writeback_work struct that hasn't been processed yet.
  */
 static struct wb_writeback_work *
-get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+get_next_work_item(struct backing_dev_info *bdi)
 {
 	struct wb_writeback_work *work = NULL;
 
-	spin_lock(&bdi->wb_lock);
+	spin_lock_bh(&bdi->wb_lock);
 	if (!list_empty(&bdi->work_list)) {
 		work = list_entry(bdi->work_list.next,
 				  struct wb_writeback_work, list);
 		list_del_init(&work->list);
 	}
-	spin_unlock(&bdi->wb_lock);
+	spin_unlock_bh(&bdi->wb_lock);
 	return work;
 }
 
@@ -744,7 +759,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 	struct wb_writeback_work *work;
 	long wrote = 0;
 
-	while ((work = get_next_work_item(bdi, wb)) != NULL) {
+	while ((work = get_next_work_item(bdi)) != NULL) {
 		/*
 		 * Override sync mode, in case we must wait for completion
 		 * because this thread is exiting now.
@@ -752,6 +767,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		if (force_wait)
 			work->sync_mode = WB_SYNC_ALL;
 
+		trace_writeback_exec(bdi, work);
+
 		wrote += wb_writeback(wb, work);
 
 		/*
@@ -776,47 +793,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
-int bdi_writeback_task(struct bdi_writeback *wb)
+int bdi_writeback_thread(void *data)
 {
-	unsigned long last_active = jiffies;
-	unsigned long wait_jiffies = -1UL;
+	struct bdi_writeback *wb = data;
+	struct backing_dev_info *bdi = wb->bdi;
 	long pages_written;
 
+	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	set_freezable();
+	wb->last_active = jiffies;
+
+	/*
+	 * Our parent may run at a different priority, just set us to normal
+	 */
+	set_user_nice(current, 0);
+
+	trace_writeback_thread_start(bdi);
+
 	while (!kthread_should_stop()) {
+		/*
+		 * Remove own delayed wake-up timer, since we are already awake
+		 * and we'll take care of the preriodic write-back.
+		 */
+		del_timer(&wb->wakeup_timer);
+
 		pages_written = wb_do_writeback(wb, 0);
 
+		trace_writeback_pages_written(pages_written);
+
 		if (pages_written)
-			last_active = jiffies;
-		else if (wait_jiffies != -1UL) {
-			unsigned long max_idle;
+			wb->last_active = jiffies;
 
-			/*
-			 * Longest period of inactivity that we tolerate. If we
-			 * see dirty data again later, the task will get
-			 * recreated automatically.
-			 */
-			max_idle = max(5UL * 60 * HZ, wait_jiffies);
-			if (time_after(jiffies, max_idle + last_active))
-				break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!list_empty(&bdi->work_list)) {
+			__set_current_state(TASK_RUNNING);
+			continue;
 		}
 
-		if (dirty_writeback_interval) {
-			wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-			schedule_timeout_interruptible(wait_jiffies);
-		} else {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (list_empty_careful(&wb->bdi->work_list) &&
-			    !kthread_should_stop())
-				schedule();
-			__set_current_state(TASK_RUNNING);
+		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
+		else {
+			/*
+			 * We have nothing to do, so can go sleep without any
+			 * timeout and save power. When a work is queued or
+			 * something is made dirty - we will be woken up.
+			 */
+			schedule();
 		}
 
 		try_to_freeze();
 	}
 
+	/* Flush any work that raced with us exiting */
+	if (!list_empty(&bdi->work_list))
+		wb_do_writeback(wb, 1);
+
+	trace_writeback_thread_stop(bdi);
 	return 0;
 }
 
+
 /*
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
@@ -891,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 void __mark_inode_dirty(struct inode *inode, int flags)
 {
 	struct super_block *sb = inode->i_sb;
+	struct backing_dev_info *bdi = NULL;
+	bool wakeup_bdi = false;
 
 	/*
 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -936,7 +974,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			if (hlist_unhashed(&inode->i_hash))
 				goto out;
 		}
-		if (inode->i_state & (I_FREEING|I_CLEAR))
+		if (inode->i_state & I_FREEING)
 			goto out;
 
 		/*
@@ -944,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
-			struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-			struct backing_dev_info *bdi = wb->bdi;
-
-			if (bdi_cap_writeback_dirty(bdi) &&
-			    !test_bit(BDI_registered, &bdi->state)) {
-				WARN_ON(1);
-				printk(KERN_ERR "bdi-%s not registered\n",
-								bdi->name);
+			bdi = inode_to_bdi(inode);
+
+			if (bdi_cap_writeback_dirty(bdi)) {
+				WARN(!test_bit(BDI_registered, &bdi->state),
+				     "bdi-%s not registered\n", bdi->name);
+
+				/*
+				 * If this is the first dirty inode for this
+				 * bdi, we have to wake-up the corresponding
+				 * bdi thread to make sure background
+				 * write-back happens later.
+				 */
+				if (!wb_has_dirty_io(&bdi->wb))
+					wakeup_bdi = true;
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &wb->b_dirty);
+			list_move(&inode->i_list, &bdi->wb.b_dirty);
 		}
 	}
 out:
 	spin_unlock(&inode_lock);
+
+	if (wakeup_bdi)
+		bdi_wakeup_thread_delayed(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
@@ -1002,7 +1049,7 @@ static void wait_sb_inodes(struct super_block *sb)
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		struct address_space *mapping;
 
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 		mapping = inode->i_mapping;
 		if (mapping->nrpages == 0)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 431be0795b6..c9627c95482 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1270,21 +1270,18 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
 	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 
-	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
-		err = inode_change_ok(inode, attr);
-		if (err)
-			return err;
-	}
+	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+		attr->ia_valid |= ATTR_FORCE;
+
+	err = inode_change_ok(inode, attr);
+	if (err)
+		return err;
 
 	if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
 		return 0;
 
-	if (attr->ia_valid & ATTR_SIZE) {
-		err = inode_newsize_ok(inode, attr->ia_size);
-		if (err)
-			return err;
+	if (attr->ia_valid & ATTR_SIZE)
 		is_truncate = true;
-	}
 
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ec14d19ce50..da9e6e11374 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -122,8 +122,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
 	fuse_request_send_noreply(fc, req);
 }
 
-static void fuse_clear_inode(struct inode *inode)
+static void fuse_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (inode->i_sb->s_flags & MS_ACTIVE) {
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
@@ -736,7 +738,7 @@ static const struct export_operations fuse_export_operations = {
 static const struct super_operations fuse_super_operations = {
 	.alloc_inode    = fuse_alloc_inode,
 	.destroy_inode  = fuse_destroy_inode,
-	.clear_inode	= fuse_clear_inode,
+	.evict_inode	= fuse_evict_inode,
 	.drop_inode	= generic_delete_inode,
 	.remount_fs	= fuse_remount_fs,
 	.put_super	= fuse_put_super,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 5e96cbd8a45..194fe16d841 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -697,12 +697,12 @@ out:
 	page_cache_release(page);
 
 	/*
-	 * XXX(hch): the call below should probably be replaced with
+	 * XXX(truncate): the call below should probably be replaced with
 	 * a call to the gfs2-specific truncate blocks helper to actually
 	 * release disk blocks..
 	 */
 	if (pos + len > ip->i_inode.i_size)
-		simple_setsize(&ip->i_inode, ip->i_inode.i_size);
+		truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
 out_endtrans:
 	gfs2_trans_end(sdp);
 out_trans_fail:
@@ -1042,9 +1042,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	if (rv != 1)
 		goto out; /* dio not valid, fall back to buffered i/o */
 
-	rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
-					   iov, offset, nr_segs,
-					   gfs2_get_block_direct, NULL);
+	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+				  offset, nr_segs, gfs2_get_block_direct,
+				  NULL, NULL, 0);
 out:
 	gfs2_glock_dq_m(1, &gh);
 	gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f03afd9c44b..08140f185a3 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -84,7 +84,7 @@ static int iget_skip_test(struct inode *inode, void *opaque)
 	struct gfs2_skip_data *data = opaque;
 
 	if (ip->i_no_addr == data->no_addr) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
+		if (inode->i_state & (I_FREEING|I_WILL_FREE)){
 			data->skipped = 1;
 			return 0;
 		}
@@ -991,18 +991,29 @@ fail:
 
 static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
+	struct inode *inode = &ip->i_inode;
 	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		error = inode_setattr(&ip->i_inode, attr);
-		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
 	}
-	return error;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
+	gfs2_assert_warn(GFS2_SB(inode), !error);
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+	gfs2_dinode_out(ip, dibh->b_data);
+	brelse(dibh);
+	return 0;
 }
 
 /**
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6a857e24f94..cde1248a622 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
 		goto skip_barrier;
 	get_bh(bh);
-	submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
+	submit_bh(WRITE_BARRIER | REQ_META, bh);
 	wait_on_buffer(bh);
 	if (buffer_eopnotsupp(bh)) {
 		clear_buffer_eopnotsupp(bh);
@@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 		lock_buffer(bh);
 skip_barrier:
 		get_bh(bh);
-		submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+		submit_bh(WRITE_SYNC | REQ_META, bh);
 		wait_on_buffer(bh);
 	}
 	if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 18176d0b75d..f3b071f921a 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
-	int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
-			WRITE_SYNC_PLUG : WRITE));
+	int write_op = REQ_META |
+		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 	}
 	bh->b_end_io = end_buffer_read_sync;
 	get_bh(bh);
-	submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+	submit_bh(READ_SYNC | REQ_META, bh);
 	if (!(flags & DIO_WAIT))
 		return 0;
 
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
+		ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
 
 	dblock++;
 	extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4f44bdeb2f0..4d4b1e8ac64 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -274,7 +274,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 
 	bio->bi_end_io = end_bio_io_page;
 	bio->bi_private = page;
-	submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+	submit_bio(READ_SYNC | REQ_META, bio);
 	wait_on_page_locked(page);
 	bio_put(bio);
 	if (!PageUptodate(page)) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 98cdd05f331..1009be2c973 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1072,7 +1072,7 @@ int gfs2_permission(struct inode *inode, int mask)
 }
 
 /*
- * XXX: should be changed to have proper ordering by opencoding simple_setsize
+ * XXX(truncate): the truncate_setsize calls should be moved to the end.
  */
 static int setattr_size(struct inode *inode, struct iattr *attr)
 {
@@ -1084,10 +1084,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
 		error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 		if (error)
 			return error;
-		error = simple_setsize(inode, attr->ia_size);
+		truncate_setsize(inode, attr->ia_size);
 		gfs2_trans_end(sdp);
-		if (error) 
-			return error;
 	}
 
 	error = gfs2_truncatei(ip, attr->ia_size);
@@ -1136,8 +1134,16 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (error)
 		goto out_end_trans;
 
-	error = inode_setattr(inode, attr);
-	gfs2_assert_warn(sdp, !error);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, attr->ia_size);
+		gfs2_assert_warn(sdp, !error);
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4140811a921..77cb9f830ee 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1188,7 +1188,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
  * node for later deallocation.
  */
 
-static void gfs2_drop_inode(struct inode *inode)
+static int gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
@@ -1197,26 +1197,7 @@ static void gfs2_drop_inode(struct inode *inode)
 		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
 			clear_nlink(inode);
 	}
-	generic_drop_inode(inode);
-}
-
-/**
- * gfs2_clear_inode - Deallocate an inode when VFS is done with it
- * @inode: The VFS inode
- *
- */
-
-static void gfs2_clear_inode(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	ip->i_gl->gl_object = NULL;
-	gfs2_glock_put(ip->i_gl);
-	ip->i_gl = NULL;
-	if (ip->i_iopen_gh.gh_gl) {
-		ip->i_iopen_gh.gh_gl->gl_object = NULL;
-		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-	}
+	return generic_drop_inode(inode);
 }
 
 static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
@@ -1344,13 +1325,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
  * is safe, just less efficient.
  */
 
-static void gfs2_delete_inode(struct inode *inode)
+static void gfs2_evict_inode(struct inode *inode)
 {
 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
 
+	if (inode->i_nlink)
+		goto out;
+
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
@@ -1404,10 +1388,18 @@ out_unlock:
 	gfs2_holder_uninit(&ip->i_iopen_gh);
 	gfs2_glock_dq_uninit(&gh);
 	if (error && error != GLR_TRYFAILED && error != -EROFS)
-		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
+		fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
 out:
 	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	end_writeback(inode);
+
+	ip->i_gl->gl_object = NULL;
+	gfs2_glock_put(ip->i_gl);
+	ip->i_gl = NULL;
+	if (ip->i_iopen_gh.gh_gl) {
+		ip->i_iopen_gh.gh_gl->gl_object = NULL;
+		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+	}
 }
 
 static struct inode *gfs2_alloc_inode(struct super_block *sb)
@@ -1431,14 +1423,13 @@ const struct super_operations gfs2_super_ops = {
 	.alloc_inode		= gfs2_alloc_inode,
 	.destroy_inode		= gfs2_destroy_inode,
 	.write_inode		= gfs2_write_inode,
-	.delete_inode		= gfs2_delete_inode,
+	.evict_inode		= gfs2_evict_inode,
 	.put_super		= gfs2_put_super,
 	.sync_fs		= gfs2_sync_fs,
 	.freeze_fs 		= gfs2_freeze,
 	.unfreeze_fs		= gfs2_unfreeze,
 	.statfs			= gfs2_statfs,
 	.remount_fs		= gfs2_remount_fs,
-	.clear_inode		= gfs2_clear_inode,
 	.drop_inode		= gfs2_drop_inode,
 	.show_options		= gfs2_show_options,
 };
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 82f93da00d1..776af6eb4bc 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
 
 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 {
+	struct inode *inode = &ip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_ea_location el;
 	struct buffer_head *dibh;
@@ -1321,14 +1322,25 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 		return error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		error = inode_setattr(&ip->i_inode, attr);
-		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
+	if (error)
+		goto out_trans_end;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, attr->ia_size);
+		gfs2_assert_warn(GFS2_SB(inode), !error);
 	}
 
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+	gfs2_dinode_out(ip, dibh->b_data);
+	brelse(dibh);
+
+out_trans_end:
 	gfs2_trans_end(sdp);
 	return error;
 }
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index fe35e3b626c..4f55651aaa5 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -193,7 +193,7 @@ extern int hfs_inode_setattr(struct dentry *, struct iattr *);
 extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
 			__be32 log_size, __be32 phys_size, u32 clump_size);
 extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *);
-extern void hfs_clear_inode(struct inode *);
+extern void hfs_evict_inode(struct inode *);
 extern void hfs_delete_inode(struct inode *);
 
 /* attr.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 14f5cb1b9fd..397b7adc7ce 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfs_get_block,
 				&HFS_I(mapping->host)->phys_size);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
@@ -112,9 +121,24 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, hfs_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 static int hfs_writepages(struct address_space *mapping,
@@ -507,8 +531,10 @@ out:
 	return NULL;
 }
 
-void hfs_clear_inode(struct inode *inode)
+void hfs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
 		HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		iput(HFS_I(inode)->rsrc_inode);
@@ -588,13 +614,43 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 			attr->ia_mode = inode->i_mode & ~S_IWUGO;
 		attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask;
 	}
-	error = inode_setattr(inode, attr);
-	if (error)
-		return error;
 
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 	return 0;
 }
 
+static int hfs_file_fsync(struct file *filp, int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	struct super_block * sb;
+	int ret, err;
+
+	/* sync the inode to buffers */
+	ret = write_inode_now(inode, 0);
+
+	/* sync the superblock to buffers */
+	sb = inode->i_sb;
+	if (sb->s_dirt) {
+		lock_super(sb);
+		sb->s_dirt = 0;
+		if (!(sb->s_flags & MS_RDONLY))
+			hfs_mdb_commit(sb);
+		unlock_super(sb);
+	}
+	/* .. finally sync the buffers to disk */
+	err = sync_blockdev(sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
 
 static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
@@ -604,7 +660,7 @@ static const struct file_operations hfs_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.fsync		= file_fsync,
+	.fsync		= hfs_file_fsync,
 	.open		= hfs_file_open,
 	.release	= hfs_file_release,
 };
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0a81eb7111f..34235d4bf08 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -181,7 +181,7 @@ static const struct super_operations hfs_super_operations = {
 	.alloc_inode	= hfs_alloc_inode,
 	.destroy_inode	= hfs_destroy_inode,
 	.write_inode	= hfs_write_inode,
-	.clear_inode	= hfs_clear_inode,
+	.evict_inode	= hfs_evict_inode,
 	.put_super	= hfs_put_super,
 	.write_super	= hfs_write_super,
 	.sync_fs	= hfs_sync_fs,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 6505c30ad96..dc856be3c2b 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *);
 
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
+int hfsplus_sync_fs(struct super_block *sb, int wait);
 
 /* tables.c */
 extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9bbb82924a2..c5a979d62c6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfsplus_get_block,
 				&HFSPLUS_I(mapping->host).phys_size);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
@@ -105,9 +114,24 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, hfsplus_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 static int hfsplus_writepages(struct address_space *mapping,
@@ -266,9 +290,56 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+static int hfsplus_file_fsync(struct file *filp, int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	struct super_block * sb;
+	int ret, err;
+
+	/* sync the inode to buffers */
+	ret = write_inode_now(inode, 0);
+
+	/* sync the superblock to buffers */
+	sb = inode->i_sb;
+	if (sb->s_dirt) {
+		if (!(sb->s_flags & MS_RDONLY))
+			hfsplus_sync_fs(sb, 1);
+		else
+			sb->s_dirt = 0;
+	}
+
+	/* .. finally sync the buffers to disk */
+	err = sync_blockdev(sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
 static const struct inode_operations hfsplus_file_inode_operations = {
 	.lookup		= hfsplus_file_lookup,
 	.truncate	= hfsplus_file_truncate,
+	.setattr	= hfsplus_setattr,
 	.setxattr	= hfsplus_setxattr,
 	.getxattr	= hfsplus_getxattr,
 	.listxattr	= hfsplus_listxattr,
@@ -282,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.fsync		= file_fsync,
+	.fsync		= hfsplus_file_fsync,
 	.open		= hfsplus_file_open,
 	.release	= hfsplus_file_release,
 	.unlocked_ioctl = hfsplus_ioctl,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 74b473a8ef9..3b55c050c74 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -145,16 +145,18 @@ static int hfsplus_write_inode(struct inode *inode,
 	return ret;
 }
 
-static void hfsplus_clear_inode(struct inode *inode)
+static void hfsplus_evict_inode(struct inode *inode)
 {
-	dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino);
+	dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (HFSPLUS_IS_RSRC(inode)) {
 		HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
 		iput(HFSPLUS_I(inode).rsrc_inode);
 	}
 }
 
-static int hfsplus_sync_fs(struct super_block *sb, int wait)
+int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
 	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
@@ -293,7 +295,7 @@ static const struct super_operations hfsplus_sops = {
 	.alloc_inode	= hfsplus_alloc_inode,
 	.destroy_inode	= hfsplus_destroy_inode,
 	.write_inode	= hfsplus_write_inode,
-	.clear_inode	= hfsplus_clear_inode,
+	.evict_inode	= hfsplus_evict_inode,
 	.put_super	= hfsplus_put_super,
 	.write_super	= hfsplus_write_super,
 	.sync_fs	= hfsplus_sync_fs,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 2f34f8f2134..6bbd75c5589 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -53,18 +53,28 @@ struct hostfs_iattr {
 	struct timespec	ia_ctime;
 };
 
-extern int stat_file(const char *path, unsigned long long *inode_out,
-		     int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
-		     unsigned long long *size_out, struct timespec *atime_out,
-		     struct timespec *mtime_out, struct timespec *ctime_out,
-		     int *blksize_out, unsigned long long *blocks_out, int fd);
+struct hostfs_stat {
+	unsigned long long ino;
+	unsigned int mode;
+	unsigned int nlink;
+	unsigned int uid;
+	unsigned int gid;
+	unsigned long long size;
+	struct timespec atime, mtime, ctime;
+	unsigned int blksize;
+	unsigned long long blocks;
+	unsigned int maj;
+	unsigned int min;
+};
+
+extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
 extern int access_file(char *path, int r, int w, int x);
 extern int open_file(char *path, int r, int w, int append);
-extern int file_type(const char *path, int *maj, int *min);
 extern void *open_dir(char *path, int *err_out);
 extern char *read_dir(void *stream, unsigned long long *pos,
 		      unsigned long long *ino_out, int *len_out);
 extern void close_file(void *stream);
+extern int replace_file(int oldfd, int fd);
 extern void close_dir(void *stream);
 extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
 extern int write_file(int fd, unsigned long long *offset, const char *buf,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 87ac1891a18..dd1e55535a4 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -14,12 +14,12 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/namei.h>
 #include "hostfs.h"
 #include "init.h"
 #include "kern.h"
 
 struct hostfs_inode_info {
-	char *host_filename;
 	int fd;
 	fmode_t mode;
 	struct inode vfs_inode;
@@ -49,7 +49,7 @@ static int append = 0;
 
 static const struct inode_operations hostfs_iops;
 static const struct inode_operations hostfs_dir_iops;
-static const struct address_space_operations hostfs_link_aops;
+static const struct inode_operations hostfs_link_iops;
 
 #ifndef MODULE
 static int __init hostfs_args(char *options, int *add)
@@ -90,71 +90,58 @@ __uml_setup("hostfs=", hostfs_args,
 );
 #endif
 
-static char *dentry_name(struct dentry *dentry, int extra)
+static char *__dentry_name(struct dentry *dentry, char *name)
 {
-	struct dentry *parent;
-	char *root, *name;
-	int len;
-
-	len = 0;
-	parent = dentry;
-	while (parent->d_parent != parent) {
-		len += parent->d_name.len + 1;
-		parent = parent->d_parent;
-	}
+	char *p = __dentry_path(dentry, name, PATH_MAX);
+	char *root;
+	size_t len;
 
-	root = HOSTFS_I(parent->d_inode)->host_filename;
-	len += strlen(root);
-	name = kmalloc(len + extra + 1, GFP_KERNEL);
-	if (name == NULL)
-		return NULL;
+	spin_unlock(&dcache_lock);
 
-	name[len] = '\0';
-	parent = dentry;
-	while (parent->d_parent != parent) {
-		len -= parent->d_name.len + 1;
-		name[len] = '/';
-		strncpy(&name[len + 1], parent->d_name.name,
-			parent->d_name.len);
-		parent = parent->d_parent;
+	root = dentry->d_sb->s_fs_info;
+	len = strlen(root);
+	if (IS_ERR(p)) {
+		__putname(name);
+		return NULL;
+	}
+	strncpy(name, root, PATH_MAX);
+	if (len > p - name) {
+		__putname(name);
+		return NULL;
+	}
+	if (p > name + len) {
+		char *s = name + len;
+		while ((*s++ = *p++) != '\0')
+			;
 	}
-	strncpy(name, root, strlen(root));
 	return name;
 }
 
-static char *inode_name(struct inode *ino, int extra)
+static char *dentry_name(struct dentry *dentry)
 {
-	struct dentry *dentry;
+	char *name = __getname();
+	if (!name)
+		return NULL;
 
-	dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
-	return dentry_name(dentry, extra);
+	spin_lock(&dcache_lock);
+	return __dentry_name(dentry, name); /* will unlock */
 }
 
-static int read_name(struct inode *ino, char *name)
+static char *inode_name(struct inode *ino)
 {
-	/*
-	 * The non-int inode fields are copied into ints by stat_file and
-	 * then copied into the inode because passing the actual pointers
-	 * in and having them treated as int * breaks on big-endian machines
-	 */
-	int err;
-	int i_mode, i_nlink, i_blksize;
-	unsigned long long i_size;
-	unsigned long long i_ino;
-	unsigned long long i_blocks;
-
-	err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
-			&ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
-			&ino->i_ctime, &i_blksize, &i_blocks, -1);
-	if (err)
-		return err;
+	struct dentry *dentry;
+	char *name = __getname();
+	if (!name)
+		return NULL;
 
-	ino->i_ino = i_ino;
-	ino->i_mode = i_mode;
-	ino->i_nlink = i_nlink;
-	ino->i_size = i_size;
-	ino->i_blocks = i_blocks;
-	return 0;
+	spin_lock(&dcache_lock);
+	if (list_empty(&ino->i_dentry)) {
+		spin_unlock(&dcache_lock);
+		__putname(name);
+		return NULL;
+	}
+	dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
+	return __dentry_name(dentry, name); /* will unlock */
 }
 
 static char *follow_link(char *link)
@@ -205,53 +192,11 @@ static char *follow_link(char *link)
 	return ERR_PTR(n);
 }
 
-static int hostfs_read_inode(struct inode *ino)
-{
-	char *name;
-	int err = 0;
-
-	/*
-	 * Unfortunately, we are called from iget() when we don't have a dentry
-	 * allocated yet.
-	 */
-	if (list_empty(&ino->i_dentry))
-		goto out;
-
-	err = -ENOMEM;
-	name = inode_name(ino, 0);
-	if (name == NULL)
-		goto out;
-
-	if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) {
-		name = follow_link(name);
-		if (IS_ERR(name)) {
-			err = PTR_ERR(name);
-			goto out;
-		}
-	}
-
-	err = read_name(ino, name);
-	kfree(name);
- out:
-	return err;
-}
-
 static struct inode *hostfs_iget(struct super_block *sb)
 {
-	struct inode *inode;
-	long ret;
-
-	inode = iget_locked(sb, 0);
+	struct inode *inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
-	if (inode->i_state & I_NEW) {
-		ret = hostfs_read_inode(inode);
-		if (ret < 0) {
-			iget_failed(inode);
-			return ERR_PTR(ret);
-		}
-		unlock_new_inode(inode);
-	}
 	return inode;
 }
 
@@ -269,7 +214,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 	long long f_files;
 	long long f_ffree;
 
-	err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename,
+	err = do_statfs(dentry->d_sb->s_fs_info,
 			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
 			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
 			&sf->f_namelen, sf->f_spare);
@@ -288,47 +233,32 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 {
 	struct hostfs_inode_info *hi;
 
-	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+	hi = kzalloc(sizeof(*hi), GFP_KERNEL);
 	if (hi == NULL)
 		return NULL;
-
-	*hi = ((struct hostfs_inode_info) { .host_filename	= NULL,
-					    .fd			= -1,
-					    .mode		= 0 });
+	hi->fd = -1;
 	inode_init_once(&hi->vfs_inode);
 	return &hi->vfs_inode;
 }
 
-static void hostfs_delete_inode(struct inode *inode)
+static void hostfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
 	}
-	clear_inode(inode);
 }
 
 static void hostfs_destroy_inode(struct inode *inode)
 {
-	kfree(HOSTFS_I(inode)->host_filename);
-
-	/*
-	 * XXX: This should not happen, probably. The check is here for
-	 * additional safety.
-	 */
-	if (HOSTFS_I(inode)->fd != -1) {
-		close_file(&HOSTFS_I(inode)->fd);
-		printk(KERN_DEBUG "Closing host fd in .destroy_inode\n");
-	}
-
 	kfree(HOSTFS_I(inode));
 }
 
 static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
-	struct inode *root = vfs->mnt_sb->s_root->d_inode;
-	const char *root_path = HOSTFS_I(root)->host_filename;
+	const char *root_path = vfs->mnt_sb->s_fs_info;
 	size_t offset = strlen(root_ino) + 1;
 
 	if (strlen(root_path) > offset)
@@ -339,9 +269,8 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 static const struct super_operations hostfs_sbops = {
 	.alloc_inode	= hostfs_alloc_inode,
-	.drop_inode	= generic_delete_inode,
-	.delete_inode   = hostfs_delete_inode,
 	.destroy_inode	= hostfs_destroy_inode,
+	.evict_inode	= hostfs_evict_inode,
 	.statfs		= hostfs_statfs,
 	.show_options	= hostfs_show_options,
 };
@@ -353,11 +282,11 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	unsigned long long next, ino;
 	int error, len;
 
-	name = dentry_name(file->f_path.dentry, 0);
+	name = dentry_name(file->f_path.dentry);
 	if (name == NULL)
 		return -ENOMEM;
 	dir = open_dir(name, &error);
-	kfree(name);
+	__putname(name);
 	if (dir == NULL)
 		return -error;
 	next = file->f_pos;
@@ -373,40 +302,59 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 
 int hostfs_file_open(struct inode *ino, struct file *file)
 {
+	static DEFINE_MUTEX(open_mutex);
 	char *name;
 	fmode_t mode = 0;
+	int err;
 	int r = 0, w = 0, fd;
 
 	mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
 	if ((mode & HOSTFS_I(ino)->mode) == mode)
 		return 0;
 
-	/*
-	 * The file may already have been opened, but with the wrong access,
-	 * so this resets things and reopens the file with the new access.
-	 */
-	if (HOSTFS_I(ino)->fd != -1) {
-		close_file(&HOSTFS_I(ino)->fd);
-		HOSTFS_I(ino)->fd = -1;
-	}
+	mode |= HOSTFS_I(ino)->mode;
 
-	HOSTFS_I(ino)->mode |= mode;
-	if (HOSTFS_I(ino)->mode & FMODE_READ)
+retry:
+	if (mode & FMODE_READ)
 		r = 1;
-	if (HOSTFS_I(ino)->mode & FMODE_WRITE)
+	if (mode & FMODE_WRITE)
 		w = 1;
 	if (w)
 		r = 1;
 
-	name = dentry_name(file->f_path.dentry, 0);
+	name = dentry_name(file->f_path.dentry);
 	if (name == NULL)
 		return -ENOMEM;
 
 	fd = open_file(name, r, w, append);
-	kfree(name);
+	__putname(name);
 	if (fd < 0)
 		return fd;
-	FILE_HOSTFS_I(file)->fd = fd;
+
+	mutex_lock(&open_mutex);
+	/* somebody else had handled it first? */
+	if ((mode & HOSTFS_I(ino)->mode) == mode) {
+		mutex_unlock(&open_mutex);
+		return 0;
+	}
+	if ((mode | HOSTFS_I(ino)->mode) != mode) {
+		mode |= HOSTFS_I(ino)->mode;
+		mutex_unlock(&open_mutex);
+		close_file(&fd);
+		goto retry;
+	}
+	if (HOSTFS_I(ino)->fd == -1) {
+		HOSTFS_I(ino)->fd = fd;
+	} else {
+		err = replace_file(fd, HOSTFS_I(ino)->fd);
+		close_file(&fd);
+		if (err < 0) {
+			mutex_unlock(&open_mutex);
+			return err;
+		}
+	}
+	HOSTFS_I(ino)->mode = mode;
+	mutex_unlock(&open_mutex);
 
 	return 0;
 }
@@ -544,54 +492,50 @@ static const struct address_space_operations hostfs_aops = {
 	.write_end	= hostfs_write_end,
 };
 
-static int init_inode(struct inode *inode, struct dentry *dentry)
+static int read_name(struct inode *ino, char *name)
 {
-	char *name;
-	int type, err = -ENOMEM;
-	int maj, min;
-	dev_t rdev = 0;
+	dev_t rdev;
+	struct hostfs_stat st;
+	int err = stat_file(name, &st, -1);
+	if (err)
+		return err;
 
-	if (dentry) {
-		name = dentry_name(dentry, 0);
-		if (name == NULL)
-			goto out;
-		type = file_type(name, &maj, &min);
-		/* Reencode maj and min with the kernel encoding.*/
-		rdev = MKDEV(maj, min);
-		kfree(name);
-	}
-	else type = OS_TYPE_DIR;
+	/* Reencode maj and min with the kernel encoding.*/
+	rdev = MKDEV(st.maj, st.min);
 
-	err = 0;
-	if (type == OS_TYPE_SYMLINK)
-		inode->i_op = &page_symlink_inode_operations;
-	else if (type == OS_TYPE_DIR)
-		inode->i_op = &hostfs_dir_iops;
-	else inode->i_op = &hostfs_iops;
-
-	if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
-	else inode->i_fop = &hostfs_file_fops;
-
-	if (type == OS_TYPE_SYMLINK)
-		inode->i_mapping->a_ops = &hostfs_link_aops;
-	else inode->i_mapping->a_ops = &hostfs_aops;
-
-	switch (type) {
-	case OS_TYPE_CHARDEV:
-		init_special_inode(inode, S_IFCHR, rdev);
+	switch (st.mode & S_IFMT) {
+	case S_IFLNK:
+		ino->i_op = &hostfs_link_iops;
 		break;
-	case OS_TYPE_BLOCKDEV:
-		init_special_inode(inode, S_IFBLK, rdev);
+	case S_IFDIR:
+		ino->i_op = &hostfs_dir_iops;
+		ino->i_fop = &hostfs_dir_fops;
 		break;
-	case OS_TYPE_FIFO:
-		init_special_inode(inode, S_IFIFO, 0);
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+		init_special_inode(ino, st.mode & S_IFMT, rdev);
+		ino->i_op = &hostfs_iops;
 		break;
-	case OS_TYPE_SOCK:
-		init_special_inode(inode, S_IFSOCK, 0);
-		break;
-	}
- out:
-	return err;
+
+	default:
+		ino->i_op = &hostfs_iops;
+		ino->i_fop = &hostfs_file_fops;
+		ino->i_mapping->a_ops = &hostfs_aops;
+	}
+
+	ino->i_ino = st.ino;
+	ino->i_mode = st.mode;
+	ino->i_nlink = st.nlink;
+	ino->i_uid = st.uid;
+	ino->i_gid = st.gid;
+	ino->i_atime = st.atime;
+	ino->i_mtime = st.mtime;
+	ino->i_ctime = st.ctime;
+	ino->i_size = st.size;
+	ino->i_blocks = st.blocks;
+	return 0;
 }
 
 int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -607,12 +551,8 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		goto out;
 	}
 
-	error = init_inode(inode, dentry);
-	if (error)
-		goto out_put;
-
 	error = -ENOMEM;
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		goto out_put;
 
@@ -622,9 +562,10 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 			 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
 	if (fd < 0)
 		error = fd;
-	else error = read_name(inode, name);
+	else
+		error = read_name(inode, name);
 
-	kfree(name);
+	__putname(name);
 	if (error)
 		goto out_put;
 
@@ -652,17 +593,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 		goto out;
 	}
 
-	err = init_inode(inode, dentry);
-	if (err)
-		goto out_put;
-
 	err = -ENOMEM;
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		goto out_put;
 
 	err = read_name(inode, name);
-	kfree(name);
+
+	__putname(name);
 	if (err == -ENOENT) {
 		iput(inode);
 		inode = NULL;
@@ -680,36 +618,21 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 	return ERR_PTR(err);
 }
 
-static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
-{
-	char *file;
-	int len;
-
-	file = inode_name(ino, dentry->d_name.len + 1);
-	if (file == NULL)
-		return NULL;
-	strcat(file, "/");
-	len = strlen(file);
-	strncat(file, dentry->d_name.name, dentry->d_name.len);
-	file[len + dentry->d_name.len] = '\0';
-	return file;
-}
-
 int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
 {
 	char *from_name, *to_name;
 	int err;
 
-	if ((from_name = inode_dentry_name(ino, from)) == NULL)
+	if ((from_name = dentry_name(from)) == NULL)
 		return -ENOMEM;
-	to_name = dentry_name(to, 0);
+	to_name = dentry_name(to);
 	if (to_name == NULL) {
-		kfree(from_name);
+		__putname(from_name);
 		return -ENOMEM;
 	}
 	err = link_file(to_name, from_name);
-	kfree(from_name);
-	kfree(to_name);
+	__putname(from_name);
+	__putname(to_name);
 	return err;
 }
 
@@ -718,13 +641,14 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
-		return -ENOMEM;
 	if (append)
 		return -EPERM;
 
+	if ((file = dentry_name(dentry)) == NULL)
+		return -ENOMEM;
+
 	err = unlink_file(file);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -733,10 +657,10 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = make_symlink(file, to);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -745,10 +669,10 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_mkdir(file, mode);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -757,10 +681,10 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_rmdir(file);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -776,22 +700,20 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 		goto out;
 	}
 
-	err = init_inode(inode, dentry);
-	if (err)
-		goto out_put;
-
 	err = -ENOMEM;
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		goto out_put;
 
 	init_special_inode(inode, mode, dev);
 	err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
-	if (err)
+	if (!err)
 		goto out_free;
 
 	err = read_name(inode, name);
-	kfree(name);
+	__putname(name);
+	if (err)
+		goto out_put;
 	if (err)
 		goto out_put;
 
@@ -799,7 +721,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	return 0;
 
  out_free:
-	kfree(name);
+	__putname(name);
  out_put:
 	iput(inode);
  out:
@@ -812,15 +734,15 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	char *from_name, *to_name;
 	int err;
 
-	if ((from_name = inode_dentry_name(from_ino, from)) == NULL)
+	if ((from_name = dentry_name(from)) == NULL)
 		return -ENOMEM;
-	if ((to_name = inode_dentry_name(to_ino, to)) == NULL) {
-		kfree(from_name);
+	if ((to_name = dentry_name(to)) == NULL) {
+		__putname(from_name);
 		return -ENOMEM;
 	}
 	err = rename_file(from_name, to_name);
-	kfree(from_name);
-	kfree(to_name);
+	__putname(from_name);
+	__putname(to_name);
 	return err;
 }
 
@@ -832,7 +754,7 @@ int hostfs_permission(struct inode *ino, int desired)
 	if (desired & MAY_READ) r = 1;
 	if (desired & MAY_WRITE) w = 1;
 	if (desired & MAY_EXEC) x = 1;
-	name = inode_name(ino, 0);
+	name = inode_name(ino);
 	if (name == NULL)
 		return -ENOMEM;
 
@@ -841,7 +763,7 @@ int hostfs_permission(struct inode *ino, int desired)
 		err = 0;
 	else
 		err = access_file(name, r, w, x);
-	kfree(name);
+	__putname(name);
 	if (!err)
 		err = generic_permission(ino, desired, NULL);
 	return err;
@@ -849,13 +771,14 @@ int hostfs_permission(struct inode *ino, int desired)
 
 int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
+	struct inode *inode = dentry->d_inode;
 	struct hostfs_iattr attrs;
 	char *name;
 	int err;
 
-	int fd = HOSTFS_I(dentry->d_inode)->fd;
+	int fd = HOSTFS_I(inode)->fd;
 
-	err = inode_change_ok(dentry->d_inode, attr);
+	err = inode_change_ok(inode, attr);
 	if (err)
 		return err;
 
@@ -897,15 +820,26 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (attr->ia_valid & ATTR_MTIME_SET) {
 		attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
 	}
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		return -ENOMEM;
 	err = set_attr(name, &attrs, fd);
-	kfree(name);
+	__putname(name);
 	if (err)
 		return err;
 
-	return inode_setattr(dentry->d_inode, attr);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, attr->ia_size);
+		if (err)
+			return err;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct inode_operations hostfs_iops = {
@@ -935,32 +869,41 @@ static const struct inode_operations hostfs_dir_iops = {
 	.setattr	= hostfs_setattr,
 };
 
-int hostfs_link_readpage(struct file *file, struct page *page)
-{
-	char *buffer, *name;
-	int err;
-
-	buffer = kmap(page);
-	name = inode_name(page->mapping->host, 0);
-	if (name == NULL)
-		return -ENOMEM;
-	err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE);
-	kfree(name);
-	if (err == PAGE_CACHE_SIZE)
-		err = -E2BIG;
-	else if (err > 0) {
-		flush_dcache_page(page);
-		SetPageUptodate(page);
-		if (PageError(page)) ClearPageError(page);
-		err = 0;
+static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *link = __getname();
+	if (link) {
+		char *path = dentry_name(dentry);
+		int err = -ENOMEM;
+		if (path) {
+			int err = hostfs_do_readlink(path, link, PATH_MAX);
+			if (err == PATH_MAX)
+				err = -E2BIG;
+			__putname(path);
+		}
+		if (err < 0) {
+			__putname(link);
+			link = ERR_PTR(err);
+		}
+	} else {
+		link = ERR_PTR(-ENOMEM);
 	}
-	kunmap(page);
-	unlock_page(page);
-	return err;
+
+	nd_set_link(nd, link);
+	return NULL;
 }
 
-static const struct address_space_operations hostfs_link_aops = {
-	.readpage	= hostfs_link_readpage,
+static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		__putname(s);
+}
+
+static const struct inode_operations hostfs_link_iops = {
+	.readlink	= generic_readlink,
+	.follow_link	= hostfs_follow_link,
+	.put_link	= hostfs_put_link,
 };
 
 static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
@@ -980,49 +923,41 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 		req_root = "";
 
 	err = -ENOMEM;
-	host_root_path = kmalloc(strlen(root_ino) + 1
-				 + strlen(req_root) + 1, GFP_KERNEL);
+	sb->s_fs_info = host_root_path =
+		kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL);
 	if (host_root_path == NULL)
 		goto out;
 
 	sprintf(host_root_path, "%s/%s", root_ino, req_root);
 
-	root_inode = hostfs_iget(sb);
-	if (IS_ERR(root_inode)) {
-		err = PTR_ERR(root_inode);
-		goto out_free;
-	}
+	root_inode = new_inode(sb);
+	if (!root_inode)
+		goto out;
 
-	err = init_inode(root_inode, NULL);
+	err = read_name(root_inode, host_root_path);
 	if (err)
 		goto out_put;
 
-	HOSTFS_I(root_inode)->host_filename = host_root_path;
-	/*
-	 * Avoid that in the error path, iput(root_inode) frees again
-	 * host_root_path through hostfs_destroy_inode!
-	 */
-	host_root_path = NULL;
+	if (S_ISLNK(root_inode->i_mode)) {
+		char *name = follow_link(host_root_path);
+		if (IS_ERR(name))
+			err = PTR_ERR(name);
+		else
+			err = read_name(root_inode, name);
+		kfree(name);
+		if (err)
+			goto out_put;
+	}
 
 	err = -ENOMEM;
 	sb->s_root = d_alloc_root(root_inode);
 	if (sb->s_root == NULL)
 		goto out_put;
 
-	err = hostfs_read_inode(root_inode);
-	if (err) {
-		/* No iput in this case because the dput does that for us */
-		dput(sb->s_root);
-		sb->s_root = NULL;
-		goto out;
-	}
-
 	return 0;
 
 out_put:
 	iput(root_inode);
-out_free:
-	kfree(host_root_path);
 out:
 	return err;
 }
@@ -1034,11 +969,17 @@ static int hostfs_read_sb(struct file_system_type *type,
 	return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
 }
 
+static void hostfs_kill_sb(struct super_block *s)
+{
+	kill_anon_super(s);
+	kfree(s->s_fs_info);
+}
+
 static struct file_system_type hostfs_type = {
 	.owner 		= THIS_MODULE,
 	.name 		= "hostfs",
 	.get_sb 	= hostfs_read_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= hostfs_kill_sb,
 	.fs_flags 	= 0,
 };
 
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b79424f9328..6777aa06ce2 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -19,11 +19,27 @@
 #include "user.h"
 #include <utime.h>
 
-int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
-	      int *nlink_out, int *uid_out, int *gid_out,
-	      unsigned long long *size_out, struct timespec *atime_out,
-	      struct timespec *mtime_out, struct timespec *ctime_out,
-	      int *blksize_out, unsigned long long *blocks_out, int fd)
+static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
+{
+	p->ino = buf->st_ino;
+	p->mode = buf->st_mode;
+	p->nlink = buf->st_nlink;
+	p->uid = buf->st_uid;
+	p->gid = buf->st_gid;
+	p->size = buf->st_size;
+	p->atime.tv_sec = buf->st_atime;
+	p->atime.tv_nsec = 0;
+	p->ctime.tv_sec = buf->st_ctime;
+	p->ctime.tv_nsec = 0;
+	p->mtime.tv_sec = buf->st_mtime;
+	p->mtime.tv_nsec = 0;
+	p->blksize = buf->st_blksize;
+	p->blocks = buf->st_blocks;
+	p->maj = os_major(buf->st_rdev);
+	p->min = os_minor(buf->st_rdev);
+}
+
+int stat_file(const char *path, struct hostfs_stat *p, int fd)
 {
 	struct stat64 buf;
 
@@ -33,68 +49,10 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
 	} else if (lstat64(path, &buf) < 0) {
 		return -errno;
 	}
-
-	if (inode_out != NULL)
-		*inode_out = buf.st_ino;
-	if (mode_out != NULL)
-		*mode_out = buf.st_mode;
-	if (nlink_out != NULL)
-		*nlink_out = buf.st_nlink;
-	if (uid_out != NULL)
-		*uid_out = buf.st_uid;
-	if (gid_out != NULL)
-		*gid_out = buf.st_gid;
-	if (size_out != NULL)
-		*size_out = buf.st_size;
-	if (atime_out != NULL) {
-		atime_out->tv_sec = buf.st_atime;
-		atime_out->tv_nsec = 0;
-	}
-	if (mtime_out != NULL) {
-		mtime_out->tv_sec = buf.st_mtime;
-		mtime_out->tv_nsec = 0;
-	}
-	if (ctime_out != NULL) {
-		ctime_out->tv_sec = buf.st_ctime;
-		ctime_out->tv_nsec = 0;
-	}
-	if (blksize_out != NULL)
-		*blksize_out = buf.st_blksize;
-	if (blocks_out != NULL)
-		*blocks_out = buf.st_blocks;
+	stat64_to_hostfs(&buf, p);
 	return 0;
 }
 
-int file_type(const char *path, int *maj, int *min)
-{
- 	struct stat64 buf;
-
-	if (lstat64(path, &buf) < 0)
-		return -errno;
-	/*
-	 * We cannot pass rdev as is because glibc and the kernel disagree
-	 * about its definition.
-	 */
-	if (maj != NULL)
-		*maj = major(buf.st_rdev);
-	if (min != NULL)
-		*min = minor(buf.st_rdev);
-
-	if (S_ISDIR(buf.st_mode))
-		return OS_TYPE_DIR;
-	else if (S_ISLNK(buf.st_mode))
-		return OS_TYPE_SYMLINK;
-	else if (S_ISCHR(buf.st_mode))
-		return OS_TYPE_CHARDEV;
-	else if (S_ISBLK(buf.st_mode))
-		return OS_TYPE_BLOCKDEV;
-	else if (S_ISFIFO(buf.st_mode))
-		return OS_TYPE_FIFO;
-	else if (S_ISSOCK(buf.st_mode))
-		return OS_TYPE_SOCK;
-	else return OS_TYPE_FILE;
-}
-
 int access_file(char *path, int r, int w, int x)
 {
 	int mode = 0;
@@ -202,6 +160,11 @@ int fsync_file(int fd, int datasync)
 	return 0;
 }
 
+int replace_file(int oldfd, int fd)
+{
+	return dup2(oldfd, fd);
+}
+
 void close_file(void *stream)
 {
 	close(*((int *) stream));
@@ -235,8 +198,8 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
 
 int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 {
+	struct hostfs_stat st;
 	struct timeval times[2];
-	struct timespec atime_ts, mtime_ts;
 	int err, ma;
 
 	if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
@@ -279,15 +242,14 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 	 */
 	ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
 	if (attrs->ia_valid & ma) {
-		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
-				&atime_ts, &mtime_ts, NULL, NULL, NULL, fd);
+		err = stat_file(file, &st, fd);
 		if (err != 0)
 			return err;
 
-		times[0].tv_sec = atime_ts.tv_sec;
-		times[0].tv_usec = atime_ts.tv_nsec / 1000;
-		times[1].tv_sec = mtime_ts.tv_sec;
-		times[1].tv_usec = mtime_ts.tv_nsec / 1000;
+		times[0].tv_sec = st.atime.tv_sec;
+		times[0].tv_usec = st.atime.tv_nsec / 1000;
+		times[1].tv_sec = st.mtime.tv_sec;
+		times[1].tv_usec = st.mtime.tv_nsec / 1000;
 
 		if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
 			times[0].tv_sec = attrs->ia_atime.tv_sec;
@@ -308,9 +270,9 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 
 	/* Note: ctime is not handled */
 	if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) {
-		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
-				&attrs->ia_atime, &attrs->ia_mtime, NULL,
-				NULL, NULL, fd);
+		err = stat_file(file, &st, fd);
+		attrs->ia_atime = st.atime;
+		attrs->ia_mtime = st.mtime;
 		if (err != 0)
 			return err;
 	}
@@ -361,7 +323,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
 {
 	int err;
 
-	err = mknod(file, mode, makedev(major, minor));
+	err = mknod(file, mode, os_makedev(major, minor));
 	if (err)
 		return -errno;
 	return 0;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index a9ae9bfa752..c0340887c7e 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hpfs_get_block,
 				&hpfs_i(mapping->host)->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 75f9d432485..b59eac0232a 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -281,7 +281,7 @@ void hpfs_write_inode(struct inode *);
 void hpfs_write_inode_nolock(struct inode *);
 int hpfs_setattr(struct dentry *, struct iattr *);
 void hpfs_write_if_changed(struct inode *);
-void hpfs_delete_inode(struct inode *);
+void hpfs_evict_inode(struct inode *);
 
 /* map.c */
 
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1042a9bc97f..56f0da1cfd1 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -277,9 +277,15 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		goto out_unlock;
 
-	error = inode_setattr(inode, attr);
-	if (error)
-		goto out_unlock;
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 
 	hpfs_write_inode(inode);
 
@@ -296,11 +302,13 @@ void hpfs_write_if_changed(struct inode *inode)
 		hpfs_write_inode(inode);
 }
 
-void hpfs_delete_inode(struct inode *inode)
+void hpfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	lock_kernel();
-	hpfs_remove_fnode(inode->i_sb, inode->i_ino);
-	unlock_kernel();
-	clear_inode(inode);
+	end_writeback(inode);
+	if (!inode->i_nlink) {
+		lock_kernel();
+		hpfs_remove_fnode(inode->i_sb, inode->i_ino);
+		unlock_kernel();
+	}
 }
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index aa53842c599..2607010be2f 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -450,7 +450,7 @@ static const struct super_operations hpfs_sops =
 {
 	.alloc_inode	= hpfs_alloc_inode,
 	.destroy_inode	= hpfs_destroy_inode,
-	.delete_inode	= hpfs_delete_inode,
+	.evict_inode	= hpfs_evict_inode,
 	.put_super	= hpfs_put_super,
 	.statfs		= hpfs_statfs,
 	.remount_fs	= hpfs_remount_fs,
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 826c3f9d29a..7b027720d82 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/types.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 #include "os.h"
 
@@ -623,12 +624,11 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
 	return &hi->vfs_inode;
 }
 
-void hppfs_delete_inode(struct inode *ino)
+void hppfs_evict_inode(struct inode *ino)
 {
+	end_writeback(ino);
 	dput(HPPFS_I(ino)->proc_dentry);
 	mntput(ino->i_sb->s_fs_info);
-
-	clear_inode(ino);
 }
 
 static void hppfs_destroy_inode(struct inode *inode)
@@ -639,7 +639,7 @@ static void hppfs_destroy_inode(struct inode *inode)
 static const struct super_operations hppfs_sbops = {
 	.alloc_inode	= hppfs_alloc_inode,
 	.destroy_inode	= hppfs_destroy_inode,
-	.delete_inode	= hppfs_delete_inode,
+	.evict_inode	= hppfs_evict_inode,
 	.statfs		= hppfs_statfs,
 };
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a4e9a7ec369..6e5bd42f386 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -371,27 +371,10 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 	hugetlb_unreserve_pages(inode, start, freed);
 }
 
-static void hugetlbfs_delete_inode(struct inode *inode)
+static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	truncate_hugepages(inode, 0);
-	clear_inode(inode);
-}
-
-static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
-{
-	if (generic_detach_inode(inode)) {
-		truncate_hugepages(inode, 0);
-		clear_inode(inode);
-		destroy_inode(inode);
-	}
-}
-
-static void hugetlbfs_drop_inode(struct inode *inode)
-{
-	if (!inode->i_nlink)
-		generic_delete_inode(inode);
-	else
-		hugetlbfs_forget_inode(inode);
+	end_writeback(inode);
 }
 
 static inline void
@@ -448,19 +431,20 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	error = inode_change_ok(inode, attr);
 	if (error)
-		goto out;
+		return error;
 
 	if (ia_valid & ATTR_SIZE) {
 		error = -EINVAL;
-		if (!(attr->ia_size & ~huge_page_mask(h)))
-			error = hugetlb_vmtruncate(inode, attr->ia_size);
+		if (attr->ia_size & ~huge_page_mask(h))
+			return -EINVAL;
+		error = hugetlb_vmtruncate(inode, attr->ia_size);
 		if (error)
-			goto out;
-		attr->ia_valid &= ~ATTR_SIZE;
+			return error;
 	}
-	error = inode_setattr(inode, attr);
-out:
-	return error;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 
@@ -712,9 +696,8 @@ static const struct inode_operations hugetlbfs_inode_operations = {
 static const struct super_operations hugetlbfs_ops = {
 	.alloc_inode    = hugetlbfs_alloc_inode,
 	.destroy_inode  = hugetlbfs_destroy_inode,
+	.evict_inode	= hugetlbfs_evict_inode,
 	.statfs		= hugetlbfs_statfs,
-	.delete_inode	= hugetlbfs_delete_inode,
-	.drop_inode	= hugetlbfs_drop_inode,
 	.put_super	= hugetlbfs_put_super,
 	.show_options	= generic_show_options,
 };
diff --git a/fs/inode.c b/fs/inode.c
index 722860b323a..86464332e59 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -20,7 +20,6 @@
 #include <linux/pagemap.h>
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
-#include <linux/inotify.h>
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
@@ -264,12 +263,8 @@ void inode_init_once(struct inode *inode)
 	INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
 	i_size_ordered_init(inode);
-#ifdef CONFIG_INOTIFY
-	INIT_LIST_HEAD(&inode->inotify_watches);
-	mutex_init(&inode->inotify_mutex);
-#endif
 #ifdef CONFIG_FSNOTIFY
-	INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
+	INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
 #endif
 }
 EXPORT_SYMBOL(inode_init_once);
@@ -294,32 +289,34 @@ void __iget(struct inode *inode)
 	inodes_stat.nr_unused--;
 }
 
-/**
- * clear_inode - clear an inode
- * @inode: inode to clear
- *
- * This is called by the filesystem to tell us
- * that the inode is no longer useful. We just
- * terminate it with extreme prejudice.
- */
-void clear_inode(struct inode *inode)
+void end_writeback(struct inode *inode)
 {
 	might_sleep();
-	invalidate_inode_buffers(inode);
-
 	BUG_ON(inode->i_data.nrpages);
+	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	inode_sync_wait(inode);
-	if (inode->i_sb->s_op->clear_inode)
-		inode->i_sb->s_op->clear_inode(inode);
+	inode->i_state = I_FREEING | I_CLEAR;
+}
+EXPORT_SYMBOL(end_writeback);
+
+static void evict(struct inode *inode)
+{
+	const struct super_operations *op = inode->i_sb->s_op;
+
+	if (op->evict_inode) {
+		op->evict_inode(inode);
+	} else {
+		if (inode->i_data.nrpages)
+			truncate_inode_pages(&inode->i_data, 0);
+		end_writeback(inode);
+	}
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
 	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
 		cd_forget(inode);
-	inode->i_state = I_CLEAR;
 }
-EXPORT_SYMBOL(clear_inode);
 
 /*
  * dispose_list - dispose of the contents of a local list
@@ -338,9 +335,7 @@ static void dispose_list(struct list_head *head)
 		inode = list_first_entry(head, struct inode, i_list);
 		list_del(&inode->i_list);
 
-		if (inode->i_data.nrpages)
-			truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
+		evict(inode);
 
 		spin_lock(&inode_lock);
 		hlist_del_init(&inode->i_hash);
@@ -413,7 +408,6 @@ int invalidate_inodes(struct super_block *sb)
 
 	down_write(&iprune_sem);
 	spin_lock(&inode_lock);
-	inotify_unmount_inodes(&sb->s_inodes);
 	fsnotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
@@ -553,7 +547,7 @@ repeat:
 			continue;
 		if (!test(inode, data))
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -578,7 +572,7 @@ repeat:
 			continue;
 		if (inode->i_sb != sb)
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -840,7 +834,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
@@ -1089,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
 				continue;
 			if (old->i_sb != sb)
 				continue;
-			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			if (old->i_state & (I_FREEING|I_WILL_FREE))
 				continue;
 			break;
 		}
@@ -1128,7 +1122,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 				continue;
 			if (!test(old, data))
 				continue;
-			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			if (old->i_state & (I_FREEING|I_WILL_FREE))
 				continue;
 			break;
 		}
@@ -1180,69 +1174,51 @@ void remove_inode_hash(struct inode *inode)
 }
 EXPORT_SYMBOL(remove_inode_hash);
 
+int generic_delete_inode(struct inode *inode)
+{
+	return 1;
+}
+EXPORT_SYMBOL(generic_delete_inode);
+
 /*
- * Tell the filesystem that this inode is no longer of any interest and should
- * be completely destroyed.
- *
- * We leave the inode in the inode hash table until *after* the filesystem's
- * ->delete_inode completes.  This ensures that an iget (such as nfsd might
- * instigate) will always find up-to-date information either in the hash or on
- * disk.
- *
- * I_FREEING is set so that no-one will take a new reference to the inode while
- * it is being deleted.
+ * Normal UNIX filesystem behaviour: delete the
+ * inode when the usage count drops to zero, and
+ * i_nlink is zero.
  */
-void generic_delete_inode(struct inode *inode)
+int generic_drop_inode(struct inode *inode)
 {
-	const struct super_operations *op = inode->i_sb->s_op;
-
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	WARN_ON(inode->i_state & I_NEW);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-
-	if (op->delete_inode) {
-		void (*delete)(struct inode *) = op->delete_inode;
-		/* Filesystems implementing their own
-		 * s_op->delete_inode are required to call
-		 * truncate_inode_pages and clear_inode()
-		 * internally */
-		delete(inode);
-	} else {
-		truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
-	}
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
-	wake_up_inode(inode);
-	BUG_ON(inode->i_state != I_CLEAR);
-	destroy_inode(inode);
+	return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
 }
-EXPORT_SYMBOL(generic_delete_inode);
+EXPORT_SYMBOL_GPL(generic_drop_inode);
 
-/**
- *	generic_detach_inode - remove inode from inode lists
- *	@inode: inode to remove
- *
- *	Remove inode from inode lists, write it if it's dirty. This is just an
- *	internal VFS helper exported for hugetlbfs. Do not use!
+/*
+ * Called when we're dropping the last reference
+ * to an inode.
  *
- *	Returns 1 if inode should be completely destroyed.
+ * Call the FS "drop_inode()" function, defaulting to
+ * the legacy UNIX filesystem behaviour.  If it tells
+ * us to evict inode, do so.  Otherwise, retain inode
+ * in cache if fs is alive, sync and evict if fs is
+ * shutting down.
  */
-int generic_detach_inode(struct inode *inode)
+static void iput_final(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
+	const struct super_operations *op = inode->i_sb->s_op;
+	int drop;
 
-	if (!hlist_unhashed(&inode->i_hash)) {
+	if (op && op->drop_inode)
+		drop = op->drop_inode(inode);
+	else
+		drop = generic_drop_inode(inode);
+
+	if (!drop) {
 		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
 			list_move(&inode->i_list, &inode_unused);
 		inodes_stat.nr_unused++;
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode_lock);
-			return 0;
+			return;
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_WILL_FREE;
@@ -1260,56 +1236,15 @@ int generic_detach_inode(struct inode *inode)
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
-	return 1;
-}
-EXPORT_SYMBOL_GPL(generic_detach_inode);
-
-static void generic_forget_inode(struct inode *inode)
-{
-	if (!generic_detach_inode(inode))
-		return;
-	if (inode->i_data.nrpages)
-		truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	evict(inode);
+	spin_lock(&inode_lock);
+	hlist_del_init(&inode->i_hash);
+	spin_unlock(&inode_lock);
 	wake_up_inode(inode);
+	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 	destroy_inode(inode);
 }
 
-/*
- * Normal UNIX filesystem behaviour: delete the
- * inode when the usage count drops to zero, and
- * i_nlink is zero.
- */
-void generic_drop_inode(struct inode *inode)
-{
-	if (!inode->i_nlink)
-		generic_delete_inode(inode);
-	else
-		generic_forget_inode(inode);
-}
-EXPORT_SYMBOL_GPL(generic_drop_inode);
-
-/*
- * Called when we're dropping the last reference
- * to an inode.
- *
- * Call the FS "drop()" function, defaulting to
- * the legacy UNIX filesystem behaviour..
- *
- * NOTE! NOTE! NOTE! We're called with the inode lock
- * held, and the drop function is supposed to release
- * the lock!
- */
-static inline void iput_final(struct inode *inode)
-{
-	const struct super_operations *op = inode->i_sb->s_op;
-	void (*drop)(struct inode *) = generic_drop_inode;
-
-	if (op && op->drop_inode)
-		drop = op->drop_inode;
-	drop(inode);
-}
-
 /**
  *	iput	- put an inode
  *	@inode: inode to put
@@ -1322,7 +1257,7 @@ static inline void iput_final(struct inode *inode)
 void iput(struct inode *inode)
 {
 	if (inode) {
-		BUG_ON(inode->i_state == I_CLEAR);
+		BUG_ON(inode->i_state & I_CLEAR);
 
 		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
 			iput_final(inode);
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 55f1dde2fa8..404111b016c 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index c5e1450d79f..a906f538d11 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index f0294410868..617a1e5694c 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -2,11 +2,12 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
- * Created by Arjan van de Ven <arjanv@redhat.com>
- *
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
  *		    University of Szeged, Hungary
  *
+ * Created by Arjan van de Ven <arjan@infradead.org>
+ *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
  */
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index 7d1d72faa77..e471a9106fd 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -3,6 +3,7 @@
  *
  * Copyright © 2004   Ferenc Havasi <havasi@inf.u-szeged.hu>,
  *		      University of Szeged, Hungary
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index cd02acafde8..ed25ae7c98e 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2007 Nokia Corporation. All rights reserved.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by Richard Purdie <rpurdie@openedhand.com>
  *
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 546d1538d07..9696ad9ef5f 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by Arjan van de Ven <arjanv@redhat.com>
  *
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index 170d289ac78..a12b4f76337 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by Arjan van de Ven <arjanv@redhat.com>
  *
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index b46661a4275..97fc45de6f8 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index ec353841392..e0b76c87a91 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index a113ecc3baf..c4f8eef5ca6 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 166062a6823..ed78a3cf3cb 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
@@ -232,9 +233,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	jffs2_free_raw_inode(ri);
 	return ret;
 }
@@ -454,9 +453,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ret;
 }
 
@@ -601,9 +598,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ret;
 }
 
@@ -778,9 +773,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ret;
 }
 
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 6286ad9b00f..abac961f617 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 81349702443..1c0a08d711a 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 459d39d1ea0..6b2964a1985 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
@@ -169,13 +170,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	mutex_unlock(&f->sem);
 	jffs2_complete_reservation(c);
 
-	/* We have to do the simple_setsize() without f->sem held, since
+	/* We have to do the truncate_setsize() without f->sem held, since
 	   some pages may be locked and waiting for it in readpage().
 	   We are protected from a simultaneous write() extending i_size
 	   back past iattr->ia_size, because do_truncate() holds the
 	   generic inode semaphore. */
 	if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
-		simple_setsize(inode, iattr->ia_size);
+		truncate_setsize(inode, iattr->ia_size);
 		inode->i_blocks = (inode->i_size + 511) >> 9;
 	}	
 
@@ -225,7 +226,7 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 }
 
 
-void jffs2_clear_inode (struct inode *inode)
+void jffs2_evict_inode (struct inode *inode)
 {
 	/* We can forget about this inode for now - drop all
 	 *  the nodelists associated with it, etc.
@@ -233,7 +234,9 @@ void jffs2_clear_inode (struct inode *inode)
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 
-	D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
+	D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	jffs2_do_clear_inode(c, f);
 }
 
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index f5e96bd656e..846a7945249 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index 9d41f43e47b..859a598af02 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index c6923da9826..2e4a86763c0 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 85ef6dbb1be..6784bc89add 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index a881a42f19e..523a9169105 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -24,7 +24,6 @@
 #ifdef __ECOS
 #include "os-ecos.h"
 #else
-#include <linux/mtd/compatmac.h> /* For compatibility with older kernels */
 #include "os-linux.h"
 #endif
 
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 4791aacf308..00bae7cc2e4 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -171,7 +171,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations;
 int jffs2_setattr (struct dentry *, struct iattr *);
 int jffs2_do_setattr (struct inode *, struct iattr *);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
-void jffs2_clear_inode (struct inode *);
+void jffs2_evict_inode (struct inode *);
 void jffs2_dirty_inode(struct inode *inode);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 511e2d609d1..662bba09950 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -135,7 +135,7 @@ static const struct super_operations jffs2_super_operations =
 	.write_super =	jffs2_write_super,
 	.statfs =	jffs2_statfs,
 	.remount_fs =	jffs2_remount_fs,
-	.clear_inode =	jffs2_clear_inode,
+	.evict_inode =	jffs2_evict_inode,
 	.dirty_inode =	jffs2_dirty_inode,
 	.sync_fs =	jffs2_sync_fs,
 };
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d258e261bdc..9b572ca40a4 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -588,7 +588,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
 
 void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
 {
-	/* It's called from jffs2_clear_inode() on inode removing.
+	/* It's called from jffs2_evict_inode() on inode removing.
 	   When an inode with XATTR is removed, those XATTRs must be removed. */
 	struct jffs2_xattr_ref *ref, *_ref;
 
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 127263cc865..c5ce6c1d1ff 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -17,6 +17,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
@@ -107,11 +108,18 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 			return rc;
 	}
 
-	rc = inode_setattr(inode, iattr);
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		rc = vmtruncate(inode, iattr->ia_size);
+		if (rc)
+			return rc;
+	}
 
-	if (!rc && (iattr->ia_valid & ATTR_MODE))
-		rc = jfs_acl_chmod(inode);
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
 
+	if (iattr->ia_valid & ATTR_MODE)
+		rc = jfs_acl_chmod(inode);
 	return rc;
 }
 
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index ed9ba6fe04f..9978803ceed 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -145,31 +145,32 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 		return 0;
 }
 
-void jfs_delete_inode(struct inode *inode)
+void jfs_evict_inode(struct inode *inode)
 {
-	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
+	jfs_info("In jfs_evict_inode, inode = 0x%p", inode);
 
-	if (!is_bad_inode(inode))
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
 		dquot_initialize(inode);
 
-	if (!is_bad_inode(inode) &&
-	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
-		truncate_inode_pages(&inode->i_data, 0);
+		if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
+			truncate_inode_pages(&inode->i_data, 0);
 
-		if (test_cflag(COMMIT_Freewmap, inode))
-			jfs_free_zero_link(inode);
+			if (test_cflag(COMMIT_Freewmap, inode))
+				jfs_free_zero_link(inode);
 
-		diFree(inode);
+			diFree(inode);
 
-		/*
-		 * Free the inode from the quota allocation.
-		 */
-		dquot_initialize(inode);
-		dquot_free_inode(inode);
-		dquot_drop(inode);
+			/*
+			 * Free the inode from the quota allocation.
+			 */
+			dquot_initialize(inode);
+			dquot_free_inode(inode);
+		}
+	} else {
+		truncate_inode_pages(&inode->i_data, 0);
 	}
-
-	clear_inode(inode);
+	end_writeback(inode);
+	dquot_drop(inode);
 }
 
 void jfs_dirty_inode(struct inode *inode)
@@ -303,8 +304,17 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
 {
-	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	int ret;
+
+	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
 				jfs_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
@@ -317,9 +327,24 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				offset, nr_segs, jfs_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 const struct address_space_operations jfs_aops = {
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 11042b1f44b..155e91eff07 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -27,7 +27,7 @@ extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern struct inode *jfs_iget(struct super_block *, unsigned long);
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_write_inode(struct inode *, struct writeback_control *);
-extern void jfs_delete_inode(struct inode *);
+extern void jfs_evict_inode(struct inode *);
 extern void jfs_dirty_inode(struct inode *);
 extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b38f96bef82..ec8c3e4baca 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -132,11 +132,6 @@ static void jfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
-static void jfs_clear_inode(struct inode *inode)
-{
-	dquot_drop(inode);
-}
-
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
@@ -765,8 +760,7 @@ static const struct super_operations jfs_super_operations = {
 	.destroy_inode	= jfs_destroy_inode,
 	.dirty_inode	= jfs_dirty_inode,
 	.write_inode	= jfs_write_inode,
-	.delete_inode	= jfs_delete_inode,
-	.clear_inode	= jfs_clear_inode,
+	.evict_inode	= jfs_evict_inode,
 	.put_super	= jfs_put_super,
 	.sync_fs	= jfs_sync_fs,
 	.freeze_fs	= jfs_freeze,
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fa96bbb2634..2d7f165d0f1 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -86,46 +86,25 @@ struct ea_buffer {
 #define EA_MALLOC	0x0008
 
 
+static int is_known_namespace(const char *name)
+{
+	if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
+	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+	    strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+	    strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+		return false;
+
+	return true;
+}
+
 /*
  * These three routines are used to recognize on-disk extended attributes
  * that are in a recognized namespace.  If the attribute is not recognized,
  * "os2." is prepended to the name
  */
-static inline int is_os2_xattr(struct jfs_ea *ea)
+static int is_os2_xattr(struct jfs_ea *ea)
 {
-	/*
-	 * Check for "system."
-	 */
-	if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
-		return false;
-	/*
-	 * Check for "user."
-	 */
-	if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
-		return false;
-	/*
-	 * Check for "security."
-	 */
-	if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_SECURITY_PREFIX,
-		     XATTR_SECURITY_PREFIX_LEN))
-		return false;
-	/*
-	 * Check for "trusted."
-	 */
-	if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
-		return false;
-	/*
-	 * Add any other valid namespace prefixes here
-	 */
-
-	/*
-	 * We assume it's OS/2's flat namespace
-	 */
-	return true;
+	return !is_known_namespace(ea->name);
 }
 
 static inline int name_size(struct jfs_ea *ea)
@@ -764,13 +743,23 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return can_set_system_xattr(inode, name, value, value_len);
 
+	if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) {
+		/*
+		 * This makes sure that we aren't trying to set an
+		 * attribute in a different namespace by prefixing it
+		 * with "os2."
+		 */
+		if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN))
+				return -EOPNOTSUPP;
+		return 0;
+	}
+
 	/*
 	 * Don't allow setting an attribute in an unknown namespace.
 	 */
 	if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
 	    strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
-	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
-	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
+	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
 		return -EOPNOTSUPP;
 
 	return 0;
@@ -952,19 +941,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
 	int xattr_size;
 	ssize_t size;
 	int namelen = strlen(name);
-	char *os2name = NULL;
 	char *value;
 
-	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
-		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
-				  GFP_KERNEL);
-		if (!os2name)
-			return -ENOMEM;
-		strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
-		name = os2name;
-		namelen -= XATTR_OS2_PREFIX_LEN;
-	}
-
 	down_read(&JFS_IP(inode)->xattr_sem);
 
 	xattr_size = ea_get(inode, &ea_buf, 0);
@@ -1002,8 +980,6 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
       out:
 	up_read(&JFS_IP(inode)->xattr_sem);
 
-	kfree(os2name);
-
 	return size;
 }
 
@@ -1012,6 +988,19 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
 {
 	int err;
 
+	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
+		/*
+		 * skip past "os2." prefix
+		 */
+		name += XATTR_OS2_PREFIX_LEN;
+		/*
+		 * Don't allow retrieving properly prefixed attributes
+		 * by prepending them with "os2."
+		 */
+		if (is_known_namespace(name))
+			return -EOPNOTSUPP;
+	}
+
 	err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
 
 	return err;
diff --git a/fs/libfs.c b/fs/libfs.c
index dcaf972cbf1..0a9da95317f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -327,77 +327,35 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
 }
 
 /**
- * simple_setsize - handle core mm and vfs requirements for file size change
- * @inode: inode
- * @newsize: new file size
- *
- * Returns 0 on success, -error on failure.
- *
- * simple_setsize must be called with inode_mutex held.
- *
- * simple_setsize will check that the requested new size is OK (see
- * inode_newsize_ok), and then will perform the necessary i_size update
- * and pagecache truncation (if necessary). It will be typically be called
- * from the filesystem's setattr function when ATTR_SIZE is passed in.
- *
- * The inode itself must have correct permissions and attributes to allow
- * i_size to be changed, this function then just checks that the new size
- * requested is valid.
- *
- * In the case of simple in-memory filesystems with inodes stored solely
- * in the inode cache, and file data in the pagecache, nothing more needs
- * to be done to satisfy a truncate request. Filesystems with on-disk
- * blocks for example will need to free them in the case of truncate, in
- * that case it may be easier not to use simple_setsize (but each of its
- * components will likely be required at some point to update pagecache
- * and inode etc).
- */
-int simple_setsize(struct inode *inode, loff_t newsize)
-{
-	loff_t oldsize;
-	int error;
-
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
-	oldsize = inode->i_size;
-	i_size_write(inode, newsize);
-	truncate_pagecache(inode, oldsize, newsize);
-
-	return error;
-}
-EXPORT_SYMBOL(simple_setsize);
-
-/**
- * simple_setattr - setattr for simple in-memory filesystem
+ * simple_setattr - setattr for simple filesystem
  * @dentry: dentry
  * @iattr: iattr structure
  *
  * Returns 0 on success, -error on failure.
  *
- * simple_setattr implements setattr for an in-memory filesystem which
- * does not store its own file data or metadata (eg. uses the page cache
- * and inode cache as its data store).
+ * simple_setattr is a simple ->setattr implementation without a proper
+ * implementation of size changes.
+ *
+ * It can either be used for in-memory filesystems or special files
+ * on simple regular filesystems.  Anything that needs to change on-disk
+ * or wire state on size changes needs its own setattr method.
  */
 int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 
+	WARN_ON_ONCE(inode->i_op->truncate);
+
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
 
-	if (iattr->ia_valid & ATTR_SIZE) {
-		error = simple_setsize(inode, iattr->ia_size);
-		if (error)
-			return error;
-	}
-
-	generic_setattr(inode, iattr);
-
-	return error;
+	if (iattr->ia_valid & ATTR_SIZE)
+		truncate_setsize(inode, iattr->ia_size);
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 EXPORT_SYMBOL(simple_setattr);
 
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 72d1893ddd3..675cc49197f 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -434,8 +434,11 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
 	int ret;
 
 	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
-	if (!ta)
+	if (!ta) {
+		inode->i_nlink--;
+		iput(inode);
 		return -ENOMEM;
+	}
 
 	ta->state = CREATE_1;
 	ta->ino = inode->i_ino;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index abe1cafbd4c..4dd0f7c06e3 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -232,15 +232,19 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct inode *inode = dentry->d_inode;
 	int err = 0;
 
-	if (attr->ia_valid & ATTR_SIZE)
+	err = inode_change_ok(inode, attr);
+	if (err)
+		return err;
+
+	if (attr->ia_valid & ATTR_SIZE) {
 		err = logfs_truncate(inode, attr->ia_size);
-	attr->ia_valid &= ~ATTR_SIZE;
+		if (err)
+			return err;
+	}
 
-	if (!err)
-		err = inode_change_ok(inode, attr);
-	if (!err)
-		err = inode_setattr(inode, attr);
-	return err;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations logfs_reg_iops = {
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index f602e230e16..d8c71ece098 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -235,33 +235,21 @@ static struct inode *logfs_alloc_inode(struct super_block *sb)
  * purpose is to create a new inode that will not trigger the warning if such
  * an inode is still in use.  An ugly hack, no doubt.  Suggections for
  * improvement are welcome.
+ *
+ * AV: that's what ->put_super() is for...
  */
 struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
 {
 	struct inode *inode;
 
-	inode = logfs_alloc_inode(sb);
+	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
 	inode->i_mode = S_IFREG;
 	inode->i_ino = ino;
-	inode->i_sb = sb;
-
-	/* This is a blatant copy of alloc_inode code.  We'd need alloc_inode
-	 * to be nonstatic, alas. */
-	{
-		struct address_space * const mapping = &inode->i_data;
-
-		mapping->a_ops = &logfs_reg_aops;
-		mapping->host = inode;
-		mapping->flags = 0;
-		mapping_set_gfp_mask(mapping, GFP_NOFS);
-		mapping->assoc_mapping = NULL;
-		mapping->backing_dev_info = &default_backing_dev_info;
-		inode->i_mapping = mapping;
-		inode->i_nlink = 1;
-	}
+	inode->i_data.a_ops = &logfs_reg_aops;
+	mapping_set_gfp_mask(&inode->i_data, GFP_NOFS);
 
 	return inode;
 }
@@ -277,7 +265,7 @@ struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
 
 	err = logfs_read_inode(inode);
 	if (err) {
-		destroy_meta_inode(inode);
+		iput(inode);
 		return ERR_PTR(err);
 	}
 	logfs_inode_setops(inode);
@@ -298,18 +286,8 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 
-void destroy_meta_inode(struct inode *inode)
-{
-	if (inode) {
-		if (inode->i_data.nrpages)
-			truncate_inode_pages(&inode->i_data, 0);
-		logfs_clear_inode(inode);
-		kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
-	}
-}
-
 /* called with inode_lock held */
-static void logfs_drop_inode(struct inode *inode)
+static int logfs_drop_inode(struct inode *inode)
 {
 	struct logfs_super *super = logfs_super(inode->i_sb);
 	struct logfs_inode *li = logfs_inode(inode);
@@ -317,7 +295,7 @@ static void logfs_drop_inode(struct inode *inode)
 	spin_lock(&logfs_inode_lock);
 	list_move(&li->li_freeing_list, &super->s_freeing_list);
 	spin_unlock(&logfs_inode_lock);
-	generic_drop_inode(inode);
+	return generic_drop_inode(inode);
 }
 
 static void logfs_set_ino_generation(struct super_block *sb,
@@ -384,12 +362,21 @@ static int logfs_sync_fs(struct super_block *sb, int wait)
 	return 0;
 }
 
+static void logfs_put_super(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	/* kill the meta-inodes */
+	iput(super->s_master_inode);
+	iput(super->s_segfile_inode);
+	iput(super->s_mapping_inode);
+}
+
 const struct super_operations logfs_super_operations = {
 	.alloc_inode	= logfs_alloc_inode,
-	.clear_inode	= logfs_clear_inode,
-	.delete_inode	= logfs_delete_inode,
 	.destroy_inode	= logfs_destroy_inode,
+	.evict_inode	= logfs_evict_inode,
 	.drop_inode	= logfs_drop_inode,
+	.put_super	= logfs_put_super,
 	.write_inode	= logfs_write_inode,
 	.statfs		= logfs_statfs,
 	.sync_fs	= logfs_sync_fs,
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 4b0e0616b35..f46ee8b0e13 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -889,8 +889,6 @@ void logfs_cleanup_journal(struct super_block *sb)
 	struct logfs_super *super = logfs_super(sb);
 
 	btree_grim_visitor32(&super->s_reserved_segments, 0, NULL);
-	destroy_meta_inode(super->s_master_inode);
-	super->s_master_inode = NULL;
 
 	kfree(super->s_compressed_je);
 	kfree(super->s_je);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index c838c4d7211..5e3b7207795 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -525,13 +525,11 @@ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
 struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
 int logfs_init_inode_cache(void);
 void logfs_destroy_inode_cache(void);
-void destroy_meta_inode(struct inode *inode);
 void logfs_set_blocks(struct inode *inode, u64 no);
 /* these logically belong into inode.c but actually reside in readwrite.c */
 int logfs_read_inode(struct inode *inode);
 int __logfs_write_inode(struct inode *inode, long flags);
-void logfs_delete_inode(struct inode *inode);
-void logfs_clear_inode(struct inode *inode);
+void logfs_evict_inode(struct inode *inode);
 
 /* journal.c */
 void logfs_write_anchor(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 0718d112a1a..6127baf0e18 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1972,31 +1972,6 @@ static struct page *inode_to_page(struct inode *inode)
 	return page;
 }
 
-/* Cheaper version of write_inode.  All changes are concealed in
- * aliases, which are moved back.  No write to the medium happens.
- */
-void logfs_clear_inode(struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
-	struct logfs_inode *li = logfs_inode(inode);
-	struct logfs_block *block = li->li_block;
-	struct page *page;
-
-	/* Only deleted files may be dirty at this point */
-	BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
-	if (!block)
-		return;
-	if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
-		block->ops->free_block(inode->i_sb, block);
-		return;
-	}
-
-	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
-	page = inode_to_page(inode);
-	BUG_ON(!page); /* FIXME: Use emergency page */
-	logfs_put_write_page(page);
-}
-
 static int do_write_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
@@ -2164,18 +2139,40 @@ static int do_delete_inode(struct inode *inode)
  * ZOMBIE inodes have already been deleted before and should remain dead,
  * if it weren't for valid checking.  No need to kill them again here.
  */
-void logfs_delete_inode(struct inode *inode)
+void logfs_evict_inode(struct inode *inode)
 {
+	struct super_block *sb = inode->i_sb;
 	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = li->li_block;
+	struct page *page;
 
-	if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
-		li->li_flags |= LOGFS_IF_ZOMBIE;
-		if (i_size_read(inode) > 0)
-			logfs_truncate(inode, 0);
-		do_delete_inode(inode);
+	if (!inode->i_nlink) {
+		if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
+			li->li_flags |= LOGFS_IF_ZOMBIE;
+			if (i_size_read(inode) > 0)
+				logfs_truncate(inode, 0);
+			do_delete_inode(inode);
+		}
 	}
 	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	end_writeback(inode);
+
+	/* Cheaper version of write_inode.  All changes are concealed in
+	 * aliases, which are moved back.  No write to the medium happens.
+	 */
+	/* Only deleted files may be dirty at this point */
+	BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
+	if (!block)
+		return;
+	if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
+		block->ops->free_block(inode->i_sb, block);
+		return;
+	}
+
+	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
+	page = inode_to_page(inode);
+	BUG_ON(!page); /* FIXME: Use emergency page */
+	logfs_put_write_page(page);
 }
 
 void btree_write_block(struct logfs_block *block)
@@ -2272,7 +2269,6 @@ void logfs_cleanup_rw(struct super_block *sb)
 {
 	struct logfs_super *super = logfs_super(sb);
 
-	destroy_meta_inode(super->s_segfile_inode);
 	logfs_mempool_destroy(super->s_block_pool);
 	logfs_mempool_destroy(super->s_shadow_pool);
 }
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index a9657afb70a..9d518735325 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -929,5 +929,4 @@ void logfs_cleanup_areas(struct super_block *sb)
 	for_each_area(i)
 		free_area(super->s_area[i]);
 	free_area(super->s_journal_area);
-	destroy_meta_inode(super->s_mapping_inode);
 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d651e10a1e9..5336155c5d8 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -342,24 +342,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
 		goto fail;
 	}
 
+	/* at that point we know that ->put_super() will be called */
 	super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
 	if (!super->s_erase_page)
-		goto fail;
+		return -ENOMEM;
 	memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
 
 	/* FIXME: check for read-only mounts */
 	err = logfs_make_writeable(sb);
-	if (err)
-		goto fail1;
+	if (err) {
+		__free_page(super->s_erase_page);
+		return err;
+	}
 
 	log_super("LogFS: Finished mounting\n");
 	simple_set_mnt(mnt, sb);
 	return 0;
 
-fail1:
-	__free_page(super->s_erase_page);
 fail:
-	iput(logfs_super(sb)->s_master_inode);
+	iput(super->s_master_inode);
+	iput(super->s_segfile_inode);
+	iput(super->s_mapping_inode);
 	return -EIO;
 }
 
@@ -580,10 +583,14 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
 	sb->s_flags |= MS_ACTIVE;
 	err = logfs_get_sb_final(sb, mnt);
 	if (err)
-		goto err1;
-	return 0;
+		deactivate_locked_super(sb);
+	return err;
 
 err1:
+	/* no ->s_root, no ->put_super() */
+	iput(super->s_master_inode);
+	iput(super->s_segfile_inode);
+	iput(super->s_mapping_inode);
 	deactivate_locked_super(sb);
 	return err;
 err0:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e28f21b9534..cf4e6cdfd15 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -79,15 +79,11 @@ EXPORT_SYMBOL(mb_cache_entry_find_next);
 struct mb_cache {
 	struct list_head		c_cache_list;
 	const char			*c_name;
-	struct mb_cache_op		c_op;
 	atomic_t			c_entry_count;
 	int				c_bucket_bits;
-#ifndef MB_CACHE_INDEXES_COUNT
-	int				c_indexes_count;
-#endif
-	struct kmem_cache			*c_entry_cache;
+	struct kmem_cache		*c_entry_cache;
 	struct list_head		*c_block_hash;
-	struct list_head		*c_indexes_hash[0];
+	struct list_head		*c_index_hash;
 };
 
 
@@ -101,16 +97,6 @@ static LIST_HEAD(mb_cache_list);
 static LIST_HEAD(mb_cache_lru_list);
 static DEFINE_SPINLOCK(mb_cache_spinlock);
 
-static inline int
-mb_cache_indexes(struct mb_cache *cache)
-{
-#ifdef MB_CACHE_INDEXES_COUNT
-	return MB_CACHE_INDEXES_COUNT;
-#else
-	return cache->c_indexes_count;
-#endif
-}
-
 /*
  * What the mbcache registers as to get shrunk dynamically.
  */
@@ -132,12 +118,9 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
 static void
 __mb_cache_entry_unhash(struct mb_cache_entry *ce)
 {
-	int n;
-
 	if (__mb_cache_entry_is_hashed(ce)) {
 		list_del_init(&ce->e_block_list);
-		for (n=0; n<mb_cache_indexes(ce->e_cache); n++)
-			list_del(&ce->e_indexes[n].o_list);
+		list_del(&ce->e_index.o_list);
 	}
 }
 
@@ -148,16 +131,8 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
 	struct mb_cache *cache = ce->e_cache;
 
 	mb_assert(!(ce->e_used || ce->e_queued));
-	if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) {
-		/* free failed -- put back on the lru list
-		   for freeing later. */
-		spin_lock(&mb_cache_spinlock);
-		list_add(&ce->e_lru_list, &mb_cache_lru_list);
-		spin_unlock(&mb_cache_spinlock);
-	} else {
-		kmem_cache_free(cache->c_entry_cache, ce);
-		atomic_dec(&cache->c_entry_count);
-	}
+	kmem_cache_free(cache->c_entry_cache, ce);
+	atomic_dec(&cache->c_entry_count);
 }
 
 
@@ -201,22 +176,12 @@ static int
 mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free_list);
-	struct list_head *l, *ltmp;
+	struct mb_cache *cache;
+	struct mb_cache_entry *entry, *tmp;
 	int count = 0;
 
-	spin_lock(&mb_cache_spinlock);
-	list_for_each(l, &mb_cache_list) {
-		struct mb_cache *cache =
-			list_entry(l, struct mb_cache, c_cache_list);
-		mb_debug("cache %s (%d)", cache->c_name,
-			  atomic_read(&cache->c_entry_count));
-		count += atomic_read(&cache->c_entry_count);
-	}
 	mb_debug("trying to free %d entries", nr_to_scan);
-	if (nr_to_scan == 0) {
-		spin_unlock(&mb_cache_spinlock);
-		goto out;
-	}
+	spin_lock(&mb_cache_spinlock);
 	while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) {
 		struct mb_cache_entry *ce =
 			list_entry(mb_cache_lru_list.next,
@@ -224,12 +189,15 @@ mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 		list_move_tail(&ce->e_lru_list, &free_list);
 		__mb_cache_entry_unhash(ce);
 	}
+	list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
+		mb_debug("cache %s (%d)", cache->c_name,
+			  atomic_read(&cache->c_entry_count));
+		count += atomic_read(&cache->c_entry_count);
+	}
 	spin_unlock(&mb_cache_spinlock);
-	list_for_each_safe(l, ltmp, &free_list) {
-		__mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
-						   e_lru_list), gfp_mask);
+	list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
+		__mb_cache_entry_forget(entry, gfp_mask);
 	}
-out:
 	return (count / 100) * sysctl_vfs_cache_pressure;
 }
 
@@ -243,72 +211,49 @@ out:
  * memory was available.
  *
  * @name: name of the cache (informal)
- * @cache_op: contains the callback called when freeing a cache entry
- * @entry_size: The size of a cache entry, including
- *              struct mb_cache_entry
- * @indexes_count: number of additional indexes in the cache. Must equal
- *                 MB_CACHE_INDEXES_COUNT if the number of indexes is
- *                 hardwired.
  * @bucket_bits: log2(number of hash buckets)
  */
 struct mb_cache *
-mb_cache_create(const char *name, struct mb_cache_op *cache_op,
-		size_t entry_size, int indexes_count, int bucket_bits)
+mb_cache_create(const char *name, int bucket_bits)
 {
-	int m=0, n, bucket_count = 1 << bucket_bits;
+	int n, bucket_count = 1 << bucket_bits;
 	struct mb_cache *cache = NULL;
 
-	if(entry_size < sizeof(struct mb_cache_entry) +
-	   indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]))
-		return NULL;
-
-	cache = kmalloc(sizeof(struct mb_cache) +
-	                indexes_count * sizeof(struct list_head), GFP_KERNEL);
+	cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
 	if (!cache)
-		goto fail;
+		return NULL;
 	cache->c_name = name;
-	cache->c_op.free = NULL;
-	if (cache_op)
-		cache->c_op.free = cache_op->free;
 	atomic_set(&cache->c_entry_count, 0);
 	cache->c_bucket_bits = bucket_bits;
-#ifdef MB_CACHE_INDEXES_COUNT
-	mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT);
-#else
-	cache->c_indexes_count = indexes_count;
-#endif
 	cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
 	                              GFP_KERNEL);
 	if (!cache->c_block_hash)
 		goto fail;
 	for (n=0; n<bucket_count; n++)
 		INIT_LIST_HEAD(&cache->c_block_hash[n]);
-	for (m=0; m<indexes_count; m++) {
-		cache->c_indexes_hash[m] = kmalloc(bucket_count *
-		                                 sizeof(struct list_head),
-		                                 GFP_KERNEL);
-		if (!cache->c_indexes_hash[m])
-			goto fail;
-		for (n=0; n<bucket_count; n++)
-			INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]);
-	}
-	cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
+	cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head),
+				      GFP_KERNEL);
+	if (!cache->c_index_hash)
+		goto fail;
+	for (n=0; n<bucket_count; n++)
+		INIT_LIST_HEAD(&cache->c_index_hash[n]);
+	cache->c_entry_cache = kmem_cache_create(name,
+		sizeof(struct mb_cache_entry), 0,
 		SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
 	if (!cache->c_entry_cache)
-		goto fail;
+		goto fail2;
 
 	spin_lock(&mb_cache_spinlock);
 	list_add(&cache->c_cache_list, &mb_cache_list);
 	spin_unlock(&mb_cache_spinlock);
 	return cache;
 
+fail2:
+	kfree(cache->c_index_hash);
+
 fail:
-	if (cache) {
-		while (--m >= 0)
-			kfree(cache->c_indexes_hash[m]);
-		kfree(cache->c_block_hash);
-		kfree(cache);
-	}
+	kfree(cache->c_block_hash);
+	kfree(cache);
 	return NULL;
 }
 
@@ -357,7 +302,6 @@ mb_cache_destroy(struct mb_cache *cache)
 {
 	LIST_HEAD(free_list);
 	struct list_head *l, *ltmp;
-	int n;
 
 	spin_lock(&mb_cache_spinlock);
 	list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
@@ -384,8 +328,7 @@ mb_cache_destroy(struct mb_cache *cache)
 
 	kmem_cache_destroy(cache->c_entry_cache);
 
-	for (n=0; n < mb_cache_indexes(cache); n++)
-		kfree(cache->c_indexes_hash[n]);
+	kfree(cache->c_index_hash);
 	kfree(cache->c_block_hash);
 	kfree(cache);
 }
@@ -429,17 +372,16 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
  *
  * @bdev: device the cache entry belongs to
  * @block: block number
- * @keys: array of additional keys. There must be indexes_count entries
- *        in the array (as specified when creating the cache).
+ * @key: lookup key
  */
 int
 mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
-		      sector_t block, unsigned int keys[])
+		      sector_t block, unsigned int key)
 {
 	struct mb_cache *cache = ce->e_cache;
 	unsigned int bucket;
 	struct list_head *l;
-	int error = -EBUSY, n;
+	int error = -EBUSY;
 
 	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 
 			   cache->c_bucket_bits);
@@ -454,12 +396,9 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
 	ce->e_bdev = bdev;
 	ce->e_block = block;
 	list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
-	for (n=0; n<mb_cache_indexes(cache); n++) {
-		ce->e_indexes[n].o_key = keys[n];
-		bucket = hash_long(keys[n], cache->c_bucket_bits);
-		list_add(&ce->e_indexes[n].o_list,
-			 &cache->c_indexes_hash[n][bucket]);
-	}
+	ce->e_index.o_key = key;
+	bucket = hash_long(key, cache->c_bucket_bits);
+	list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]);
 	error = 0;
 out:
 	spin_unlock(&mb_cache_spinlock);
@@ -555,13 +494,12 @@ cleanup:
 
 static struct mb_cache_entry *
 __mb_cache_entry_find(struct list_head *l, struct list_head *head,
-		      int index, struct block_device *bdev, unsigned int key)
+		      struct block_device *bdev, unsigned int key)
 {
 	while (l != head) {
 		struct mb_cache_entry *ce =
-			list_entry(l, struct mb_cache_entry,
-			           e_indexes[index].o_list);
-		if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) {
+			list_entry(l, struct mb_cache_entry, e_index.o_list);
+		if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
 			DEFINE_WAIT(wait);
 
 			if (!list_empty(&ce->e_lru_list))
@@ -603,23 +541,20 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
  * returned cache entry is locked for shared access ("multiple readers").
  *
  * @cache: the cache to search
- * @index: the number of the additonal index to search (0<=index<indexes_count)
  * @bdev: the device the cache entry should belong to
  * @key: the key in the index
  */
 struct mb_cache_entry *
-mb_cache_entry_find_first(struct mb_cache *cache, int index,
-			  struct block_device *bdev, unsigned int key)
+mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
+			  unsigned int key)
 {
 	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
 	struct list_head *l;
 	struct mb_cache_entry *ce;
 
-	mb_assert(index < mb_cache_indexes(cache));
 	spin_lock(&mb_cache_spinlock);
-	l = cache->c_indexes_hash[index][bucket].next;
-	ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
-	                           index, bdev, key);
+	l = cache->c_index_hash[bucket].next;
+	ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
 	spin_unlock(&mb_cache_spinlock);
 	return ce;
 }
@@ -640,12 +575,11 @@ mb_cache_entry_find_first(struct mb_cache *cache, int index,
  * }
  *
  * @prev: The previous match
- * @index: the number of the additonal index to search (0<=index<indexes_count)
  * @bdev: the device the cache entry should belong to
  * @key: the key in the index
  */
 struct mb_cache_entry *
-mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
+mb_cache_entry_find_next(struct mb_cache_entry *prev,
 			 struct block_device *bdev, unsigned int key)
 {
 	struct mb_cache *cache = prev->e_cache;
@@ -653,11 +587,9 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
 	struct list_head *l;
 	struct mb_cache_entry *ce;
 
-	mb_assert(index < mb_cache_indexes(cache));
 	spin_lock(&mb_cache_spinlock);
-	l = prev->e_indexes[index].o_list.next;
-	ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
-	                           index, bdev, key);
+	l = prev->e_index.o_list.next;
+	ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
 	__mb_cache_entry_release_unlock(prev);
 	return ce;
 }
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 482779fe4e7..3f32bcb0d9b 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -200,13 +200,13 @@ void minix_free_inode(struct inode * inode)
 	ino = inode->i_ino;
 	if (ino < 1 || ino > sbi->s_ninodes) {
 		printk("minix_free_inode: inode 0 or nonexistent inode\n");
-		goto out;
+		return;
 	}
 	bit = ino & ((1<<k) - 1);
 	ino >>= k;
 	if (ino >= sbi->s_imap_blocks) {
 		printk("minix_free_inode: nonexistent imap in superblock\n");
-		goto out;
+		return;
 	}
 
 	minix_clear_inode(inode);	/* clear on-disk copy */
@@ -217,8 +217,6 @@ void minix_free_inode(struct inode * inode)
 		printk("minix_free_inode: bit %lu already cleared\n", bit);
 	spin_unlock(&bitmap_lock);
 	mark_buffer_dirty(bh);
- out:
-	clear_inode(inode);		/* clear in-memory copy */
 }
 
 struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 1dbf921ca44..085a9262c69 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -271,8 +271,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
 
 got_it:
 	pos = page_offset(page) + p - (char *)page_address(page);
-	err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
 	if (err)
 		goto out_unlock;
 	memcpy (namx, name, namelen);
@@ -297,8 +296,7 @@ out_unlock:
 
 int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = (struct inode*)mapping->host;
+	struct inode *inode = page->mapping->host;
 	char *kaddr = page_address(page);
 	loff_t pos = page_offset(page) + (char*)de - kaddr;
 	struct minix_sb_info *sbi = minix_sb(inode->i_sb);
@@ -306,8 +304,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 	int err;
 
 	lock_page(page);
-	err = __minix_write_begin(NULL, mapping, pos, len,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, pos, len);
 	if (err == 0) {
 		if (sbi->s_version == MINIX_V3)
 			((minix3_dirent *) de)->inode = 0;
@@ -325,16 +322,14 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 
 int minix_make_empty(struct inode *inode, struct inode *dir)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page = grab_cache_page(inode->i_mapping, 0);
 	struct minix_sb_info *sbi = minix_sb(inode->i_sb);
 	char *kaddr;
 	int err;
 
 	if (!page)
 		return -ENOMEM;
-	err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize);
 	if (err) {
 		unlock_page(page);
 		goto fail;
@@ -425,8 +420,7 @@ not_empty:
 void minix_set_link(struct minix_dir_entry *de, struct page *page,
 	struct inode *inode)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *dir = mapping->host;
+	struct inode *dir = page->mapping->host;
 	struct minix_sb_info *sbi = minix_sb(dir->i_sb);
 	loff_t pos = page_offset(page) +
 			(char *)de-(char*)page_address(page);
@@ -434,8 +428,7 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page,
 
 	lock_page(page);
 
-	err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
 	if (err == 0) {
 		if (sbi->s_version == MINIX_V3)
 			((minix3_dirent *) de)->inode = inode->i_ino;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index d5320ff23fa..4493ce695ab 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,29 @@ const struct file_operations minix_file_operations = {
 	.splice_read	= generic_file_splice_read,
 };
 
+static int minix_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 const struct inode_operations minix_file_inode_operations = {
 	.truncate	= minix_truncate,
+	.setattr	= minix_setattr,
 	.getattr	= minix_getattr,
 };
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 756f8c93780..e39d6bf2e8f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,12 +24,17 @@ static int minix_write_inode(struct inode *inode,
 static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
-static void minix_delete_inode(struct inode *inode)
+static void minix_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	minix_truncate(inode);
-	minix_free_inode(inode);
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		minix_truncate(inode);
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+	if (!inode->i_nlink)
+		minix_free_inode(inode);
 }
 
 static void minix_put_super(struct super_block *sb)
@@ -96,7 +101,7 @@ static const struct super_operations minix_sops = {
 	.alloc_inode	= minix_alloc_inode,
 	.destroy_inode	= minix_destroy_inode,
 	.write_inode	= minix_write_inode,
-	.delete_inode	= minix_delete_inode,
+	.evict_inode	= minix_evict_inode,
 	.put_super	= minix_put_super,
 	.statfs		= minix_statfs,
 	.remount_fs	= minix_remount,
@@ -357,20 +362,26 @@ static int minix_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page,minix_get_block);
 }
 
-int __minix_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
+int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				minix_get_block);
+	return __block_write_begin(page, pos, len, minix_get_block);
 }
 
 static int minix_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				minix_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t minix_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 111f34ee9e3..407b1c84911 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -53,9 +53,7 @@ extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
 extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
 extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int __minix_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata);
+extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 extern void V1_minix_truncate(struct inode *);
 extern void V2_minix_truncate(struct inode *);
diff --git a/fs/namei.c b/fs/namei.c
index 42d2d28fb82..13ff4abdbdc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2633,7 +2633,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	int error;
 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
-	const char *old_name;
+	const unsigned char *old_name;
 
 	if (old_dentry->d_inode == new_dentry->d_inode)
  		return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index 88058de59c7..66c4f7e781c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -29,6 +29,7 @@
 #include <linux/log2.h>
 #include <linux/idr.h>
 #include <linux/fs_struct.h>
+#include <linux/fsnotify.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
@@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
+#ifdef CONFIG_FSNOTIFY
+		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
+#endif
 #ifdef CONFIG_SMP
 		mnt->mnt_writers = alloc_percpu(int);
 		if (!mnt->mnt_writers)
@@ -610,6 +614,7 @@ static inline void __mntput(struct vfsmount *mnt)
 	 * provides barriers, so count_mnt_writers() below is safe.  AV
 	 */
 	WARN_ON(count_mnt_writers(mnt));
+	fsnotify_vfsmount_delete(mnt);
 	dput(mnt->mnt_root);
 	free_vfsmnt(mnt);
 	deactivate_super(sb);
@@ -1984,7 +1989,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 	if (flags & MS_RDONLY)
 		mnt_flags |= MNT_READONLY;
 
-	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
+	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
 		   MS_STRICTATIME);
 
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1e634deff94..b4de38cf49f 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -43,7 +43,7 @@
 #define NCP_DEFAULT_TIME_OUT 10
 #define NCP_DEFAULT_RETRY_COUNT 20
 
-static void ncp_delete_inode(struct inode *);
+static void ncp_evict_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
 static int  ncp_show_options(struct seq_file *, struct vfsmount *);
@@ -100,7 +100,7 @@ static const struct super_operations ncp_sops =
 	.alloc_inode	= ncp_alloc_inode,
 	.destroy_inode	= ncp_destroy_inode,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= ncp_delete_inode,
+	.evict_inode	= ncp_evict_inode,
 	.put_super	= ncp_put_super,
 	.statfs		= ncp_statfs,
 	.remount_fs	= ncp_remount,
@@ -282,19 +282,19 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
 }
 
 static void
-ncp_delete_inode(struct inode *inode)
+ncp_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 
 	if (S_ISDIR(inode->i_mode)) {
-		DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
+		DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
 	}
 
 	if (ncp_make_closed(inode) != 0) {
 		/* We can't do anything but complain. */
-		printk(KERN_ERR "ncp_delete_inode: could not close\n");
+		printk(KERN_ERR "ncp_evict_inode: could not close\n");
 	}
-	clear_inode(inode);
 }
 
 static void ncp_stop_tasks(struct ncp_server *server) {
@@ -924,9 +924,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 				tmpattr.ia_valid = ATTR_MODE;
 				tmpattr.ia_mode = attr->ia_mode;
 
-				result = inode_setattr(inode, &tmpattr);
-				if (result)
-					goto out;
+				setattr_copy(inode, &tmpattr);
+				mark_inode_dirty(inode);
 			}
 		}
 #endif
@@ -954,15 +953,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 		result = ncp_make_closed(inode);
 		if (result)
 			goto out;
-		{
-			struct iattr tmpattr;
-			
-			tmpattr.ia_valid = ATTR_SIZE;
-			tmpattr.ia_size = attr->ia_size;
-			
-			result = inode_setattr(inode, &tmpattr);
+
+		if (attr->ia_size != i_size_read(inode)) {
+			result = vmtruncate(inode, attr->ia_size);
 			if (result)
 				goto out;
+			mark_inode_dirty(inode);
 		}
 	}
 	if ((attr->ia_valid & ATTR_CTIME) != 0) {
@@ -1002,8 +998,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 			NCP_FINFO(inode)->nwattr = info.attributes;
 #endif
 	}
-	if (!result)
-		result = inode_setattr(inode, attr);
+	if (result)
+		goto out;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
 out:
 	unlock_kernel();
 	return result;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 023c03d0207..84a8cfc4e38 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -20,7 +20,6 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 
 #include <linux/ncp_fs.h>
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 581d8f081e6..7d2d6c72aa7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -98,7 +98,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-void nfs_clear_inode(struct inode *inode)
+static void nfs_clear_inode(struct inode *inode)
 {
 	/*
 	 * The following should never happen...
@@ -110,6 +110,13 @@ void nfs_clear_inode(struct inode *inode)
 	nfs_fscache_release_inode_cookie(inode);
 }
 
+void nfs_evict_inode(struct inode *inode)
+{
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+	nfs_clear_inode(inode);
+}
+
 /**
  * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
  */
@@ -1398,8 +1405,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  * to open() calls that passed nfs_atomic_lookup, but failed to call
  * nfs_open().
  */
-void nfs4_clear_inode(struct inode *inode)
+void nfs4_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4c2150d8671..c961bc92c10 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -213,9 +213,9 @@ extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
 extern int nfs_write_inode(struct inode *, struct writeback_control *);
-extern void nfs_clear_inode(struct inode *);
+extern void nfs_evict_inode(struct inode *);
 #ifdef CONFIG_NFS_V4
-extern void nfs4_clear_inode(struct inode *);
+extern void nfs4_evict_inode(struct inode *);
 #endif
 void nfs_zap_acl_cache(struct inode *inode);
 extern int nfs_wait_bit_killable(void *word);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f1ae39f6cb0..ee26316ad1f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -270,7 +270,7 @@ static const struct super_operations nfs_sops = {
 	.write_inode	= nfs_write_inode,
 	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
-	.clear_inode	= nfs_clear_inode,
+	.evict_inode	= nfs_evict_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
 	.show_stats	= nfs_show_stats,
@@ -340,7 +340,7 @@ static const struct super_operations nfs4_sops = {
 	.write_inode	= nfs_write_inode,
 	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
-	.clear_inode	= nfs4_clear_inode,
+	.evict_inode	= nfs4_evict_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
 	.show_stats	= nfs_show_stats,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f8931acb05f..1a468bbd330 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1756,6 +1756,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	struct nfs4_acl *acl = NULL;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	u32 minorversion = resp->cstate.minorversion;
+	struct path path = {
+		.mnt	= exp->ex_path.mnt,
+		.dentry	= dentry,
+	};
 
 	BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
 	BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
@@ -1776,7 +1780,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			FATTR4_WORD0_MAXNAME)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
 		       FATTR4_WORD1_SPACE_TOTAL))) {
-		err = vfs_statfs(dentry, &statfs);
+		err = vfs_statfs(&path, &statfs);
 		if (err)
 			goto out_nfserr;
 	}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9df85a13af2..96360a83cb9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -934,7 +934,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		nfsdstats.io_read += host_err;
 		*count = host_err;
 		err = 0;
-		fsnotify_access(file->f_path.dentry);
+		fsnotify_access(file);
 	} else 
 		err = nfserrno(host_err);
 out:
@@ -1045,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		goto out_nfserr;
 	*cnt = host_err;
 	nfsdstats.io_write += host_err;
-	fsnotify_modify(file->f_path.dentry);
+	fsnotify_modify(file);
 
 	/* clear setuid/setgid flag after write */
 	if (inode->i_mode & (S_ISUID | S_ISGID))
@@ -2033,8 +2033,14 @@ out:
 __be32
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
 {
-	__be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
-	if (!err && vfs_statfs(fhp->fh_dentry,stat))
+	struct path path = {
+		.mnt	= fhp->fh_export->ex_path.mnt,
+		.dentry	= fhp->fh_dentry,
+	};
+	__be32 err;
+
+	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
+	if (!err && vfs_statfs(&path, stat))
 		err = nfserr_io;
 	return err;
 }
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index b60277b4446..cb003c8ee1f 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -80,23 +80,10 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
 	return last_byte;
 }
 
-static int nilfs_prepare_chunk_uninterruptible(struct page *page,
-					       struct address_space *mapping,
-					       unsigned from, unsigned to)
+static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
 {
 	loff_t pos = page_offset(page) + from;
-	return block_write_begin(NULL, mapping, pos, to - from,
-				 AOP_FLAG_UNINTERRUPTIBLE, &page,
-				 NULL, nilfs_get_block);
-}
-
-static int nilfs_prepare_chunk(struct page *page,
-			       struct address_space *mapping,
-			       unsigned from, unsigned to)
-{
-	loff_t pos = page_offset(page) + from;
-	return block_write_begin(NULL, mapping, pos, to - from, 0, &page,
-				 NULL, nilfs_get_block);
+	return __block_write_begin(page, pos, to - from, nilfs_get_block);
 }
 
 static void nilfs_commit_chunk(struct page *page,
@@ -447,7 +434,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
 	int err;
 
 	lock_page(page);
-	err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to);
+	err = nilfs_prepare_chunk(page, from, to);
 	BUG_ON(err);
 	de->inode = cpu_to_le64(inode->i_ino);
 	nilfs_set_de_type(de, inode);
@@ -528,7 +515,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 got_it:
 	from = (char *)de - (char *)page_address(page);
 	to = from + rec_len;
-	err = nilfs_prepare_chunk(page, page->mapping, from, to);
+	err = nilfs_prepare_chunk(page, from, to);
 	if (err)
 		goto out_unlock;
 	if (de->inode) {
@@ -586,7 +573,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
 	if (pde)
 		from = (char *)pde - (char *)page_address(page);
 	lock_page(page);
-	err = nilfs_prepare_chunk(page, mapping, from, to);
+	err = nilfs_prepare_chunk(page, from, to);
 	BUG_ON(err);
 	if (pde)
 		pde->rec_len = nilfs_rec_len_to_disk(to - from);
@@ -614,7 +601,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
 	if (!page)
 		return -ENOMEM;
 
-	err = nilfs_prepare_chunk(page, mapping, 0, chunk_size);
+	err = nilfs_prepare_chunk(page, 0, chunk_size);
 	if (unlikely(err)) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index dd5f7e0a95f..84a45d1d546 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -78,7 +78,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
 	struct inode *gcdat = nilfs->ns_gc_dat;
 	struct nilfs_inode_info *gii = NILFS_I(gcdat);
 
-	gcdat->i_state = I_CLEAR;
+	gcdat->i_state = I_FREEING | I_CLEAR;
 	gii->i_flags = 0;
 
 	nilfs_palloc_clear_cache(gcdat);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 39e038ac8fc..eccb2f2e231 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -27,6 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/uio.h>
 #include "nilfs.h"
+#include "btnode.h"
 #include "segment.h"
 #include "page.h"
 #include "mdt.h"
@@ -197,11 +198,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
 	if (unlikely(err))
 		return err;
 
-	*pagep = NULL;
-	err = block_write_begin(file, mapping, pos, len, flags, pagep,
-				fsdata, nilfs_get_block);
-	if (unlikely(err))
+	err = block_write_begin(mapping, pos, len, flags, pagep,
+				nilfs_get_block);
+	if (unlikely(err)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+
 		nilfs_transaction_abort(inode->i_sb);
+	}
 	return err;
 }
 
@@ -237,6 +242,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	/* Needs synchronization with the cleaner */
 	size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, nilfs_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && size < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
 	return size;
 }
 
@@ -337,7 +355,6 @@ void nilfs_free_inode(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
 
-	clear_inode(inode);
 	/* XXX: check error code? Is there any thing I can do? */
 	(void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino);
 	atomic_dec(&sbi->s_inodes_count);
@@ -597,16 +614,34 @@ void nilfs_truncate(struct inode *inode)
 	   But truncate has no return value. */
 }
 
-void nilfs_delete_inode(struct inode *inode)
+static void nilfs_clear_inode(struct inode *inode)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+
+	/*
+	 * Free resources allocated in nilfs_read_inode(), here.
+	 */
+	BUG_ON(!list_empty(&ii->i_dirty));
+	brelse(ii->i_bh);
+	ii->i_bh = NULL;
+
+	if (test_bit(NILFS_I_BMAP, &ii->i_state))
+		nilfs_bmap_clear(ii->i_bmap);
+
+	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+}
+
+void nilfs_evict_inode(struct inode *inode)
 {
 	struct nilfs_transaction_info ti;
 	struct super_block *sb = inode->i_sb;
 	struct nilfs_inode_info *ii = NILFS_I(inode);
 
-	if (unlikely(is_bad_inode(inode))) {
+	if (inode->i_nlink || unlikely(is_bad_inode(inode))) {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
+		end_writeback(inode);
+		nilfs_clear_inode(inode);
 		return;
 	}
 	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
@@ -616,6 +651,8 @@ void nilfs_delete_inode(struct inode *inode)
 
 	nilfs_truncate_bmap(ii, 0);
 	nilfs_mark_inode_dirty(inode);
+	end_writeback(inode);
+	nilfs_clear_inode(inode);
 	nilfs_free_inode(inode);
 	/* nilfs_free_inode() marks inode buffer dirty */
 	if (IS_SYNC(inode))
@@ -639,14 +676,27 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	err = nilfs_transaction_begin(sb, &ti, 0);
 	if (unlikely(err))
 		return err;
-	err = inode_setattr(inode, iattr);
-	if (!err && (iattr->ia_valid & ATTR_MODE))
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		err = vmtruncate(inode, iattr->ia_size);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+
+	if (iattr->ia_valid & ATTR_MODE) {
 		err = nilfs_acl_chmod(inode);
-	if (likely(!err))
-		err = nilfs_transaction_commit(sb);
-	else
-		nilfs_transaction_abort(sb);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	return nilfs_transaction_commit(sb);
 
+out_err:
+	nilfs_transaction_abort(sb);
 	return err;
 }
 
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 0842d775b3e..d3d54046e5f 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -250,7 +250,7 @@ extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
 extern struct inode *nilfs_iget(struct super_block *, unsigned long);
 extern void nilfs_update_inode(struct inode *, struct buffer_head *);
 extern void nilfs_truncate(struct inode *);
-extern void nilfs_delete_inode(struct inode *);
+extern void nilfs_evict_inode(struct inode *);
 extern int nilfs_setattr(struct dentry *, struct iattr *);
 extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
 				  struct buffer_head **);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 83e3d8c61a0..d0c35ef39f6 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -523,11 +523,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
 		}
 
 		pos = rb->blkoff << inode->i_blkbits;
-		page = NULL;
-		err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
-					0, &page, NULL, nilfs_get_block);
-		if (unlikely(err))
+		err = block_write_begin(inode->i_mapping, pos, blocksize,
+					0, &page, nilfs_get_block);
+		if (unlikely(err)) {
+			loff_t isize = inode->i_size;
+			if (pos + blocksize > isize)
+				vmtruncate(inode, isize);
 			goto failed_inode;
+		}
 
 		err = nilfs_recovery_copy_block(nilfs, rb, page);
 		if (unlikely(err))
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2e6a2723b8f..4588fb9e93d 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 		 * Last BIO is always sent through the following
 		 * submission.
 		 */
-		rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+		rw |= REQ_SYNC | REQ_UNPLUG;
 		res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
 	}
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 26078b3407c..1fa86b9df73 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -171,23 +171,6 @@ void nilfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
 }
 
-static void nilfs_clear_inode(struct inode *inode)
-{
-	struct nilfs_inode_info *ii = NILFS_I(inode);
-
-	/*
-	 * Free resources allocated in nilfs_read_inode(), here.
-	 */
-	BUG_ON(!list_empty(&ii->i_dirty));
-	brelse(ii->i_bh);
-	ii->i_bh = NULL;
-
-	if (test_bit(NILFS_I_BMAP, &ii->i_state))
-		nilfs_bmap_clear(ii->i_bmap);
-
-	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
-}
-
 static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
 {
 	struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -548,7 +531,7 @@ static const struct super_operations nilfs_sops = {
 	/* .write_inode    = nilfs_write_inode, */
 	/* .put_inode      = nilfs_put_inode, */
 	/* .drop_inode	  = nilfs_drop_inode, */
-	.delete_inode   = nilfs_delete_inode,
+	.evict_inode    = nilfs_evict_inode,
 	.put_super      = nilfs_put_super,
 	/* .write_super    = nilfs_write_super, */
 	.sync_fs        = nilfs_sync_fs,
@@ -556,7 +539,6 @@ static const struct super_operations nilfs_sops = {
 	/* .unlockfs */
 	.statfs         = nilfs_statfs,
 	.remount_fs     = nilfs_remount,
-	.clear_inode    = nilfs_clear_inode,
 	/* .umount_begin */
 	.show_options = nilfs_show_options
 };
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index dffbb0911d0..22c629eedd8 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,3 +3,4 @@ config FSNOTIFY
 
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 0922cc826c4..ae5f33a6d86 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,6 @@
-obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
+				   mark.o vfsmount_mark.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
+obj-y			+= fanotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 7e54e52964d..3344bdd5506 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -29,17 +29,17 @@
 int dir_notify_enable __read_mostly = 1;
 
 static struct kmem_cache *dnotify_struct_cache __read_mostly;
-static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_cache __read_mostly;
 static struct fsnotify_group *dnotify_group __read_mostly;
 static DEFINE_MUTEX(dnotify_mark_mutex);
 
 /*
- * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * dnotify will attach one of these to each inode (i_fsnotify_marks) which
  * is being watched by dnotify.  If multiple userspace applications are watching
  * the same directory with dnotify their information is chained in dn
  */
-struct dnotify_mark_entry {
-	struct fsnotify_mark_entry fsn_entry;
+struct dnotify_mark {
+	struct fsnotify_mark fsn_mark;
 	struct dnotify_struct *dn;
 };
 
@@ -51,27 +51,27 @@ struct dnotify_mark_entry {
  * it calls the fsnotify function so it can update the set of all events relevant
  * to this inode.
  */
-static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
+static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 {
 	__u32 new_mask, old_mask;
 	struct dnotify_struct *dn;
-	struct dnotify_mark_entry *dnentry  = container_of(entry,
-							   struct dnotify_mark_entry,
-							   fsn_entry);
+	struct dnotify_mark *dn_mark  = container_of(fsn_mark,
+						     struct dnotify_mark,
+						     fsn_mark);
 
-	assert_spin_locked(&entry->lock);
+	assert_spin_locked(&fsn_mark->lock);
 
-	old_mask = entry->mask;
+	old_mask = fsn_mark->mask;
 	new_mask = 0;
-	for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
+	for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
 		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
-	entry->mask = new_mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
 
 	if (old_mask == new_mask)
 		return;
 
-	if (entry->inode)
-		fsnotify_recalc_inode_mask(entry->inode);
+	if (fsn_mark->i.inode)
+		fsnotify_recalc_inode_mask(fsn_mark->i.inode);
 }
 
 /*
@@ -83,29 +83,25 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
  * events.
  */
 static int dnotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
 				struct fsnotify_event *event)
 {
-	struct fsnotify_mark_entry *entry = NULL;
-	struct dnotify_mark_entry *dnentry;
+	struct dnotify_mark *dn_mark;
 	struct inode *to_tell;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
 	struct fown_struct *fown;
 	__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
 
-	to_tell = event->to_tell;
+	BUG_ON(vfsmount_mark);
 
-	spin_lock(&to_tell->i_lock);
-	entry = fsnotify_find_mark_entry(group, to_tell);
-	spin_unlock(&to_tell->i_lock);
+	to_tell = event->to_tell;
 
-	/* unlikely since we alreay passed dnotify_should_send_event() */
-	if (unlikely(!entry))
-		return 0;
-	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+	dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
 
-	spin_lock(&entry->lock);
-	prev = &dnentry->dn;
+	spin_lock(&inode_mark->lock);
+	prev = &dn_mark->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_mask & test_mask) == 0) {
 			prev = &dn->dn_next;
@@ -118,12 +114,11 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 		else {
 			*prev = dn->dn_next;
 			kmem_cache_free(dnotify_struct_cache, dn);
-			dnotify_recalc_inode_mask(entry);
+			dnotify_recalc_inode_mask(inode_mark);
 		}
 	}
 
-	spin_unlock(&entry->lock);
-	fsnotify_put_mark(entry);
+	spin_unlock(&inode_mark->lock);
 
 	return 0;
 }
@@ -133,44 +128,27 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  * userspace notification for that pair.
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
-				      struct inode *inode, __u32 mask)
+				      struct inode *inode,
+				      struct fsnotify_mark *inode_mark,
+				      struct fsnotify_mark *vfsmount_mark,
+				      __u32 mask, void *data, int data_type)
 {
-	struct fsnotify_mark_entry *entry;
-	bool send;
-
-	/* !dir_notify_enable should never get here, don't waste time checking
-	if (!dir_notify_enable)
-		return 0; */
-
 	/* not a dir, dnotify doesn't care */
 	if (!S_ISDIR(inode->i_mode))
 		return false;
 
-	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(group, inode);
-	spin_unlock(&inode->i_lock);
-
-	/* no mark means no dnotify watch */
-	if (!entry)
-		return false;
-
-	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (mask & entry->mask);
-
-	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
-
-	return send;
+	return true;
 }
 
-static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
-	struct dnotify_mark_entry *dnentry = container_of(entry,
-							  struct dnotify_mark_entry,
-							  fsn_entry);
+	struct dnotify_mark *dn_mark = container_of(fsn_mark,
+						    struct dnotify_mark,
+						    fsn_mark);
 
-	BUG_ON(dnentry->dn);
+	BUG_ON(dn_mark->dn);
 
-	kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+	kmem_cache_free(dnotify_mark_cache, dn_mark);
 }
 
 static struct fsnotify_ops dnotify_fsnotify_ops = {
@@ -183,15 +161,15 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
 
 /*
  * Called every time a file is closed.  Looks first for a dnotify mark on the
- * inode.  If one is found run all of the ->dn entries attached to that
+ * inode.  If one is found run all of the ->dn structures attached to that
  * mark for one relevant to this process closing the file and remove that
  * dnotify_struct.  If that was the last dnotify_struct also remove the
- * fsnotify_mark_entry.
+ * fsnotify_mark.
  */
 void dnotify_flush(struct file *filp, fl_owner_t id)
 {
-	struct fsnotify_mark_entry *entry;
-	struct dnotify_mark_entry *dnentry;
+	struct fsnotify_mark *fsn_mark;
+	struct dnotify_mark *dn_mark;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
 	struct inode *inode;
@@ -200,38 +178,34 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
-	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(dnotify_group, inode);
-	spin_unlock(&inode->i_lock);
-	if (!entry)
+	fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+	if (!fsn_mark)
 		return;
-	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 
 	mutex_lock(&dnotify_mark_mutex);
 
-	spin_lock(&entry->lock);
-	prev = &dnentry->dn;
+	spin_lock(&fsn_mark->lock);
+	prev = &dn_mark->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
 			*prev = dn->dn_next;
 			kmem_cache_free(dnotify_struct_cache, dn);
-			dnotify_recalc_inode_mask(entry);
+			dnotify_recalc_inode_mask(fsn_mark);
 			break;
 		}
 		prev = &dn->dn_next;
 	}
 
-	spin_unlock(&entry->lock);
+	spin_unlock(&fsn_mark->lock);
 
 	/* nothing else could have found us thanks to the dnotify_mark_mutex */
-	if (dnentry->dn == NULL)
-		fsnotify_destroy_mark_by_entry(entry);
-
-	fsnotify_recalc_group_mask(dnotify_group);
+	if (dn_mark->dn == NULL)
+		fsnotify_destroy_mark(fsn_mark);
 
 	mutex_unlock(&dnotify_mark_mutex);
 
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 }
 
 /* this conversion is done only at watch creation */
@@ -259,16 +233,16 @@ static __u32 convert_arg(unsigned long arg)
 
 /*
  * If multiple processes watch the same inode with dnotify there is only one
- * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
  * onto that mark.  This function either attaches the new dnotify_struct onto
  * that list, or it |= the mask onto an existing dnofiy_struct.
  */
-static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
 		     fl_owner_t id, int fd, struct file *filp, __u32 mask)
 {
 	struct dnotify_struct *odn;
 
-	odn = dnentry->dn;
+	odn = dn_mark->dn;
 	while (odn != NULL) {
 		/* adding more events to existing dnofiy_struct? */
 		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
@@ -283,8 +257,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
 	dn->dn_fd = fd;
 	dn->dn_filp = filp;
 	dn->dn_owner = id;
-	dn->dn_next = dnentry->dn;
-	dnentry->dn = dn;
+	dn->dn_next = dn_mark->dn;
+	dn_mark->dn = dn;
 
 	return 0;
 }
@@ -296,8 +270,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
  */
 int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 {
-	struct dnotify_mark_entry *new_dnentry, *dnentry;
-	struct fsnotify_mark_entry *new_entry, *entry;
+	struct dnotify_mark *new_dn_mark, *dn_mark;
+	struct fsnotify_mark *new_fsn_mark, *fsn_mark;
 	struct dnotify_struct *dn;
 	struct inode *inode;
 	fl_owner_t id = current->files;
@@ -306,7 +280,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	__u32 mask;
 
 	/* we use these to tell if we need to kfree */
-	new_entry = NULL;
+	new_fsn_mark = NULL;
 	dn = NULL;
 
 	if (!dir_notify_enable) {
@@ -336,8 +310,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	}
 
 	/* new fsnotify mark, we expect most fcntl calls to add a new mark */
-	new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
-	if (!new_dnentry) {
+	new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
+	if (!new_dn_mark) {
 		error = -ENOMEM;
 		goto out_err;
 	}
@@ -345,29 +319,27 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
 	mask = convert_arg(arg);
 
-	/* set up the new_entry and new_dnentry */
-	new_entry = &new_dnentry->fsn_entry;
-	fsnotify_init_mark(new_entry, dnotify_free_mark);
-	new_entry->mask = mask;
-	new_dnentry->dn = NULL;
+	/* set up the new_fsn_mark and new_dn_mark */
+	new_fsn_mark = &new_dn_mark->fsn_mark;
+	fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
+	new_fsn_mark->mask = mask;
+	new_dn_mark->dn = NULL;
 
 	/* this is needed to prevent the fcntl/close race described below */
 	mutex_lock(&dnotify_mark_mutex);
 
-	/* add the new_entry or find an old one. */
-	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(dnotify_group, inode);
-	spin_unlock(&inode->i_lock);
-	if (entry) {
-		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
-		spin_lock(&entry->lock);
+	/* add the new_fsn_mark or find an old one. */
+	fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+	if (fsn_mark) {
+		dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
+		spin_lock(&fsn_mark->lock);
 	} else {
-		fsnotify_add_mark(new_entry, dnotify_group, inode);
-		spin_lock(&new_entry->lock);
-		entry = new_entry;
-		dnentry = new_dnentry;
-		/* we used new_entry, so don't free it */
-		new_entry = NULL;
+		fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
+		spin_lock(&new_fsn_mark->lock);
+		fsn_mark = new_fsn_mark;
+		dn_mark = new_dn_mark;
+		/* we used new_fsn_mark, so don't free it */
+		new_fsn_mark = NULL;
 	}
 
 	rcu_read_lock();
@@ -376,17 +348,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 
 	/* if (f != filp) means that we lost a race and another task/thread
 	 * actually closed the fd we are still playing with before we grabbed
-	 * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
-	 * only time we clean up the mark entries we need to get our mark off
+	 * the dnotify_mark_mutex and fsn_mark->lock.  Since closing the fd is the
+	 * only time we clean up the marks we need to get our mark off
 	 * the list. */
 	if (f != filp) {
 		/* if we added ourselves, shoot ourselves, it's possible that
-		 * the flush actually did shoot this entry.  That's fine too
+		 * the flush actually did shoot this fsn_mark.  That's fine too
 		 * since multiple calls to destroy_mark is perfectly safe, if
-		 * we found a dnentry already attached to the inode, just sod
+		 * we found a dn_mark already attached to the inode, just sod
 		 * off silently as the flush at close time dealt with it.
 		 */
-		if (dnentry == new_dnentry)
+		if (dn_mark == new_dn_mark)
 			destroy = 1;
 		goto out;
 	}
@@ -394,13 +366,13 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 	if (error) {
 		/* if we added, we must shoot */
-		if (dnentry == new_dnentry)
+		if (dn_mark == new_dn_mark)
 			destroy = 1;
 		goto out;
 	}
 
-	error = attach_dn(dn, dnentry, id, fd, filp, mask);
-	/* !error means that we attached the dn to the dnentry, so don't free it */
+	error = attach_dn(dn, dn_mark, id, fd, filp, mask);
+	/* !error means that we attached the dn to the dn_mark, so don't free it */
 	if (!error)
 		dn = NULL;
 	/* -EEXIST means that we didn't add this new dn and used an old one.
@@ -408,20 +380,18 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	else if (error == -EEXIST)
 		error = 0;
 
-	dnotify_recalc_inode_mask(entry);
+	dnotify_recalc_inode_mask(fsn_mark);
 out:
-	spin_unlock(&entry->lock);
+	spin_unlock(&fsn_mark->lock);
 
 	if (destroy)
-		fsnotify_destroy_mark_by_entry(entry);
-
-	fsnotify_recalc_group_mask(dnotify_group);
+		fsnotify_destroy_mark(fsn_mark);
 
 	mutex_unlock(&dnotify_mark_mutex);
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 out_err:
-	if (new_entry)
-		fsnotify_put_mark(new_entry);
+	if (new_fsn_mark)
+		fsnotify_put_mark(new_fsn_mark);
 	if (dn)
 		kmem_cache_free(dnotify_struct_cache, dn);
 	return error;
@@ -430,10 +400,9 @@ out_err:
 static int __init dnotify_init(void)
 {
 	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
 
-	dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
-					      0, &dnotify_fsnotify_ops);
+	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
 		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
new file mode 100644
index 00000000000..3ac36b7bf6b
--- /dev/null
+++ b/fs/notify/fanotify/Kconfig
@@ -0,0 +1,26 @@
+config FANOTIFY
+	bool "Filesystem wide access notification"
+	select FSNOTIFY
+	select ANON_INODES
+	default n
+	---help---
+	   Say Y here to enable fanotify suport.  fanotify is a file access
+	   notification system which differs from inotify in that it sends
+	   and open file descriptor to the userspace listener along with
+	   the event.
+
+	   If unsure, say Y.
+
+config FANOTIFY_ACCESS_PERMISSIONS
+	bool "fanotify permissions checking"
+	depends on FANOTIFY
+	depends on SECURITY
+	default n
+	---help---
+	   Say Y here is you want fanotify listeners to be able to make permissions
+	   decisions concerning filesystem events.  This is used by some fanotify
+	   listeners which need to scan files before allowing the system access to
+	   use those files.  This is used by some anti-malware vendors and by some
+	   hierarchical storage managent systems.
+
+	   If unsure, say N.
diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile
new file mode 100644
index 00000000000..0999213e7e6
--- /dev/null
+++ b/fs/notify/fanotify/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_FANOTIFY)		+= fanotify.o fanotify_user.o
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
new file mode 100644
index 00000000000..eb8f73c9c13
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.c
@@ -0,0 +1,212 @@
+#include <linux/fanotify.h>
+#include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h> /* UINT_MAX */
+#include <linux/mount.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
+
+	if (old->to_tell == new->to_tell &&
+	    old->data_type == new->data_type &&
+	    old->tgid == new->tgid) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_FILE):
+			if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
+			    (old->file->f_path.dentry == new->file->f_path.dentry))
+				return true;
+		case (FSNOTIFY_EVENT_NONE):
+			return true;
+		default:
+			BUG();
+		};
+	}
+	return false;
+}
+
+/* and the list better be locked by something too! */
+static struct fsnotify_event *fanotify_merge(struct list_head *list,
+					     struct fsnotify_event *event)
+{
+	struct fsnotify_event_holder *test_holder;
+	struct fsnotify_event *test_event = NULL;
+	struct fsnotify_event *new_event;
+
+	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+
+
+	list_for_each_entry_reverse(test_holder, list, event_list) {
+		if (should_merge(test_holder->event, event)) {
+			test_event = test_holder->event;
+			break;
+		}
+	}
+
+	if (!test_event)
+		return NULL;
+
+	fsnotify_get_event(test_event);
+
+	/* if they are exactly the same we are done */
+	if (test_event->mask == event->mask)
+		return test_event;
+
+	/*
+	 * if the refcnt == 2 this is the only queue
+	 * for this event and so we can update the mask
+	 * in place.
+	 */
+	if (atomic_read(&test_event->refcnt) == 2) {
+		test_event->mask |= event->mask;
+		return test_event;
+	}
+
+	new_event = fsnotify_clone_event(test_event);
+
+	/* done with test_event */
+	fsnotify_put_event(test_event);
+
+	/* couldn't allocate memory, merge was not possible */
+	if (unlikely(!new_event))
+		return ERR_PTR(-ENOMEM);
+
+	/* build new event and replace it on the list */
+	new_event->mask = (test_event->mask | event->mask);
+	fsnotify_replace_event(test_holder, new_event);
+
+	/* we hold a reference on new_event from clone_event */
+	return new_event;
+}
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+static int fanotify_get_response_from_access(struct fsnotify_group *group,
+					     struct fsnotify_event *event)
+{
+	int ret;
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	wait_event(group->fanotify_data.access_waitq, event->response);
+
+	/* userspace responded, convert to something usable */
+	spin_lock(&event->lock);
+	switch (event->response) {
+	case FAN_ALLOW:
+		ret = 0;
+		break;
+	case FAN_DENY:
+	default:
+		ret = -EPERM;
+	}
+	event->response = 0;
+	spin_unlock(&event->lock);
+
+	pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
+		 group, event, ret);
+	
+	return ret;
+}
+#endif
+
+static int fanotify_handle_event(struct fsnotify_group *group,
+				 struct fsnotify_mark *inode_mark,
+				 struct fsnotify_mark *fanotify_mark,
+				 struct fsnotify_event *event)
+{
+	int ret = 0;
+	struct fsnotify_event *notify_event = NULL;
+
+	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
+	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
+	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
+	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
+	if (IS_ERR(notify_event))
+		return PTR_ERR(notify_event);
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (event->mask & FAN_ALL_PERM_EVENTS) {
+		/* if we merged we need to wait on the new event */
+		if (notify_event)
+			event = notify_event;
+		ret = fanotify_get_response_from_access(group, event);
+	}
+#endif
+
+	if (notify_event)
+		fsnotify_put_event(notify_event);
+
+	return ret;
+}
+
+static bool fanotify_should_send_event(struct fsnotify_group *group,
+				       struct inode *to_tell,
+				       struct fsnotify_mark *inode_mark,
+				       struct fsnotify_mark *vfsmnt_mark,
+				       __u32 event_mask, void *data, int data_type)
+{
+	__u32 marks_mask, marks_ignored_mask;
+
+	pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
+		 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
+		 inode_mark, vfsmnt_mark, event_mask, data, data_type);
+
+	pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
+		 __func__, group, vfsmnt_mark, inode_mark, event_mask);
+
+	/* sorry, fanotify only gives a damn about files and dirs */
+	if (!S_ISREG(to_tell->i_mode) &&
+	    !S_ISDIR(to_tell->i_mode))
+		return false;
+
+	/* if we don't have enough info to send an event to userspace say no */
+	if (data_type != FSNOTIFY_EVENT_FILE)
+		return false;
+
+	if (inode_mark && vfsmnt_mark) {
+		marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
+		marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
+	} else if (inode_mark) {
+		/*
+		 * if the event is for a child and this inode doesn't care about
+		 * events on the child, don't send it!
+		 */
+		if ((event_mask & FS_EVENT_ON_CHILD) &&
+		    !(inode_mark->mask & FS_EVENT_ON_CHILD))
+			return false;
+		marks_mask = inode_mark->mask;
+		marks_ignored_mask = inode_mark->ignored_mask;
+	} else if (vfsmnt_mark) {
+		marks_mask = vfsmnt_mark->mask;
+		marks_ignored_mask = vfsmnt_mark->ignored_mask;
+	} else {
+		BUG();
+	}
+
+	if (event_mask & marks_mask & ~marks_ignored_mask)
+		return true;
+
+	return false;
+}
+
+const struct fsnotify_ops fanotify_fsnotify_ops = {
+	.handle_event = fanotify_handle_event,
+	.should_send_event = fanotify_should_send_event,
+	.free_group_priv = NULL,
+	.free_event_priv = NULL,
+	.freeing_mark = NULL,
+};
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
new file mode 100644
index 00000000000..25a3b4dfcf6
--- /dev/null
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -0,0 +1,760 @@
+#include <linux/fanotify.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <asm/ioctls.h>
+
+extern const struct fsnotify_ops fanotify_fsnotify_ops;
+
+static struct kmem_cache *fanotify_mark_cache __read_mostly;
+static struct kmem_cache *fanotify_response_event_cache __read_mostly;
+
+struct fanotify_response_event {
+	struct list_head list;
+	__s32 fd;
+	struct fsnotify_event *event;
+};
+
+/*
+ * Get an fsnotify notification event if one exists and is small
+ * enough to fit in "count". Return an error pointer if the count
+ * is not large enough.
+ *
+ * Called with the group->notification_mutex held.
+ */
+static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+					    size_t count)
+{
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+	pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
+
+	if (fsnotify_notify_queue_is_empty(group))
+		return NULL;
+
+	if (FAN_EVENT_METADATA_LEN > count)
+		return ERR_PTR(-EINVAL);
+
+	/* held the notification_mutex the whole time, so this is the
+	 * same event we peeked above */
+	return fsnotify_remove_notify_event(group);
+}
+
+static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	int client_fd;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+	struct file *new_file;
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	client_fd = get_unused_fd();
+	if (client_fd < 0)
+		return client_fd;
+
+	if (event->data_type != FSNOTIFY_EVENT_FILE) {
+		WARN_ON(1);
+		put_unused_fd(client_fd);
+		return -EINVAL;
+	}
+
+	/*
+	 * we need a new file handle for the userspace program so it can read even if it was
+	 * originally opened O_WRONLY.
+	 */
+	dentry = dget(event->file->f_path.dentry);
+	mnt = mntget(event->file->f_path.mnt);
+	/* it's possible this event was an overflow event.  in that case dentry and mnt
+	 * are NULL;  That's fine, just don't call dentry open */
+	if (dentry && mnt)
+		new_file = dentry_open(dentry, mnt,
+				       group->fanotify_data.f_flags | FMODE_NONOTIFY,
+				       current_cred());
+	else
+		new_file = ERR_PTR(-EOVERFLOW);
+	if (IS_ERR(new_file)) {
+		/*
+		 * we still send an event even if we can't open the file.  this
+		 * can happen when say tasks are gone and we try to open their
+		 * /proc files or we try to open a WRONLY file like in sysfs
+		 * we just send the errno to userspace since there isn't much
+		 * else we can do.
+		 */
+		put_unused_fd(client_fd);
+		client_fd = PTR_ERR(new_file);
+	} else {
+		fd_install(client_fd, new_file);
+	}
+
+	return client_fd;
+}
+
+static ssize_t fill_event_metadata(struct fsnotify_group *group,
+				   struct fanotify_event_metadata *metadata,
+				   struct fsnotify_event *event)
+{
+	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
+		 group, metadata, event);
+
+	metadata->event_len = FAN_EVENT_METADATA_LEN;
+	metadata->vers = FANOTIFY_METADATA_VERSION;
+	metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
+	metadata->pid = pid_vnr(event->tgid);
+	metadata->fd = create_fd(group, event);
+
+	return metadata->fd;
+}
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
+						  __s32 fd)
+{
+	struct fanotify_response_event *re, *return_re = NULL;
+
+	mutex_lock(&group->fanotify_data.access_mutex);
+	list_for_each_entry(re, &group->fanotify_data.access_list, list) {
+		if (re->fd != fd)
+			continue;
+
+		list_del_init(&re->list);
+		return_re = re;
+		break;
+	}
+	mutex_unlock(&group->fanotify_data.access_mutex);
+
+	pr_debug("%s: found return_re=%p\n", __func__, return_re);
+
+	return return_re;
+}
+
+static int process_access_response(struct fsnotify_group *group,
+				   struct fanotify_response *response_struct)
+{
+	struct fanotify_response_event *re;
+	__s32 fd = response_struct->fd;
+	__u32 response = response_struct->response;
+
+	pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
+		 fd, response);
+	/*
+	 * make sure the response is valid, if invalid we do nothing and either
+	 * userspace can send a valid responce or we will clean it up after the
+	 * timeout
+	 */
+	switch (response) {
+	case FAN_ALLOW:
+	case FAN_DENY:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (fd < 0)
+		return -EINVAL;
+
+	re = dequeue_re(group, fd);
+	if (!re)
+		return -ENOENT;
+
+	re->event->response = response;
+
+	wake_up(&group->fanotify_data.access_waitq);
+
+	kmem_cache_free(fanotify_response_event_cache, re);
+
+	return 0;
+}
+
+static int prepare_for_access_response(struct fsnotify_group *group,
+				       struct fsnotify_event *event,
+				       __s32 fd)
+{
+	struct fanotify_response_event *re;
+
+	if (!(event->mask & FAN_ALL_PERM_EVENTS))
+		return 0;
+
+	re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
+	if (!re)
+		return -ENOMEM;
+
+	re->event = event;
+	re->fd = fd;
+
+	mutex_lock(&group->fanotify_data.access_mutex);
+	list_add_tail(&re->list, &group->fanotify_data.access_list);
+	mutex_unlock(&group->fanotify_data.access_mutex);
+
+	return 0;
+}
+
+static void remove_access_response(struct fsnotify_group *group,
+				   struct fsnotify_event *event,
+				   __s32 fd)
+{
+	struct fanotify_response_event *re;
+
+	if (!(event->mask & FAN_ALL_PERM_EVENTS))
+		return;
+
+	re = dequeue_re(group, fd);
+	if (!re)
+		return;
+
+	BUG_ON(re->event != event);
+
+	kmem_cache_free(fanotify_response_event_cache, re);
+
+	return;
+}
+#else
+static int prepare_for_access_response(struct fsnotify_group *group,
+				       struct fsnotify_event *event,
+				       __s32 fd)
+{
+	return 0;
+}
+
+static void remove_access_response(struct fsnotify_group *group,
+				   struct fsnotify_event *event,
+				   __s32 fd)
+{
+	return;
+}
+#endif
+
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+				  struct fsnotify_event *event,
+				  char __user *buf)
+{
+	struct fanotify_event_metadata fanotify_event_metadata;
+	int fd, ret;
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	fd = fill_event_metadata(group, &fanotify_event_metadata, event);
+	if (fd < 0)
+		return fd;
+
+	ret = prepare_for_access_response(group, event, fd);
+	if (ret)
+		goto out_close_fd;
+
+	ret = -EFAULT;
+	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
+		goto out_kill_access_response;
+
+	return FAN_EVENT_METADATA_LEN;
+
+out_kill_access_response:
+	remove_access_response(group, event, fd);
+out_close_fd:
+	sys_close(fd);
+	return ret;
+}
+
+/* intofiy userspace file descriptor functions */
+static unsigned int fanotify_poll(struct file *file, poll_table *wait)
+{
+	struct fsnotify_group *group = file->private_data;
+	int ret = 0;
+
+	poll_wait(file, &group->notification_waitq, wait);
+	mutex_lock(&group->notification_mutex);
+	if (!fsnotify_notify_queue_is_empty(group))
+		ret = POLLIN | POLLRDNORM;
+	mutex_unlock(&group->notification_mutex);
+
+	return ret;
+}
+
+static ssize_t fanotify_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *pos)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event *kevent;
+	char __user *start;
+	int ret;
+	DEFINE_WAIT(wait);
+
+	start = buf;
+	group = file->private_data;
+
+	pr_debug("%s: group=%p\n", __func__, group);
+
+	while (1) {
+		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
+
+		mutex_lock(&group->notification_mutex);
+		kevent = get_one_event(group, count);
+		mutex_unlock(&group->notification_mutex);
+
+		if (kevent) {
+			ret = PTR_ERR(kevent);
+			if (IS_ERR(kevent))
+				break;
+			ret = copy_event_to_user(group, kevent, buf);
+			fsnotify_put_event(kevent);
+			if (ret < 0)
+				break;
+			buf += ret;
+			count -= ret;
+			continue;
+		}
+
+		ret = -EAGAIN;
+		if (file->f_flags & O_NONBLOCK)
+			break;
+		ret = -EINTR;
+		if (signal_pending(current))
+			break;
+
+		if (start != buf)
+			break;
+
+		schedule();
+	}
+
+	finish_wait(&group->notification_waitq, &wait);
+	if (start != buf && ret != -EFAULT)
+		ret = buf - start;
+	return ret;
+}
+
+static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
+{
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	struct fanotify_response response = { .fd = -1, .response = -1 };
+	struct fsnotify_group *group;
+	int ret;
+
+	group = file->private_data;
+
+	if (count > sizeof(response))
+		count = sizeof(response);
+
+	pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
+
+	if (copy_from_user(&response, buf, count))
+		return -EFAULT;
+
+	ret = process_access_response(group, &response);
+	if (ret < 0)
+		count = ret;
+
+	return count;
+#else
+	return -EINVAL;
+#endif
+}
+
+static int fanotify_release(struct inode *ignored, struct file *file)
+{
+	struct fsnotify_group *group = file->private_data;
+
+	pr_debug("%s: file=%p group=%p\n", __func__, file, group);
+
+	/* matches the fanotify_init->fsnotify_alloc_group */
+	fsnotify_put_group(group);
+
+	return 0;
+}
+
+static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event_holder *holder;
+	void __user *p;
+	int ret = -ENOTTY;
+	size_t send_len = 0;
+
+	group = file->private_data;
+
+	p = (void __user *) arg;
+
+	switch (cmd) {
+	case FIONREAD:
+		mutex_lock(&group->notification_mutex);
+		list_for_each_entry(holder, &group->notification_list, event_list)
+			send_len += FAN_EVENT_METADATA_LEN;
+		mutex_unlock(&group->notification_mutex);
+		ret = put_user(send_len, (int __user *) p);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct file_operations fanotify_fops = {
+	.poll		= fanotify_poll,
+	.read		= fanotify_read,
+	.write		= fanotify_write,
+	.fasync		= NULL,
+	.release	= fanotify_release,
+	.unlocked_ioctl	= fanotify_ioctl,
+	.compat_ioctl	= fanotify_ioctl,
+};
+
+static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
+{
+	kmem_cache_free(fanotify_mark_cache, fsn_mark);
+}
+
+static int fanotify_find_path(int dfd, const char __user *filename,
+			      struct path *path, unsigned int flags)
+{
+	int ret;
+
+	pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
+		 dfd, filename, flags);
+
+	if (filename == NULL) {
+		struct file *file;
+		int fput_needed;
+
+		ret = -EBADF;
+		file = fget_light(dfd, &fput_needed);
+		if (!file)
+			goto out;
+
+		ret = -ENOTDIR;
+		if ((flags & FAN_MARK_ONLYDIR) &&
+		    !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
+			fput_light(file, fput_needed);
+			goto out;
+		}
+
+		*path = file->f_path;
+		path_get(path);
+		fput_light(file, fput_needed);
+	} else {
+		unsigned int lookup_flags = 0;
+
+		if (!(flags & FAN_MARK_DONT_FOLLOW))
+			lookup_flags |= LOOKUP_FOLLOW;
+		if (flags & FAN_MARK_ONLYDIR)
+			lookup_flags |= LOOKUP_DIRECTORY;
+
+		ret = user_path_at(dfd, filename, lookup_flags, path);
+		if (ret)
+			goto out;
+	}
+
+	/* you can only watch an inode if you have read permissions on it */
+	ret = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (ret)
+		path_put(path);
+out:
+	return ret;
+}
+
+static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+					    __u32 mask,
+					    unsigned int flags)
+{
+	__u32 oldmask;
+
+	spin_lock(&fsn_mark->lock);
+	if (!(flags & FAN_MARK_IGNORED_MASK)) {
+		oldmask = fsn_mark->mask;
+		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
+	} else {
+		oldmask = fsn_mark->ignored_mask;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
+	}
+	spin_unlock(&fsn_mark->lock);
+
+	if (!(oldmask & ~mask))
+		fsnotify_destroy_mark(fsn_mark);
+
+	return mask & oldmask;
+}
+
+static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
+					 struct vfsmount *mnt, __u32 mask,
+					 unsigned int flags)
+{
+	struct fsnotify_mark *fsn_mark = NULL;
+	__u32 removed;
+
+	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	if (!fsn_mark)
+		return -ENOENT;
+
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
+	fsnotify_put_mark(fsn_mark);
+	if (removed & mnt->mnt_fsnotify_mask)
+		fsnotify_recalc_vfsmount_mask(mnt);
+
+	return 0;
+}
+
+static int fanotify_remove_inode_mark(struct fsnotify_group *group,
+				      struct inode *inode, __u32 mask,
+				      unsigned int flags)
+{
+	struct fsnotify_mark *fsn_mark = NULL;
+	__u32 removed;
+
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
+	if (!fsn_mark)
+		return -ENOENT;
+
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
+	/* matches the fsnotify_find_inode_mark() */
+	fsnotify_put_mark(fsn_mark);
+	if (removed & inode->i_fsnotify_mask)
+		fsnotify_recalc_inode_mask(inode);
+
+	return 0;
+}
+
+static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+				       __u32 mask,
+				       unsigned int flags)
+{
+	__u32 oldmask;
+
+	spin_lock(&fsn_mark->lock);
+	if (!(flags & FAN_MARK_IGNORED_MASK)) {
+		oldmask = fsn_mark->mask;
+		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
+	} else {
+		oldmask = fsn_mark->ignored_mask;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
+			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
+	}
+	spin_unlock(&fsn_mark->lock);
+
+	return mask & ~oldmask;
+}
+
+static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
+				      struct vfsmount *mnt, __u32 mask,
+				      unsigned int flags)
+{
+	struct fsnotify_mark *fsn_mark;
+	__u32 added;
+
+	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	if (!fsn_mark) {
+		int ret;
+
+		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+		if (!fsn_mark)
+			return -ENOMEM;
+
+		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
+		ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
+		if (ret) {
+			fanotify_free_mark(fsn_mark);
+			return ret;
+		}
+	}
+	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
+	fsnotify_put_mark(fsn_mark);
+	if (added & ~mnt->mnt_fsnotify_mask)
+		fsnotify_recalc_vfsmount_mask(mnt);
+
+	return 0;
+}
+
+static int fanotify_add_inode_mark(struct fsnotify_group *group,
+				   struct inode *inode, __u32 mask,
+				   unsigned int flags)
+{
+	struct fsnotify_mark *fsn_mark;
+	__u32 added;
+
+	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
+
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
+	if (!fsn_mark) {
+		int ret;
+
+		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+		if (!fsn_mark)
+			return -ENOMEM;
+
+		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
+		ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
+		if (ret) {
+			fanotify_free_mark(fsn_mark);
+			return ret;
+		}
+	}
+	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
+	fsnotify_put_mark(fsn_mark);
+	if (added & ~inode->i_fsnotify_mask)
+		fsnotify_recalc_inode_mask(inode);
+	return 0;
+}
+
+/* fanotify syscalls */
+SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+{
+	struct fsnotify_group *group;
+	int f_flags, fd;
+
+	pr_debug("%s: flags=%d event_f_flags=%d\n",
+		__func__, flags, event_f_flags);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (flags & ~FAN_ALL_INIT_FLAGS)
+		return -EINVAL;
+
+	f_flags = O_RDWR | FMODE_NONOTIFY;
+	if (flags & FAN_CLOEXEC)
+		f_flags |= O_CLOEXEC;
+	if (flags & FAN_NONBLOCK)
+		f_flags |= O_NONBLOCK;
+
+	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
+	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	group->fanotify_data.f_flags = event_f_flags;
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	mutex_init(&group->fanotify_data.access_mutex);
+	init_waitqueue_head(&group->fanotify_data.access_waitq);
+	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+#endif
+
+	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
+	if (fd < 0)
+		goto out_put_group;
+
+	return fd;
+
+out_put_group:
+	fsnotify_put_group(group);
+	return fd;
+}
+
+SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
+			      __u64 mask, int dfd,
+			      const char  __user * pathname)
+{
+	struct inode *inode = NULL;
+	struct vfsmount *mnt = NULL;
+	struct fsnotify_group *group;
+	struct file *filp;
+	struct path path;
+	int ret, fput_needed;
+
+	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
+		 __func__, fanotify_fd, flags, dfd, pathname, mask);
+
+	/* we only use the lower 32 bits as of right now. */
+	if (mask & ((__u64)0xffffffff << 32))
+		return -EINVAL;
+
+	if (flags & ~FAN_ALL_MARK_FLAGS)
+		return -EINVAL;
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
+	case FAN_MARK_ADD:
+	case FAN_MARK_REMOVE:
+	case FAN_MARK_FLUSH:
+		break;
+	default:
+		return -EINVAL;
+	}
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
+#else
+	if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
+#endif
+		return -EINVAL;
+
+	filp = fget_light(fanotify_fd, &fput_needed);
+	if (unlikely(!filp))
+		return -EBADF;
+
+	/* verify that this is indeed an fanotify instance */
+	ret = -EINVAL;
+	if (unlikely(filp->f_op != &fanotify_fops))
+		goto fput_and_out;
+
+	ret = fanotify_find_path(dfd, pathname, &path, flags);
+	if (ret)
+		goto fput_and_out;
+
+	/* inode held in place by reference to path; group by fget on fd */
+	if (!(flags & FAN_MARK_MOUNT))
+		inode = path.dentry->d_inode;
+	else
+		mnt = path.mnt;
+	group = filp->private_data;
+
+	/* create/update an inode mark */
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
+	case FAN_MARK_ADD:
+		if (flags & FAN_MARK_MOUNT)
+			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
+		else
+			ret = fanotify_add_inode_mark(group, inode, mask, flags);
+		break;
+	case FAN_MARK_REMOVE:
+		if (flags & FAN_MARK_MOUNT)
+			ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
+		else
+			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
+		break;
+	case FAN_MARK_FLUSH:
+		if (flags & FAN_MARK_MOUNT)
+			fsnotify_clear_vfsmount_marks_by_group(group);
+		else
+			fsnotify_clear_inode_marks_by_group(group);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	path_put(&path);
+fput_and_out:
+	fput_light(filp, fput_needed);
+	return ret;
+}
+
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+asmlinkage long SyS_fanotify_mark(long fanotify_fd, long flags, __u64 mask,
+				  long dfd, long pathname)
+{
+	return SYSC_fanotify_mark((int) fanotify_fd, (unsigned int) flags,
+				  mask, (int) dfd,
+				  (const char  __user *) pathname);
+}
+SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
+#endif
+
+/*
+ * fanotify_user_setup - Our initialization function.  Note that we cannnot return
+ * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
+ * must result in panic().
+ */
+static int __init fanotify_user_setup(void)
+{
+	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+	fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
+						   SLAB_PANIC);
+
+	return 0;
+}
+device_initcall(fanotify_user_setup);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index fcc2f064af8..4d2a82c1ceb 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -21,6 +21,7 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mount.h>
 #include <linux/srcu.h>
 
 #include <linux/fsnotify_backend.h>
@@ -35,6 +36,11 @@ void __fsnotify_inode_delete(struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
 
+void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
+{
+	fsnotify_clear_marks_by_mount(mnt);
+}
+
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
@@ -78,13 +84,16 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
 	bool send = false;
 	bool should_update_children = false;
 
+	if (!dentry)
+		dentry = file->f_path.dentry;
+
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
 		return;
 
@@ -115,8 +124,12 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
-		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-			 dentry->d_name.name, 0);
+		if (file)
+			fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
+				 dentry->d_name.name, 0);
+		else
+			fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+				 dentry->d_name.name, 0);
 		dput(parent);
 	}
 
@@ -127,63 +140,181 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
+static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
+			 struct fsnotify_mark *inode_mark,
+			 struct fsnotify_mark *vfsmount_mark,
+			 __u32 mask, void *data,
+			 int data_is, u32 cookie,
+			 const unsigned char *file_name,
+			 struct fsnotify_event **event)
+{
+	struct fsnotify_group *group = inode_mark->group;
+	__u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	__u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
+
+	pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
+		 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
+		 mnt, inode_mark, mask, data, data_is, cookie, *event);
+
+	/* clear ignored on inode modification */
+	if (mask & FS_MODIFY) {
+		if (inode_mark &&
+		    !(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+			inode_mark->ignored_mask = 0;
+		if (vfsmount_mark &&
+		    !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+			vfsmount_mark->ignored_mask = 0;
+	}
+
+	/* does the inode mark tell us to do something? */
+	if (inode_mark) {
+		inode_test_mask &= inode_mark->mask;
+		inode_test_mask &= ~inode_mark->ignored_mask;
+	}
+
+	/* does the vfsmount_mark tell us to do something? */
+	if (vfsmount_mark) {
+		vfsmount_test_mask &= vfsmount_mark->mask;
+		vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
+		if (inode_mark)
+			vfsmount_test_mask &= ~inode_mark->ignored_mask;
+	}
+
+	if (!inode_test_mask && !vfsmount_test_mask)
+		return 0;
+
+	if (group->ops->should_send_event(group, to_tell, inode_mark,
+					  vfsmount_mark, mask, data,
+					  data_is) == false)
+		return 0;
+
+	if (!*event) {
+		*event = fsnotify_create_event(to_tell, mask, data,
+						data_is, file_name,
+						cookie, GFP_KERNEL);
+		if (!*event)
+			return -ENOMEM;
+	}
+	return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
+}
+
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
+int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+	     const unsigned char *file_name, u32 cookie)
 {
-	struct fsnotify_group *group;
+	struct hlist_node *inode_node, *vfsmount_node;
+	struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
+	struct fsnotify_group *inode_group, *vfsmount_group;
 	struct fsnotify_event *event = NULL;
-	int idx;
+	struct vfsmount *mnt;
+	int idx, ret = 0;
+	bool used_inode = false, used_vfsmount = false;
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
-	if (list_empty(&fsnotify_groups))
-		return;
+	if (data_is == FSNOTIFY_EVENT_FILE)
+		mnt = ((struct file *)data)->f_path.mnt;
+	else
+		mnt = NULL;
 
-	if (!(test_mask & fsnotify_mask))
-		return;
-
-	if (!(test_mask & to_tell->i_fsnotify_mask))
-		return;
 	/*
-	 * SRCU!!  the groups list is very very much read only and the path is
-	 * very hot.  The VAST majority of events are not going to need to do
-	 * anything other than walk the list so it's crazy to pre-allocate.
+	 * if this is a modify event we may need to clear the ignored masks
+	 * otherwise return if neither the inode nor the vfsmount care about
+	 * this type of event.
 	 */
-	idx = srcu_read_lock(&fsnotify_grp_srcu);
-	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
-		if (test_mask & group->mask) {
-			if (!group->ops->should_send_event(group, to_tell, mask))
-				continue;
-			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data,
-							      data_is, file_name, cookie,
-							      GFP_KERNEL);
-				/* shit, we OOM'd and now we can't tell, maybe
-				 * someday someone else will want to do something
-				 * here */
-				if (!event)
-					break;
-			}
-			group->ops->handle_event(group, event);
+	if (!(mask & FS_MODIFY) &&
+	    !(test_mask & to_tell->i_fsnotify_mask) &&
+	    !(mnt && test_mask & mnt->mnt_fsnotify_mask))
+		return 0;
+
+	idx = srcu_read_lock(&fsnotify_mark_srcu);
+
+	if ((mask & FS_MODIFY) ||
+	    (test_mask & to_tell->i_fsnotify_mask))
+		inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
+					      &fsnotify_mark_srcu);
+	else
+		inode_node = NULL;
+
+	if (mnt) {
+		if ((mask & FS_MODIFY) ||
+		    (test_mask & mnt->mnt_fsnotify_mask))
+			vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
+							 &fsnotify_mark_srcu);
+		else
+			vfsmount_node = NULL;
+	} else {
+		mnt = NULL;
+		vfsmount_node = NULL;
+	}
+
+	while (inode_node || vfsmount_node) {
+		if (inode_node) {
+			inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
+						 struct fsnotify_mark, i.i_list);
+			inode_group = inode_mark->group;
+		} else
+			inode_group = (void *)-1;
+
+		if (vfsmount_node) {
+			vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
+							struct fsnotify_mark, m.m_list);
+			vfsmount_group = vfsmount_mark->group;
+		} else
+			vfsmount_group = (void *)-1;
+
+		if (inode_group < vfsmount_group) {
+			/* handle inode */
+			send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
+				      data_is, cookie, file_name, &event);
+			used_inode = true;
+		} else if (vfsmount_group < inode_group) {
+			send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+				      data_is, cookie, file_name, &event);
+			used_vfsmount = true;
+		} else {
+			send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+				      mask, data, data_is, cookie, file_name,
+				      &event);
+			used_vfsmount = true;
+			used_inode = true;
 		}
+
+		if (used_inode)
+			inode_node = srcu_dereference(inode_node->next,
+						      &fsnotify_mark_srcu);
+		if (used_vfsmount)
+			vfsmount_node = srcu_dereference(vfsmount_node->next,
+							 &fsnotify_mark_srcu);
 	}
-	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+
+	srcu_read_unlock(&fsnotify_mark_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
 	 * up while we are still trying to add it to lists, drop that one.
 	 */
 	if (event)
 		fsnotify_put_event(event);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(fsnotify);
 
 static __init int fsnotify_init(void)
 {
-	return init_srcu_struct(&fsnotify_grp_srcu);
+	int ret;
+
+	BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
+
+	ret = init_srcu_struct(&fsnotify_mark_srcu);
+	if (ret)
+		panic("initializing fsnotify_mark_srcu");
+
+	return 0;
 }
-subsys_initcall(fsnotify_init);
+core_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 4dc240824b2..85e7d2b431d 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -6,21 +6,34 @@
 #include <linux/srcu.h>
 #include <linux/types.h>
 
-/* protects reads of fsnotify_groups */
-extern struct srcu_struct fsnotify_grp_srcu;
-/* all groups which receive fsnotify events */
-extern struct list_head fsnotify_groups;
-/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
-extern __u32 fsnotify_mask;
-
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
+/* protects reads of inode and vfsmount marks list */
+extern struct srcu_struct fsnotify_mark_srcu;
+
+extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
+						__u32 mask);
+/* add a mark to an inode */
+extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
+				   struct fsnotify_group *group, struct inode *inode,
+				   int allow_dups);
+/* add a mark to a vfsmount */
+extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
+				      struct fsnotify_group *group, struct vfsmount *mnt,
+				      int allow_dups);
+
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 
+/* vfsmount specific destruction of a mark */
+extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
+/* inode specific destruction of a mark */
+extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
+/* run the list of all marks associated with vfsmount and flag them to be freed */
+extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
  * about events that happen to its children.
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0e1677144bc..d309f38449c 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -28,64 +28,6 @@
 
 #include <asm/atomic.h>
 
-/* protects writes to fsnotify_groups and fsnotify_mask */
-static DEFINE_MUTEX(fsnotify_grp_mutex);
-/* protects reads while running the fsnotify_groups list */
-struct srcu_struct fsnotify_grp_srcu;
-/* all groups registered to receive filesystem notifications */
-LIST_HEAD(fsnotify_groups);
-/* bitwise OR of all events (FS_*) interesting to some group on this system */
-__u32 fsnotify_mask;
-
-/*
- * When a new group registers or changes it's set of interesting events
- * this function updates the fsnotify_mask to contain all interesting events
- */
-void fsnotify_recalc_global_mask(void)
-{
-	struct fsnotify_group *group;
-	__u32 mask = 0;
-	int idx;
-
-	idx = srcu_read_lock(&fsnotify_grp_srcu);
-	list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
-		mask |= group->mask;
-	srcu_read_unlock(&fsnotify_grp_srcu, idx);
-	fsnotify_mask = mask;
-}
-
-/*
- * Update the group->mask by running all of the marks associated with this
- * group and finding the bitwise | of all of the mark->mask.  If we change
- * the group->mask we need to update the global mask of events interesting
- * to the system.
- */
-void fsnotify_recalc_group_mask(struct fsnotify_group *group)
-{
-	__u32 mask = 0;
-	__u32 old_mask = group->mask;
-	struct fsnotify_mark_entry *entry;
-
-	spin_lock(&group->mark_lock);
-	list_for_each_entry(entry, &group->mark_entries, g_list)
-		mask |= entry->mask;
-	spin_unlock(&group->mark_lock);
-
-	group->mask = mask;
-
-	if (old_mask != mask)
-		fsnotify_recalc_global_mask();
-}
-
-/*
- * Take a reference to a group so things found under the fsnotify_grp_mutex
- * can't get freed under us
- */
-static void fsnotify_get_group(struct fsnotify_group *group)
-{
-	atomic_inc(&group->refcnt);
-}
-
 /*
  * Final freeing of a group
  */
@@ -110,145 +52,53 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
  */
 static void fsnotify_destroy_group(struct fsnotify_group *group)
 {
-	/* clear all inode mark entries for this group */
+	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
 
+	synchronize_srcu(&fsnotify_mark_srcu);
+
 	/* past the point of no return, matches the initial value of 1 */
 	if (atomic_dec_and_test(&group->num_marks))
 		fsnotify_final_destroy_group(group);
 }
 
 /*
- * Remove this group from the global list of groups that will get events
- * this can be done even if there are still references and things still using
- * this group.  This just stops the group from getting new events.
- */
-static void __fsnotify_evict_group(struct fsnotify_group *group)
-{
-	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
-
-	if (group->on_group_list)
-		list_del_rcu(&group->group_list);
-	group->on_group_list = 0;
-}
-
-/*
- * Called when a group is no longer interested in getting events.  This can be
- * used if a group is misbehaving or if for some reason a group should no longer
- * get any filesystem events.
- */
-void fsnotify_evict_group(struct fsnotify_group *group)
-{
-	mutex_lock(&fsnotify_grp_mutex);
-	__fsnotify_evict_group(group);
-	mutex_unlock(&fsnotify_grp_mutex);
-}
-
-/*
  * Drop a reference to a group.  Free it if it's through.
  */
 void fsnotify_put_group(struct fsnotify_group *group)
 {
-	if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
-		return;
-
-	/*
-	 * OK, now we know that there's no other users *and* we hold mutex,
-	 * so no new references will appear
-	 */
-	__fsnotify_evict_group(group);
-
-	/*
-	 * now it's off the list, so the only thing we might care about is
-	 * srcu access....
-	 */
-	mutex_unlock(&fsnotify_grp_mutex);
-	synchronize_srcu(&fsnotify_grp_srcu);
-
-	/* and now it is really dead. _Nothing_ could be seeing it */
-	fsnotify_recalc_global_mask();
-	fsnotify_destroy_group(group);
-}
-
-/*
- * Simply run the fsnotify_groups list and find a group which matches
- * the given parameters.  If a group is found we take a reference to that
- * group.
- */
-static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
-						  const struct fsnotify_ops *ops)
-{
-	struct fsnotify_group *group_iter;
-	struct fsnotify_group *group = NULL;
-
-	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
-
-	list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
-		if (group_iter->group_num == group_num) {
-			if ((group_iter->mask == mask) &&
-			    (group_iter->ops == ops)) {
-				fsnotify_get_group(group_iter);
-				group = group_iter;
-			} else
-				group = ERR_PTR(-EEXIST);
-		}
-	}
-	return group;
+	if (atomic_dec_and_test(&group->refcnt))
+		fsnotify_destroy_group(group);
 }
 
 /*
- * Either finds an existing group which matches the group_num, mask, and ops or
- * creates a new group and adds it to the global group list.  In either case we
- * take a reference for the group returned.
+ * Create a new fsnotify_group and hold a reference for the group returned.
  */
-struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
-					     const struct fsnotify_ops *ops)
+struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 {
-	struct fsnotify_group *group, *tgroup;
+	struct fsnotify_group *group;
 
-	/* very low use, simpler locking if we just always alloc */
-	group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
+	group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
 	if (!group)
 		return ERR_PTR(-ENOMEM);
 
+	/* set to 0 when there a no external references to this group */
 	atomic_set(&group->refcnt, 1);
-
-	group->on_group_list = 0;
-	group->group_num = group_num;
-	group->mask = mask;
+	/*
+	 * hits 0 when there are no external references AND no marks for
+	 * this group
+	 */
+	atomic_set(&group->num_marks, 1);
 
 	mutex_init(&group->notification_mutex);
 	INIT_LIST_HEAD(&group->notification_list);
 	init_waitqueue_head(&group->notification_waitq);
-	group->q_len = 0;
 	group->max_events = UINT_MAX;
 
 	spin_lock_init(&group->mark_lock);
-	atomic_set(&group->num_marks, 0);
-	INIT_LIST_HEAD(&group->mark_entries);
+	INIT_LIST_HEAD(&group->marks_list);
 
 	group->ops = ops;
 
-	mutex_lock(&fsnotify_grp_mutex);
-	tgroup = fsnotify_find_group(group_num, mask, ops);
-	if (tgroup) {
-		/* group already exists */
-		mutex_unlock(&fsnotify_grp_mutex);
-		/* destroy the new one we made */
-		fsnotify_put_group(group);
-		return tgroup;
-	}
-
-	/* group not found, add a new one */
-	list_add_rcu(&group->group_list, &fsnotify_groups);
-	group->on_group_list = 1;
-	/* being on the fsnotify_groups list holds one num_marks */
-	atomic_inc(&group->num_marks);
-
-	mutex_unlock(&fsnotify_grp_mutex);
-
-	if (mask)
-		fsnotify_recalc_global_mask();
-
 	return group;
 }
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 0399bcbe09c..33297c00506 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -16,72 +16,6 @@
  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-/*
- * fsnotify inode mark locking/lifetime/and refcnting
- *
- * REFCNT:
- * The mark->refcnt tells how many "things" in the kernel currently are
- * referencing this object.  The object typically will live inside the kernel
- * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
- * which can find this object holding the appropriete locks, can take a reference
- * and the object itself is guarenteed to survive until the reference is dropped.
- *
- * LOCKING:
- * There are 3 spinlocks involved with fsnotify inode marks and they MUST
- * be taken in order as follows:
- *
- * entry->lock
- * group->mark_lock
- * inode->i_lock
- *
- * entry->lock protects 2 things, entry->group and entry->inode.  You must hold
- * that lock to dereference either of these things (they could be NULL even with
- * the lock)
- *
- * group->mark_lock protects the mark_entries list anchored inside a given group
- * and each entry is hooked via the g_list.  It also sorta protects the
- * free_g_list, which when used is anchored by a private list on the stack of the
- * task which held the group->mark_lock.
- *
- * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
- * given inode and each entry is hooked via the i_list. (and sorta the
- * free_i_list)
- *
- *
- * LIFETIME:
- * Inode marks survive between when they are added to an inode and when their
- * refcnt==0.
- *
- * The inode mark can be cleared for a number of different reasons including:
- * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
- * - The inode is being evicted from cache. (fsnotify_inode_delete)
- * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
- * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark_by_entry)
- * - The fsnotify_group associated with the mark is going away and all such marks
- *   need to be cleaned up. (fsnotify_clear_marks_by_group)
- *
- * Worst case we are given an inode and need to clean up all the marks on that
- * inode.  We take i_lock and walk the i_fsnotify_mark_entries safely.  For each
- * mark on the list we take a reference (so the mark can't disappear under us).
- * We remove that mark form the inode's list of marks and we add this mark to a
- * private list anchored on the stack using i_free_list;  At this point we no
- * longer fear anything finding the mark using the inode's list of marks.
- *
- * We can safely and locklessly run the private list on the stack of everything
- * we just unattached from the original inode.  For each mark on the private list
- * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
- * we see the group and inode are not NULL we take those locks.  Now holding all
- * 3 locks we can completely remove the mark from other tasks finding it in the
- * future.  Remember, 10 things might already be referencing this mark, but they
- * better be holding a ref.  We drop our reference we took before we unhooked it
- * from the inode.  When the ref hits 0 we can free the mark.
- *
- * Very similarly for freeing by group, except we use free_g_list.
- *
- * This has the very interesting property of being able to run concurrently with
- * any (or all) other directions.
- */
-
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -95,30 +29,19 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
-void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
-{
-	atomic_inc(&entry->refcnt);
-}
-
-void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
-{
-	if (atomic_dec_and_test(&entry->refcnt))
-		entry->free_mark(entry);
-}
-
 /*
  * Recalculate the mask of events relevant to a given inode locked.
  */
 static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
 	__u32 new_mask = 0;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
-		new_mask |= entry->mask;
+	hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
+		new_mask |= mark->mask;
 	inode->i_fsnotify_mask = new_mask;
 }
 
@@ -135,107 +58,26 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
 	__fsnotify_update_child_dentry_flags(inode);
 }
 
-/*
- * Any time a mark is getting freed we end up here.
- * The caller had better be holding a reference to this mark so we don't actually
- * do the final put under the entry->lock
- */
-void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
+void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 {
-	struct fsnotify_group *group;
-	struct inode *inode;
+	struct inode *inode = mark->i.inode;
 
-	spin_lock(&entry->lock);
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&mark->group->mark_lock);
 
-	group = entry->group;
-	inode = entry->inode;
-
-	BUG_ON(group && !inode);
-	BUG_ON(!group && inode);
-
-	/* if !group something else already marked this to die */
-	if (!group) {
-		spin_unlock(&entry->lock);
-		return;
-	}
-
-	/* 1 from caller and 1 for being on i_list/g_list */
-	BUG_ON(atomic_read(&entry->refcnt) < 2);
-
-	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
-	hlist_del_init(&entry->i_list);
-	entry->inode = NULL;
-
-	list_del_init(&entry->g_list);
-	entry->group = NULL;
-
-	fsnotify_put_mark(entry); /* for i_list and g_list */
+	hlist_del_init_rcu(&mark->i.i_list);
+	mark->i.inode = NULL;
 
 	/*
-	 * this mark is now off the inode->i_fsnotify_mark_entries list and we
+	 * this mark is now off the inode->i_fsnotify_marks list and we
 	 * hold the inode->i_lock, so this is the perfect time to update the
 	 * inode->i_fsnotify_mask
 	 */
 	fsnotify_recalc_inode_mask_locked(inode);
 
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&group->mark_lock);
-	spin_unlock(&entry->lock);
-
-	/*
-	 * Some groups like to know that marks are being freed.  This is a
-	 * callback to the group function to let it know that this entry
-	 * is being freed.
-	 */
-	if (group->ops->freeing_mark)
-		group->ops->freeing_mark(entry, group);
-
-	/*
-	 * __fsnotify_update_child_dentry_flags(inode);
-	 *
-	 * I really want to call that, but we can't, we have no idea if the inode
-	 * still exists the second we drop the entry->lock.
-	 *
-	 * The next time an event arrive to this inode from one of it's children
-	 * __fsnotify_parent will see that the inode doesn't care about it's
-	 * children and will update all of these flags then.  So really this
-	 * is just a lazy update (and could be a perf win...)
-	 */
-
-
-	iput(inode);
-
-	/*
-	 * it's possible that this group tried to destroy itself, but this
-	 * this mark was simultaneously being freed by inode.  If that's the
-	 * case, we finish freeing the group here.
-	 */
-	if (unlikely(atomic_dec_and_test(&group->num_marks)))
-		fsnotify_final_destroy_group(group);
-}
-
-/*
- * Given a group, destroy all of the marks associated with that group.
- */
-void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
-{
-	struct fsnotify_mark_entry *lentry, *entry;
-	LIST_HEAD(free_list);
-
-	spin_lock(&group->mark_lock);
-	list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
-		list_add(&entry->free_g_list, &free_list);
-		list_del_init(&entry->g_list);
-		fsnotify_get_mark(entry);
-	}
-	spin_unlock(&group->mark_lock);
-
-	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
-		fsnotify_destroy_mark_by_entry(entry);
-		fsnotify_put_mark(entry);
-	}
 }
 
 /*
@@ -243,112 +85,145 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
  */
 void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
-	struct fsnotify_mark_entry *entry, *lentry;
+	struct fsnotify_mark *mark, *lmark;
 	struct hlist_node *pos, *n;
 	LIST_HEAD(free_list);
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
-		list_add(&entry->free_i_list, &free_list);
-		hlist_del_init(&entry->i_list);
-		fsnotify_get_mark(entry);
+	hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
+		list_add(&mark->i.free_i_list, &free_list);
+		hlist_del_init_rcu(&mark->i.i_list);
+		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&inode->i_lock);
 
-	list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
-		fsnotify_destroy_mark_by_entry(entry);
-		fsnotify_put_mark(entry);
+	list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
 	}
 }
 
 /*
+ * Given a group clear all of the inode marks associated with that group.
+ */
+void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
+}
+
+/*
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
  */
-struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
-						     struct inode *inode)
+struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group,
+						      struct inode *inode)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
-		if (entry->group == group) {
-			fsnotify_get_mark(entry);
-			return entry;
+	hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
+		if (mark->group == group) {
+			fsnotify_get_mark(mark);
+			return mark;
 		}
 	}
 	return NULL;
 }
 
 /*
- * Nothing fancy, just initialize lists and locks and counters.
+ * given a group and inode, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
  */
-void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
-			void (*free_mark)(struct fsnotify_mark_entry *entry))
+struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
+					       struct inode *inode)
+{
+	struct fsnotify_mark *mark;
+
+	spin_lock(&inode->i_lock);
+	mark = fsnotify_find_inode_mark_locked(group, inode);
+	spin_unlock(&inode->i_lock);
 
+	return mark;
+}
+
+/*
+ * If we are setting a mark mask on an inode mark we should pin the inode
+ * in memory.
+ */
+void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
+					 __u32 mask)
 {
-	spin_lock_init(&entry->lock);
-	atomic_set(&entry->refcnt, 1);
-	INIT_HLIST_NODE(&entry->i_list);
-	entry->group = NULL;
-	entry->mask = 0;
-	entry->inode = NULL;
-	entry->free_mark = free_mark;
+	struct inode *inode;
+
+	assert_spin_locked(&mark->lock);
+
+	if (mask &&
+	    mark->i.inode &&
+	    !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
+		mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
+		inode = igrab(mark->i.inode);
+		/*
+		 * we shouldn't be able to get here if the inode wasn't
+		 * already safely held in memory.  But bug in case it
+		 * ever is wrong.
+		 */
+		BUG_ON(!inode);
+	}
 }
 
 /*
- * Attach an initialized mark entry to a given group and inode.
+ * Attach an initialized mark to a given inode.
  * These marks may be used for the fsnotify backend to determine which
- * event types should be delivered to which group and for which inodes.
+ * event types should be delivered to which group and for which inodes.  These
+ * marks are ordered according to the group's location in memory.
  */
-int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
-		      struct fsnotify_group *group, struct inode *inode)
+int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
+			    struct fsnotify_group *group, struct inode *inode,
+			    int allow_dups)
 {
-	struct fsnotify_mark_entry *lentry;
+	struct fsnotify_mark *lmark;
+	struct hlist_node *node, *last = NULL;
 	int ret = 0;
 
-	inode = igrab(inode);
-	if (unlikely(!inode))
-		return -EINVAL;
+	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
+
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&group->mark_lock);
 
-	/*
-	 * LOCKING ORDER!!!!
-	 * entry->lock
-	 * group->mark_lock
-	 * inode->i_lock
-	 */
-	spin_lock(&entry->lock);
-	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
-	lentry = fsnotify_find_mark_entry(group, inode);
-	if (!lentry) {
-		entry->group = group;
-		entry->inode = inode;
+	mark->i.inode = inode;
 
-		hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
-		list_add(&entry->g_list, &group->mark_entries);
+	/* is mark the first mark? */
+	if (hlist_empty(&inode->i_fsnotify_marks)) {
+		hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks);
+		goto out;
+	}
 
-		fsnotify_get_mark(entry); /* for i_list and g_list */
+	/* should mark be in the middle of the current list? */
+	hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
+		last = node;
+
+		if ((lmark->group == group) && !allow_dups) {
+			ret = -EEXIST;
+			goto out;
+		}
 
-		atomic_inc(&group->num_marks);
+		if (mark->group < lmark->group)
+			continue;
 
-		fsnotify_recalc_inode_mask_locked(inode);
+		hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
+		goto out;
 	}
 
+	BUG_ON(last == NULL);
+	/* mark should be the last entry.  last is the current last entry */
+	hlist_add_after_rcu(last, &mark->i.i_list);
+out:
+	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&group->mark_lock);
-	spin_unlock(&entry->lock);
-
-	if (lentry) {
-		ret = -EEXIST;
-		iput(inode);
-		fsnotify_put_mark(lentry);
-	} else {
-		__fsnotify_update_child_dentry_flags(inode);
-	}
 
 	return ret;
 }
@@ -369,11 +244,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		struct inode *need_iput_tmp;
 
 		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+		 * We cannot __iget() an inode in state I_FREEING,
 		 * I_WILL_FREE, or I_NEW which is fine because by that point
 		 * the inode cannot have any associated watches.
 		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 
 		/*
@@ -397,7 +272,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		/* In case the dropping of a reference would nuke next_i. */
 		if ((&next_i->i_sb_list != list) &&
 		    atomic_read(&next_i->i_count) &&
-		    !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+		    !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
 			__iget(next_i);
 			need_iput = next_i;
 		}
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b3a159b21cf..b981fc0c837 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,18 +1,3 @@
-config INOTIFY
-	bool "Inotify file change notification support"
-	default n
-	---help---
-	  Say Y here to enable legacy in kernel inotify support.  Inotify is a
-	  file change notification system.  It is a replacement for dnotify.
-	  This option only provides the legacy inotify in kernel API.  There
-	  are no in tree kernel users of this interface since it is deprecated.
-	  You only need this if you are loading an out of tree kernel module
-	  that uses inotify.
-
-	  For more information, see <file:Documentation/filesystems/inotify.txt>
-
-	  If unsure, say N.
-
 config INOTIFY_USER
 	bool "Inotify support for userspace"
 	select ANON_INODES
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index 94382817136..a380dabe09d 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1 @@
-obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
deleted file mode 100644
index 27b75ebc746..00000000000
--- a/fs/notify/inotify/inotify.c
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * fs/inotify.c - inode-based file event notifications
- *
- * Authors:
- *	John McCutchan	<ttb@tentacle.dhs.org>
- *	Robert Love	<rml@novell.com>
- *
- * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
- *
- * Copyright (C) 2005 John McCutchan
- * Copyright 2006 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/writeback.h>
-#include <linux/inotify.h>
-#include <linux/fsnotify_backend.h>
-
-static atomic_t inotify_cookie;
-
-/*
- * Lock ordering:
- *
- * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
- * iprune_mutex (synchronize shrink_icache_memory())
- * 	inode_lock (protects the super_block->s_inodes list)
- * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
- * 		inotify_handle->mutex (protects inotify_handle and watches->h_list)
- *
- * The inode->inotify_mutex and inotify_handle->mutex and held during execution
- * of a caller's event handler.  Thus, the caller must not hold any locks
- * taken in their event handler while calling any of the published inotify
- * interfaces.
- */
-
-/*
- * Lifetimes of the three main data structures--inotify_handle, inode, and
- * inotify_watch--are managed by reference count.
- *
- * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
- * Additional references can bump the count via get_inotify_handle() and drop
- * the count via put_inotify_handle().
- *
- * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
- * to remove_watch_no_event().  Additional references can bump the count via
- * get_inotify_watch() and drop the count via put_inotify_watch().  The caller
- * is reponsible for the final put after receiving IN_IGNORED, or when using
- * IN_ONESHOT after receiving the first event.  Inotify does the final put if
- * inotify_destroy() is called.
- *
- * inode: Pinned so long as the inode is associated with a watch, from
- * inotify_add_watch() to the final put_inotify_watch().
- */
-
-/*
- * struct inotify_handle - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_handle {
-	struct idr		idr;		/* idr mapping wd -> watch */
-	struct mutex		mutex;		/* protects this bad boy */
-	struct list_head	watches;	/* list of watches */
-	atomic_t		count;		/* reference count */
-	u32			last_wd;	/* the last wd allocated */
-	const struct inotify_operations *in_ops; /* inotify caller operations */
-};
-
-static inline void get_inotify_handle(struct inotify_handle *ih)
-{
-	atomic_inc(&ih->count);
-}
-
-static inline void put_inotify_handle(struct inotify_handle *ih)
-{
-	if (atomic_dec_and_test(&ih->count)) {
-		idr_destroy(&ih->idr);
-		kfree(ih);
-	}
-}
-
-/**
- * get_inotify_watch - grab a reference to an inotify_watch
- * @watch: watch to grab
- */
-void get_inotify_watch(struct inotify_watch *watch)
-{
-	atomic_inc(&watch->count);
-}
-EXPORT_SYMBOL_GPL(get_inotify_watch);
-
-int pin_inotify_watch(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	if (atomic_inc_not_zero(&sb->s_active)) {
-		atomic_inc(&watch->count);
-		return 1;
-	}
-	return 0;
-}
-
-/**
- * put_inotify_watch - decrements the ref count on a given watch.  cleans up
- * watch references if the count reaches zero.  inotify_watch is freed by
- * inotify callers via the destroy_watch() op.
- * @watch: watch to release
- */
-void put_inotify_watch(struct inotify_watch *watch)
-{
-	if (atomic_dec_and_test(&watch->count)) {
-		struct inotify_handle *ih = watch->ih;
-
-		iput(watch->inode);
-		ih->in_ops->destroy_watch(watch);
-		put_inotify_handle(ih);
-	}
-}
-EXPORT_SYMBOL_GPL(put_inotify_watch);
-
-void unpin_inotify_watch(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	put_inotify_watch(watch);
-	deactivate_super(sb);
-}
-
-/*
- * inotify_handle_get_wd - returns the next WD for use by the given handle
- *
- * Callers must hold ih->mutex.  This function can sleep.
- */
-static int inotify_handle_get_wd(struct inotify_handle *ih,
-				 struct inotify_watch *watch)
-{
-	int ret;
-
-	do {
-		if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
-			return -ENOSPC;
-		ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
-	} while (ret == -EAGAIN);
-
-	if (likely(!ret))
-		ih->last_wd = watch->wd;
-
-	return ret;
-}
-
-/*
- * inotify_inode_watched - returns nonzero if there are watches on this inode
- * and zero otherwise.  We call this lockless, we do not care if we race.
- */
-static inline int inotify_inode_watched(struct inode *inode)
-{
-	return !list_empty(&inode->inotify_watches);
-}
-
-/*
- * Get child dentry flag into synch with parent inode.
- * Flag should always be clear for negative dentrys.
- */
-static void set_dentry_child_flags(struct inode *inode, int watched)
-{
-	struct dentry *alias;
-
-	spin_lock(&dcache_lock);
-	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
-		struct dentry *child;
-
-		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
-			if (!child->d_inode)
-				continue;
-
-			spin_lock(&child->d_lock);
-			if (watched)
-				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-			else
-				child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
-			spin_unlock(&child->d_lock);
-		}
-	}
-	spin_unlock(&dcache_lock);
-}
-
-/*
- * inotify_find_handle - find the watch associated with the given inode and
- * handle
- *
- * Callers must hold inode->inotify_mutex.
- */
-static struct inotify_watch *inode_find_handle(struct inode *inode,
-					       struct inotify_handle *ih)
-{
-	struct inotify_watch *watch;
-
-	list_for_each_entry(watch, &inode->inotify_watches, i_list) {
-		if (watch->ih == ih)
-			return watch;
-	}
-
-	return NULL;
-}
-
-/*
- * remove_watch_no_event - remove watch without the IN_IGNORED event.
- *
- * Callers must hold both inode->inotify_mutex and ih->mutex.
- */
-static void remove_watch_no_event(struct inotify_watch *watch,
-				  struct inotify_handle *ih)
-{
-	list_del(&watch->i_list);
-	list_del(&watch->h_list);
-
-	if (!inotify_inode_watched(watch->inode))
-		set_dentry_child_flags(watch->inode, 0);
-
-	idr_remove(&ih->idr, watch->wd);
-}
-
-/**
- * inotify_remove_watch_locked - Remove a watch from both the handle and the
- * inode.  Sends the IN_IGNORED event signifying that the inode is no longer
- * watched.  May be invoked from a caller's event handler.
- * @ih: inotify handle associated with watch
- * @watch: watch to remove
- *
- * Callers must hold both inode->inotify_mutex and ih->mutex.
- */
-void inotify_remove_watch_locked(struct inotify_handle *ih,
-				 struct inotify_watch *watch)
-{
-	remove_watch_no_event(watch, ih);
-	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
-
-/* Kernel API for producing events */
-
-/*
- * inotify_d_instantiate - instantiate dcache entry for inode
- */
-void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
-{
-	struct dentry *parent;
-
-	if (!inode)
-		return;
-
-	spin_lock(&entry->d_lock);
-	parent = entry->d_parent;
-	if (parent->d_inode && inotify_inode_watched(parent->d_inode))
-		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-	spin_unlock(&entry->d_lock);
-}
-
-/*
- * inotify_d_move - dcache entry has been moved
- */
-void inotify_d_move(struct dentry *entry)
-{
-	struct dentry *parent;
-
-	parent = entry->d_parent;
-	if (inotify_inode_watched(parent->d_inode))
-		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-	else
-		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
-}
-
-/**
- * inotify_inode_queue_event - queue an event to all watches on this inode
- * @inode: inode event is originating from
- * @mask: event mask describing this event
- * @cookie: cookie for synchronization, or zero
- * @name: filename, if any
- * @n_inode: inode associated with name
- */
-void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
-			       const char *name, struct inode *n_inode)
-{
-	struct inotify_watch *watch, *next;
-
-	if (!inotify_inode_watched(inode))
-		return;
-
-	mutex_lock(&inode->inotify_mutex);
-	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		u32 watch_mask = watch->mask;
-		if (watch_mask & mask) {
-			struct inotify_handle *ih= watch->ih;
-			mutex_lock(&ih->mutex);
-			if (watch_mask & IN_ONESHOT)
-				remove_watch_no_event(watch, ih);
-			ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
-						 name, n_inode);
-			mutex_unlock(&ih->mutex);
-		}
-	}
-	mutex_unlock(&inode->inotify_mutex);
-}
-EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
-
-/**
- * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
- * @dentry: the dentry in question, we queue against this dentry's parent
- * @mask: event mask describing this event
- * @cookie: cookie for synchronization, or zero
- * @name: filename, if any
- */
-void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
-				       u32 cookie, const char *name)
-{
-	struct dentry *parent;
-	struct inode *inode;
-
-	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
-		return;
-
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	inode = parent->d_inode;
-
-	if (inotify_inode_watched(inode)) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		inotify_inode_queue_event(inode, mask, cookie, name,
-					  dentry->d_inode);
-		dput(parent);
-	} else
-		spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
-
-/**
- * inotify_get_cookie - return a unique cookie for use in synchronizing events.
- */
-u32 inotify_get_cookie(void)
-{
-	return atomic_inc_return(&inotify_cookie);
-}
-EXPORT_SYMBOL_GPL(inotify_get_cookie);
-
-/**
- * inotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
- * @list: list of inodes being unmounted (sb->s_inodes)
- *
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
- */
-void inotify_unmount_inodes(struct list_head *list)
-{
-	struct inode *inode, *next_i, *need_iput = NULL;
-
-	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
-		struct inotify_watch *watch, *next_w;
-		struct inode *need_iput_tmp;
-		struct list_head *watches;
-
-		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
-		 * I_WILL_FREE, or I_NEW which is fine because by that point
-		 * the inode cannot have any associated watches.
-		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
-			continue;
-
-		/*
-		 * If i_count is zero, the inode cannot have any watches and
-		 * doing an __iget/iput with MS_ACTIVE clear would actually
-		 * evict all inodes with zero i_count from icache which is
-		 * unnecessarily violent and may in fact be illegal to do.
-		 */
-		if (!atomic_read(&inode->i_count))
-			continue;
-
-		need_iput_tmp = need_iput;
-		need_iput = NULL;
-		/* In case inotify_remove_watch_locked() drops a reference. */
-		if (inode != need_iput_tmp)
-			__iget(inode);
-		else
-			need_iput_tmp = NULL;
-		/* In case the dropping of a reference would nuke next_i. */
-		if ((&next_i->i_sb_list != list) &&
-				atomic_read(&next_i->i_count) &&
-				!(next_i->i_state & (I_CLEAR | I_FREEING |
-					I_WILL_FREE))) {
-			__iget(next_i);
-			need_iput = next_i;
-		}
-
-		/*
-		 * We can safely drop inode_lock here because we hold
-		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.  Finally,
-		 * iprune_mutex keeps shrink_icache_memory() away.
-		 */
-		spin_unlock(&inode_lock);
-
-		if (need_iput_tmp)
-			iput(need_iput_tmp);
-
-		/* for each watch, send IN_UNMOUNT and then remove it */
-		mutex_lock(&inode->inotify_mutex);
-		watches = &inode->inotify_watches;
-		list_for_each_entry_safe(watch, next_w, watches, i_list) {
-			struct inotify_handle *ih= watch->ih;
-			get_inotify_watch(watch);
-			mutex_lock(&ih->mutex);
-			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
-						 NULL, NULL);
-			inotify_remove_watch_locked(ih, watch);
-			mutex_unlock(&ih->mutex);
-			put_inotify_watch(watch);
-		}
-		mutex_unlock(&inode->inotify_mutex);
-		iput(inode);		
-
-		spin_lock(&inode_lock);
-	}
-}
-EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
-
-/**
- * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
- * @inode: inode that is about to be removed
- */
-void inotify_inode_is_dead(struct inode *inode)
-{
-	struct inotify_watch *watch, *next;
-
-	mutex_lock(&inode->inotify_mutex);
-	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		struct inotify_handle *ih = watch->ih;
-		mutex_lock(&ih->mutex);
-		inotify_remove_watch_locked(ih, watch);
-		mutex_unlock(&ih->mutex);
-	}
-	mutex_unlock(&inode->inotify_mutex);
-}
-EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
-
-/* Kernel Consumer API */
-
-/**
- * inotify_init - allocate and initialize an inotify instance
- * @ops: caller's inotify operations
- */
-struct inotify_handle *inotify_init(const struct inotify_operations *ops)
-{
-	struct inotify_handle *ih;
-
-	ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
-	if (unlikely(!ih))
-		return ERR_PTR(-ENOMEM);
-
-	idr_init(&ih->idr);
-	INIT_LIST_HEAD(&ih->watches);
-	mutex_init(&ih->mutex);
-	ih->last_wd = 0;
-	ih->in_ops = ops;
-	atomic_set(&ih->count, 0);
-	get_inotify_handle(ih);
-
-	return ih;
-}
-EXPORT_SYMBOL_GPL(inotify_init);
-
-/**
- * inotify_init_watch - initialize an inotify watch
- * @watch: watch to initialize
- */
-void inotify_init_watch(struct inotify_watch *watch)
-{
-	INIT_LIST_HEAD(&watch->h_list);
-	INIT_LIST_HEAD(&watch->i_list);
-	atomic_set(&watch->count, 0);
-	get_inotify_watch(watch); /* initial get */
-}
-EXPORT_SYMBOL_GPL(inotify_init_watch);
-
-/*
- * Watch removals suck violently.  To kick the watch out we need (in this
- * order) inode->inotify_mutex and ih->mutex.  That's fine if we have
- * a hold on inode; however, for all other cases we need to make damn sure
- * we don't race with umount.  We can *NOT* just grab a reference to a
- * watch - inotify_unmount_inodes() will happily sail past it and we'll end
- * with reference to inode potentially outliving its superblock.  Ideally
- * we just want to grab an active reference to superblock if we can; that
- * will make sure we won't go into inotify_umount_inodes() until we are
- * done.  Cleanup is just deactivate_super().  However, that leaves a messy
- * case - what if we *are* racing with umount() and active references to
- * superblock can't be acquired anymore?  We can bump ->s_count, grab
- * ->s_umount, which will wait until the superblock is shut down and the
- * watch in question is pining for fjords.
- *
- * And yes, this is far beyond mere "not very pretty"; so's the entire
- * concept of inotify to start with.
- */
-
-/**
- * pin_to_kill - pin the watch down for removal
- * @ih: inotify handle
- * @watch: watch to kill
- *
- * Called with ih->mutex held, drops it.  Possible return values:
- * 0 - nothing to do, it has died
- * 1 - remove it, drop the reference and deactivate_super()
- */
-static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-
-	if (atomic_inc_not_zero(&sb->s_active)) {
-		get_inotify_watch(watch);
-		mutex_unlock(&ih->mutex);
-		return 1;	/* the best outcome */
-	}
-	spin_lock(&sb_lock);
-	sb->s_count++;
-	spin_unlock(&sb_lock);
-	mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
-	down_read(&sb->s_umount);
-	/* fs is already shut down; the watch is dead */
-	drop_super(sb);
-	return 0;
-}
-
-static void unpin_and_kill(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	put_inotify_watch(watch);
-	deactivate_super(sb);
-}
-
-/**
- * inotify_destroy - clean up and destroy an inotify instance
- * @ih: inotify handle
- */
-void inotify_destroy(struct inotify_handle *ih)
-{
-	/*
-	 * Destroy all of the watches for this handle. Unfortunately, not very
-	 * pretty.  We cannot do a simple iteration over the list, because we
-	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_mutex before ih->mutex.  The following works.
-	 *
-	 * AV: it had to become even uglier to start working ;-/
-	 */
-	while (1) {
-		struct inotify_watch *watch;
-		struct list_head *watches;
-		struct super_block *sb;
-		struct inode *inode;
-
-		mutex_lock(&ih->mutex);
-		watches = &ih->watches;
-		if (list_empty(watches)) {
-			mutex_unlock(&ih->mutex);
-			break;
-		}
-		watch = list_first_entry(watches, struct inotify_watch, h_list);
-		sb = watch->inode->i_sb;
-		if (!pin_to_kill(ih, watch))
-			continue;
-
-		inode = watch->inode;
-		mutex_lock(&inode->inotify_mutex);
-		mutex_lock(&ih->mutex);
-
-		/* make sure we didn't race with another list removal */
-		if (likely(idr_find(&ih->idr, watch->wd))) {
-			remove_watch_no_event(watch, ih);
-			put_inotify_watch(watch);
-		}
-
-		mutex_unlock(&ih->mutex);
-		mutex_unlock(&inode->inotify_mutex);
-		unpin_and_kill(watch);
-	}
-
-	/* free this handle: the put matching the get in inotify_init() */
-	put_inotify_handle(ih);
-}
-EXPORT_SYMBOL_GPL(inotify_destroy);
-
-/**
- * inotify_find_watch - find an existing watch for an (ih,inode) pair
- * @ih: inotify handle
- * @inode: inode to watch
- * @watchp: pointer to existing inotify_watch
- *
- * Caller must pin given inode (via nameidata).
- */
-s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
-		       struct inotify_watch **watchp)
-{
-	struct inotify_watch *old;
-	int ret = -ENOENT;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	old = inode_find_handle(inode, ih);
-	if (unlikely(old)) {
-		get_inotify_watch(old); /* caller must put watch */
-		*watchp = old;
-		ret = old->wd;
-	}
-
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_find_watch);
-
-/**
- * inotify_find_update_watch - find and update the mask of an existing watch
- * @ih: inotify handle
- * @inode: inode's watch to update
- * @mask: mask of events to watch
- *
- * Caller must pin given inode (via nameidata).
- */
-s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
-			      u32 mask)
-{
-	struct inotify_watch *old;
-	int mask_add = 0;
-	int ret;
-
-	if (mask & IN_MASK_ADD)
-		mask_add = 1;
-
-	/* don't allow invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask))
-		return -EINVAL;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/*
-	 * Handle the case of re-adding a watch on an (inode,ih) pair that we
-	 * are already watching.  We just update the mask and return its wd.
-	 */
-	old = inode_find_handle(inode, ih);
-	if (unlikely(!old)) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	if (mask_add)
-		old->mask |= mask;
-	else
-		old->mask = mask;
-	ret = old->wd;
-out:
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_find_update_watch);
-
-/**
- * inotify_add_watch - add a watch to an inotify instance
- * @ih: inotify handle
- * @watch: caller allocated watch structure
- * @inode: inode to watch
- * @mask: mask of events to watch
- *
- * Caller must pin given inode (via nameidata).
- * Caller must ensure it only calls inotify_add_watch() once per watch.
- * Calls inotify_handle_get_wd() so may sleep.
- */
-s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
-		      struct inode *inode, u32 mask)
-{
-	int ret = 0;
-	int newly_watched;
-
-	/* don't allow invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask))
-		return -EINVAL;
-	watch->mask = mask;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/* Initialize a new watch */
-	ret = inotify_handle_get_wd(ih, watch);
-	if (unlikely(ret))
-		goto out;
-	ret = watch->wd;
-
-	/* save a reference to handle and bump the count to make it official */
-	get_inotify_handle(ih);
-	watch->ih = ih;
-
-	/*
-	 * Save a reference to the inode and bump the ref count to make it
-	 * official.  We hold a reference to nameidata, which makes this safe.
-	 */
-	watch->inode = igrab(inode);
-
-	/* Add the watch to the handle's and the inode's list */
-	newly_watched = !inotify_inode_watched(inode);
-	list_add(&watch->h_list, &ih->watches);
-	list_add(&watch->i_list, &inode->inotify_watches);
-	/*
-	 * Set child flags _after_ adding the watch, so there is no race
-	 * windows where newly instantiated children could miss their parent's
-	 * watched flag.
-	 */
-	if (newly_watched)
-		set_dentry_child_flags(inode, 1);
-
-out:
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_add_watch);
-
-/**
- * inotify_clone_watch - put the watch next to existing one
- * @old: already installed watch
- * @new: new watch
- *
- * Caller must hold the inotify_mutex of inode we are dealing with;
- * it is expected to remove the old watch before unlocking the inode.
- */
-s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
-{
-	struct inotify_handle *ih = old->ih;
-	int ret = 0;
-
-	new->mask = old->mask;
-	new->ih = ih;
-
-	mutex_lock(&ih->mutex);
-
-	/* Initialize a new watch */
-	ret = inotify_handle_get_wd(ih, new);
-	if (unlikely(ret))
-		goto out;
-	ret = new->wd;
-
-	get_inotify_handle(ih);
-
-	new->inode = igrab(old->inode);
-
-	list_add(&new->h_list, &ih->watches);
-	list_add(&new->i_list, &old->inode->inotify_watches);
-out:
-	mutex_unlock(&ih->mutex);
-	return ret;
-}
-
-void inotify_evict_watch(struct inotify_watch *watch)
-{
-	get_inotify_watch(watch);
-	mutex_lock(&watch->ih->mutex);
-	inotify_remove_watch_locked(watch->ih, watch);
-	mutex_unlock(&watch->ih->mutex);
-}
-
-/**
- * inotify_rm_wd - remove a watch from an inotify instance
- * @ih: inotify handle
- * @wd: watch descriptor to remove
- *
- * Can sleep.
- */
-int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
-{
-	struct inotify_watch *watch;
-	struct super_block *sb;
-	struct inode *inode;
-
-	mutex_lock(&ih->mutex);
-	watch = idr_find(&ih->idr, wd);
-	if (unlikely(!watch)) {
-		mutex_unlock(&ih->mutex);
-		return -EINVAL;
-	}
-	sb = watch->inode->i_sb;
-	if (!pin_to_kill(ih, watch))
-		return 0;
-
-	inode = watch->inode;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/* make sure that we did not race */
-	if (likely(idr_find(&ih->idr, wd) == watch))
-		inotify_remove_watch_locked(ih, watch);
-
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	unpin_and_kill(watch);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(inotify_rm_wd);
-
-/**
- * inotify_rm_watch - remove a watch from an inotify instance
- * @ih: inotify handle
- * @watch: watch to remove
- *
- * Can sleep.
- */
-int inotify_rm_watch(struct inotify_handle *ih,
-		     struct inotify_watch *watch)
-{
-	return inotify_rm_wd(ih, watch->wd);
-}
-EXPORT_SYMBOL_GPL(inotify_rm_watch);
-
-/*
- * inotify_setup - core initialization function
- */
-static int __init inotify_setup(void)
-{
-	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
-	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
-	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
-	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
-	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
-	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
-	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
-	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
-	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
-	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
-	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
-	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
-	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
-
-	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
-	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
-	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
-	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
-
-	atomic_set(&inotify_cookie, 0);
-
-	return 0;
-}
-
-module_init(inotify_setup);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index f234f3a4c8c..b6642e4de4b 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -9,13 +9,12 @@ struct inotify_event_private_data {
 	int wd;
 };
 
-struct inotify_inode_mark_entry {
-	/* fsnotify_mark_entry MUST be the first thing */
-	struct fsnotify_mark_entry fsn_entry;
+struct inotify_inode_mark {
+	struct fsnotify_mark fsn_mark;
 	int wd;
 };
 
-extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
+extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 					   struct fsnotify_group *group);
 extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index e27960cd76a..5e73eeb2c69 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -22,6 +22,7 @@
  * General Public License for more details.
  */
 
+#include <linux/dcache.h> /* d_unlinked */
 #include <linux/fs.h> /* struct inode */
 #include <linux/fsnotify_backend.h>
 #include <linux/inotify.h>
@@ -32,26 +33,84 @@
 
 #include "inotify.h"
 
-static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+/*
+ * Check if 2 events contain the same information.  We do not compare private data
+ * but at this moment that isn't a problem for any know fsnotify listeners.
+ */
+static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	if ((old->mask == new->mask) &&
+	    (old->to_tell == new->to_tell) &&
+	    (old->data_type == new->data_type) &&
+	    (old->name_len == new->name_len)) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_INODE):
+			/* remember, after old was put on the wait_q we aren't
+			 * allowed to look at the inode any more, only thing
+			 * left to check was if the file_name is the same */
+			if (!old->name_len ||
+			    !strcmp(old->file_name, new->file_name))
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_FILE):
+			if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
+			    (old->file->f_path.dentry == new->file->f_path.dentry))
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_NONE):
+			if (old->mask & FS_Q_OVERFLOW)
+				return true;
+			else if (old->mask & FS_IN_IGNORED)
+				return false;
+			return true;
+		};
+	}
+	return false;
+}
+
+static struct fsnotify_event *inotify_merge(struct list_head *list,
+					    struct fsnotify_event *event)
 {
-	struct fsnotify_mark_entry *entry;
-	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_event_holder *last_holder;
+	struct fsnotify_event *last_event;
+
+	/* and the list better be locked by something too */
+	spin_lock(&event->lock);
+
+	last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
+	last_event = last_holder->event;
+	if (event_compare(last_event, event))
+		fsnotify_get_event(last_event);
+	else
+		last_event = NULL;
+
+	spin_unlock(&event->lock);
+
+	return last_event;
+}
+
+static int inotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
+				struct fsnotify_event *event)
+{
+	struct inotify_inode_mark *i_mark;
 	struct inode *to_tell;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
-	int wd, ret;
+	struct fsnotify_event *added_event;
+	int wd, ret = 0;
+
+	BUG_ON(vfsmount_mark);
+
+	pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
+		 event, event->to_tell, event->mask);
 
 	to_tell = event->to_tell;
 
-	spin_lock(&to_tell->i_lock);
-	entry = fsnotify_find_mark_entry(group, to_tell);
-	spin_unlock(&to_tell->i_lock);
-	/* race with watch removal?  We already passes should_send */
-	if (unlikely(!entry))
-		return 0;
-	ientry = container_of(entry, struct inotify_inode_mark_entry,
-			      fsn_entry);
-	wd = ientry->wd;
+	i_mark = container_of(inode_mark, struct inotify_inode_mark,
+			      fsn_mark);
+	wd = i_mark->wd;
 
 	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
 	if (unlikely(!event_priv))
@@ -62,48 +121,40 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	fsn_event_priv->group = group;
 	event_priv->wd = wd;
 
-	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
-	if (ret) {
+	added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
+	if (added_event) {
 		inotify_free_event_priv(fsn_event_priv);
-		/* EEXIST says we tail matched, EOVERFLOW isn't something
-		 * to report up the stack. */
-		if ((ret == -EEXIST) ||
-		    (ret == -EOVERFLOW))
-			ret = 0;
+		if (!IS_ERR(added_event))
+			fsnotify_put_event(added_event);
+		else
+			ret = PTR_ERR(added_event);
 	}
 
-	/*
-	 * If we hold the entry until after the event is on the queue
-	 * IN_IGNORED won't be able to pass this event in the queue
-	 */
-	fsnotify_put_mark(entry);
+	if (inode_mark->mask & IN_ONESHOT)
+		fsnotify_destroy_mark(inode_mark);
 
 	return ret;
 }
 
-static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
 {
-	inotify_ignored_and_remove_idr(entry, group);
+	inotify_ignored_and_remove_idr(fsn_mark, group);
 }
 
-static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
+static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
+				      struct fsnotify_mark *inode_mark,
+				      struct fsnotify_mark *vfsmount_mark,
+				      __u32 mask, void *data, int data_type)
 {
-	struct fsnotify_mark_entry *entry;
-	bool send;
-
-	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(group, inode);
-	spin_unlock(&inode->i_lock);
-	if (!entry)
-		return false;
+	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+	    (data_type == FSNOTIFY_EVENT_FILE)) {
+		struct file *file  = data;
 
-	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (entry->mask & mask);
-
-	/* find took a reference */
-	fsnotify_put_mark(entry);
+		if (d_unlinked(file->f_path.dentry))
+			return false;
+	}
 
-	return send;
+	return true;
 }
 
 /*
@@ -115,18 +166,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
  */
 static int idr_callback(int id, void *p, void *data)
 {
-	struct fsnotify_mark_entry *entry;
-	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_mark *fsn_mark;
+	struct inotify_inode_mark *i_mark;
 	static bool warned = false;
 
 	if (warned)
 		return 0;
 
 	warned = true;
-	entry = p;
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	fsn_mark = p;
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
-	WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
+	WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
 		"idr.  Probably leaking memory\n", id, p, data);
 
 	/*
@@ -135,9 +186,9 @@ static int idr_callback(int id, void *p, void *data)
 	 * out why we got here and the panic is no worse than the original
 	 * BUG() that was here.
 	 */
-	if (entry)
-		printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
-			entry->group, entry->inode, ientry->wd);
+	if (fsn_mark)
+		printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
+			fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
 	return 0;
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e46ca685b9b..bf7f6d776c3 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -46,17 +46,11 @@
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
 static int inotify_max_queued_events __read_mostly;
-int inotify_max_user_watches __read_mostly;
+static int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 struct kmem_cache *event_priv_cachep __read_mostly;
 
-/*
- * When inotify registers a new group it increments this and uses that
- * value as an offset to set the fsnotify group "name" and priority.
- */
-static atomic_t inotify_grp_num;
-
 #ifdef CONFIG_SYSCTL
 
 #include <linux/sysctl.h>
@@ -96,11 +90,14 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
 {
 	__u32 mask;
 
-	/* everything should accept their own ignored and cares about children */
-	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
+	/*
+	 * everything should accept their own ignored, cares about children,
+	 * and should receive events when the inode is unmounted
+	 */
+	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
 
 	/* mask off the flags used to open the fd */
-	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
+	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
 
 	return mask;
 }
@@ -144,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
 	event = fsnotify_peek_notify_event(group);
 
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
 	if (event->name_len)
 		event_size += roundup(event->name_len + 1, event_size);
 
@@ -173,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	size_t event_size = sizeof(struct inotify_event);
 	size_t name_len = 0;
 
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
 	/* we get the inotify watch descriptor from the event private data */
 	spin_lock(&event->lock);
 	fsn_priv = fsnotify_remove_priv_from_event(group, event);
@@ -245,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 		kevent = get_one_event(group, count);
 		mutex_unlock(&group->notification_mutex);
 
+		pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
+
 		if (kevent) {
 			ret = PTR_ERR(kevent);
 			if (IS_ERR(kevent))
@@ -289,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
 	struct fsnotify_group *group = file->private_data;
 	struct user_struct *user = group->inotify_data.user;
 
+	pr_debug("%s: group=%p\n", __func__, group);
+
 	fsnotify_clear_marks_by_group(group);
 
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
@@ -312,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 	group = file->private_data;
 	p = (void __user *) arg;
 
+	pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
+
 	switch (cmd) {
 	case FIONREAD:
 		mutex_lock(&group->notification_mutex);
@@ -357,59 +364,159 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 	return error;
 }
 
+static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
+			      int *last_wd,
+			      struct inotify_inode_mark *i_mark)
+{
+	int ret;
+
+	do {
+		if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
+			return -ENOMEM;
+
+		spin_lock(idr_lock);
+		ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
+					&i_mark->wd);
+		/* we added the mark to the idr, take a reference */
+		if (!ret) {
+			*last_wd = i_mark->wd;
+			fsnotify_get_mark(&i_mark->fsn_mark);
+		}
+		spin_unlock(idr_lock);
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
+								int wd)
+{
+	struct idr *idr = &group->inotify_data.idr;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	struct inotify_inode_mark *i_mark;
+
+	assert_spin_locked(idr_lock);
+
+	i_mark = idr_find(idr, wd);
+	if (i_mark) {
+		struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
+
+		fsnotify_get_mark(fsn_mark);
+		/* One ref for being in the idr, one ref we just took */
+		BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
+	}
+
+	return i_mark;
+}
+
+static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
+							 int wd)
+{
+	struct inotify_inode_mark *i_mark;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+
+	spin_lock(idr_lock);
+	i_mark = inotify_idr_find_locked(group, wd);
+	spin_unlock(idr_lock);
+
+	return i_mark;
+}
+
+static void do_inotify_remove_from_idr(struct fsnotify_group *group,
+				       struct inotify_inode_mark *i_mark)
+{
+	struct idr *idr = &group->inotify_data.idr;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	int wd = i_mark->wd;
+
+	assert_spin_locked(idr_lock);
+
+	idr_remove(idr, wd);
+
+	/* removed from the idr, drop that ref */
+	fsnotify_put_mark(&i_mark->fsn_mark);
+}
+
 /*
  * Remove the mark from the idr (if present) and drop the reference
  * on the mark because it was in the idr.
  */
 static void inotify_remove_from_idr(struct fsnotify_group *group,
-				    struct inotify_inode_mark_entry *ientry)
+				    struct inotify_inode_mark *i_mark)
 {
-	struct idr *idr;
-	struct fsnotify_mark_entry *entry;
-	struct inotify_inode_mark_entry *found_ientry;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	struct inotify_inode_mark *found_i_mark = NULL;
 	int wd;
 
-	spin_lock(&group->inotify_data.idr_lock);
-	idr = &group->inotify_data.idr;
-	wd = ientry->wd;
+	spin_lock(idr_lock);
+	wd = i_mark->wd;
 
-	if (wd == -1)
+	/*
+	 * does this i_mark think it is in the idr?  we shouldn't get called
+	 * if it wasn't....
+	 */
+	if (wd == -1) {
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
+			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
+			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
 		goto out;
+	}
 
-	entry = idr_find(&group->inotify_data.idr, wd);
-	if (unlikely(!entry))
+	/* Lets look in the idr to see if we find it */
+	found_i_mark = inotify_idr_find_locked(group, wd);
+	if (unlikely(!found_i_mark)) {
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
+			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
+			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
 		goto out;
+	}
 
-	found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
-	if (unlikely(found_ientry != ientry)) {
-		/* We found an entry in the idr with the right wd, but it's
-		 * not the entry we were told to remove.  eparis seriously
-		 * fucked up somewhere. */
-		WARN_ON(1);
-		ientry->wd = -1;
+	/*
+	 * We found an mark in the idr at the right wd, but it's
+	 * not the mark we were told to remove.  eparis seriously
+	 * fucked up somewhere.
+	 */
+	if (unlikely(found_i_mark != i_mark)) {
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
+			"mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
+			"found_i_mark->group=%p found_i_mark->inode=%p\n",
+			__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
+			i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
+			found_i_mark->fsn_mark.group,
+			found_i_mark->fsn_mark.i.inode);
 		goto out;
 	}
 
-	/* One ref for being in the idr, one ref held by the caller */
-	BUG_ON(atomic_read(&entry->refcnt) < 2);
-
-	idr_remove(idr, wd);
-	ientry->wd = -1;
+	/*
+	 * One ref for being in the idr
+	 * one ref held by the caller trying to kill us
+	 * one ref grabbed by inotify_idr_find
+	 */
+	if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
+		printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
+			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
+			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
+		/* we can't really recover with bad ref cnting.. */
+		BUG();
+	}
 
-	/* removed from the idr, drop that ref */
-	fsnotify_put_mark(entry);
+	do_inotify_remove_from_idr(group, i_mark);
 out:
-	spin_unlock(&group->inotify_data.idr_lock);
+	/* match the ref taken by inotify_idr_find_locked() */
+	if (found_i_mark)
+		fsnotify_put_mark(&found_i_mark->fsn_mark);
+	i_mark->wd = -1;
+	spin_unlock(idr_lock);
 }
 
 /*
  * Send IN_IGNORED for this wd, remove this wd from the idr.
  */
-void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
+void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 				    struct fsnotify_group *group)
 {
-	struct inotify_inode_mark_entry *ientry;
-	struct fsnotify_event *ignored_event;
+	struct inotify_inode_mark *i_mark;
+	struct fsnotify_event *ignored_event, *notify_event;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
 	int ret;
@@ -420,7 +527,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 	if (!ignored_event)
 		return;
 
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
 	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
 	if (unlikely(!event_priv))
@@ -429,37 +536,44 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
 
 	fsn_event_priv->group = group;
-	event_priv->wd = ientry->wd;
-
-	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
-	if (ret)
+	event_priv->wd = i_mark->wd;
+
+	notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
+	if (notify_event) {
+		if (IS_ERR(notify_event))
+			ret = PTR_ERR(notify_event);
+		else
+			fsnotify_put_event(notify_event);
 		inotify_free_event_priv(fsn_event_priv);
+	}
 
 skip_send_ignore:
 
 	/* matches the reference taken when the event was created */
 	fsnotify_put_event(ignored_event);
 
-	/* remove this entry from the idr */
-	inotify_remove_from_idr(group, ientry);
+	/* remove this mark from the idr */
+	inotify_remove_from_idr(group, i_mark);
 
 	atomic_dec(&group->inotify_data.user->inotify_watches);
 }
 
 /* ding dong the mark is dead */
-static void inotify_free_mark(struct fsnotify_mark_entry *entry)
+static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
-	struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
+	struct inotify_inode_mark *i_mark;
+
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
-	kmem_cache_free(inotify_inode_mark_cachep, ientry);
+	kmem_cache_free(inotify_inode_mark_cachep, i_mark);
 }
 
 static int inotify_update_existing_watch(struct fsnotify_group *group,
 					 struct inode *inode,
 					 u32 arg)
 {
-	struct fsnotify_mark_entry *entry;
-	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_mark *fsn_mark;
+	struct inotify_inode_mark *i_mark;
 	__u32 old_mask, new_mask;
 	__u32 mask;
 	int add = (arg & IN_MASK_ADD);
@@ -467,52 +581,43 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 
 	/* don't allow invalid bits: we don't want flags set */
 	mask = inotify_arg_to_mask(arg);
-	if (unlikely(!mask))
+	if (unlikely(!(mask & IN_ALL_EVENTS)))
 		return -EINVAL;
 
-	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(group, inode);
-	spin_unlock(&inode->i_lock);
-	if (!entry)
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
+	if (!fsn_mark)
 		return -ENOENT;
 
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
-	spin_lock(&entry->lock);
+	spin_lock(&fsn_mark->lock);
 
-	old_mask = entry->mask;
-	if (add) {
-		entry->mask |= mask;
-		new_mask = entry->mask;
-	} else {
-		entry->mask = mask;
-		new_mask = entry->mask;
-	}
+	old_mask = fsn_mark->mask;
+	if (add)
+		fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
+	else
+		fsnotify_set_mark_mask_locked(fsn_mark, mask);
+	new_mask = fsn_mark->mask;
 
-	spin_unlock(&entry->lock);
+	spin_unlock(&fsn_mark->lock);
 
 	if (old_mask != new_mask) {
 		/* more bits in old than in new? */
 		int dropped = (old_mask & ~new_mask);
-		/* more bits in this entry than the inode's mask? */
+		/* more bits in this fsn_mark than the inode's mask? */
 		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
-		/* more bits in this entry than the group? */
-		int do_group = (new_mask & ~group->mask);
 
-		/* update the inode with this new entry */
+		/* update the inode with this new fsn_mark */
 		if (dropped || do_inode)
 			fsnotify_recalc_inode_mask(inode);
 
-		/* update the group mask with the new mask */
-		if (dropped || do_group)
-			fsnotify_recalc_group_mask(group);
 	}
 
 	/* return the wd */
-	ret = ientry->wd;
+	ret = i_mark->wd;
 
-	/* match the get from fsnotify_find_mark_entry() */
-	fsnotify_put_mark(entry);
+	/* match the get from fsnotify_find_mark() */
+	fsnotify_put_mark(fsn_mark);
 
 	return ret;
 }
@@ -521,73 +626,51 @@ static int inotify_new_watch(struct fsnotify_group *group,
 			     struct inode *inode,
 			     u32 arg)
 {
-	struct inotify_inode_mark_entry *tmp_ientry;
+	struct inotify_inode_mark *tmp_i_mark;
 	__u32 mask;
 	int ret;
+	struct idr *idr = &group->inotify_data.idr;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
 
 	/* don't allow invalid bits: we don't want flags set */
 	mask = inotify_arg_to_mask(arg);
-	if (unlikely(!mask))
+	if (unlikely(!(mask & IN_ALL_EVENTS)))
 		return -EINVAL;
 
-	tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
-	if (unlikely(!tmp_ientry))
+	tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+	if (unlikely(!tmp_i_mark))
 		return -ENOMEM;
 
-	fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
-	tmp_ientry->fsn_entry.mask = mask;
-	tmp_ientry->wd = -1;
+	fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
+	tmp_i_mark->fsn_mark.mask = mask;
+	tmp_i_mark->wd = -1;
 
 	ret = -ENOSPC;
 	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
 		goto out_err;
-retry:
-	ret = -ENOMEM;
-	if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
-		goto out_err;
 
-	/* we are putting the mark on the idr, take a reference */
-	fsnotify_get_mark(&tmp_ientry->fsn_entry);
-
-	spin_lock(&group->inotify_data.idr_lock);
-	ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
-				group->inotify_data.last_wd+1,
-				&tmp_ientry->wd);
-	spin_unlock(&group->inotify_data.idr_lock);
-	if (ret) {
-		/* we didn't get on the idr, drop the idr reference */
-		fsnotify_put_mark(&tmp_ientry->fsn_entry);
-
-		/* idr was out of memory allocate and try again */
-		if (ret == -EAGAIN)
-			goto retry;
+	ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
+				 tmp_i_mark);
+	if (ret)
 		goto out_err;
-	}
 
 	/* we are on the idr, now get on the inode */
-	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
+	ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
 	if (ret) {
 		/* we failed to get on the inode, get off the idr */
-		inotify_remove_from_idr(group, tmp_ientry);
+		inotify_remove_from_idr(group, tmp_i_mark);
 		goto out_err;
 	}
 
-	/* update the idr hint, who cares about races, it's just a hint */
-	group->inotify_data.last_wd = tmp_ientry->wd;
-
 	/* increment the number of watches the user has */
 	atomic_inc(&group->inotify_data.user->inotify_watches);
 
-	/* return the watch descriptor for this new entry */
-	ret = tmp_ientry->wd;
-
-	/* if this mark added a new event update the group mask */
-	if (mask & ~group->mask)
-		fsnotify_recalc_group_mask(group);
+	/* return the watch descriptor for this new mark */
+	ret = tmp_i_mark->wd;
 
 out_err:
-	/* match the ref from fsnotify_init_markentry() */
-	fsnotify_put_mark(&tmp_ientry->fsn_entry);
+	/* match the ref from fsnotify_init_mark() */
+	fsnotify_put_mark(&tmp_i_mark->fsn_mark);
 
 	return ret;
 }
@@ -616,11 +699,8 @@ retry:
 static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
 {
 	struct fsnotify_group *group;
-	unsigned int grp_num;
 
-	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
-	grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
-	group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
+	group = fsnotify_alloc_group(&inotify_fsnotify_ops);
 	if (IS_ERR(group))
 		return group;
 
@@ -726,7 +806,7 @@ fput_and_out:
 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
 	struct fsnotify_group *group;
-	struct fsnotify_mark_entry *entry;
+	struct inotify_inode_mark *i_mark;
 	struct file *filp;
 	int ret = 0, fput_needed;
 
@@ -735,25 +815,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 		return -EBADF;
 
 	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
-		ret = -EINVAL;
+	ret = -EINVAL;
+	if (unlikely(filp->f_op != &inotify_fops))
 		goto out;
-	}
 
 	group = filp->private_data;
 
-	spin_lock(&group->inotify_data.idr_lock);
-	entry = idr_find(&group->inotify_data.idr, wd);
-	if (unlikely(!entry)) {
-		spin_unlock(&group->inotify_data.idr_lock);
-		ret = -EINVAL;
+	ret = -EINVAL;
+	i_mark = inotify_idr_find(group, wd);
+	if (unlikely(!i_mark))
 		goto out;
-	}
-	fsnotify_get_mark(entry);
-	spin_unlock(&group->inotify_data.idr_lock);
 
-	fsnotify_destroy_mark_by_entry(entry);
-	fsnotify_put_mark(entry);
+	ret = 0;
+
+	fsnotify_destroy_mark(&i_mark->fsn_mark);
+
+	/* match ref taken by inotify_idr_find */
+	fsnotify_put_mark(&i_mark->fsn_mark);
 
 out:
 	fput_light(filp, fput_needed);
@@ -767,7 +845,28 @@ out:
  */
 static int __init inotify_user_setup(void)
 {
-	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
+	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
+	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
+	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
+	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
+	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
+	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+
+	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
+
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
 	inotify_max_queued_events = 16384;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
new file mode 100644
index 00000000000..325185e514b
--- /dev/null
+++ b/fs/notify/mark.c
@@ -0,0 +1,371 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * fsnotify inode mark locking/lifetime/and refcnting
+ *
+ * REFCNT:
+ * The mark->refcnt tells how many "things" in the kernel currently are
+ * referencing this object.  The object typically will live inside the kernel
+ * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
+ * which can find this object holding the appropriete locks, can take a reference
+ * and the object itself is guarenteed to survive until the reference is dropped.
+ *
+ * LOCKING:
+ * There are 3 spinlocks involved with fsnotify inode marks and they MUST
+ * be taken in order as follows:
+ *
+ * mark->lock
+ * group->mark_lock
+ * inode->i_lock
+ *
+ * mark->lock protects 2 things, mark->group and mark->inode.  You must hold
+ * that lock to dereference either of these things (they could be NULL even with
+ * the lock)
+ *
+ * group->mark_lock protects the marks_list anchored inside a given group
+ * and each mark is hooked via the g_list.  It also sorta protects the
+ * free_g_list, which when used is anchored by a private list on the stack of the
+ * task which held the group->mark_lock.
+ *
+ * inode->i_lock protects the i_fsnotify_marks list anchored inside a
+ * given inode and each mark is hooked via the i_list. (and sorta the
+ * free_i_list)
+ *
+ *
+ * LIFETIME:
+ * Inode marks survive between when they are added to an inode and when their
+ * refcnt==0.
+ *
+ * The inode mark can be cleared for a number of different reasons including:
+ * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
+ * - The inode is being evicted from cache. (fsnotify_inode_delete)
+ * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
+ * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
+ * - The fsnotify_group associated with the mark is going away and all such marks
+ *   need to be cleaned up. (fsnotify_clear_marks_by_group)
+ *
+ * Worst case we are given an inode and need to clean up all the marks on that
+ * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
+ * mark on the list we take a reference (so the mark can't disappear under us).
+ * We remove that mark form the inode's list of marks and we add this mark to a
+ * private list anchored on the stack using i_free_list;  At this point we no
+ * longer fear anything finding the mark using the inode's list of marks.
+ *
+ * We can safely and locklessly run the private list on the stack of everything
+ * we just unattached from the original inode.  For each mark on the private list
+ * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
+ * we see the group and inode are not NULL we take those locks.  Now holding all
+ * 3 locks we can completely remove the mark from other tasks finding it in the
+ * future.  Remember, 10 things might already be referencing this mark, but they
+ * better be holding a ref.  We drop our reference we took before we unhooked it
+ * from the inode.  When the ref hits 0 we can free the mark.
+ *
+ * Very similarly for freeing by group, except we use free_g_list.
+ *
+ * This has the very interesting property of being able to run concurrently with
+ * any (or all) other directions.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
+#include <linux/writeback.h> /* for inode_lock */
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+struct srcu_struct fsnotify_mark_srcu;
+static DEFINE_SPINLOCK(destroy_lock);
+static LIST_HEAD(destroy_list);
+static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
+
+void fsnotify_get_mark(struct fsnotify_mark *mark)
+{
+	atomic_inc(&mark->refcnt);
+}
+
+void fsnotify_put_mark(struct fsnotify_mark *mark)
+{
+	if (atomic_dec_and_test(&mark->refcnt))
+		mark->free_mark(mark);
+}
+
+/*
+ * Any time a mark is getting freed we end up here.
+ * The caller had better be holding a reference to this mark so we don't actually
+ * do the final put under the mark->lock
+ */
+void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+{
+	struct fsnotify_group *group;
+	struct inode *inode = NULL;
+
+	spin_lock(&mark->lock);
+
+	group = mark->group;
+
+	/* something else already called this function on this mark */
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
+		spin_unlock(&mark->lock);
+		return;
+	}
+
+	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
+
+	/* 1 from caller and 1 for being on i_list/g_list */
+	BUG_ON(atomic_read(&mark->refcnt) < 2);
+
+	spin_lock(&group->mark_lock);
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+		inode = mark->i.inode;
+		fsnotify_destroy_inode_mark(mark);
+	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
+		fsnotify_destroy_vfsmount_mark(mark);
+	else
+		BUG();
+
+	list_del_init(&mark->g_list);
+
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&mark->lock);
+
+	spin_lock(&destroy_lock);
+	list_add(&mark->destroy_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	wake_up(&destroy_waitq);
+
+	/*
+	 * Some groups like to know that marks are being freed.  This is a
+	 * callback to the group function to let it know that this mark
+	 * is being freed.
+	 */
+	if (group->ops->freeing_mark)
+		group->ops->freeing_mark(mark, group);
+
+	/*
+	 * __fsnotify_update_child_dentry_flags(inode);
+	 *
+	 * I really want to call that, but we can't, we have no idea if the inode
+	 * still exists the second we drop the mark->lock.
+	 *
+	 * The next time an event arrive to this inode from one of it's children
+	 * __fsnotify_parent will see that the inode doesn't care about it's
+	 * children and will update all of these flags then.  So really this
+	 * is just a lazy update (and could be a perf win...)
+	 */
+
+	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
+		iput(inode);
+
+	/*
+	 * it's possible that this group tried to destroy itself, but this
+	 * this mark was simultaneously being freed by inode.  If that's the
+	 * case, we finish freeing the group here.
+	 */
+	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+		fsnotify_final_destroy_group(group);
+}
+
+void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+{
+	assert_spin_locked(&mark->lock);
+
+	mark->mask = mask;
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
+		fsnotify_set_inode_mark_mask_locked(mark, mask);
+}
+
+void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+{
+	assert_spin_locked(&mark->lock);
+
+	mark->ignored_mask = mask;
+}
+
+/*
+ * Attach an initialized mark to a given group and fs object.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which group.
+ */
+int fsnotify_add_mark(struct fsnotify_mark *mark,
+		      struct fsnotify_group *group, struct inode *inode,
+		      struct vfsmount *mnt, int allow_dups)
+{
+	int ret = 0;
+
+	BUG_ON(inode && mnt);
+	BUG_ON(!inode && !mnt);
+
+	/*
+	 * LOCKING ORDER!!!!
+	 * mark->lock
+	 * group->mark_lock
+	 * inode->i_lock
+	 */
+	spin_lock(&mark->lock);
+	spin_lock(&group->mark_lock);
+
+	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
+
+	mark->group = group;
+	list_add(&mark->g_list, &group->marks_list);
+	atomic_inc(&group->num_marks);
+	fsnotify_get_mark(mark); /* for i_list and g_list */
+
+	if (inode) {
+		ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
+		if (ret)
+			goto err;
+	} else if (mnt) {
+		ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
+		if (ret)
+			goto err;
+	} else {
+		BUG();
+	}
+
+	spin_unlock(&group->mark_lock);
+
+	/* this will pin the object if appropriate */
+	fsnotify_set_mark_mask_locked(mark, mark->mask);
+
+	spin_unlock(&mark->lock);
+
+	if (inode)
+		__fsnotify_update_child_dentry_flags(inode);
+
+	return ret;
+err:
+	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
+	list_del_init(&mark->g_list);
+	mark->group = NULL;
+	atomic_dec(&group->num_marks);
+
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&mark->lock);
+
+	spin_lock(&destroy_lock);
+	list_add(&mark->destroy_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	wake_up(&destroy_waitq);
+
+	return ret;
+}
+
+/*
+ * clear any marks in a group in which mark->flags & flags is true
+ */
+void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
+					 unsigned int flags)
+{
+	struct fsnotify_mark *lmark, *mark;
+	LIST_HEAD(free_list);
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
+		if (mark->flags & flags) {
+			list_add(&mark->free_g_list, &free_list);
+			list_del_init(&mark->g_list);
+			fsnotify_get_mark(mark);
+		}
+	}
+	spin_unlock(&group->mark_lock);
+
+	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
+	}
+}
+
+/*
+ * Given a group, destroy all of the marks associated with that group.
+ */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
+}
+
+void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
+{
+	assert_spin_locked(&old->lock);
+	new->i.inode = old->i.inode;
+	new->m.mnt = old->m.mnt;
+	new->group = old->group;
+	new->mask = old->mask;
+	new->free_mark = old->free_mark;
+}
+
+/*
+ * Nothing fancy, just initialize lists and locks and counters.
+ */
+void fsnotify_init_mark(struct fsnotify_mark *mark,
+			void (*free_mark)(struct fsnotify_mark *mark))
+{
+	memset(mark, 0, sizeof(*mark));
+	spin_lock_init(&mark->lock);
+	atomic_set(&mark->refcnt, 1);
+	mark->free_mark = free_mark;
+}
+
+static int fsnotify_mark_destroy(void *ignored)
+{
+	struct fsnotify_mark *mark, *next;
+	LIST_HEAD(private_destroy_list);
+
+	for (;;) {
+		spin_lock(&destroy_lock);
+		/* exchange the list head */
+		list_replace_init(&destroy_list, &private_destroy_list);
+		spin_unlock(&destroy_lock);
+
+		synchronize_srcu(&fsnotify_mark_srcu);
+
+		list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
+			list_del_init(&mark->destroy_list);
+			fsnotify_put_mark(mark);
+		}
+
+		wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
+	}
+
+	return 0;
+}
+
+static int __init fsnotify_mark_init(void)
+{
+	struct task_struct *thread;
+
+	thread = kthread_run(fsnotify_mark_destroy, NULL,
+			     "fsnotify_mark");
+	if (IS_ERR(thread))
+		panic("unable to start fsnotify mark destruction thread.");
+
+	return 0;
+}
+device_initcall(fsnotify_mark_init);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b8bf53b4c10..d6c435adc7a 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -31,6 +31,7 @@
  * allocated and used.
  */
 
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -56,7 +57,7 @@ static struct kmem_cache *fsnotify_event_holder_cachep;
  * it is needed.  It's refcnt is set 1 at kernel init time and will never
  * get set to 0 so it will never get 'freed'
  */
-static struct fsnotify_event q_overflow_event;
+static struct fsnotify_event *q_overflow_event;
 static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
 
 /**
@@ -87,12 +88,15 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		return;
 
 	if (atomic_dec_and_test(&event->refcnt)) {
-		if (event->data_type == FSNOTIFY_EVENT_PATH)
-			path_put(&event->path);
+		pr_debug("%s: event=%p\n", __func__, event);
+
+		if (event->data_type == FSNOTIFY_EVENT_FILE)
+			fput(event->file);
 
 		BUG_ON(!list_empty(&event->private_data_list));
 
 		kfree(event->file_name);
+		put_pid(event->tgid);
 		kmem_cache_free(fsnotify_event_cachep, event);
 	}
 }
@@ -104,7 +108,8 @@ struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
 
 void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
 {
-	kmem_cache_free(fsnotify_event_holder_cachep, holder);
+	if (holder)
+		kmem_cache_free(fsnotify_event_holder_cachep, holder);
 }
 
 /*
@@ -129,53 +134,20 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
 }
 
 /*
- * Check if 2 events contain the same information.  We do not compare private data
- * but at this moment that isn't a problem for any know fsnotify listeners.
- */
-static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
-{
-	if ((old->mask == new->mask) &&
-	    (old->to_tell == new->to_tell) &&
-	    (old->data_type == new->data_type) &&
-	    (old->name_len == new->name_len)) {
-		switch (old->data_type) {
-		case (FSNOTIFY_EVENT_INODE):
-			/* remember, after old was put on the wait_q we aren't
-			 * allowed to look at the inode any more, only thing
-			 * left to check was if the file_name is the same */
-			if (!old->name_len ||
-			    !strcmp(old->file_name, new->file_name))
-				return true;
-			break;
-		case (FSNOTIFY_EVENT_PATH):
-			if ((old->path.mnt == new->path.mnt) &&
-			    (old->path.dentry == new->path.dentry))
-				return true;
-			break;
-		case (FSNOTIFY_EVENT_NONE):
-			if (old->mask & FS_Q_OVERFLOW)
-				return true;
-			else if (old->mask & FS_IN_IGNORED)
-				return false;
-			return false;
-		};
-	}
-	return false;
-}
-
-/*
  * Add an event to the group notification queue.  The group can later pull this
  * event off the queue to deal with.  If the event is successfully added to the
  * group's notification queue, a reference is taken on event.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
-			      struct fsnotify_event_private_data *priv)
+struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+						 struct fsnotify_event_private_data *priv,
+						 struct fsnotify_event *(*merge)(struct list_head *,
+										 struct fsnotify_event *))
 {
+	struct fsnotify_event *return_event = NULL;
 	struct fsnotify_event_holder *holder = NULL;
 	struct list_head *list = &group->notification_list;
-	struct fsnotify_event_holder *last_holder;
-	struct fsnotify_event *last_event;
-	int ret = 0;
+
+	pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
 
 	/*
 	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -189,18 +161,40 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
 alloc_holder:
 		holder = fsnotify_alloc_event_holder();
 		if (!holder)
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 	}
 
 	mutex_lock(&group->notification_mutex);
 
 	if (group->q_len >= group->max_events) {
-		event = &q_overflow_event;
-		ret = -EOVERFLOW;
+		event = q_overflow_event;
+
+		/*
+		 * we need to return the overflow event
+		 * which means we need a ref
+		 */
+		fsnotify_get_event(event);
+		return_event = event;
+
 		/* sorry, no private data on the overflow event */
 		priv = NULL;
 	}
 
+	if (!list_empty(list) && merge) {
+		struct fsnotify_event *tmp;
+
+		tmp = merge(list, event);
+		if (tmp) {
+			mutex_unlock(&group->notification_mutex);
+
+			if (return_event)
+				fsnotify_put_event(return_event);
+			if (holder != &event->holder)
+				fsnotify_destroy_event_holder(holder);
+			return tmp;
+		}
+	}
+
 	spin_lock(&event->lock);
 
 	if (list_empty(&event->holder.event_list)) {
@@ -212,19 +206,13 @@ alloc_holder:
 		 * event holder was used, go back and get a new one */
 		spin_unlock(&event->lock);
 		mutex_unlock(&group->notification_mutex);
-		goto alloc_holder;
-	}
 
-	if (!list_empty(list)) {
-		last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
-		last_event = last_holder->event;
-		if (event_compare(last_event, event)) {
-			spin_unlock(&event->lock);
-			mutex_unlock(&group->notification_mutex);
-			if (holder != &event->holder)
-				fsnotify_destroy_event_holder(holder);
-			return -EEXIST;
+		if (return_event) {
+			fsnotify_put_event(return_event);
+			return_event = NULL;
 		}
+
+		goto alloc_holder;
 	}
 
 	group->q_len++;
@@ -238,7 +226,7 @@ alloc_holder:
 	mutex_unlock(&group->notification_mutex);
 
 	wake_up(&group->notification_waitq);
-	return ret;
+	return return_event;
 }
 
 /*
@@ -253,6 +241,8 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 
 	BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
+	pr_debug("%s: group=%p\n", __func__, group);
+
 	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
 
 	event = holder->event;
@@ -314,25 +304,82 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
 
 static void initialize_event(struct fsnotify_event *event)
 {
-	event->holder.event = NULL;
 	INIT_LIST_HEAD(&event->holder.event_list);
 	atomic_set(&event->refcnt, 1);
 
 	spin_lock_init(&event->lock);
 
-	event->path.dentry = NULL;
-	event->path.mnt = NULL;
-	event->inode = NULL;
-	event->data_type = FSNOTIFY_EVENT_NONE;
-
 	INIT_LIST_HEAD(&event->private_data_list);
+}
 
-	event->to_tell = NULL;
+/*
+ * Caller damn well better be holding whatever mutex is protecting the
+ * old_holder->event_list and the new_event must be a clean event which
+ * cannot be found anywhere else in the kernel.
+ */
+int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
+			   struct fsnotify_event *new_event)
+{
+	struct fsnotify_event *old_event = old_holder->event;
+	struct fsnotify_event_holder *new_holder = &new_event->holder;
 
-	event->file_name = NULL;
-	event->name_len = 0;
+	enum event_spinlock_class {
+		SPINLOCK_OLD,
+		SPINLOCK_NEW,
+	};
 
-	event->sync_cookie = 0;
+	pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
+
+	/*
+	 * if the new_event's embedded holder is in use someone
+	 * screwed up and didn't give us a clean new event.
+	 */
+	BUG_ON(!list_empty(&new_holder->event_list));
+
+	spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
+	spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
+
+	new_holder->event = new_event;
+	list_replace_init(&old_holder->event_list, &new_holder->event_list);
+
+	spin_unlock(&new_event->lock);
+	spin_unlock(&old_event->lock);
+
+	/* event == holder means we are referenced through the in event holder */
+	if (old_holder != &old_event->holder)
+		fsnotify_destroy_event_holder(old_holder);
+
+	fsnotify_get_event(new_event); /* on the list take reference */
+	fsnotify_put_event(old_event); /* off the list, drop reference */
+
+	return 0;
+}
+
+struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
+
+	memcpy(event, old_event, sizeof(*event));
+	initialize_event(event);
+
+	if (event->name_len) {
+		event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
+		if (!event->file_name) {
+			kmem_cache_free(fsnotify_event_cachep, event);
+			return NULL;
+		}
+	}
+	event->tgid = get_pid(old_event->tgid);
+	if (event->data_type == FSNOTIFY_EVENT_FILE)
+		get_file(event->file);
+
+	return event;
 }
 
 /*
@@ -348,15 +395,18 @@ static void initialize_event(struct fsnotify_event *event)
  * @name the filename, if available
  */
 struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
-					     int data_type, const char *name, u32 cookie,
-					     gfp_t gfp)
+					     int data_type, const unsigned char *name,
+					     u32 cookie, gfp_t gfp)
 {
 	struct fsnotify_event *event;
 
-	event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
+	event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
 	if (!event)
 		return NULL;
 
+	pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
+		 __func__, event, to_tell, mask, data, data_type);
+
 	initialize_event(event);
 
 	if (name) {
@@ -368,35 +418,36 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		event->name_len = strlen(event->file_name);
 	}
 
+	event->tgid = get_pid(task_tgid(current));
 	event->sync_cookie = cookie;
 	event->to_tell = to_tell;
+	event->data_type = data_type;
 
 	switch (data_type) {
 	case FSNOTIFY_EVENT_FILE: {
-		struct file *file = data;
-		struct path *path = &file->f_path;
-		event->path.dentry = path->dentry;
-		event->path.mnt = path->mnt;
-		path_get(&event->path);
-		event->data_type = FSNOTIFY_EVENT_PATH;
-		break;
-	}
-	case FSNOTIFY_EVENT_PATH: {
-		struct path *path = data;
-		event->path.dentry = path->dentry;
-		event->path.mnt = path->mnt;
-		path_get(&event->path);
-		event->data_type = FSNOTIFY_EVENT_PATH;
+		event->file = data;
+		/*
+		 * if this file is about to disappear hold an extra reference
+		 * until we return to __fput so we don't have to worry about
+		 * future get/put destroying the file under us or generating
+		 * additional events.  Notice that we change f_mode without
+		 * holding f_lock.  This is safe since this is the only possible
+		 * reference to this object in the kernel (it was about to be
+		 * freed, remember?)
+		 */
+		if (!atomic_long_read(&event->file->f_count)) {
+			event->file->f_mode |= FMODE_NONOTIFY;
+			get_file(event->file);
+		}
+		get_file(event->file);
 		break;
 	}
 	case FSNOTIFY_EVENT_INODE:
 		event->inode = data;
-		event->data_type = FSNOTIFY_EVENT_INODE;
 		break;
 	case FSNOTIFY_EVENT_NONE:
 		event->inode = NULL;
-		event->path.dentry = NULL;
-		event->path.mnt = NULL;
+		event->file = NULL;
 		break;
 	default:
 		BUG();
@@ -412,8 +463,11 @@ __init int fsnotify_notification_init(void)
 	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
 	fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
 
-	initialize_event(&q_overflow_event);
-	q_overflow_event.mask = FS_Q_OVERFLOW;
+	q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
+						 FSNOTIFY_EVENT_NONE, NULL, 0,
+						 GFP_KERNEL);
+	if (!q_overflow_event)
+		panic("unable to allocate fsnotify q_overflow_event\n");
 
 	return 0;
 }
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
new file mode 100644
index 00000000000..56772b578fb
--- /dev/null
+++ b/fs/notify/vfsmount_mark.c
@@ -0,0 +1,187 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/writeback.h> /* for inode_lock */
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark, *lmark;
+	struct hlist_node *pos, *n;
+	LIST_HEAD(free_list);
+
+	spin_lock(&mnt->mnt_root->d_lock);
+	hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
+		list_add(&mark->m.free_m_list, &free_list);
+		hlist_del_init_rcu(&mark->m.m_list);
+		fsnotify_get_mark(mark);
+	}
+	spin_unlock(&mnt->mnt_root->d_lock);
+
+	list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
+	}
+}
+
+void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
+}
+
+/*
+ * Recalculate the mask of events relevant to a given vfsmount locked.
+ */
+static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark;
+	struct hlist_node *pos;
+	__u32 new_mask = 0;
+
+	assert_spin_locked(&mnt->mnt_root->d_lock);
+
+	hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list)
+		new_mask |= mark->mask;
+	mnt->mnt_fsnotify_mask = new_mask;
+}
+
+/*
+ * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
+ * any notifier is interested in hearing for this mount point
+ */
+void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
+{
+	spin_lock(&mnt->mnt_root->d_lock);
+	fsnotify_recalc_vfsmount_mask_locked(mnt);
+	spin_unlock(&mnt->mnt_root->d_lock);
+}
+
+void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
+{
+	struct vfsmount *mnt = mark->m.mnt;
+
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&mark->group->mark_lock);
+
+	spin_lock(&mnt->mnt_root->d_lock);
+
+	hlist_del_init_rcu(&mark->m.m_list);
+	mark->m.mnt = NULL;
+
+	fsnotify_recalc_vfsmount_mask_locked(mnt);
+
+	spin_unlock(&mnt->mnt_root->d_lock);
+}
+
+static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
+								struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark;
+	struct hlist_node *pos;
+
+	assert_spin_locked(&mnt->mnt_root->d_lock);
+
+	hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) {
+		if (mark->group == group) {
+			fsnotify_get_mark(mark);
+			return mark;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * given a group and vfsmount, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
+						  struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark;
+
+	spin_lock(&mnt->mnt_root->d_lock);
+	mark = fsnotify_find_vfsmount_mark_locked(group, mnt);
+	spin_unlock(&mnt->mnt_root->d_lock);
+
+	return mark;
+}
+
+/*
+ * Attach an initialized mark to a given group and vfsmount.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which groups.
+ */
+int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
+			       struct fsnotify_group *group, struct vfsmount *mnt,
+			       int allow_dups)
+{
+	struct fsnotify_mark *lmark;
+	struct hlist_node *node, *last = NULL;
+	int ret = 0;
+
+	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
+
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&group->mark_lock);
+
+	spin_lock(&mnt->mnt_root->d_lock);
+
+	mark->m.mnt = mnt;
+
+	/* is mark the first mark? */
+	if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
+		hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
+		goto out;
+	}
+
+	/* should mark be in the middle of the current list? */
+	hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+		last = node;
+
+		if ((lmark->group == group) && !allow_dups) {
+			ret = -EEXIST;
+			goto out;
+		}
+
+		if (mark->group < lmark->group)
+			continue;
+
+		hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
+		goto out;
+	}
+
+	BUG_ON(last == NULL);
+	/* mark should be the last entry.  last is the current last entry */
+	hlist_add_after_rcu(last, &mark->m.m_list);
+out:
+	fsnotify_recalc_vfsmount_mask_locked(mnt);
+	spin_unlock(&mnt->mnt_root->d_lock);
+
+	return ret;
+}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4b57fb1eac2..93622b175fc 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2238,7 +2238,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
 }
 
 /**
- * ntfs_clear_big_inode - clean up the ntfs specific part of an inode
+ * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
  * @vi:		vfs inode pending annihilation
  *
  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
@@ -2247,10 +2247,13 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
  *
  * If the MFT record is dirty, we commit it before doing anything else.
  */
-void ntfs_clear_big_inode(struct inode *vi)
+void ntfs_evict_big_inode(struct inode *vi)
 {
 	ntfs_inode *ni = NTFS_I(vi);
 
+	truncate_inode_pages(&vi->i_data, 0);
+	end_writeback(vi);
+
 #ifdef NTFS_RW
 	if (NInoDirty(ni)) {
 		bool was_bad = (is_bad_inode(vi));
@@ -2879,9 +2882,6 @@ void ntfs_truncate_vfs(struct inode *vi) {
  *
  * Called with ->i_mutex held.  For the ATTR_SIZE (i.e. ->truncate) case, also
  * called with ->i_alloc_sem held for writing.
- *
- * Basically this is a copy of generic notify_change() and inode_setattr()
- * functionality, except we intercept and abort changes in i_size.
  */
 int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 9a113544605..2dabf813456 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -279,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
 
 extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
 extern void ntfs_destroy_big_inode(struct inode *inode);
-extern void ntfs_clear_big_inode(struct inode *vi);
+extern void ntfs_evict_big_inode(struct inode *vi);
 
 extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0de1db6cddb..512806171bf 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2700,7 +2700,7 @@ static const struct super_operations ntfs_sops = {
 	.put_super	= ntfs_put_super,	/* Syscall: umount. */
 	.statfs		= ntfs_statfs,		/* Syscall: statfs */
 	.remount_fs	= ntfs_remount,		/* Syscall: mount -o remount. */
-	.clear_inode	= ntfs_clear_big_inode,	/* VFS: Called when an inode is
+	.evict_inode	= ntfs_evict_big_inode,	/* VFS: Called when an inode is
 						   removed from memory. */
 	//.umount_begin	= NULL,			/* Forced umount. */
 	.show_options	= ntfs_show_options,	/* Show mount options in
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 96337a4fbbd..0de69c9a08b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -643,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw,
 	if (i_size_read(inode) <= offset)
 		return 0;
 
-	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-					    inode->i_sb->s_bdev, iov, offset,
-					    nr_segs,
-					    ocfs2_direct_IO_get_blocks,
-					    ocfs2_dio_end_io);
+	ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				   iov, offset, nr_segs,
+				   ocfs2_direct_IO_get_blocks,
+				   ocfs2_dio_end_io, NULL, 0);
 
 	mlog_exit(ret);
 	return ret;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index bef34d0528d..c2903b84bb7 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -213,10 +213,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
 
 	attr->ia_valid &= ~ATTR_SIZE;
 	error = inode_change_ok(inode, attr);
-	if (!error)
-		error = inode_setattr(inode, attr);
+	if (error)
+		return error;
 
-	return error;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
@@ -354,13 +356,12 @@ static void dlmfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
 }
 
-static void dlmfs_clear_inode(struct inode *inode)
+static void dlmfs_evict_inode(struct inode *inode)
 {
 	int status;
 	struct dlmfs_inode_private *ip;
 
-	if (!inode)
-		return;
+	end_writeback(inode);
 
 	mlog(0, "inode %lu\n", inode->i_ino);
 
@@ -630,7 +631,7 @@ static const struct super_operations dlmfs_ops = {
 	.statfs		= simple_statfs,
 	.alloc_inode	= dlmfs_alloc_inode,
 	.destroy_inode	= dlmfs_destroy_inode,
-	.clear_inode	= dlmfs_clear_inode,
+	.evict_inode	= dlmfs_evict_inode,
 	.drop_inode	= generic_delete_inode,
 };
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2b10b36d157..81296b4e364 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1233,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	/*
-	 * This will intentionally not wind up calling simple_setsize(),
+	 * This will intentionally not wind up calling truncate_setsize(),
 	 * since all the work for a size change has been done above.
 	 * Otherwise, we could get into problems with truncate as
 	 * ip_alloc_sem is used there to protect against i_size
 	 * changes.
+	 *
+	 * XXX: this means the conditional below can probably be removed.
 	 */
-	status = inode_setattr(inode, attr);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail_commit;
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		status = vmtruncate(inode, attr->ia_size);
+		if (status) {
+			mlog_errno(status);
+			goto bail_commit;
+		}
 	}
 
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
 	status = ocfs2_mark_inode_dirty(handle, inode, bh);
 	if (status < 0)
 		mlog_errno(status);
@@ -2300,12 +2308,12 @@ relock:
 			 * blocks outside i_size. Trim these off again.
 			 * Don't need i_size_read because we hold i_mutex.
 			 *
-			 * XXX(hch): this looks buggy because ocfs2 did not
+			 * XXX(truncate): this looks buggy because ocfs2 did not
 			 * actually implement ->truncate.  Take a look at
 			 * the new truncate sequence and update this accordingly
 			 */
 			if (*ppos + count > inode->i_size)
-				simple_setsize(inode, inode->i_size);
+				truncate_setsize(inode, inode->i_size);
 			ret = written;
 			goto out_dio;
 		}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index abb0a95cc71..0492464916b 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
 	truncate_inode_pages(&inode->i_data, 0);
 }
 
-void ocfs2_delete_inode(struct inode *inode)
+static void ocfs2_delete_inode(struct inode *inode)
 {
 	int wipe, status;
 	sigset_t oldset;
@@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync:
 bail_unblock:
 	ocfs2_unblock_signals(&oldset);
 bail:
-	clear_inode(inode);
 	mlog_exit_void();
 }
 
-void ocfs2_clear_inode(struct inode *inode)
+static void ocfs2_clear_inode(struct inode *inode)
 {
 	int status;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	mlog_entry_void();
 
-	if (!inode)
-		goto bail;
-
+	end_writeback(inode);
 	mlog(0, "Clearing inode: %llu, nlink = %u\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
 
@@ -1180,16 +1177,27 @@ void ocfs2_clear_inode(struct inode *inode)
 	jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
 				       &oi->ip_jinode);
 
-bail:
 	mlog_exit_void();
 }
 
+void ocfs2_evict_inode(struct inode *inode)
+{
+	if (!inode->i_nlink ||
+	    (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
+		ocfs2_delete_inode(inode);
+	} else {
+		truncate_inode_pages(&inode->i_data, 0);
+	}
+	ocfs2_clear_inode(inode);
+}
+
 /* Called under inode_lock, with no more references on the
  * struct inode, so it's safe here to check the flags field
  * and to manipulate i_nlink without any other locks. */
-void ocfs2_drop_inode(struct inode *inode)
+int ocfs2_drop_inode(struct inode *inode)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	int res;
 
 	mlog_entry_void();
 
@@ -1197,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode)
 	     (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
 
 	if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
-		generic_delete_inode(inode);
+		res = 1;
 	else
-		generic_drop_inode(inode);
+		res = generic_drop_inode(inode);
 
 	mlog_exit_void();
+	return res;
 }
 
 /*
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9f5f5fcadc4..6de5a869db3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -123,9 +123,8 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
 	return &OCFS2_I(inode)->ip_metadata_cache;
 }
 
-void ocfs2_clear_inode(struct inode *inode);
-void ocfs2_delete_inode(struct inode *inode);
-void ocfs2_drop_inode(struct inode *inode);
+void ocfs2_evict_inode(struct inode *inode);
+int ocfs2_drop_inode(struct inode *inode);
 
 /* Flags for ocfs2_iget() */
 #define OCFS2_FI_FLAG_SYSFILE		0x1
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 03a799fdd74..fa1be1b304d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = {
 	.alloc_inode	= ocfs2_alloc_inode,
 	.destroy_inode	= ocfs2_destroy_inode,
 	.drop_inode	= ocfs2_drop_inode,
-	.clear_inode	= ocfs2_clear_inode,
-	.delete_inode	= ocfs2_delete_inode,
+	.evict_inode	= ocfs2_evict_inode,
 	.sync_fs	= ocfs2_sync_fs,
 	.put_super	= ocfs2_put_super,
 	.remount_fs	= ocfs2_remount,
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index b42d6241903..393f3f659da 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -25,11 +25,10 @@ static struct buffer_head *omfs_get_bucket(struct inode *dir,
 		const char *name, int namelen, int *ofs)
 {
 	int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
-	int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
 	int bucket = omfs_hash(name, namelen, nbuckets);
 
 	*ofs = OMFS_DIR_START + bucket * 8;
-	return sb_bread(dir->i_sb, block);
+	return omfs_bread(dir->i_sb, dir->i_ino);
 }
 
 static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
@@ -42,8 +41,7 @@ static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
 	*prev_block = ~0;
 
 	while (block != ~0) {
-		bh = sb_bread(dir->i_sb,
-			clus_to_blk(OMFS_SB(dir->i_sb), block));
+		bh = omfs_bread(dir->i_sb, block);
 		if (!bh) {
 			err = -EIO;
 			goto err;
@@ -86,11 +84,10 @@ static struct buffer_head *omfs_find_entry(struct inode *dir,
 int omfs_make_empty(struct inode *inode, struct super_block *sb)
 {
 	struct omfs_sb_info *sbi = OMFS_SB(sb);
-	int block = clus_to_blk(sbi, inode->i_ino);
 	struct buffer_head *bh;
 	struct omfs_inode *oi;
 
-	bh = sb_bread(sb, block);
+	bh = omfs_bread(sb, inode->i_ino);
 	if (!bh)
 		return -ENOMEM;
 
@@ -134,7 +131,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode)
 	brelse(bh);
 
 	/* now set the sibling and parent pointers on the new inode */
-	bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
+	bh = omfs_bread(dir->i_sb, inode->i_ino);
 	if (!bh)
 		goto out;
 
@@ -190,8 +187,7 @@ static int omfs_delete_entry(struct dentry *dentry)
 	if (prev != ~0) {
 		/* found in middle of list, get list ptr */
 		brelse(bh);
-		bh = sb_bread(dir->i_sb,
-			clus_to_blk(OMFS_SB(dir->i_sb), prev));
+		bh = omfs_bread(dir->i_sb, prev);
 		if (!bh)
 			goto out;
 
@@ -224,8 +220,7 @@ static int omfs_dir_is_empty(struct inode *inode)
 	u64 *ptr;
 	int i;
 
-	bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb),
-			inode->i_ino));
+	bh = omfs_bread(inode->i_sb, inode->i_ino);
 
 	if (!bh)
 		return 0;
@@ -353,8 +348,7 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
 
 	/* follow chain in this bucket */
 	while (fsblock != ~0) {
-		bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb),
-				fsblock));
+		bh = omfs_bread(dir->i_sb, fsblock);
 		if (!bh)
 			goto out;
 
@@ -466,7 +460,7 @@ static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	hchain = (filp->f_pos >> 20) - 1;
 	hindex = filp->f_pos & 0xfffff;
 
-	bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino));
+	bh = omfs_bread(dir->i_sb, dir->i_ino);
 	if (!bh)
 		goto out;
 
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 6e7a3291bbe..8a6d34fa668 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -50,7 +50,7 @@ int omfs_shrink_inode(struct inode *inode)
 	if (inode->i_size != 0)
 		goto out;
 
-	bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+	bh = omfs_bread(inode->i_sb, next);
 	if (!bh)
 		goto out;
 
@@ -90,7 +90,7 @@ int omfs_shrink_inode(struct inode *inode)
 		if (next == ~0)
 			break;
 
-		bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+		bh = omfs_bread(inode->i_sb, next);
 		if (!bh)
 			goto out;
 		oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
@@ -222,7 +222,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
 	struct buffer_head *bh;
 	sector_t next, offset;
 	int ret;
-	u64 new_block;
+	u64 uninitialized_var(new_block);
 	u32 max_extents;
 	int extent_count;
 	struct omfs_extent *oe;
@@ -232,7 +232,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
 	int remain;
 
 	ret = -EIO;
-	bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
+	bh = omfs_bread(inode->i_sb, inode->i_ino);
 	if (!bh)
 		goto out;
 
@@ -265,7 +265,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
 			break;
 
 		brelse(bh);
-		bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+		bh = omfs_bread(inode->i_sb, next);
 		if (!bh)
 			goto out;
 		oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
@@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags,
-				pagep, fsdata, omfs_get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				omfs_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
@@ -333,7 +341,29 @@ const struct file_operations omfs_file_operations = {
 	.splice_read = generic_file_splice_read,
 };
 
+static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 const struct inode_operations omfs_file_inops = {
+	.setattr = omfs_setattr,
 	.truncate = omfs_truncate
 };
 
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 089839a6cc6..14a22863291 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -19,6 +19,15 @@ MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
 MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
 MODULE_LICENSE("GPL");
 
+struct buffer_head *omfs_bread(struct super_block *sb, sector_t block)
+{
+	struct omfs_sb_info *sbi = OMFS_SB(sb);
+	if (block >= sbi->s_num_blocks)
+		return NULL;
+
+	return sb_bread(sb, clus_to_blk(sbi, block));
+}
+
 struct inode *omfs_new_inode(struct inode *dir, int mode)
 {
 	struct inode *inode;
@@ -93,15 +102,13 @@ static int __omfs_write_inode(struct inode *inode, int wait)
 	struct omfs_inode *oi;
 	struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
 	struct buffer_head *bh, *bh2;
-	unsigned int block;
 	u64 ctime;
 	int i;
 	int ret = -EIO;
 	int sync_failed = 0;
 
 	/* get current inode since we may have written sibling ptrs etc. */
-	block = clus_to_blk(sbi, inode->i_ino);
-	bh = sb_bread(inode->i_sb, block);
+	bh = omfs_bread(inode->i_sb, inode->i_ino);
 	if (!bh)
 		goto out;
 
@@ -140,8 +147,7 @@ static int __omfs_write_inode(struct inode *inode, int wait)
 
 	/* if mirroring writes, copy to next fsblock */
 	for (i = 1; i < sbi->s_mirrors; i++) {
-		bh2 = sb_bread(inode->i_sb, block + i *
-			(sbi->s_blocksize / sbi->s_sys_blocksize));
+		bh2 = omfs_bread(inode->i_sb, inode->i_ino + i);
 		if (!bh2)
 			goto out_brelse;
 
@@ -175,9 +181,13 @@ int omfs_sync_inode(struct inode *inode)
  * called when an entry is deleted, need to clear the bits in the
  * bitmaps.
  */
-static void omfs_delete_inode(struct inode *inode)
+static void omfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+
+	if (inode->i_nlink)
+		return;
 
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_size = 0;
@@ -185,7 +195,6 @@ static void omfs_delete_inode(struct inode *inode)
 	}
 
 	omfs_clear_range(inode->i_sb, inode->i_ino, 2);
-	clear_inode(inode);
 }
 
 struct inode *omfs_iget(struct super_block *sb, ino_t ino)
@@ -193,7 +202,6 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
 	struct omfs_sb_info *sbi = OMFS_SB(sb);
 	struct omfs_inode *oi;
 	struct buffer_head *bh;
-	unsigned int block;
 	u64 ctime;
 	unsigned long nsecs;
 	struct inode *inode;
@@ -204,8 +212,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
 	if (!(inode->i_state & I_NEW))
 		return inode;
 
-	block = clus_to_blk(sbi, ino);
-	bh = sb_bread(inode->i_sb, block);
+	bh = omfs_bread(inode->i_sb, ino);
 	if (!bh)
 		goto iget_failed;
 
@@ -284,7 +291,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static const struct super_operations omfs_sops = {
 	.write_inode	= omfs_write_inode,
-	.delete_inode	= omfs_delete_inode,
+	.evict_inode	= omfs_evict_inode,
 	.put_super	= omfs_put_super,
 	.statfs		= omfs_statfs,
 	.show_options	= generic_show_options,
@@ -319,6 +326,9 @@ static int omfs_get_imap(struct super_block *sb)
 		goto nomem;
 
 	block = clus_to_blk(sbi, sbi->s_bitmap_ino);
+	if (block >= sbi->s_num_blocks)
+		goto nomem;
+
 	ptr = sbi->s_imap;
 	for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
 		bh = sb_bread(sb, block++);
@@ -417,7 +427,6 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct omfs_root_block *omfs_rb;
 	struct omfs_sb_info *sbi;
 	struct inode *root;
-	sector_t start;
 	int ret = -EINVAL;
 
 	save_mount_options(sb, (char *) data);
@@ -486,8 +495,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
 		get_bitmask_order(sbi->s_sys_blocksize);
 
-	start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block));
-	bh2 = sb_bread(sb, start);
+	bh2 = omfs_bread(sb, be64_to_cpu(omfs_sb->s_root_block));
 	if (!bh2)
 		goto out_brelse_bh;
 
@@ -504,6 +512,21 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_brelse_bh2;
 	}
 
+	if (sbi->s_bitmap_ino != ~0ULL &&
+	    sbi->s_bitmap_ino > sbi->s_num_blocks) {
+		printk(KERN_ERR "omfs: free space bitmap location is corrupt "
+			"(%llx, total blocks %llx)\n",
+			(unsigned long long) sbi->s_bitmap_ino,
+			(unsigned long long) sbi->s_num_blocks);
+		goto out_brelse_bh2;
+	}
+	if (sbi->s_clustersize < 1 ||
+	    sbi->s_clustersize > OMFS_MAX_CLUSTER_SIZE) {
+		printk(KERN_ERR "omfs: cluster size out of range (%d)",
+			sbi->s_clustersize);
+		goto out_brelse_bh2;
+	}
+
 	ret = omfs_get_imap(sb);
 	if (ret)
 		goto out_brelse_bh2;
@@ -529,6 +552,8 @@ out_brelse_bh2:
 out_brelse_bh:
 	brelse(bh);
 end:
+	if (ret)
+		kfree(sbi);
 	return ret;
 }
 
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index ebe2fdbe535..7d414fef501 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -58,6 +58,7 @@ extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
 extern int omfs_shrink_inode(struct inode *inode);
 
 /* inode.c */
+extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block);
 extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
 extern struct inode *omfs_new_inode(struct inode *dir, int mode);
 extern int omfs_reserve_block(struct super_block *sb, sector_t block);
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
index 12cca245d6e..ee5e4327de9 100644
--- a/fs/omfs/omfs_fs.h
+++ b/fs/omfs/omfs_fs.h
@@ -17,6 +17,7 @@
 #define OMFS_EXTENT_CONT 0x40
 #define OMFS_XOR_COUNT 19
 #define OMFS_MAX_BLOCK_SIZE 8192
+#define OMFS_MAX_CLUSTER_SIZE 8
 
 struct omfs_super_block {
 	char s_fill1[256];
diff --git a/fs/open.c b/fs/open.c
index 0d1fa3dc0ef..b715d06fbe3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -29,6 +29,7 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 #include <linux/ima.h>
+#include <linux/dnotify.h>
 
 #include "internal.h"
 
@@ -887,7 +888,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 				put_unused_fd(fd);
 				fd = PTR_ERR(f);
 			} else {
-				fsnotify_open(f->f_path.dentry);
+				fsnotify_open(f);
 				fd_install(fd, f);
 			}
 		}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69254a365ce..c806dfb24e0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -559,9 +559,19 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
 		return -EPERM;
 
 	error = inode_change_ok(inode, attr);
-	if (!error)
-		error = inode_setattr(inode, attr);
-	return error;
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct inode_operations proc_def_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2791907744e..dd29f033766 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
@@ -258,17 +259,22 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 
 	error = inode_change_ok(inode, iattr);
 	if (error)
-		goto out;
+		return error;
 
-	error = inode_setattr(inode, iattr);
-	if (error)
-		goto out;
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, iattr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
 	
 	de->uid = inode->i_uid;
 	de->gid = inode->i_gid;
 	de->mode = inode->i_mode;
-out:
-	return error;
+	return 0;
 }
 
 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index aea8502e58a..23561cda724 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,11 +25,12 @@
 
 #include "internal.h"
 
-static void proc_delete_inode(struct inode *inode)
+static void proc_evict_inode(struct inode *inode)
 {
 	struct proc_dir_entry *de;
 
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 
 	/* Stop tracking associated processes */
 	put_pid(PROC_I(inode)->pid);
@@ -40,7 +41,6 @@ static void proc_delete_inode(struct inode *inode)
 		pde_put(de);
 	if (PROC_I(inode)->sysctl)
 		sysctl_head_put(PROC_I(inode)->sysctl);
-	clear_inode(inode);
 }
 
 struct vfsmount *proc_mnt;
@@ -91,7 +91,7 @@ static const struct super_operations proc_sops = {
 	.alloc_inode	= proc_alloc_inode,
 	.destroy_inode	= proc_destroy_inode,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= proc_delete_inode,
+	.evict_inode	= proc_evict_inode,
 	.statfs		= simple_statfs,
 };
 
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6ff9981f0a1..5be436ea088 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -329,10 +329,19 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 		return -EPERM;
 
 	error = inode_change_ok(inode, attr);
-	if (!error)
-		error = inode_setattr(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
 
-	return error;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 277575ddc05..16829722be9 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				qnx4_get_block,
 				&qnx4_inode->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ef72b169942..aad1316a977 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -898,7 +898,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 #ifdef CONFIG_QUOTA_DEBUG
 		if (unlikely(inode_get_rsv_space(inode) > 0))
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d532c20fc17..9eead2c796b 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -146,9 +146,8 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 			return ret;
 	}
 
-	ret = simple_setsize(inode, newsize);
-
-	return ret;
+	truncate_setsize(inode, newsize);
+	return 0;
 }
 
 /*****************************************************************************/
@@ -183,7 +182,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
 		}
 	}
 
-	generic_setattr(inode, ia);
+	setattr_copy(inode, ia);
  out:
 	ia->ia_valid = old_ia_valid;
 	return ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 9c0485236e6..74e36586e4d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -311,7 +311,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 		else
 			ret = do_sync_read(file, buf, count, pos);
 		if (ret > 0) {
-			fsnotify_access(file->f_path.dentry);
+			fsnotify_access(file);
 			add_rchar(current, ret);
 		}
 		inc_syscr(current);
@@ -367,7 +367,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 		else
 			ret = do_sync_write(file, buf, count, pos);
 		if (ret > 0) {
-			fsnotify_modify(file->f_path.dentry);
+			fsnotify_modify(file);
 			add_wchar(current, ret);
 		}
 		inc_syscw(current);
@@ -675,9 +675,9 @@ out:
 		kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
-			fsnotify_access(file->f_path.dentry);
+			fsnotify_access(file);
 		else
-			fsnotify_modify(file->f_path.dentry);
+			fsnotify_modify(file);
 	}
 	return ret;
 }
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index b82cdd8a45d..6846371498b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -38,20 +38,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 
 	BUG_ON(!S_ISREG(inode->i_mode));
 
+        if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
+		return 0;
+
+	mutex_lock(&(REISERFS_I(inode)->tailpack));
+
+        if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		return 0;
+	}
+
 	/* fast out for when nothing needs to be done */
-	if ((atomic_read(&inode->i_count) > 1 ||
-	     !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
+	if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
 	     !tail_has_to_be_packed(inode)) &&
 	    REISERFS_I(inode)->i_prealloc_count <= 0) {
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
 		return 0;
 	}
 
-	mutex_lock(&inode->i_mutex);
-
-	mutex_lock(&(REISERFS_I(inode)->i_mmap));
-	if (REISERFS_I(inode)->i_flags & i_ever_mapped)
-		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
-
 	reiserfs_write_lock(inode->i_sb);
 	/* freeing preallocation only involves relogging blocks that
 	 * are already in the current transaction.  preallocation gets
@@ -94,9 +98,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 	if (!err)
 		err = jbegin_failure;
 
-	if (!err && atomic_read(&inode->i_count) <= 1 &&
+	if (!err &&
 	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
 	    tail_has_to_be_packed(inode)) {
+
 		/* if regular file is released by last holder and it has been
 		   appended (we append by unformatted node only) or its direct
 		   item(s) had to be converted, then it may have to be
@@ -104,27 +109,28 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 		err = reiserfs_truncate_file(inode, 0);
 	}
       out:
-	mutex_unlock(&(REISERFS_I(inode)->i_mmap));
-	mutex_unlock(&inode->i_mutex);
 	reiserfs_write_unlock(inode->i_sb);
+	mutex_unlock(&(REISERFS_I(inode)->tailpack));
 	return err;
 }
 
-static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int reiserfs_file_open(struct inode *inode, struct file *file)
 {
-	struct inode *inode;
-
-	inode = file->f_path.dentry->d_inode;
-	mutex_lock(&(REISERFS_I(inode)->i_mmap));
-	REISERFS_I(inode)->i_flags |= i_ever_mapped;
-	mutex_unlock(&(REISERFS_I(inode)->i_mmap));
-
-	return generic_file_mmap(file, vma);
+	int err = dquot_file_open(inode, file);
+        if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
+		/* somebody might be tailpacking on final close; wait for it */
+		mutex_lock(&(REISERFS_I(inode)->tailpack));
+		atomic_inc(&REISERFS_I(inode)->openers);
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+	}
+	return err;
 }
 
 static void reiserfs_vfs_truncate_file(struct inode *inode)
 {
+	mutex_lock(&(REISERFS_I(inode)->tailpack));
 	reiserfs_truncate_file(inode, 1);
+	mutex_unlock(&(REISERFS_I(inode)->tailpack));
 }
 
 /* Sync a reiserfs file. */
@@ -288,8 +294,8 @@ const struct file_operations reiserfs_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
-	.mmap = reiserfs_file_mmap,
-	.open = dquot_file_open,
+	.mmap = generic_file_mmap,
+	.open = reiserfs_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 29db72203bd..ae35413dcbe 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -25,7 +25,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 int reiserfs_prepare_write(struct file *f, struct page *page,
 			   unsigned from, unsigned to);
 
-void reiserfs_delete_inode(struct inode *inode)
+void reiserfs_evict_inode(struct inode *inode)
 {
 	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
 	int jbegin_count =
@@ -35,10 +35,12 @@ void reiserfs_delete_inode(struct inode *inode)
 	int depth;
 	int err;
 
-	if (!is_bad_inode(inode))
+	if (!inode->i_nlink && !is_bad_inode(inode))
 		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
+	if (inode->i_nlink)
+		goto no_delete;
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 
@@ -77,9 +79,14 @@ void reiserfs_delete_inode(struct inode *inode)
 		;
 	}
       out:
-	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
+	end_writeback(inode);	/* note this must go after the journal_end to prevent deadlock */
+	dquot_drop(inode);
 	inode->i_blocks = 0;
 	reiserfs_write_unlock_once(inode->i_sb, depth);
+
+no_delete:
+	end_writeback(inode);
+	dquot_drop(inode);
 }
 
 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -1138,7 +1145,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
 	REISERFS_I(inode)->i_prealloc_count = 0;
 	REISERFS_I(inode)->i_trans_id = 0;
 	REISERFS_I(inode)->i_jl = NULL;
-	mutex_init(&(REISERFS_I(inode)->i_mmap));
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
@@ -1841,7 +1847,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	REISERFS_I(inode)->i_attrs =
 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
-	mutex_init(&(REISERFS_I(inode)->i_mmap));
 	reiserfs_init_xattr_rwsem(inode);
 
 	/* key to search for correct place for new stat data */
@@ -2587,8 +2592,7 @@ static int reiserfs_write_begin(struct file *file,
 		old_ref = th->t_refcount;
 		th->t_refcount++;
 	}
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				reiserfs_get_block);
+	ret = __block_write_begin(page, pos, len, reiserfs_get_block);
 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th = current->journal_info;
 		/* this gets a little ugly.  If reiserfs_get_block returned an
@@ -3059,10 +3063,25 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs,
 				  reiserfs_get_blocks_direct_io, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3072,6 +3091,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 	int depth;
 	int error;
 
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
 	/* must be turned off for recursive notify_change calls */
 	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
 
@@ -3121,55 +3144,58 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
-	error = inode_change_ok(inode, attr);
-	if (!error) {
-		if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-		    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-			error = reiserfs_chown_xattrs(inode, attr);
+	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		struct reiserfs_transaction_handle th;
+		int jbegin_count =
+		    2 *
+		    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
+		     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
+		    2;
 
-			if (!error) {
-				struct reiserfs_transaction_handle th;
-				int jbegin_count =
-				    2 *
-				    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
-				     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
-				    2;
-
-				/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-				error =
-				    journal_begin(&th, inode->i_sb,
-						  jbegin_count);
-				if (error)
-					goto out;
-				error = dquot_transfer(inode, attr);
-				if (error) {
-					journal_end(&th, inode->i_sb,
-						    jbegin_count);
-					goto out;
-				}
-				/* Update corresponding info in inode so that everything is in
-				 * one transaction */
-				if (attr->ia_valid & ATTR_UID)
-					inode->i_uid = attr->ia_uid;
-				if (attr->ia_valid & ATTR_GID)
-					inode->i_gid = attr->ia_gid;
-				mark_inode_dirty(inode);
-				error =
-				    journal_end(&th, inode->i_sb, jbegin_count);
-			}
-		}
-		if (!error) {
-			/*
-			 * Relax the lock here, as it might truncate the
-			 * inode pages and wait for inode pages locks.
-			 * To release such page lock, the owner needs the
-			 * reiserfs lock
-			 */
-			reiserfs_write_unlock_once(inode->i_sb, depth);
-			error = inode_setattr(inode, attr);
-			depth = reiserfs_write_lock_once(inode->i_sb);
+		error = reiserfs_chown_xattrs(inode, attr);
+
+		if (error)
+			return error;
+
+		/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
+		error = journal_begin(&th, inode->i_sb, jbegin_count);
+		if (error)
+			goto out;
+		error = dquot_transfer(inode, attr);
+		if (error) {
+			journal_end(&th, inode->i_sb, jbegin_count);
+			goto out;
 		}
+
+		/* Update corresponding info in inode so that everything is in
+		 * one transaction */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		mark_inode_dirty(inode);
+		error = journal_end(&th, inode->i_sb, jbegin_count);
+		if (error)
+			goto out;
+	}
+
+	/*
+	 * Relax the lock here, as it might truncate the
+	 * inode pages and wait for inode pages locks.
+	 * To release such page lock, the owner needs the
+	 * reiserfs lock
+	 */
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode))
+		error = vmtruncate(inode, attr->ia_size);
+
+	if (!error) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
 	}
+	depth = reiserfs_write_lock_once(inode->i_sb);
 
 	if (!error && reiserfs_posixacl(inode->i_sb)) {
 		if (attr->ia_valid & ATTR_MODE)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9822fa15118..e15ff612002 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -525,6 +525,8 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 	    kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
+	atomic_set(&ei->openers, 0);
+	mutex_init(&ei->tailpack);
 	return &ei->vfs_inode;
 }
 
@@ -589,11 +591,6 @@ out:
 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
 }
 
-static void reiserfs_clear_inode(struct inode *inode)
-{
-	dquot_drop(inode);
-}
-
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -606,8 +603,7 @@ static const struct super_operations reiserfs_sops = {
 	.destroy_inode = reiserfs_destroy_inode,
 	.write_inode = reiserfs_write_inode,
 	.dirty_inode = reiserfs_dirty_inode,
-	.clear_inode = reiserfs_clear_inode,
-	.delete_inode = reiserfs_delete_inode,
+	.evict_inode = reiserfs_evict_inode,
 	.put_super = reiserfs_put_super,
 	.write_super = reiserfs_write_super,
 	.sync_fs = reiserfs_sync_fs,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9551cb6f7fe..450c9194198 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -46,7 +46,7 @@
 
 #define SMB_TTL_DEFAULT 1000
 
-static void smb_delete_inode(struct inode *);
+static void smb_evict_inode(struct inode *);
 static void smb_put_super(struct super_block *);
 static int  smb_statfs(struct dentry *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
@@ -102,7 +102,7 @@ static const struct super_operations smb_sops =
 	.alloc_inode	= smb_alloc_inode,
 	.destroy_inode	= smb_destroy_inode,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= smb_delete_inode,
+	.evict_inode	= smb_evict_inode,
 	.put_super	= smb_put_super,
 	.statfs		= smb_statfs,
 	.show_options	= smb_show_options,
@@ -324,15 +324,15 @@ out:
  * All blocking cleanup operations need to go here to avoid races.
  */
 static void
-smb_delete_inode(struct inode *ino)
+smb_evict_inode(struct inode *ino)
 {
 	DEBUG1("ino=%ld\n", ino->i_ino);
 	truncate_inode_pages(&ino->i_data, 0);
+	end_writeback(ino);
 	lock_kernel();
 	if (smb_close(ino))
 		PARANOIA("could not close inode %ld\n", ino->i_ino);
 	unlock_kernel();
-	clear_inode(ino);
 }
 
 static struct option opts[] = {
@@ -714,9 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
 		error = server->ops->truncate(inode, attr->ia_size);
 		if (error)
 			goto out;
-		error = simple_setsize(inode, attr->ia_size);
-		if (error)
-			goto out;
+		truncate_setsize(inode, attr->ia_size);
 		refresh = 1;
 	}
 
diff --git a/fs/splice.c b/fs/splice.c
index efdbfece993..8f1dfaecc8f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -399,17 +399,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		 * If the page isn't uptodate, we may need to start io on it
 		 */
 		if (!PageUptodate(page)) {
-			/*
-			 * If in nonblock mode then dont block on waiting
-			 * for an in-flight io page
-			 */
-			if (flags & SPLICE_F_NONBLOCK) {
-				if (!trylock_page(page)) {
-					error = -EAGAIN;
-					break;
-				}
-			} else
-				lock_page(page);
+			lock_page(page);
 
 			/*
 			 * Page was truncated, or invalidated by the
@@ -597,7 +587,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
-	pgoff_t index;
 	ssize_t res;
 	size_t this_len;
 	int error;
@@ -621,7 +610,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 			goto shrink_ret;
 	}
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
diff --git a/fs/statfs.c b/fs/statfs.c
index 4ef021f3b61..30ea8c8a996 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -2,38 +2,83 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
 
-int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+static int flags_by_mnt(int mnt_flags)
 {
-	int retval = -ENODEV;
-
-	if (dentry) {
-		retval = -ENOSYS;
-		if (dentry->d_sb->s_op->statfs) {
-			memset(buf, 0, sizeof(*buf));
-			retval = security_sb_statfs(dentry);
-			if (retval)
-				return retval;
-			retval = dentry->d_sb->s_op->statfs(dentry, buf);
-			if (retval == 0 && buf->f_frsize == 0)
-				buf->f_frsize = buf->f_bsize;
-		}
-	}
+	int flags = 0;
+
+	if (mnt_flags & MNT_READONLY)
+		flags |= ST_RDONLY;
+	if (mnt_flags & MNT_NOSUID)
+		flags |= ST_NOSUID;
+	if (mnt_flags & MNT_NODEV)
+		flags |= ST_NODEV;
+	if (mnt_flags & MNT_NOEXEC)
+		flags |= ST_NOEXEC;
+	if (mnt_flags & MNT_NOATIME)
+		flags |= ST_NOATIME;
+	if (mnt_flags & MNT_NODIRATIME)
+		flags |= ST_NODIRATIME;
+	if (mnt_flags & MNT_RELATIME)
+		flags |= ST_RELATIME;
+	return flags;
+}
+
+static int flags_by_sb(int s_flags)
+{
+	int flags = 0;
+	if (s_flags & MS_SYNCHRONOUS)
+		flags |= ST_SYNCHRONOUS;
+	if (s_flags & MS_MANDLOCK)
+		flags |= ST_MANDLOCK;
+	return flags;
+}
+
+static int calculate_f_flags(struct vfsmount *mnt)
+{
+	return ST_VALID | flags_by_mnt(mnt->mnt_flags) |
+		flags_by_sb(mnt->mnt_sb->s_flags);
+}
+
+int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
+{
+	int retval;
+
+	if (!dentry->d_sb->s_op->statfs)
+		return -ENOSYS;
+
+	memset(buf, 0, sizeof(*buf));
+	retval = security_sb_statfs(dentry);
+	if (retval)
+		return retval;
+	retval = dentry->d_sb->s_op->statfs(dentry, buf);
+	if (retval == 0 && buf->f_frsize == 0)
+		buf->f_frsize = buf->f_bsize;
 	return retval;
 }
 
+int vfs_statfs(struct path *path, struct kstatfs *buf)
+{
+	int error;
+
+	error = statfs_by_dentry(path->dentry, buf);
+	if (!error)
+		buf->f_flags = calculate_f_flags(path->mnt);
+	return error;
+}
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
+static int do_statfs_native(struct path *path, struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(dentry, &st);
+	retval = vfs_statfs(path, &st);
 	if (retval)
 		return retval;
 
@@ -67,17 +112,18 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
 		buf->f_fsid = st.f_fsid;
 		buf->f_namelen = st.f_namelen;
 		buf->f_frsize = st.f_frsize;
+		buf->f_flags = st.f_flags;
 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
 	}
 	return 0;
 }
 
-static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
+static int do_statfs64(struct path *path, struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(dentry, &st);
+	retval = vfs_statfs(path, &st);
 	if (retval)
 		return retval;
 
@@ -94,6 +140,7 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
 		buf->f_fsid = st.f_fsid;
 		buf->f_namelen = st.f_namelen;
 		buf->f_frsize = st.f_frsize;
+		buf->f_flags = st.f_flags;
 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
 	}
 	return 0;
@@ -107,7 +154,7 @@ SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, b
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(path.dentry, &tmp);
+		error = do_statfs_native(&path, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_put(&path);
@@ -125,7 +172,7 @@ SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct stat
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(path.dentry, &tmp);
+		error = do_statfs64(&path, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_put(&path);
@@ -143,7 +190,7 @@ SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_path.dentry, &tmp);
+	error = do_statfs_native(&file->f_path, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -164,7 +211,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_path.dentry, &tmp);
+	error = do_statfs64(&file->f_path, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -183,7 +230,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
 	if (!s)
 		return -EINVAL;
 
-	err = vfs_statfs(s->s_root, &sbuf);
+	err = statfs_by_dentry(s->s_root, &sbuf);
 	drop_super(s);
 	if (err)
 		return err;
diff --git a/fs/super.c b/fs/super.c
index 938119ab8dc..9674ab2c871 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -305,8 +305,13 @@ retry:
 			if (s) {
 				up_write(&s->s_umount);
 				destroy_super(s);
+				s = NULL;
 			}
 			down_write(&old->s_umount);
+			if (unlikely(!(old->s_flags & MS_BORN))) {
+				deactivate_locked_super(old);
+				goto retry;
+			}
 			return old;
 		}
 	}
@@ -358,10 +363,10 @@ EXPORT_SYMBOL(drop_super);
  */
 void sync_supers(void)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,11 +379,13 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
-			/* lock was dropped, must reset next */
-			list_safe_reset_next(sb, n, s_list);
-			__put_super(sb);
+			if (p)
+				__put_super(p);
+			p = sb;
 		}
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 }
 
@@ -392,10 +399,10 @@ void sync_supers(void)
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -407,10 +414,12 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
-		/* lock was dropped, must reset next */
-		list_safe_reset_next(sb, n, s_list);
-		__put_super(sb);
+		if (p)
+			__put_super(p);
+		p = sb;
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 }
 
@@ -572,10 +581,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 
 static void do_emergency_remount(struct work_struct *work)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -589,10 +598,12 @@ static void do_emergency_remount(struct work_struct *work)
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
-		/* lock was dropped, must reset next */
-		list_safe_reset_next(sb, n, s_list);
-		__put_super(sb);
+		if (p)
+			__put_super(p);
+		p = sb;
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 	kfree(work);
 	printk("Emergency Remount complete\n");
@@ -773,7 +784,16 @@ int get_sb_bdev(struct file_system_type *fs_type,
 			goto error_bdev;
 		}
 
+		/*
+		 * s_umount nests inside bd_mutex during
+		 * __invalidate_device().  close_bdev_exclusive()
+		 * acquires bd_mutex and can't be called under
+		 * s_umount.  Drop s_umount temporarily.  This is safe
+		 * as we're holding an active reference.
+		 */
+		up_write(&s->s_umount);
 		close_bdev_exclusive(bdev, mode);
+		down_write(&s->s_umount);
 	} else {
 		char b[BDEVNAME_SIZE];
 
@@ -909,6 +929,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 		goto out_free_secdata;
 	BUG_ON(!mnt->mnt_sb);
 	WARN_ON(!mnt->mnt_sb->s_bdi);
+	mnt->mnt_sb->s_flags |= MS_BORN;
 
 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
 	if (error)
diff --git a/fs/sync.c b/fs/sync.c
index 15aa6f03b2d..ba76b9623e7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -128,31 +128,6 @@ void emergency_sync(void)
 	}
 }
 
-/*
- * Generic function to fsync a file.
- */
-int file_fsync(struct file *filp, int datasync)
-{
-	struct inode *inode = filp->f_mapping->host;
-	struct super_block * sb;
-	int ret, err;
-
-	/* sync the inode to buffers */
-	ret = write_inode_now(inode, 0);
-
-	/* sync the superblock to buffers */
-	sb = inode->i_sb;
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-
-	/* .. finally sync the buffers to disk */
-	err = sync_blockdev(sb->s_bdev);
-	if (!ret)
-		ret = err;
-	return ret;
-}
-EXPORT_SYMBOL(file_fsync);
-
 /**
  * vfs_fsync_range - helper to sync a range of data & metadata to disk
  * @file:		file to sync
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0835a3b70e0..cffb1fd8ba3 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -122,7 +122,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		goto out;
 
 	/* this ignores size changes */
-	generic_setattr(inode, iattr);
+	setattr_copy(inode, iattr);
 
 out:
 	mutex_unlock(&sysfs_mutex);
@@ -312,15 +312,15 @@ struct inode * sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
  * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
  * To prevent the sysfs inode numbers from being freed prematurely we take a
  * reference to sysfs_dirent from the sysfs inode.  A
- * super_operations.delete_inode() implementation is needed to drop that
+ * super_operations.evict_inode() implementation is needed to drop that
  * reference upon inode destruction.
  */
-void sysfs_delete_inode(struct inode *inode)
+void sysfs_evict_inode(struct inode *inode)
 {
 	struct sysfs_dirent *sd  = inode->i_private;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	end_writeback(inode);
 	sysfs_put(sd);
 }
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 281c0c9bc39..f2af22574c5 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -29,7 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
 static const struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= sysfs_delete_inode,
+	.evict_inode	= sysfs_evict_inode,
 };
 
 struct sysfs_dirent sysfs_root = {
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6a13105b559..d9be60a2e95 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -198,7 +198,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
  * inode.c
  */
 struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
-void sysfs_delete_inode(struct inode *inode);
+void sysfs_evict_inode(struct inode *inode);
 int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
 int sysfs_permission(struct inode *inode, int mask);
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 79941e4964a..a77c4215762 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -218,8 +218,7 @@ got_it:
 	pos = page_offset(page) +
 			(char*)de - (char*)page_address(page);
 	lock_page(page);
-	err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
 	if (err)
 		goto out_unlock;
 	memcpy (de->name, name, namelen);
@@ -239,15 +238,13 @@ out_unlock:
 
 int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = (struct inode*)mapping->host;
+	struct inode *inode = page->mapping->host;
 	char *kaddr = (char*)page_address(page);
 	loff_t pos = page_offset(page) + (char *)de - kaddr;
 	int err;
 
 	lock_page(page);
-	err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
 	BUG_ON(err);
 	de->inode = 0;
 	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
@@ -259,16 +256,14 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
 
 int sysv_make_empty(struct inode *inode, struct inode *dir)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page = grab_cache_page(inode->i_mapping, 0);
 	struct sysv_dir_entry * de;
 	char *base;
 	int err;
 
 	if (!page)
 		return -ENOMEM;
-	err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE);
 	if (err) {
 		unlock_page(page);
 		goto fail;
@@ -341,15 +336,13 @@ not_empty:
 void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
 	struct inode *inode)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *dir = mapping->host;
+	struct inode *dir = page->mapping->host;
 	loff_t pos = page_offset(page) +
 			(char *)de-(char*)page_address(page);
 	int err;
 
 	lock_page(page);
-	err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
 	BUG_ON(err);
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
 	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 750cc22349b..0a65939508e 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,29 @@ const struct file_operations sysv_file_operations = {
 	.splice_read	= generic_file_splice_read,
 };
 
+static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 const struct inode_operations sysv_file_inode_operations = {
 	.truncate	= sysv_truncate,
+	.setattr	= sysv_setattr,
 	.getattr	= sysv_getattr,
 };
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index fcc498ec9b3..0c96c98bd1d 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -113,7 +113,6 @@ void sysv_free_inode(struct inode * inode)
 		return;
 	}
 	raw_inode = sysv_raw_inode(sb, ino, &bh);
-	clear_inode(inode);
 	if (!raw_inode) {
 		printk("sysv_free_inode: unable to read inode block on device "
 		       "%s\n", inode->i_sb->s_id);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d4a5380b566..de44d067b9e 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -71,8 +71,8 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
 	lock_super(sb);
 	if (sbi->s_forced_ro)
 		*flags |= MS_RDONLY;
-	if (!(*flags & MS_RDONLY))
-		sb->s_dirt = 1;
+	if (*flags & MS_RDONLY)
+		sysv_write_super(sb);
 	unlock_super(sb);
 	return 0;
 }
@@ -308,12 +308,17 @@ int sysv_sync_inode(struct inode *inode)
 	return __sysv_write_inode(inode, 1);
 }
 
-static void sysv_delete_inode(struct inode *inode)
+static void sysv_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	sysv_truncate(inode);
-	sysv_free_inode(inode);
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		sysv_truncate(inode);
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+	if (!inode->i_nlink)
+		sysv_free_inode(inode);
 }
 
 static struct kmem_cache *sysv_inode_cachep;
@@ -344,7 +349,7 @@ const struct super_operations sysv_sops = {
 	.alloc_inode	= sysv_alloc_inode,
 	.destroy_inode	= sysv_destroy_inode,
 	.write_inode	= sysv_write_inode,
-	.delete_inode	= sysv_delete_inode,
+	.evict_inode	= sysv_evict_inode,
 	.put_super	= sysv_put_super,
 	.write_super	= sysv_write_super,
 	.sync_fs	= sysv_sync_fs,
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index f042eec464c..9ca66276315 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -459,20 +459,25 @@ static int sysv_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page,get_block);
 }
 
-int __sysv_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
+int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				get_block);
+	return __block_write_begin(page, pos, len, get_block);
 }
 
 static int sysv_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep, get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 5a903da5455..0e44a625335 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -347,7 +347,6 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
 		sb->s_flags |= MS_RDONLY;
 	if (sbi->s_truncate)
 		sb->s_root->d_op = &sysv_dentry_operations;
-	sb->s_dirt = 1;
 	return 1;
 }
 
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 94cb9b4d76c..bb55cdb394b 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -136,9 +136,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *);
 
 /* itree.c */
 extern void sysv_truncate(struct inode *);
-extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata);
+extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 /* inode.c */
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 12f445cee9f..03ae894c45d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -967,14 +967,15 @@ static int do_writepage(struct page *page, int len)
  * the page locked, and it locks @ui_mutex. However, write-back does take inode
  * @i_mutex, which means other VFS operations may be run on this inode at the
  * same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'simple_setsize()', which first changes @inode->i_size, then
+ * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
  * drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with
+ * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
  * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
  * means that @inode->i_size is changed while @ui_mutex is unlocked.
  *
- * XXX: with the new truncate the above is not true anymore, the simple_setsize
- * calls can be replaced with the individual components.
+ * XXX(truncate): with the new truncate sequence this is not true anymore,
+ * and the calls to truncate_setsize can be move around freely.  They should
+ * be moved to the very end of the truncate sequence.
  *
  * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
  * inode size. How do we do this if @inode->i_size may became smaller while we
@@ -1128,9 +1129,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 		budgeted = 0;
 	}
 
-	err = simple_setsize(inode, new_size);
-	if (err)
-		goto out_budg;
+	truncate_setsize(inode, new_size);
 
 	if (offset) {
 		pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
@@ -1217,16 +1216,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 
 	if (attr->ia_valid & ATTR_SIZE) {
 		dbg_gen("size %lld -> %lld", inode->i_size, new_size);
-		err = simple_setsize(inode, new_size);
-		if (err)
-			goto out;
+		truncate_setsize(inode, new_size);
 	}
 
 	mutex_lock(&ui->ui_mutex);
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* Truncation changes inode [mc]time */
 		inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
-		/* 'simple_setsize()' changed @i_size, update @ui_size */
+		/* 'truncate_setsize()' changed @i_size, update @ui_size */
 		ui->ui_size = inode->i_size;
 	}
 
@@ -1248,10 +1245,6 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 	if (IS_SYNC(inode))
 		err = inode->i_sb->s_op->write_inode(inode, NULL);
 	return err;
-
-out:
-	ubifs_release_budget(c, &req);
-	return err;
 }
 
 int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5fc5a098897..cd5900b85d3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -327,7 +327,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
-static void ubifs_delete_inode(struct inode *inode)
+static void ubifs_evict_inode(struct inode *inode)
 {
 	int err;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -343,9 +343,12 @@ static void ubifs_delete_inode(struct inode *inode)
 
 	dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
 	ubifs_assert(!atomic_read(&inode->i_count));
-	ubifs_assert(inode->i_nlink == 0);
 
 	truncate_inode_pages(&inode->i_data, 0);
+
+	if (inode->i_nlink)
+		goto done;
+
 	if (is_bad_inode(inode))
 		goto out;
 
@@ -367,7 +370,8 @@ out:
 		c->nospace = c->nospace_rp = 0;
 		smp_wmb();
 	}
-	clear_inode(inode);
+done:
+	end_writeback(inode);
 }
 
 static void ubifs_dirty_inode(struct inode *inode)
@@ -1826,7 +1830,7 @@ const struct super_operations ubifs_super_operations = {
 	.destroy_inode = ubifs_destroy_inode,
 	.put_super     = ubifs_put_super,
 	.write_inode   = ubifs_write_inode,
-	.delete_inode  = ubifs_delete_inode,
+	.evict_inode   = ubifs_evict_inode,
 	.statfs        = ubifs_statfs,
 	.dirty_inode   = ubifs_dirty_inode,
 	.remount_fs    = ubifs_remount_fs,
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 04310878f44..0c9876b396d 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb {
  * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
  * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
  * make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock
+ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
  * with 'ubifs_writepage()' (see file.c). All the other inode fields are
  * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
  * could consider to rework locking and base it on "shadow" fields.
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 6e450e01a1b..66b9e7e7e4c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -227,6 +227,28 @@ const struct file_operations udf_file_operations = {
 	.llseek			= generic_file_llseek,
 };
 
+static int udf_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 const struct inode_operations udf_file_inode_operations = {
+	.setattr		= udf_setattr,
 	.truncate		= udf_truncate,
 };
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 18cd7111185..75d9304d0dc 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -31,8 +31,6 @@ void udf_free_inode(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 
-	clear_inode(inode);
-
 	mutex_lock(&sbi->s_alloc_mutex);
 	if (sbi->s_lvid_bh) {
 		struct logicalVolIntegrityDescImpUse *lvidiu =
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 124852bcf6f..fc48f37aa2d 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -68,37 +68,23 @@ static void udf_update_extents(struct inode *,
 static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 
-void udf_delete_inode(struct inode *inode)
-{
-	truncate_inode_pages(&inode->i_data, 0);
-
-	if (is_bad_inode(inode))
-		goto no_delete;
-
-	inode->i_size = 0;
-	udf_truncate(inode);
-	lock_kernel();
-
-	udf_update_inode(inode, IS_SYNC(inode));
-	udf_free_inode(inode);
-
-	unlock_kernel();
-	return;
-
-no_delete:
-	clear_inode(inode);
-}
-
-/*
- * If we are going to release inode from memory, we truncate last inode extent
- * to proper length. We could use drop_inode() but it's called under inode_lock
- * and thus we cannot mark inode dirty there.  We use clear_inode() but we have
- * to make sure to write inode as it's not written automatically.
- */
-void udf_clear_inode(struct inode *inode)
+void udf_evict_inode(struct inode *inode)
 {
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	int want_delete = 0;
+
+	truncate_inode_pages(&inode->i_data, 0);
 
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
+		want_delete = 1;
+		inode->i_size = 0;
+		udf_truncate(inode);
+		lock_kernel();
+		udf_update_inode(inode, IS_SYNC(inode));
+		unlock_kernel();
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 	    inode->i_size != iinfo->i_lenExtents) {
 		printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
@@ -108,9 +94,13 @@ void udf_clear_inode(struct inode *inode)
 			(unsigned long long)inode->i_size,
 			(unsigned long long)iinfo->i_lenExtents);
 	}
-
 	kfree(iinfo->i_ext.i_data);
 	iinfo->i_ext.i_data = NULL;
+	if (want_delete) {
+		lock_kernel();
+		udf_free_inode(inode);
+		unlock_kernel();
+	}
 }
 
 static int udf_writepage(struct page *page, struct writeback_control *wbc)
@@ -127,9 +117,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				udf_get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t udf_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 12bb651e540..65412d84a45 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -175,8 +175,7 @@ static const struct super_operations udf_sb_ops = {
 	.alloc_inode	= udf_alloc_inode,
 	.destroy_inode	= udf_destroy_inode,
 	.write_inode	= udf_write_inode,
-	.delete_inode	= udf_delete_inode,
-	.clear_inode	= udf_clear_inode,
+	.evict_inode	= udf_evict_inode,
 	.put_super	= udf_put_super,
 	.sync_fs	= udf_sync_fs,
 	.statfs		= udf_statfs,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 2bac0354891..6995ab1f430 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -139,8 +139,7 @@ extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
 extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
 extern void udf_truncate(struct inode *);
 extern void udf_read_inode(struct inode *);
-extern void udf_delete_inode(struct inode *);
-extern void udf_clear_inode(struct inode *);
+extern void udf_evict_inode(struct inode *);
 extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
 extern long udf_block_map(struct inode *, sector_t);
 extern int udf_extend_file(struct inode *, struct extent_position *,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index ec784756dc6..dbc90994715 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -95,8 +95,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
 	int err;
 
 	lock_page(page);
-	err = __ufs_write_begin(NULL, page->mapping, pos, len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, pos, len);
 	BUG_ON(err);
 
 	de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
@@ -381,8 +380,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
 got_it:
 	pos = page_offset(page) +
 			(char*)de - (char*)page_address(page);
-	err = __ufs_write_begin(NULL, page->mapping, pos, rec_len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, pos, rec_len);
 	if (err)
 		goto out_unlock;
 	if (de->d_ino) {
@@ -518,7 +516,6 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 		     struct page * page)
 {
 	struct super_block *sb = inode->i_sb;
-	struct address_space *mapping = page->mapping;
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
 	unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
@@ -549,8 +546,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 
 	pos = page_offset(page) + from;
 	lock_page(page);
-	err = __ufs_write_begin(NULL, mapping, pos, to - from,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, pos, to - from);
 	BUG_ON(err);
 	if (pde)
 		pde->d_reclen = cpu_to_fs16(sb, to - from);
@@ -577,8 +573,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
 	if (!page)
 		return -ENOMEM;
 
-	err = __ufs_write_begin(NULL, mapping, 0, chunk_size,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, 0, chunk_size);
 	if (err) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 594480e537d..428017e018f 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -94,8 +94,6 @@ void ufs_free_inode (struct inode * inode)
 
 	is_directory = S_ISDIR(inode->i_mode);
 
-	clear_inode (inode);
-
 	if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
 		ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino);
 	else {
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 73fe773aa03..2b251f2093a 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -558,20 +558,26 @@ static int ufs_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page,ufs_getfrag_block);
 }
 
-int __ufs_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
+int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				ufs_getfrag_block);
+	return __block_write_begin(page, pos, len, ufs_getfrag_block);
 }
 
 static int ufs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				ufs_getfrag_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
@@ -905,24 +911,33 @@ int ufs_sync_inode (struct inode *inode)
 	return ufs_update_inode (inode, 1);
 }
 
-void ufs_delete_inode (struct inode * inode)
+void ufs_evict_inode(struct inode * inode)
 {
-	loff_t old_i_size;
+	int want_delete = 0;
+
+	if (!inode->i_nlink && !is_bad_inode(inode))
+		want_delete = 1;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	if (is_bad_inode(inode))
-		goto no_delete;
-	/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
-	lock_kernel();
-	mark_inode_dirty(inode);
-	ufs_update_inode(inode, IS_SYNC(inode));
-	old_i_size = inode->i_size;
-	inode->i_size = 0;
-	if (inode->i_blocks && ufs_truncate(inode, old_i_size))
-		ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
-	ufs_free_inode (inode);
-	unlock_kernel();
-	return;
-no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	if (want_delete) {
+		loff_t old_i_size;
+		/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
+		lock_kernel();
+		mark_inode_dirty(inode);
+		ufs_update_inode(inode, IS_SYNC(inode));
+		old_i_size = inode->i_size;
+		inode->i_size = 0;
+		if (inode->i_blocks && ufs_truncate(inode, old_i_size))
+			ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
+		unlock_kernel();
+	}
+
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+
+	if (want_delete) {
+		lock_kernel();
+		ufs_free_inode (inode);
+		unlock_kernel();
+	}
 }
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3ec5a9eb6ef..d510c1b9181 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1440,7 +1440,7 @@ static const struct super_operations ufs_super_ops = {
 	.alloc_inode	= ufs_alloc_inode,
 	.destroy_inode	= ufs_destroy_inode,
 	.write_inode	= ufs_write_inode,
-	.delete_inode	= ufs_delete_inode,
+	.evict_inode	= ufs_evict_inode,
 	.put_super	= ufs_put_super,
 	.write_super	= ufs_write_super,
 	.sync_fs	= ufs_sync_fs,
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 589e01a465b..34d5cb13532 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -500,11 +500,6 @@ out:
 	return err;
 }
 
-/*
- * TODO:
- *	- truncate case should use proper ordering instead of using
- *	  simple_setsize
- */
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -518,14 +513,17 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
 		loff_t old_i_size = inode->i_size;
 
-		error = simple_setsize(inode, attr->ia_size);
-		if (error)
-			return error;
+		/* XXX(truncate): truncate_setsize should be called last */
+		truncate_setsize(inode, attr->ia_size);
+
 		error = ufs_truncate(inode, old_i_size);
 		if (error)
 			return error;
 	}
-	return inode_setattr(inode, attr);
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations ufs_file_inode_operations = {
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 179ae6b3180..c08782e1b48 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -108,7 +108,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
 extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
-extern void ufs_delete_inode (struct inode *);
+extern void ufs_evict_inode (struct inode *);
 extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
 extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
 
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 23ceed8c8fb..0466036912f 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -257,9 +257,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
 
 extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
 extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
-extern int __ufs_write_begin(struct file *file, struct address_space *mapping,
-		loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, void **fsdata);
+extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 /*
  * These functions manipulate ufs buffers
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d24e78f32f3..15412fe15c3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1478,22 +1478,38 @@ xfs_vm_direct_IO(
 	if (rw & WRITE) {
 		iocb->private = xfs_alloc_ioend(inode, IO_NEW);
 
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
-						    offset, nr_segs,
-						    xfs_get_blocks_direct,
-						    xfs_end_io_direct_write);
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct_write, NULL, 0);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			xfs_destroy_ioend(iocb->private);
 	} else {
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
-						    offset, nr_segs,
-						    xfs_get_blocks_direct,
-						    NULL);
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    NULL, NULL, 0);
 	}
 
 	return ret;
 }
 
+STATIC void
+xfs_vm_write_failed(
+	struct address_space	*mapping,
+	loff_t			to)
+{
+	struct inode		*inode = mapping->host;
+
+	if (to > inode->i_size) {
+		struct iattr	ia = {
+			.ia_valid	= ATTR_SIZE | ATTR_FORCE,
+			.ia_size	= inode->i_size,
+		};
+		xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+	}
+}
+
 STATIC int
 xfs_vm_write_begin(
 	struct file		*file,
@@ -1504,9 +1520,31 @@ xfs_vm_write_begin(
 	struct page		**pagep,
 	void			**fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS,
-				 pagep, fsdata, xfs_get_blocks);
+	int			ret;
+
+	ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+				pagep, xfs_get_blocks);
+	if (unlikely(ret))
+		xfs_vm_write_failed(mapping, pos + len);
+	return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+	struct file		*file,
+	struct address_space	*mapping,
+	loff_t			pos,
+	unsigned		len,
+	unsigned		copied,
+	struct page		*page,
+	void			*fsdata)
+{
+	int			ret;
+
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (unlikely(ret < len))
+		xfs_vm_write_failed(mapping, pos + len);
+	return ret;
 }
 
 STATIC sector_t
@@ -1551,7 +1589,7 @@ const struct address_space_operations xfs_address_space_operations = {
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.write_begin		= xfs_vm_write_begin,
-	.write_end		= generic_write_end,
+	.write_end		= xfs_vm_write_end,
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 536b81e63a3..68be25dcd30 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -80,7 +80,7 @@ xfs_mark_inode_dirty_sync(
 {
 	struct inode	*inode = VFS_I(ip);
 
-	if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
 		mark_inode_dirty_sync(inode);
 }
 
@@ -90,7 +90,7 @@ xfs_mark_inode_dirty(
 {
 	struct inode	*inode = VFS_I(ip);
 
-	if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
 		mark_inode_dirty(inode);
 }
 
@@ -540,21 +540,6 @@ xfs_vn_setattr(
 	return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
 
-/*
- * block_truncate_page can return an error, but we can't propagate it
- * at all here. Leave a complaint + stack trace in the syslog because
- * this could be bad. If it is bad, we need to propagate the error further.
- */
-STATIC void
-xfs_vn_truncate(
-	struct inode	*inode)
-{
-	int	error;
-	error = block_truncate_page(inode->i_mapping, inode->i_size,
-							xfs_get_blocks);
-	WARN_ON(error);
-}
-
 STATIC long
 xfs_vn_fallocate(
 	struct inode	*inode,
@@ -694,7 +679,6 @@ xfs_vn_fiemap(
 
 static const struct inode_operations xfs_inode_operations = {
 	.check_acl		= xfs_check_acl,
-	.truncate		= xfs_vn_truncate,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
 	.setxattr		= generic_setxattr,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 998a9d7fb9c..2fa0bd9ebc7 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -156,8 +156,6 @@
  */
 #define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
 #define xfs_stack_trace()	dump_stack()
-#define xfs_itruncate_data(ip, off)	\
-	(-vmtruncate(VFS_I(ip), (off)))
 
 
 /* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 758df94690e..15c35b62ff1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1100,13 +1100,15 @@ xfs_fs_write_inode(
 }
 
 STATIC void
-xfs_fs_clear_inode(
+xfs_fs_evict_inode(
 	struct inode		*inode)
 {
 	xfs_inode_t		*ip = XFS_I(inode);
 
-	trace_xfs_clear_inode(ip);
+	trace_xfs_evict_inode(ip);
 
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_remove);
 	XFS_STATS_DEC(vn_active);
@@ -1622,7 +1624,7 @@ static const struct super_operations xfs_super_operations = {
 	.destroy_inode		= xfs_fs_destroy_inode,
 	.dirty_inode		= xfs_fs_dirty_inode,
 	.write_inode		= xfs_fs_write_inode,
-	.clear_inode		= xfs_fs_clear_inode,
+	.evict_inode		= xfs_fs_evict_inode,
 	.put_super		= xfs_fs_put_super,
 	.sync_fs		= xfs_fs_sync_fs,
 	.freeze_fs		= xfs_fs_freeze,
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c657cdca2cd..be5dffd282a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -581,7 +581,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
 DEFINE_INODE_EVENT(xfs_file_fsync);
 DEFINE_INODE_EVENT(xfs_destroy_inode);
 DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_clear_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
 
 DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
 DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 3ac137dd531..66d585c6917 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -221,8 +221,11 @@ xfs_setattr(
 			 * transaction to modify the i_size.
 			 */
 			code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+			if (code)
+				goto error_return;
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		lock_flags &= ~XFS_ILOCK_EXCL;
 
 		/*
 		 * We are going to log the inode size change in this
@@ -236,36 +239,35 @@ xfs_setattr(
 		 * really care about here and prevents waiting for other data
 		 * not within the range we care about here.
 		 */
-		if (!code &&
-		    ip->i_size != ip->i_d.di_size &&
+		if (ip->i_size != ip->i_d.di_size &&
 		    iattr->ia_size > ip->i_d.di_size) {
 			code = xfs_flush_pages(ip,
 					ip->i_d.di_size, iattr->ia_size,
 					XBF_ASYNC, FI_NONE);
+			if (code)
+				goto error_return;
 		}
 
 		/* wait for all I/O to complete */
 		xfs_ioend_wait(ip);
 
-		if (!code)
-			code = xfs_itruncate_data(ip, iattr->ia_size);
-		if (code) {
-			ASSERT(tp == NULL);
-			lock_flags &= ~XFS_ILOCK_EXCL;
-			ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
+		code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+					    xfs_get_blocks);
+		if (code)
 			goto error_return;
-		}
+
 		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-		if ((code = xfs_trans_reserve(tp, 0,
-					     XFS_ITRUNCATE_LOG_RES(mp), 0,
-					     XFS_TRANS_PERM_LOG_RES,
-					     XFS_ITRUNCATE_LOG_COUNT))) {
-			xfs_trans_cancel(tp, 0);
-			if (need_iolock)
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-			return code;
-		}
+		code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+					 XFS_TRANS_PERM_LOG_RES,
+					 XFS_ITRUNCATE_LOG_COUNT);
+		if (code)
+			goto error_return;
+
+		truncate_setsize(inode, iattr->ia_size);
+
 		commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+		lock_flags |= XFS_ILOCK_EXCL;
+
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 
 		xfs_trans_ijoin(tp, ip);
author	Jiri Kosina <jkosina@suse.cz>	2010-08-11 09:36:51 +0200
committer	Jiri Kosina <jkosina@suse.cz>	2010-08-11 09:36:51 +0200
commit	6396fc3b3ff3f6b942992b653a62df11dcef9bea (patch)
tree	db3c7cbe833b43c653adc99f70941431c5ff7c4e /fs
parent	4785879e4d340e24e54f6de2ccfc42728b912808 (diff)
parent	3d30701b58970425e1d45994d6cb82f828924fdd (diff)