aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2015-11-03 10:05:16 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2015-11-03 10:05:16 +1100
commitc1fb2e3f1c8009c829d9bdc660c53058fb9467ce (patch)
treea20d336f4d4267d38e911be2f2ff5a43b6b76820 /fs
parent01db28c52f9afb7ee64c99569d490bd2b3a9c584 (diff)
parent1bc483f959c892f3ad324f1957a9fe97e7873981 (diff)
Merge remote-tracking branch 'nfsd/nfsd-next'
Diffstat (limited to 'fs')
-rw-r--r--fs/file_table.c94
-rw-r--r--fs/lockd/host.c8
-rw-r--r--fs/lockd/mon.c125
-rw-r--r--fs/lockd/netns.h4
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/locks.c37
-rw-r--r--fs/namei.c74
-rw-r--r--fs/nfsd/Kconfig2
-rw-r--r--fs/nfsd/Makefile3
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/filecache.c712
-rw-r--r--fs/nfsd/filecache.h44
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs3xdr.c6
-rw-r--r--fs/nfsd/nfs4layouts.c46
-rw-r--r--fs/nfsd/nfs4proc.c36
-rw-r--r--fs/nfsd/nfs4state.c267
-rw-r--r--fs/nfsd/nfs4xdr.c24
-rw-r--r--fs/nfsd/nfscache.c6
-rw-r--r--fs/nfsd/nfsctl.c10
-rw-r--r--fs/nfsd/nfsfh.c5
-rw-r--r--fs/nfsd/nfsfh.h20
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/nfssvc.c16
-rw-r--r--fs/nfsd/state.h53
-rw-r--r--fs/nfsd/trace.c2
-rw-r--r--fs/nfsd/trace.h137
-rw-r--r--fs/nfsd/vfs.c296
-rw-r--r--fs/nfsd/vfs.h15
-rw-r--r--fs/nfsd/xdr4.h17
-rw-r--r--fs/notify/group.c2
-rw-r--r--fs/notify/inode_mark.c1
-rw-r--r--fs/notify/mark.c70
35 files changed, 1520 insertions, 636 deletions
diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05ebf95..d214c45765e6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -227,10 +227,10 @@ static void __fput(struct file *file)
mntput(mnt);
}
-static LLIST_HEAD(delayed_fput_list);
-static void delayed_fput(struct work_struct *unused)
+static LLIST_HEAD(global_fput_list);
+static void global_fput(struct work_struct *unused)
{
- struct llist_node *node = llist_del_all(&delayed_fput_list);
+ struct llist_node *node = llist_del_all(&global_fput_list);
struct llist_node *next;
for (; node; node = next) {
@@ -244,23 +244,45 @@ static void ____fput(struct callback_head *work)
__fput(container_of(work, struct file, f_u.fu_rcuhead));
}
-/*
- * If kernel thread really needs to have the final fput() it has done
- * to complete, call this. The only user right now is the boot - we
- * *do* need to make sure our writes to binaries on initramfs has
- * not left us with opened struct file waiting for __fput() - execve()
- * won't work without that. Please, don't add more callers without
- * very good reasons; in particular, never call that with locks
- * held and never call that from a thread that might need to do
- * some work on any kind of umount.
+static DECLARE_DELAYED_WORK(global_fput_work, global_fput);
+
+/**
+ * fput_global_flush - ensure that all global_fput work is complete
+ *
+ * If a kernel thread really needs to have the final fput() it has done to
+ * complete, call this. One of the main users is the boot - we *do* need to
+ * make sure our writes to binaries on initramfs has not left us with opened
+ * struct file waiting for __fput() - execve() won't work without that.
+ *
+ * Please, don't add more callers without very good reasons; in particular,
+ * never call that with locks held and never from a thread that might need to
+ * do some work on any kind of umount.
*/
-void flush_delayed_fput(void)
+void fput_global_flush(void)
{
- delayed_fput(NULL);
+ flush_delayed_work(&global_fput_work);
}
+EXPORT_SYMBOL(fput_global_flush);
-static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
-
+/**
+ * fput - put a struct file reference
+ * @file: file of which to put the reference
+ *
+ * This function decrements the reference count for the struct file reference,
+ * and queues it up for destruction if the count goes to zero. In the case of
+ * most tasks we queue it to the task_work infrastructure, which will be run
+ * just before the task returns back to userspace. kthreads however never
+ * return to userspace, so for those we add them to a global list and schedule
+ * a delayed workqueue job to do the final cleanup work.
+ *
+ * Why not just do it synchronously? __fput can involve taking locks of all
+ * sorts, and doing it synchronously means that the callers must take extra care
+ * not to deadlock. That can be very difficult to ensure, so by deferring it
+ * until just before return to userland or to the workqueue, we sidestep that
+ * nastiness. Also, __fput can be quite stack intensive, so doing a final fput
+ * has the possibility of blowing up if we don't take steps to ensure that we
+ * have enough stack space to make it work.
+ */
void fput(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
@@ -277,14 +299,47 @@ void fput(struct file *file)
*/
}
- if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
- schedule_delayed_work(&delayed_fput_work, 1);
+ if (llist_add(&file->f_u.fu_llist, &global_fput_list))
+ schedule_delayed_work(&global_fput_work, 1);
}
}
+EXPORT_SYMBOL(fput);
+
+/**
+ * fput_global - do an fput without using task_work
+ * @file: file of which to put the reference
+ *
+ * When fput is called in the context of a userland process, it'll queue the
+ * actual work (__fput()) to be done just before returning to userland. In some
+ * cases however, we need to ensure that the __fput runs before that point.
+ *
+ * There is no safe way to flush work that has been queued via task_work_add
+ * however, so to do this we borrow the global_fput infrastructure that
+ * kthreads use. The userland process can use fput_global() on one or more
+ * struct files and then call fput_global_flush() to ensure that they are
+ * completely closed before proceeding.
+ *
+ * The main user is nfsd, which uses this to ensure that all cached but
+ * otherwise unused files are closed to allow a userland request for a lease
+ * to proceed.
+ *
+ * Returns true if the final fput was done, false otherwise. The caller can
+ * use this to determine whether to fput_global_flush afterward.
+ */
+bool fput_global(struct file *file)
+{
+ if (atomic_long_dec_and_test(&file->f_count)) {
+ if (llist_add(&file->f_u.fu_llist, &global_fput_list))
+ schedule_delayed_work(&global_fput_work, 1);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL(fput_global);
/*
* synchronous analog of fput(); for kernel threads that might be needed
- * in some umount() (and thus can't use flush_delayed_fput() without
+ * in some umount() (and thus can't use fput_global_flush() without
* risking deadlocks), need to wait for completion of __fput() and know
* for this specific struct file it won't involve anything that would
* need them. Use only if you really need it - at the very least,
@@ -299,7 +354,6 @@ void __fput_sync(struct file *file)
}
}
-EXPORT_SYMBOL(fput);
void put_filp(struct file *file)
{
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 969d589c848d..d716c9993a26 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
atomic_inc(&nsm->sm_count);
else {
host = NULL;
- nsm = nsm_get_handle(ni->sap, ni->salen,
+ nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
ni->hostname, ni->hostname_len);
if (unlikely(nsm == NULL)) {
dprintk("lockd: %s failed; no nsm handle\n",
@@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_nsmhandle = nsm;
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
+ strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
return host;
@@ -534,17 +535,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
/**
* nlm_host_rebooted - Release all resources held by rebooted host
+ * @net: network namespace
* @info: pointer to decoded results of NLM_SM_NOTIFY call
*
* We were notified that the specified host has rebooted. Release
* all resources held by that peer.
*/
-void nlm_host_rebooted(const struct nlm_reboot *info)
+void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info)
{
struct nsm_handle *nsm;
struct nlm_host *host;
- nsm = nsm_reboot_lookup(info);
+ nsm = nsm_reboot_lookup(net, info);
if (unlikely(nsm == NULL))
return;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 47a32b6d9b90..19166d4a8d31 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -42,7 +42,7 @@ struct nsm_args {
u32 proc;
char *mon_name;
- char *nodename;
+ const char *nodename;
};
struct nsm_res {
@@ -51,7 +51,6 @@ struct nsm_res {
};
static const struct rpc_program nsm_program;
-static LIST_HEAD(nsm_handles);
static DEFINE_SPINLOCK(nsm_lock);
/*
@@ -87,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
return rpc_create(&args);
}
-static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
- struct rpc_clnt *clnt)
-{
- spin_lock(&ln->nsm_clnt_lock);
- if (ln->nsm_users == 0) {
- if (clnt == NULL)
- goto out;
- ln->nsm_clnt = clnt;
- }
- clnt = ln->nsm_clnt;
- ln->nsm_users++;
-out:
- spin_unlock(&ln->nsm_clnt_lock);
- return clnt;
-}
-
-static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename)
-{
- struct rpc_clnt *clnt, *new;
- struct lockd_net *ln = net_generic(net, lockd_net_id);
-
- clnt = nsm_client_set(ln, NULL);
- if (clnt != NULL)
- goto out;
-
- clnt = new = nsm_create(net, nodename);
- if (IS_ERR(clnt))
- goto out;
-
- clnt = nsm_client_set(ln, new);
- if (clnt != new)
- rpc_shutdown_client(new);
-out:
- return clnt;
-}
-
-static void nsm_client_put(struct net *net)
-{
- struct lockd_net *ln = net_generic(net, lockd_net_id);
- struct rpc_clnt *clnt = NULL;
-
- spin_lock(&ln->nsm_clnt_lock);
- ln->nsm_users--;
- if (ln->nsm_users == 0) {
- clnt = ln->nsm_clnt;
- ln->nsm_clnt = NULL;
- }
- spin_unlock(&ln->nsm_clnt_lock);
- if (clnt != NULL)
- rpc_shutdown_client(clnt);
-}
-
static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
- struct rpc_clnt *clnt)
+ const struct nlm_host *host)
{
int status;
+ struct rpc_clnt *clnt;
struct nsm_args args = {
.priv = &nsm->sm_priv,
.prog = NLM_PROGRAM,
.vers = 3,
.proc = NLMPROC_NSM_NOTIFY,
.mon_name = nsm->sm_mon_name,
- .nodename = clnt->cl_nodename,
+ .nodename = host->nodename,
};
struct rpc_message msg = {
.rpc_argp = &args,
@@ -158,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
memset(res, 0, sizeof(*res));
+ clnt = nsm_create(host->net, host->nodename);
+ if (IS_ERR(clnt)) {
+ dprintk("lockd: failed to create NSM upcall transport, "
+ "status=%ld, net=%p\n", PTR_ERR(clnt), host->net);
+ return PTR_ERR(clnt);
+ }
+
msg.rpc_proc = &clnt->cl_procinfo[proc];
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
if (status == -ECONNREFUSED) {
@@ -171,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
status);
else
status = 0;
+
+ rpc_shutdown_client(clnt);
return status;
}
@@ -190,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host)
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;
- struct rpc_clnt *clnt;
- const char *nodename = NULL;
dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
if (nsm->sm_monitored)
return 0;
- if (host->h_rpcclnt)
- nodename = host->h_rpcclnt->cl_nodename;
-
/*
* Choose whether to record the caller_name or IP address of
* this peer in the local rpc.statd's database.
*/
nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
- clnt = nsm_client_get(host->net, nodename);
- if (IS_ERR(clnt)) {
- status = PTR_ERR(clnt);
- dprintk("lockd: failed to create NSM upcall transport, "
- "status=%d, net=%p\n", status, host->net);
- return status;
- }
-
- status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
+ status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host);
if (unlikely(res.status != 0))
status = -EIO;
if (unlikely(status < 0)) {
@@ -247,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host)
if (atomic_read(&nsm->sm_count) == 1
&& nsm->sm_monitored && !nsm->sm_sticky) {
- struct lockd_net *ln = net_generic(host->net, lockd_net_id);
-
dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
- status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
+ status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host);
if (res.status != 0)
status = -EIO;
if (status < 0)
@@ -259,38 +201,38 @@ void nsm_unmonitor(const struct nlm_host *host)
nsm->sm_name);
else
nsm->sm_monitored = 0;
-
- nsm_client_put(host->net);
}
}
-static struct nsm_handle *nsm_lookup_hostname(const char *hostname,
- const size_t len)
+static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles,
+ const char *hostname, const size_t len)
{
struct nsm_handle *nsm;
- list_for_each_entry(nsm, &nsm_handles, sm_link)
+ list_for_each_entry(nsm, nsm_handles, sm_link)
if (strlen(nsm->sm_name) == len &&
memcmp(nsm->sm_name, hostname, len) == 0)
return nsm;
return NULL;
}
-static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
+static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles,
+ const struct sockaddr *sap)
{
struct nsm_handle *nsm;
- list_for_each_entry(nsm, &nsm_handles, sm_link)
+ list_for_each_entry(nsm, nsm_handles, sm_link)
if (rpc_cmp_addr(nsm_addr(nsm), sap))
return nsm;
return NULL;
}
-static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
+static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles,
+ const struct nsm_private *priv)
{
struct nsm_handle *nsm;
- list_for_each_entry(nsm, &nsm_handles, sm_link)
+ list_for_each_entry(nsm, nsm_handles, sm_link)
if (memcmp(nsm->sm_priv.data, priv->data,
sizeof(priv->data)) == 0)
return nsm;
@@ -353,6 +295,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
/**
* nsm_get_handle - Find or create a cached nsm_handle
+ * @net: network namespace
* @sap: pointer to socket address of handle to find
* @salen: length of socket address
* @hostname: pointer to C string containing hostname to find
@@ -365,11 +308,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
* @hostname cannot be found in the handle cache. Returns NULL if
* an error occurs.
*/
-struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+struct nsm_handle *nsm_get_handle(const struct net *net,
+ const struct sockaddr *sap,
const size_t salen, const char *hostname,
const size_t hostname_len)
{
struct nsm_handle *cached, *new = NULL;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
if (printk_ratelimit()) {
@@ -384,9 +329,10 @@ retry:
spin_lock(&nsm_lock);
if (nsm_use_hostnames && hostname != NULL)
- cached = nsm_lookup_hostname(hostname, hostname_len);
+ cached = nsm_lookup_hostname(&ln->nsm_handles,
+ hostname, hostname_len);
else
- cached = nsm_lookup_addr(sap);
+ cached = nsm_lookup_addr(&ln->nsm_handles, sap);
if (cached != NULL) {
atomic_inc(&cached->sm_count);
@@ -400,7 +346,7 @@ retry:
}
if (new != NULL) {
- list_add(&new->sm_link, &nsm_handles);
+ list_add(&new->sm_link, &ln->nsm_handles);
spin_unlock(&nsm_lock);
dprintk("lockd: created nsm_handle for %s (%s)\n",
new->sm_name, new->sm_addrbuf);
@@ -417,19 +363,22 @@ retry:
/**
* nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle
+ * @net: network namespace
* @info: pointer to NLMPROC_SM_NOTIFY arguments
*
* Returns a matching nsm_handle if found in the nsm cache. The returned
* nsm_handle's reference count is bumped. Otherwise returns NULL if some
* error occurred.
*/
-struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info)
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+ const struct nlm_reboot *info)
{
struct nsm_handle *cached;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
spin_lock(&nsm_lock);
- cached = nsm_lookup_priv(&info->priv);
+ cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv);
if (unlikely(cached == NULL)) {
spin_unlock(&nsm_lock);
dprintk("lockd: never saw rebooted peer '%.*s' before\n",
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 097bfa3adb1c..5426189406c1 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -12,9 +12,7 @@ struct lockd_net {
struct delayed_work grace_period_end;
struct lock_manager lockd_manager;
- spinlock_t nsm_clnt_lock;
- unsigned int nsm_users;
- struct rpc_clnt *nsm_clnt;
+ struct list_head nsm_handles;
};
extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index d678bcc3cbcb..5f31ebd96c06 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -592,7 +592,7 @@ static int lockd_init_net(struct net *net)
INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
INIT_LIST_HEAD(&ln->lockd_manager.list);
ln->lockd_manager.block_opens = false;
- spin_lock_init(&ln->nsm_clnt_lock);
+ INIT_LIST_HEAD(&ln->nsm_handles);
return 0;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b147d1ae71fd..09c576f26c7b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
return rpc_system_err;
}
- nlm_host_rebooted(argp);
+ nlm_host_rebooted(SVC_NET(rqstp), argp);
return rpc_success;
}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 21171f0c6477..fb26b9f522e7 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
return rpc_system_err;
}
- nlm_host_rebooted(argp);
+ nlm_host_rebooted(SVC_NET(rqstp), argp);
return rpc_success;
}
diff --git a/fs/locks.c b/fs/locks.c
index 2a54c800a223..add3eeb79ace 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1780,6 +1780,40 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(generic_setlease);
+#if IS_ENABLED(CONFIG_SRCU)
+/*
+ * Kernel subsystems can register to be notified on any attempt to set
+ * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
+ * to close files that it may have cached when there is an attempt to set a
+ * conflicting lease.
+ */
+struct srcu_notifier_head lease_notifier_chain;
+EXPORT_SYMBOL_GPL(lease_notifier_chain);
+
+static inline void
+lease_notifier_chain_init(void)
+{
+ srcu_init_notifier_head(&lease_notifier_chain);
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+ if (arg != F_UNLCK)
+ srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
+}
+#else /* !IS_ENABLED(CONFIG_SRCU) */
+static inline void
+lease_notifier_chain_init(void)
+{
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+}
+#endif /* IS_ENABLED(CONFIG_SRCU) */
+
/**
* vfs_setlease - sets a lease on an open file
* @filp: file pointer
@@ -1800,6 +1834,8 @@ EXPORT_SYMBOL(generic_setlease);
int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
{
+ if (lease)
+ setlease_notifier(arg, *lease);
if (filp->f_op->setlease)
return filp->f_op->setlease(filp, arg, lease, priv);
else
@@ -2696,6 +2732,7 @@ static int __init filelock_init(void)
for_each_possible_cpu(i)
INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
+ lease_notifier_chain_init();
return 0;
}
diff --git a/fs/namei.c b/fs/namei.c
index 33e9495a3129..5d4645e65376 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2282,6 +2282,8 @@ EXPORT_SYMBOL(vfs_path_lookup);
*
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
@@ -2325,6 +2327,78 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
}
EXPORT_SYMBOL(lookup_one_len);
+/**
+ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * Unlike lookup_one_len, it should be called without the parent
+ * i_mutex held, and will take the i_mutex itself if necessary.
+ */
+struct dentry *lookup_one_len_unlocked(const char *name,
+ struct dentry *base, int len)
+{
+ struct qstr this;
+ unsigned int c;
+ int err;
+ struct dentry *ret;
+
+ this.name = name;
+ this.len = len;
+ this.hash = full_name_hash(name, len);
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ if (unlikely(name[0] == '.')) {
+ if (len < 2 || (len == 2 && name[1] == '.'))
+ return ERR_PTR(-EACCES);
+ }
+
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ }
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ err = inode_permission(base->d_inode, MAY_EXEC);
+ if (err)
+ return ERR_PTR(err);
+
+ ret = __d_lookup(base, &this);
+ if (ret)
+ return ret;
+ /*
+ * __d_lookup() is used to try to get a quick answer and avoid the
+ * mutex. A false-negative does no harm.
+ */
+ ret = __d_lookup(base, &this);
+ if (ret && ret->d_flags & DCACHE_OP_REVALIDATE) {
+ dput(ret);
+ ret = NULL;
+ }
+ if (ret)
+ return ret;
+
+ mutex_lock(&base->d_inode->i_mutex);
+ ret = __lookup_hash(&this, base, 0);
+ mutex_unlock(&base->d_inode->i_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(lookup_one_len_unlocked);
+
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index a0b77fc1bd39..95e0a91d41ef 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -6,6 +6,8 @@ config NFSD
select SUNRPC
select EXPORTFS
select NFS_ACL_SUPPORT if NFSD_V2_ACL
+ select SRCU
+ select FSNOTIFY
depends on MULTIUSER
help
Choose Y here if you want to allow other computers to access
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9a6028e120c6..8908bb467727 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,7 +10,8 @@ obj-$(CONFIG_NFSD) += nfsd.o
nfsd-y += trace.o
nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
- export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+ export.o auth.o lockd.o nfscache.o nfsxdr.o \
+ stats.o filecache.o
nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b4d84b579f20..4b504edff121 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -21,6 +21,7 @@
#include "nfsfh.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
@@ -231,6 +232,18 @@ static struct cache_head *expkey_alloc(void)
return NULL;
}
+static void
+expkey_flush(void)
+{
+ /*
+ * Take the nfsd_mutex here to ensure that the file cache is not
+ * destroyed while we're in the middle of flushing.
+ */
+ mutex_lock(&nfsd_mutex);
+ nfsd_file_cache_purge();
+ mutex_unlock(&nfsd_mutex);
+}
+
static struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX,
@@ -243,6 +256,7 @@ static struct cache_detail svc_expkey_cache_template = {
.init = expkey_init,
.update = expkey_update,
.alloc = expkey_alloc,
+ .flush = expkey_flush,
};
static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 000000000000..3a4e423d2bb9
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,712 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/seq_file.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS 12
+#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+ struct hlist_head nfb_head;
+ spinlock_t nfb_lock;
+ unsigned int nfb_count;
+ unsigned int nfb_maxcount;
+};
+
+static struct kmem_cache *nfsd_file_slab;
+static struct kmem_cache *nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
+static struct list_lru nfsd_file_lru;
+static struct fsnotify_group *nfsd_file_fsnotify_group;
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+ struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+ kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+ struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+ nfm_mark);
+
+ kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+ if (!atomic_inc_not_zero(&nfm->nfm_ref))
+ return NULL;
+ return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+ if (atomic_dec_and_test(&nfm->nfm_ref))
+ fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+{
+ int err;
+ struct fsnotify_mark *mark;
+ struct nfsd_file_mark *nfm = NULL, *new = NULL;
+
+ do {
+ mark = fsnotify_find_inode_mark(nfsd_file_fsnotify_group,
+ inode);
+ if (mark) {
+ nfm = nfsd_file_mark_get(container_of(mark,
+ struct nfsd_file_mark,
+ nfm_mark));
+ fsnotify_put_mark(mark);
+ if (likely(nfm))
+ break;
+ }
+
+ /* allocate a new nfm */
+ if (!new) {
+ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ fsnotify_init_mark(&new->nfm_mark, nfsd_file_mark_free);
+ atomic_set(&new->nfm_ref, 1);
+ }
+
+ err = fsnotify_add_mark(&new->nfm_mark,
+ nfsd_file_fsnotify_group, nf->nf_inode,
+ NULL, false);
+ if (likely(!err)) {
+ nfm = new;
+ new = NULL;
+ }
+ } while (unlikely(err == EEXIST));
+
+ if (new)
+ kmem_cache_free(nfsd_file_mark_slab, new);
+ return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval)
+{
+ struct nfsd_file *nf;
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+ if (nf) {
+ INIT_HLIST_NODE(&nf->nf_node);
+ INIT_LIST_HEAD(&nf->nf_lru);
+ nf->nf_file = NULL;
+ nf->nf_flags = 0;
+ nf->nf_inode = inode;
+ nf->nf_hashval = hashval;
+ atomic_set(&nf->nf_ref, 1);
+ nf->nf_may = NFSD_FILE_MAY_MASK & may;
+ if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+ if (may & NFSD_MAY_WRITE)
+ __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+ if (may & NFSD_MAY_READ)
+ __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ }
+ nf->nf_mark = NULL;
+ trace_nfsd_file_alloc(nf);
+ }
+ return nf;
+}
+
+static void
+nfsd_file_put_final(struct nfsd_file *nf)
+{
+ trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file)
+ fput(nf->nf_file);
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+}
+
+static bool
+nfsd_file_put_final_delayed(struct nfsd_file *nf)
+{
+ bool flush = false;
+
+ trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file)
+ flush = fput_global(nf->nf_file);
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+ return flush;
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+ lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+ trace_nfsd_file_unhash(nf);
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+ clear_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ hlist_del_rcu(&nf->nf_node);
+ list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+ return true;
+ }
+ return false;
+}
+
+static void
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+ lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+ trace_nfsd_file_unhash_and_release_locked(nf);
+ if (!nfsd_file_unhash(nf))
+ return;
+ if (!atomic_dec_and_test(&nf->nf_ref))
+ return;
+
+ list_add(&nf->nf_lru, dispose);
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+ trace_nfsd_file_put(nf);
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ smp_mb__after_atomic();
+ if (atomic_dec_and_test(&nf->nf_ref)) {
+ WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+ nfsd_file_put_final(nf);
+ }
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+ if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+ return nf;
+ return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
+ nfsd_file_put_final(nf);
+ }
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+ bool flush = false;
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
+ if (nfsd_file_put_final_delayed(nf))
+ flush = true;
+ }
+ if (flush)
+ fput_global_flush();
+}
+
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ spinlock_t *lock, void *arg)
+ __releases(lock)
+ __acquires(lock)
+{
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+ bool unhashed;
+
+ /*
+ * Do a lockless refcount check. The hashtable holds one reference, so
+ * we look to see if anything else has a reference, or if any have
+ * been put since the shrinker last ran. Those don't get unhashed and
+ * released.
+ *
+ * Note that in the put path, we set the flag and then decrement the
+ * counter. Here we check the counter and then test and clear the flag.
+ * That order is deliberate to ensure that we can do this locklessly.
+ */
+ if (atomic_read(&nf->nf_ref) > 1 ||
+ test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+ return LRU_ROTATE;
+
+ spin_unlock(lock);
+ spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ unhashed = nfsd_file_unhash(nf);
+ spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ if (unhashed)
+ nfsd_file_put(nf);
+ spin_lock(lock);
+ return unhashed ? LRU_REMOVED_RETRY : LRU_RETRY;
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+ return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ return list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, NULL);
+}
+
+static struct shrinker nfsd_file_shrinker = {
+ .scan_objects = nfsd_file_lru_scan,
+ .count_objects = nfsd_file_lru_count,
+ .seeks = 1,
+};
+
+static void
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+ struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+ struct hlist_node *tmp;
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+ if (inode == nf->nf_inode)
+ nfsd_file_unhash_and_release_locked(nf, dispose);
+ }
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+ NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+ trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+ nfsd_file_dispose_list_sync(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+ NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+ trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+ nfsd_file_dispose_list(&dispose);
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+ void *data)
+{
+ struct file_lock *fl = data;
+
+ /* Only close files for F_SETLEASE leases */
+ if (fl->fl_flags & FL_LEASE)
+ nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+ return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+ .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmount_mark,
+ u32 mask, void *data, int data_type,
+ const unsigned char *file_name, u32 cookie)
+{
+ trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+ /* Should be no marks on non-regular files */
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ /* ...and we don't do anything with vfsmount marks */
+ BUG_ON(vfsmount_mark);
+
+ /* don't close files if this was not the last link */
+ if (mask & FS_ATTRIB) {
+ if (inode->i_nlink)
+ return 0;
+ }
+
+ nfsd_file_close_inode(inode);
+ return 0;
+}
+
+
+const static struct fsnotify_ops nfsd_file_fsnotify_ops = {
+ .handle_event = nfsd_file_fsnotify_handle_event,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+ int ret = -ENOMEM;
+ unsigned int i;
+
+ if (nfsd_file_hashtbl)
+ return 0;
+
+ nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+ sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+ if (!nfsd_file_hashtbl) {
+ pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+ goto out_err;
+ }
+
+ nfsd_file_slab = kmem_cache_create("nfsd_file",
+ sizeof(struct nfsd_file), 0, 0, NULL);
+ if (!nfsd_file_slab) {
+ pr_err("nfsd: unable to create nfsd_file_slab\n");
+ goto out_err;
+ }
+
+ nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
+ sizeof(struct nfsd_file_mark), 0, 0, NULL);
+ if (!nfsd_file_mark_slab) {
+ pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+ goto out_err;
+ }
+
+
+ ret = list_lru_init(&nfsd_file_lru);
+ if (ret) {
+ pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+ goto out_err;
+ }
+
+ ret = register_shrinker(&nfsd_file_shrinker);
+ if (ret) {
+ pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+ goto out_lru;
+ }
+
+ ret = srcu_notifier_chain_register(&lease_notifier_chain,
+ &nfsd_file_lease_notifier);
+ if (ret) {
+ pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+ goto out_shrinker;
+ }
+
+ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+ if (IS_ERR(nfsd_file_fsnotify_group)) {
+ pr_err("nfsd: unable to create fsnotify group: %ld\n",
+ PTR_ERR(nfsd_file_fsnotify_group));
+ nfsd_file_fsnotify_group = NULL;
+ goto out_notifier;
+ }
+
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+ spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+ }
+out:
+ return ret;
+out_notifier:
+ srcu_notifier_chain_unregister(&lease_notifier_chain,
+ &nfsd_file_lease_notifier);
+out_shrinker:
+ unregister_shrinker(&nfsd_file_shrinker);
+out_lru:
+ list_lru_destroy(&nfsd_file_lru);
+out_err:
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ kfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+ goto out;
+}
+
+void
+nfsd_file_cache_purge(void)
+{
+ unsigned int i;
+ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+
+ if (!nfsd_file_hashtbl)
+ return;
+
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ spin_lock(&nfsd_file_hashtbl[i].nfb_lock);
+ while(!hlist_empty(&nfsd_file_hashtbl[i].nfb_head)) {
+ nf = hlist_entry(nfsd_file_hashtbl[i].nfb_head.first,
+ struct nfsd_file, nf_node);
+ nfsd_file_unhash_and_release_locked(nf, &dispose);
+ }
+ spin_unlock(&nfsd_file_hashtbl[i].nfb_lock);
+ nfsd_file_dispose_list(&dispose);
+ }
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+ LIST_HEAD(dispose);
+
+ srcu_notifier_chain_unregister(&lease_notifier_chain,
+ &nfsd_file_lease_notifier);
+ unregister_shrinker(&nfsd_file_shrinker);
+ nfsd_file_cache_purge();
+ list_lru_destroy(&nfsd_file_lru);
+ rcu_barrier();
+ fsnotify_put_group(nfsd_file_fsnotify_group);
+ nfsd_file_fsnotify_group = NULL;
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ kfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+}
+
+/*
+ * Search nfsd_file_hashtbl[] for file. We hash on the filehandle and also on
+ * the NFSD_MAY_READ/WRITE flags. If the file is open for r/w, then it's usable
+ * for either.
+ */
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+ unsigned int hashval)
+{
+ struct nfsd_file *nf;
+ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+ hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+ nf_node) {
+ if ((need & nf->nf_may) != need)
+ continue;
+ if (nf->nf_inode == inode)
+ return nfsd_file_get(nf);
+ }
+ return NULL;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ __be32 status;
+ struct nfsd_file *nf, *new = NULL;
+ struct inode *inode;
+ unsigned int hashval;
+
+ /* FIXME: skip this if fh_dentry is already set? */
+ status = fh_verify(rqstp, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (status != nfs_ok)
+ return status;
+
+ inode = d_inode(fhp->fh_dentry);
+ hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+retry:
+ rcu_read_lock();
+ nf = nfsd_file_find_locked(inode, may_flags, hashval);
+ rcu_read_unlock();
+ if (nf)
+ goto wait_for_construction;
+
+ if (!new) {
+ new = nfsd_file_alloc(inode, may_flags, hashval);
+ if (!new) {
+ trace_nfsd_file_acquire(hashval, inode, may_flags, NULL,
+ nfserr_jukebox);
+ return nfserr_jukebox;
+ }
+ }
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ nf = nfsd_file_find_locked(inode, may_flags, hashval);
+ if (likely(nf == NULL)) {
+ /* Take reference for the hashtable */
+ atomic_inc(&new->nf_ref);
+ __set_bit(NFSD_FILE_HASHED, &new->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &new->nf_flags);
+ list_lru_add(&nfsd_file_lru, &new->nf_lru);
+ hlist_add_head_rcu(&new->nf_node,
+ &nfsd_file_hashtbl[hashval].nfb_head);
+ ++nfsd_file_hashtbl[hashval].nfb_count;
+ nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+ nfsd_file_hashtbl[hashval].nfb_count);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ nf = new;
+ new = NULL;
+ goto open_file;
+ }
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+
+wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+ /* Did construction of this file fail? */
+ if (!nf->nf_file) {
+ /*
+ * We can only take over construction for this nfsd_file if the
+ * MAY flags are equal. Otherwise, we put the reference and try
+ * again.
+ */
+ if ((may_flags & NFSD_FILE_MAY_MASK) != nf->nf_may) {
+ nfsd_file_put(nf);
+ goto retry;
+ }
+
+ /* try to take over construction for this file */
+ if (test_and_set_bit(NFSD_FILE_PENDING, &nf->nf_flags))
+ goto wait_for_construction;
+
+ /* sync up the BREAK_* flags with our may_flags */
+ if (may_flags & NFSD_MAY_NOT_BREAK_LEASE) {
+ if (may_flags & NFSD_MAY_WRITE)
+ set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+ if (may_flags & NFSD_MAY_READ)
+ set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ } else {
+ clear_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+ clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ }
+
+ goto open_file;
+ }
+
+ if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+ bool write = (may_flags & NFSD_MAY_WRITE);
+
+ if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+ (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+ status = nfserrno(nfsd_open_break_lease(
+ file_inode(nf->nf_file), may_flags));
+ if (status == nfs_ok) {
+ clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ if (write)
+ clear_bit(NFSD_FILE_BREAK_WRITE,
+ &nf->nf_flags);
+ }
+ }
+ }
+out:
+ if (status == nfs_ok) {
+ *pnf = nf;
+ } else {
+ nfsd_file_put(nf);
+ nf = NULL;
+ }
+
+ if (new)
+ nfsd_file_put(new);
+
+ trace_nfsd_file_acquire(hashval, inode, may_flags, nf, status);
+ return status;
+open_file:
+ if (!nf->nf_mark) {
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
+ if (!nf->nf_mark)
+ status = nfserr_jukebox;
+ }
+ /* FIXME: should we abort opening if the link count goes to 0? */
+ if (status == nfs_ok)
+ status = nfsd_open_verified(rqstp, fhp, S_IFREG, may_flags,
+ &nf->nf_file);
+ clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ goto out;
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+ unsigned int i, count = 0, longest = 0;
+
+ /*
+ * No need for spinlocks here since we're not terribly interested in
+ * accuracy. We do take the nfsd_mutex simply to ensure that we
+ * don't end up racing with server shutdown
+ */
+ mutex_lock(&nfsd_mutex);
+ if (nfsd_file_hashtbl) {
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ count += nfsd_file_hashtbl[i].nfb_count;
+ longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+ }
+ }
+ mutex_unlock(&nfsd_mutex);
+
+ seq_printf(m, "total entries: %u\n", count);
+ seq_printf(m, "longest chain: %u\n", longest);
+ return 0;
+}
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 000000000000..756aea5431da
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,44 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+struct nfsd_file_mark {
+ struct fsnotify_mark nfm_mark;
+ atomic_t nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+ struct hlist_node nf_node;
+ struct list_head nf_lru;
+ struct rcu_head nf_rcu;
+ struct file *nf_file;
+#define NFSD_FILE_HASHED (0)
+#define NFSD_FILE_PENDING (1)
+#define NFSD_FILE_BREAK_READ (2)
+#define NFSD_FILE_BREAK_WRITE (3)
+#define NFSD_FILE_REFERENCED (4)
+ unsigned long nf_flags;
+ struct inode *nf_inode;
+ unsigned int nf_hashval;
+ atomic_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(void);
+void nfsd_file_cache_shutdown(void);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+int nfsd_file_cache_stats_open(struct inode *, struct file *);
+#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7f785c..4e46ac511479 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -192,7 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nfserr = nfsd_write(rqstp, &resp->fh, NULL,
+ nfserr = nfsd_write(rqstp, &resp->fh,
argp->offset,
rqstp->rq_vec, argp->vlen,
&cnt,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f6e7cbabac5a..2246454dec76 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp)
err = fh_getattr(fhp, &fhp->fh_post_attr);
fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
if (err) {
- fhp->fh_post_saved = 0;
+ fhp->fh_post_saved = false;
/* Grab the ctime anyway - set_change_info might use it */
fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
} else
- fhp->fh_post_saved = 1;
+ fhp->fh_post_saved = true;
}
/*
@@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_one_len(name, dparent, namlen);
+ dchild = lookup_one_len_unlocked(name, dparent, namlen);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ebf90e487c75..1ee1881cf8d0 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -144,8 +144,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
list_del_init(&ls->ls_perfile);
spin_unlock(&fp->fi_lock);
- vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
- fput(ls->ls_file);
+ vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
+ nfsd_file_put(ls->ls_file);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -169,7 +169,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
fl->fl_end = OFFSET_MAX;
fl->fl_owner = ls;
fl->fl_pid = current->tgid;
- fl->fl_file = ls->ls_file;
+ fl->fl_file = ls->ls_file->nf_file;
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
if (status) {
@@ -201,18 +201,19 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
INIT_LIST_HEAD(&ls->ls_perfile);
spin_lock_init(&ls->ls_lock);
INIT_LIST_HEAD(&ls->ls_layouts);
+ mutex_init(&ls->ls_mutex);
ls->ls_layout_type = layout_type;
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID)
- ls->ls_file = get_file(fp->fi_deleg_file);
+ ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
BUG_ON(!ls->ls_file);
if (nfsd4_layout_setlease(ls)) {
- fput(ls->ls_file);
+ nfsd_file_put(ls->ls_file);
put_nfs4_file(fp);
kmem_cache_free(nfs4_layout_stateid_cache, ls);
return NULL;
@@ -262,19 +263,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
status = nfserr_jukebox;
if (!ls)
goto out;
+ mutex_lock(&ls->ls_mutex);
} else {
ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
status = nfserr_bad_stateid;
+ mutex_lock(&ls->ls_mutex);
if (stateid->si_generation > stid->sc_stateid.si_generation)
- goto out_put_stid;
+ goto out_unlock_stid;
if (layout_type != ls->ls_layout_type)
- goto out_put_stid;
+ goto out_unlock_stid;
}
*lsp = ls;
return 0;
+out_unlock_stid:
+ mutex_unlock(&ls->ls_mutex);
out_put_stid:
nfs4_put_stid(stid);
out:
@@ -296,8 +301,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
trace_layout_recall(&ls->ls_stid.sc_stateid);
atomic_inc(&ls->ls_stid.sc_count);
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
nfsd4_run_cb(&ls->ls_recall);
out_unlock:
@@ -406,8 +409,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
list_add_tail(&new->lo_perstate, &ls->ls_layouts);
new = NULL;
done:
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid);
spin_unlock(&ls->ls_lock);
out:
spin_unlock(&fp->fi_lock);
@@ -481,11 +483,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
}
}
if (!list_empty(&ls->ls_layouts)) {
- if (found) {
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
- sizeof(stateid_t));
- }
+ if (found)
+ nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
lrp->lrs_present = 1;
} else {
trace_layoutstate_unhash(&ls->ls_stid.sc_stateid);
@@ -494,6 +493,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
}
spin_unlock(&ls->ls_lock);
+ mutex_unlock(&ls->ls_mutex);
nfs4_put_stid(&ls->ls_stid);
nfsd4_free_layouts(&reaplist);
return nfs_ok;
@@ -598,7 +598,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = "/sbin/nfsd-recall-failed";
argv[1] = addr_str;
- argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+ argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
@@ -608,6 +608,16 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
}
}
+static void
+nfsd4_cb_layout_prepare(struct nfsd4_callback *cb)
+{
+ struct nfs4_layout_stateid *ls =
+ container_of(cb, struct nfs4_layout_stateid, ls_recall);
+
+ mutex_lock(&ls->ls_mutex);
+ nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid);
+}
+
static int
nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -649,12 +659,14 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
trace_layout_recall_release(&ls->ls_stid.sc_stateid);
+ mutex_unlock(&ls->ls_mutex);
nfsd4_return_all_layouts(ls, &reaplist);
nfsd4_free_layouts(&reaplist);
nfs4_put_stid(&ls->ls_stid);
}
static struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
+ .prepare = nfsd4_cb_layout_prepare,
.done = nfsd4_cb_layout_done,
.release = nfsd4_cb_layout_release,
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4ce6b97b31ad..7e21763a35f2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -758,7 +758,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
- read->rd_filp = NULL;
+ read->rd_nf = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
@@ -775,7 +775,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
- RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+ RD_STATE, &read->rd_nf);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -921,7 +921,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
- &setattr->sa_stateid, WR_STATE, NULL, NULL);
+ &setattr->sa_stateid, WR_STATE, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -977,7 +977,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_write *write)
{
stateid_t *stateid = &write->wr_stateid;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
int nvecs;
@@ -986,7 +986,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
- &filp, NULL);
+ &nf);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
@@ -999,10 +999,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nvecs = fill_in_write_vector(rqstp->rq_vec, write);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
- status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
&write->wr_how_written);
- fput(filp);
+ nfsd_file_put(nf);
write->wr_bytes_written = cnt;
@@ -1014,21 +1014,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status = nfserr_notsupp;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&fallocate->falloc_stateid,
- WR_STATE, &file, NULL);
+ WR_STATE, &nf);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
- status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset,
fallocate->falloc_length,
flags);
- fput(file);
+ nfsd_file_put(nf);
return status;
}
@@ -1053,11 +1053,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
int whence;
__be32 status;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&seek->seek_stateid,
- RD_STATE, &file, NULL);
+ RD_STATE, &nf);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
@@ -1079,14 +1079,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
- seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+ seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
- else if (seek->seek_pos >= i_size_read(file_inode(file)))
+ else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
seek->seek_eof = true;
out:
- fput(file);
+ nfsd_file_put(nf);
return status;
}
@@ -1309,6 +1309,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
nfserr = nfsd4_insert_layout(lgp, ls);
out_put_stid:
+ mutex_unlock(&ls->ls_mutex);
nfs4_put_stid(&ls->ls_stid);
out:
return nfserr;
@@ -1362,6 +1363,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
goto out;
}
+ /* LAYOUTCOMMIT does not require any serialization */
+ mutex_unlock(&ls->ls_mutex);
+
if (new_size > i_size_read(inode)) {
lcp->lc_size_chg = 1;
lcp->lc_newsize = new_size;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f1d5691b795..e3a10df44896 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,6 +49,7 @@
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -288,18 +289,18 @@ put_nfs4_file(struct nfs4_file *fi)
}
}
-static struct file *
+static struct nfsd_file *
__nfs4_get_fd(struct nfs4_file *f, int oflag)
{
if (f->fi_fds[oflag])
- return get_file(f->fi_fds[oflag]);
+ return nfsd_file_get(f->fi_fds[oflag]);
return NULL;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
@@ -309,10 +310,10 @@ find_writeable_file_locked(struct nfs4_file *f)
return ret;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_writeable_file_locked(f);
@@ -321,9 +322,10 @@ find_writeable_file(struct nfs4_file *f)
return ret;
}
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
@@ -333,10 +335,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
return ret;
}
-static struct file *
+static struct nfsd_file *
find_readable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_readable_file_locked(f);
@@ -345,10 +347,10 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
-struct file *
+struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = __nfs4_get_fd(f, O_RDWR);
@@ -449,17 +451,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
might_lock(&fp->fi_lock);
if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
- struct file *f1 = NULL;
- struct file *f2 = NULL;
+ struct nfsd_file *f1 = NULL;
+ struct nfsd_file *f2 = NULL;
swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
swap(f2, fp->fi_fds[O_RDWR]);
spin_unlock(&fp->fi_lock);
if (f1)
- fput(f1);
+ nfsd_file_put(f1);
if (f2)
- fput(f2);
+ nfsd_file_put(f2);
}
}
@@ -575,6 +577,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
/* Will be incremented before return to client: */
atomic_set(&stid->sc_count, 1);
+ spin_lock_init(&stid->sc_lock);
/*
* It shouldn't be a problem to reuse an opaque stateid value.
@@ -745,18 +748,30 @@ nfs4_put_stid(struct nfs4_stid *s)
put_nfs4_file(fp);
}
+void
+nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
+{
+ stateid_t *src = &stid->sc_stateid;
+
+ spin_lock(&stid->sc_lock);
+ if (unlikely(++src->si_generation == 0))
+ src->si_generation = 1;
+ memcpy(dst, src, sizeof(*dst));
+ spin_unlock(&stid->sc_lock);
+}
+
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
spin_lock(&fp->fi_lock);
if (fp->fi_deleg_file && --fp->fi_delegees == 0)
- swap(filp, fp->fi_deleg_file);
+ swap(nf, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock);
- if (filp) {
- vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp);
- fput(filp);
+ if (nf) {
+ vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&fp);
+ nfsd_file_put(nf);
}
}
@@ -1048,11 +1063,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
{
struct nfs4_ol_stateid *stp = openlockstateid(stid);
struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
- struct file *file;
+ struct nfsd_file *nf;
- file = find_any_file(stp->st_stid.sc_file);
- if (file)
- filp_close(file, (fl_owner_t)lo);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
nfs4_free_ol_stateid(stid);
}
@@ -2256,15 +2274,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
clid->flags = new->cl_exchange_flags;
}
+static bool client_has_openowners(struct nfs4_client *clp)
+{
+ struct nfs4_openowner *oo;
+
+ list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) {
+ if (!list_empty(&oo->oo_owner.so_stateids))
+ return true;
+ }
+ return false;
+}
+
static bool client_has_state(struct nfs4_client *clp)
{
- /*
- * Note clp->cl_openowners check isn't quite right: there's no
- * need to count owners without stateid's.
- *
- * Also note we should probably be using this in 4.0 case too.
- */
- return !list_empty(&clp->cl_openowners)
+ return client_has_openowners(clp)
#ifdef CONFIG_NFSD_PNFS
|| !list_empty(&clp->cl_lo_states)
#endif
@@ -3049,7 +3072,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Cases below refer to rfc 3530 section 14.2.33: */
spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&clname, nn);
- if (conf) {
+ if (conf && client_has_state(conf)) {
/* case 0: */
status = nfserr_clid_inuse;
if (clp_used_exchangeid(conf))
@@ -3136,6 +3159,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
} else { /* case 3: normal case; new or rebooted client */
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
+ status = nfserr_clid_inuse;
+ if (client_has_state(old)
+ && !same_creds(&unconf->cl_cred,
+ &old->cl_cred))
+ goto out;
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
@@ -3360,6 +3388,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
+ init_rwsem(&stp->st_rwsem);
spin_lock(&oo->oo_owner.so_client->cl_lock);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -3827,7 +3856,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
struct nfsd4_open *open)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access);
@@ -3863,18 +3892,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+ status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
if (status)
goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
- fp->fi_fds[oflag] = filp;
- filp = NULL;
+ fp->fi_fds[oflag] = nf;
+ nf = NULL;
}
}
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
@@ -3949,21 +3978,21 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
struct file_lock *fl;
- struct file *filp;
+ struct nfsd_file *nf;
int status = 0;
fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
if (!fl)
return -ENOMEM;
- filp = find_readable_file(fp);
- if (!filp) {
+ nf = find_readable_file(fp);
+ if (!nf) {
/* We should always have a readable file here */
WARN_ON_ONCE(1);
locks_free_lock(fl);
return -EBADF;
}
- fl->fl_file = filp;
- status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
+ fl->fl_file = nf->nf_file;
+ status = vfs_setlease(nf->nf_file, fl->fl_type, &fl, NULL);
if (fl)
locks_free_lock(fl);
if (status)
@@ -3981,7 +4010,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
hash_delegation_locked(dp, fp);
goto out_unlock;
}
- fp->fi_deleg_file = filp;
+ fp->fi_deleg_file = nf;
fp->fi_delegees = 1;
hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
@@ -3991,7 +4020,7 @@ out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
out_fput:
- fput(filp);
+ nfsd_file_put(nf);
return status;
}
@@ -4187,15 +4216,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
+ down_read(&stp->st_rwsem);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
- if (status)
+ if (status) {
+ up_read(&stp->st_rwsem);
goto out;
+ }
} else {
stp = open->op_stp;
open->op_stp = NULL;
init_open_stateid(stp, fp, open);
+ down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
+ up_read(&stp->st_rwsem);
release_open_stateid(stp);
goto out;
}
@@ -4205,8 +4239,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
+ up_read(&stp->st_rwsem);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4597,7 +4631,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfs_ok;
}
-static struct file *
+static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
if (!s)
@@ -4607,7 +4641,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
case NFS4_DELEG_STID:
if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
return NULL;
- return get_file(s->sc_file->fi_deleg_file);
+ return nfsd_file_get(s->sc_file->fi_deleg_file);
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
if (flags & RD_STATE)
@@ -4633,32 +4667,29 @@ nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
static __be32
nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
- struct file **filpp, bool *tmp_file, int flags)
+ struct nfsd_file **nfp, int flags)
{
int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
- struct file *file;
+ struct nfsd_file *nf;
__be32 status;
- file = nfs4_find_file(s, flags);
- if (file) {
+ nf = nfs4_find_file(s, flags);
+ if (nf) {
status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
- fput(file);
- return status;
+ nfsd_file_put(nf);
+ goto out;
}
-
- *filpp = file;
} else {
- status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+ status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
if (status)
return status;
-
- if (tmp_file)
- *tmp_file = true;
}
- return 0;
+ *nfp = nf;
+out:
+ return status;
}
/*
@@ -4667,7 +4698,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filpp, bool *tmp_file)
+ int flags, struct nfsd_file **nfp)
{
struct svc_fh *fhp = &cstate->current_fh;
struct inode *ino = d_inode(fhp->fh_dentry);
@@ -4676,10 +4707,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfs4_stid *s = NULL;
__be32 status;
- if (filpp)
- *filpp = NULL;
- if (tmp_file)
- *tmp_file = false;
+ if (nfp)
+ *nfp = NULL;
if (grace_disallows_io(net, ino))
return nfserr_grace;
@@ -4716,8 +4745,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
status = nfs4_check_fh(fhp, s);
done:
- if (!status && filpp)
- status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+ if (status == nfs_ok && nfp)
+ status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out:
if (s)
nfs4_put_stid(s);
@@ -4819,10 +4848,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
+ down_write(&stp->st_rwsem);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
- if (status)
- return status;
- return nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status == nfs_ok)
+ status = nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status != nfs_ok)
+ up_write(&stp->st_rwsem);
+ return status;
}
/*
@@ -4869,6 +4901,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -4899,11 +4932,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
- if (oo->oo_flags & NFS4_OO_CONFIRMED)
+ if (oo->oo_flags & NFS4_OO_CONFIRMED) {
+ up_write(&stp->st_rwsem);
goto put_stateid;
+ }
oo->oo_flags |= NFS4_OO_CONFIRMED;
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
+ up_write(&stp->st_rwsem);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -4975,13 +5010,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
goto put_stateid;
}
nfs4_stateid_downgrade(stp, od->od_share_access);
-
reset_union_bmap_deny(od->od_share_deny, stp);
-
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
status = nfs_ok;
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5033,8 +5066,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+ up_write(&stp->st_rwsem);
nfsd4_close_open_stateid(stp);
@@ -5260,6 +5293,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
+ init_rwsem(&stp->st_rwsem);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5387,7 +5421,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *lock_stp = NULL;
struct nfs4_ol_stateid *open_stp = NULL;
struct nfs4_file *fp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
__be32 status = 0;
@@ -5428,6 +5462,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
+ up_write(&open_stp->st_rwsem);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5435,6 +5470,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
+ if (status == nfs_ok)
+ down_write(&lock_stp->st_rwsem);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5469,8 +5506,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NFS4_READ_LT:
case NFS4_READW_LT:
spin_lock(&fp->fi_lock);
- filp = find_readable_file_locked(fp);
- if (filp)
+ nf = find_readable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
file_lock->fl_type = F_RDLCK;
@@ -5478,8 +5515,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
spin_lock(&fp->fi_lock);
- filp = find_writeable_file_locked(fp);
- if (filp)
+ nf = find_writeable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
file_lock->fl_type = F_WRLCK;
@@ -5488,14 +5525,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_inval;
goto out;
}
- if (!filp) {
+ if (!nf) {
status = nfserr_openmode;
goto out;
}
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
+ file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = lock->lk_offset;
@@ -5509,12 +5546,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
switch (-err) {
case 0: /* success! */
- update_stateid(&lock_stp->st_stid.sc_stateid);
- memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid,
- sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0;
break;
case (EAGAIN): /* conflock holds conflicting lock */
@@ -5531,8 +5566,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
}
out:
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (lock_stp) {
/* Bump seqid manually if the 4.0 replay owner is openowner */
if (cstate->replay_owner &&
@@ -5540,6 +5575,8 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
+ up_write(&lock_stp->st_rwsem);
+
/*
* If this is a new, never-before-used stateid, and we are
* returning an error, then just go ahead and release it.
@@ -5567,11 +5604,11 @@ out:
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
- struct file *file;
- __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ struct nfsd_file *nf;
+ __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (!err) {
- err = nfserrno(vfs_test_lock(file, lock));
- fput(file);
+ err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ nfsd_file_put(nf);
}
return err;
}
@@ -5657,7 +5694,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_locku *locku)
{
struct nfs4_ol_stateid *stp;
- struct file *filp = NULL;
+ struct nfsd_file *nf;
struct file_lock *file_lock = NULL;
__be32 status;
int err;
@@ -5675,8 +5712,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&stp, nn);
if (status)
goto out;
- filp = find_any_file(stp->st_stid.sc_file);
- if (!filp) {
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (!nf) {
status = nfserr_lock_range;
goto put_stateid;
}
@@ -5684,13 +5721,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox;
- goto fput;
+ goto put_file;
}
file_lock->fl_type = F_UNLCK;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
+ file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = locku->lu_offset;
@@ -5699,16 +5736,16 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
locku->lu_length);
nfs4_transform_lock_offset(file_lock);
- err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
if (err) {
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
-fput:
- fput(filp);
+ nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
+put_file:
+ nfsd_file_put(nf);
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5718,7 +5755,7 @@ out:
out_nfserr:
status = nfserrno(err);
- goto fput;
+ goto put_file;
}
/*
@@ -5731,17 +5768,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct file *filp = find_any_file(fp);
+ struct nfsd_file *nf = find_any_file(fp);
struct inode *inode;
struct file_lock_context *flctx;
- if (!filp) {
+ if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
return status;
}
- inode = file_inode(filp);
+ inode = file_inode(nf->nf_file);
flctx = inode->i_flctx;
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -5754,7 +5791,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
spin_unlock(&flctx->flc_lock);
}
- fput(filp);
+ nfsd_file_put(nf);
return status;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..92e5e8f884d0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -49,6 +49,7 @@
#include "cache.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
@@ -2838,14 +2839,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
if (d_really_is_negative(dentry)) {
/*
- * nfsd_buffered_readdir drops the i_mutex between
- * readdir and calling this callback, leaving a window
- * where this directory entry could have gone away.
+ * we're not holding the i_mutex here, so there's
+ * a window where this directory entry could have gone
+ * away.
*/
dput(dentry);
return nfserr_noent;
@@ -3460,14 +3461,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
{
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
- struct file *file = read->rd_filp;
+ struct file *file;
int starting_len = xdr->buf->len;
- struct raparms *ra = NULL;
__be32 *p;
if (nfserr)
goto out;
+ file = read->rd_nf->nf_file;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3487,24 +3488,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
(xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
- if (read->rd_tmp_file)
- ra = nfsd_init_raparms(file);
-
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
- if (ra)
- nfsd_put_raparams(file, ra);
-
if (nfserr)
xdr_truncate_encode(xdr, starting_len);
-
out:
- if (file)
- fput(file);
+ if (read->rd_nf)
+ nfsd_file_put(read->rd_nf);
return nfserr;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 46ec934f5dee..116940c739e1 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -217,10 +217,8 @@ void nfsd_reply_cache_shutdown(void)
drc_hashtbl = NULL;
drc_hashsize = 0;
- if (drc_slab) {
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
- }
+ kmem_cache_destroy(drc_slab);
+ drc_slab = NULL;
}
/*
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9690cb4dd588..eff44a277f70 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
#include "state.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
/*
* We have a single directory with several nodes in it.
@@ -36,6 +37,7 @@ enum {
NFSD_Threads,
NFSD_Pool_Threads,
NFSD_Pool_Stats,
+ NFSD_File_Cache_Stats,
NFSD_Reply_Cache_Stats,
NFSD_Versions,
NFSD_Ports,
@@ -220,6 +222,13 @@ static const struct file_operations pool_stats_operations = {
.owner = THIS_MODULE,
};
+static struct file_operations file_cache_stats_operations = {
+ .open = nfsd_file_cache_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct file_operations reply_cache_stats_operations = {
.open = nfsd_reply_cache_stats_open,
.read = seq_read,
@@ -1138,6 +1147,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
+ [NFSD_File_Cache_Stats] = {"file_cache_stats", &file_cache_stats_operations, S_IRUGO},
[NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 350041a40fe5..c1681ce894c5 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp)
fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
-#ifdef CONFIG_NFSD_V3
- fhp->fh_pre_saved = 0;
- fhp->fh_post_saved = 0;
-#endif
+ fh_clear_wcc(fhp);
}
fh_drop_write(fhp);
if (exp) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 1e90dad4926b..2087bae17582 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -26,16 +26,16 @@ static inline ino_t u32_to_ino_t(__u32 uino)
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
+ int fh_maxsize; /* max size for fh_handle */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
- int fh_maxsize; /* max size for fh_handle */
- unsigned char fh_locked; /* inode locked by us */
- unsigned char fh_want_write; /* remount protection taken */
+ bool fh_locked; /* inode locked by us */
+ bool fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
- unsigned char fh_post_saved; /* post-op attrs saved */
- unsigned char fh_pre_saved; /* pre-op attrs saved */
+ bool fh_post_saved; /* post-op attrs saved */
+ bool fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
@@ -213,8 +213,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
static inline void
fh_clear_wcc(struct svc_fh *fhp)
{
- fhp->fh_post_saved = 0;
- fhp->fh_pre_saved = 0;
+ fhp->fh_post_saved = false;
+ fhp->fh_pre_saved = false;
}
/*
@@ -231,7 +231,7 @@ fill_pre_wcc(struct svc_fh *fhp)
fhp->fh_pre_ctime = inode->i_ctime;
fhp->fh_pre_size = inode->i_size;
fhp->fh_pre_change = inode->i_version;
- fhp->fh_pre_saved = 1;
+ fhp->fh_pre_saved = true;
}
}
@@ -267,7 +267,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
inode = d_inode(dentry);
mutex_lock_nested(&inode->i_mutex, subclass);
fill_pre_wcc(fhp);
- fhp->fh_locked = 1;
+ fhp->fh_locked = true;
}
static inline void
@@ -285,7 +285,7 @@ fh_unlock(struct svc_fh *fhp)
if (fhp->fh_locked) {
fill_post_wcc(fhp);
mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
- fhp->fh_locked = 0;
+ fhp->fh_locked = false;
}
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 4cd78ef4c95c..9893095cbee1 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -213,7 +213,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
rqstp->rq_vec, argp->vlen,
&cnt,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ad4e2377dd63..d1034d119afb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -22,6 +22,7 @@
#include "cache.h"
#include "vfs.h"
#include "netns.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC
@@ -215,22 +216,17 @@ static int nfsd_startup_generic(int nrservs)
if (nfsd_users++)
return 0;
- /*
- * Readahead param cache - will no-op if it already exists.
- * (Note therefore results will be suboptimal if number of
- * threads is modified after nfsd start.)
- */
- ret = nfsd_racache_init(2*nrservs);
+ ret = nfsd_file_cache_init();
if (ret)
goto dec_users;
ret = nfs4_state_start();
if (ret)
- goto out_racache;
+ goto out_file_cache;
return 0;
-out_racache:
- nfsd_racache_shutdown();
+out_file_cache:
+ nfsd_file_cache_shutdown();
dec_users:
nfsd_users--;
return ret;
@@ -242,7 +238,7 @@ static void nfsd_shutdown_generic(void)
return;
nfs4_state_shutdown();
- nfsd_racache_shutdown();
+ nfsd_file_cache_shutdown();
}
static bool nfsd_needs_lockd(void)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 583ffc13cae2..732fbb3b5ef1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -84,7 +84,7 @@ struct nfsd4_callback_ops {
* fields that are of general use to any stateid.
*/
struct nfs4_stid {
- atomic_t sc_count;
+ atomic_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
@@ -94,11 +94,12 @@ struct nfs4_stid {
#define NFS4_REVOKED_DELEG_STID 16
#define NFS4_CLOSED_DELEG_STID 32
#define NFS4_LAYOUT_STID 64
- unsigned char sc_type;
- stateid_t sc_stateid;
- struct nfs4_client *sc_client;
- struct nfs4_file *sc_file;
- void (*sc_free)(struct nfs4_stid *);
+ unsigned char sc_type;
+ stateid_t sc_stateid;
+ spinlock_t sc_lock;
+ struct nfs4_client *sc_client;
+ struct nfs4_file *sc_file;
+ void (*sc_free)(struct nfs4_stid *);
};
/*
@@ -364,15 +365,6 @@ struct nfs4_client_reclaim {
char cr_recdir[HEXDIR_LEN]; /* recover dir */
};
-static inline void
-update_stateid(stateid_t *stateid)
-{
- stateid->si_generation++;
- /* Wraparound recommendation from 3530bis-13 9.1.3.2: */
- if (stateid->si_generation == 0)
- stateid->si_generation = 1;
-}
-
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
* The OPEN response, typically the largest, requires
* 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
@@ -499,7 +491,7 @@ struct nfs4_file {
};
struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
- struct file * fi_fds[3];
+ struct nfsd_file *fi_fds[3];
/*
* Each open or lock stateid contributes 0-4 to the counts
* below depending on which bits are set in st_access_bitmap:
@@ -509,7 +501,7 @@ struct nfs4_file {
*/
atomic_t fi_access[2];
u32 fi_share_deny;
- struct file *fi_deleg_file;
+ struct nfsd_file *fi_deleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
@@ -534,15 +526,16 @@ struct nfs4_file {
* Better suggestions welcome.
*/
struct nfs4_ol_stateid {
- struct nfs4_stid st_stid; /* must be first field */
- struct list_head st_perfile;
- struct list_head st_perstateowner;
- struct list_head st_locks;
- struct nfs4_stateowner * st_stateowner;
- struct nfs4_clnt_odstate * st_clnt_odstate;
- unsigned char st_access_bmap;
- unsigned char st_deny_bmap;
- struct nfs4_ol_stateid * st_openstp;
+ struct nfs4_stid st_stid;
+ struct list_head st_perfile;
+ struct list_head st_perstateowner;
+ struct list_head st_locks;
+ struct nfs4_stateowner *st_stateowner;
+ struct nfs4_clnt_odstate *st_clnt_odstate;
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
+ struct nfs4_ol_stateid *st_openstp;
+ struct rw_semaphore st_rwsem;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
@@ -557,10 +550,11 @@ struct nfs4_layout_stateid {
spinlock_t ls_lock;
struct list_head ls_layouts;
u32 ls_layout_type;
- struct file *ls_file;
+ struct nfsd_file *ls_file;
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
+ struct mutex ls_mutex;
};
static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
@@ -585,7 +579,7 @@ struct nfsd_net;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filp, bool *tmp_file);
+ int flags, struct nfsd_file **filp);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
@@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
struct kmem_cache *slab);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
+void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
@@ -620,7 +615,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
{
atomic_inc(&fi->fi_ref);
}
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
index 82f89070594c..90967466a1e5 100644
--- a/fs/nfsd/trace.c
+++ b/fs/nfsd/trace.c
@@ -1,5 +1,3 @@
-#include "state.h"
-
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c668520c344b..bbfa8aec762b 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,6 +9,10 @@
#include <linux/tracepoint.h>
+#include "state.h"
+#include "filecache.h"
+#include "vfs.h"
+
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
TP_ARGS(stp),
@@ -46,6 +50,139 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
+#define show_nf_flags(val) \
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+ { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
+ { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" })
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val) \
+ __print_flags(val, "|", \
+ { NFSD_MAY_READ, "READ" }, \
+ { NFSD_MAY_WRITE, "WRITE" }, \
+ { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+ __field(unsigned int, nf_hashval)
+ __field(void *, nf_inode)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ ),
+ TP_fast_assign(
+ __entry->nf_hashval = nf->nf_hashval;
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = atomic_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+ __entry->nf_hashval,
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nf_may(__entry->nf_may),
+ __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+ TP_PROTO(struct nfsd_file *nf), \
+ TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(unsigned int hash, struct inode *inode,
+ unsigned int may_flags, struct nfsd_file *nf,
+ __be32 status),
+
+ TP_ARGS(hash, inode, may_flags, nf, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, hash)
+ __field(void *, inode)
+ __field(unsigned int, may_flags)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ __field(__be32, status)
+ ),
+
+ TP_fast_assign(
+ __entry->hash = hash;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+ __entry->nf_flags = nf ? nf->nf_flags : 0;
+ __entry->nf_may = nf ? nf->nf_may : 0;
+ __entry->nf_file = nf ? nf->nf_file : NULL;
+ __entry->status = status;
+ ),
+
+ TP_printk("hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+ __entry->hash, __entry->inode,
+ show_nf_may(__entry->may_flags), __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nf_may(__entry->nf_may), __entry->nf_file,
+ be32_to_cpu(__entry->status))
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+ TP_PROTO(struct inode *inode, unsigned int hash, int found),
+ TP_ARGS(inode, hash, found),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, hash)
+ __field(int, found)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->hash = hash;
+ __entry->found = found;
+ ),
+ TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+ __entry->inode, __entry->found)
+);
+
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
+DEFINE_EVENT(nfsd_file_search_class, name, \
+ TP_PROTO(struct inode *inode, unsigned int hash, int found), \
+ TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ TP_PROTO(struct inode *inode, u32 mask),
+ TP_ARGS(inode, mask),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, nlink)
+ __field(umode_t, mode)
+ __field(u32, mask)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->nlink = inode->i_nlink;
+ __entry->mode = inode->i_mode;
+ __entry->mask = mask;
+ ),
+ TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+ __entry->nlink, __entry->mode, __entry->mask)
+);
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 45c04979e7b3..aafb9b00b767 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -42,37 +42,10 @@
#include "nfsd.h"
#include "vfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
- struct raparms *p_next;
- unsigned int p_count;
- ino_t p_ino;
- dev_t p_dev;
- int p_set;
- struct file_ra_state p_ra;
- unsigned int p_hindex;
-};
-
-struct raparm_hbucket {
- struct raparms *pb_head;
- spinlock_t pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS 4
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
-
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point.
@@ -217,10 +190,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
- /*
- * check if we have crossed a mount point ...
- */
if (nfsd_mountpoint(dentry, exp)) {
+ /*
+ * We don't need the i_mutex after all. It's
+ * still possible we could open this (regular
+ * files can be mountpoints too), but the
+ * i_mutex is just there to prevent renames of
+ * something that we might be about to delegate,
+ * and a mountpoint won't be renamed:
+ */
+ fh_unlock(fhp);
if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
dput(dentry);
goto out_nfserr;
@@ -617,7 +596,8 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
}
#endif /* CONFIG_NFSD_V3 */
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int
+nfsd_open_break_lease(struct inode *inode, int access)
{
unsigned int mode;
@@ -633,9 +613,9 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
* and additional flags.
* N.B. After this call fhp needs an fh_put
*/
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int may_flags, struct file **filp)
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
struct path path;
struct inode *inode;
@@ -644,24 +624,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
__be32 err;
int host_err = 0;
- validate_process_creds();
-
- /*
- * If we get here, then the client has already done an "open",
- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
- * in case a chmod has now revoked permission.
- *
- * Arguably we should also allow the owner override for
- * directories, but we never have and it doesn't seem to have
- * caused anyone a problem. If we were to change this, note
- * also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
- */
- if (type == S_IFREG)
- may_flags |= NFSD_MAY_OWNER_OVERRIDE;
- err = fh_verify(rqstp, fhp, type, may_flags);
- if (err)
- goto out;
+ BUG_ON(!fhp->fh_dentry);
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry;
@@ -716,67 +679,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
out_nfserr:
err = nfserrno(host_err);
out:
- validate_process_creds();
return err;
}
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
- struct inode *inode = file_inode(file);
- dev_t dev = inode->i_sb->s_dev;
- ino_t ino = inode->i_ino;
- struct raparms *ra, **rap, **frap = NULL;
- int depth = 0;
- unsigned int hash;
- struct raparm_hbucket *rab;
-
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
- rab = &raparm_hash[hash];
-
- spin_lock(&rab->pb_lock);
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
- if (ra->p_ino == ino && ra->p_dev == dev)
- goto found;
- depth++;
- if (ra->p_count == 0)
- frap = rap;
- }
- depth = nfsdstats.ra_size;
- if (!frap) {
- spin_unlock(&rab->pb_lock);
- return NULL;
- }
- rap = frap;
- ra = *frap;
- ra->p_dev = dev;
- ra->p_ino = ino;
- ra->p_set = 0;
- ra->p_hindex = hash;
-found:
- if (rap != &rab->pb_head) {
- *rap = ra->p_next;
- ra->p_next = rab->pb_head;
- rab->pb_head = ra;
- }
- ra->p_count++;
- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&rab->pb_lock);
+ __be32 err;
- if (ra->p_set)
- file->f_ra = ra->p_ra;
- return ra;
+ validate_process_creds();
+ /*
+ * If we get here, then the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ *
+ * Arguably we should also allow the owner override for
+ * directories, but we never have and it doesn't seem to have
+ * caused anyone a problem. If we were to change this, note
+ * also that our filldir callbacks would need a variant of
+ * lookup_one_len that doesn't check permissions.
+ */
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+ err = fh_verify(rqstp, fhp, type, may_flags);
+ if (!err)
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ validate_process_creds();
+ return err;
}
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+ __be32 err;
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
+ validate_process_creds();
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ validate_process_creds();
+ return err;
}
/*
@@ -979,20 +921,15 @@ out_nfserr:
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
- struct file *file;
- struct raparms *ra;
- __be32 err;
-
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
- if (err)
- return err;
-
- ra = nfsd_init_raparms(file);
- err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
- if (ra)
- nfsd_put_raparams(file, ra);
- fput(file);
-
+ __be32 err;
+ struct nfsd_file *nf;
+
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
+ if (err == nfs_ok) {
+ err = nfsd_vfs_read(rqstp, nf->nf_file, offset, vec, vlen,
+ count);
+ nfsd_file_put(nf);
+ }
return err;
}
@@ -1002,30 +939,18 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put
*/
__be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt, int *stablep)
{
- __be32 err = 0;
-
- if (file) {
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
- stablep);
- } else {
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
- if (err)
- goto out;
-
- if (cnt)
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
- cnt, stablep);
- fput(file);
+ __be32 err;
+ struct nfsd_file *nf;
+
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
+ if (err == nfs_ok) {
+ err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
+ vlen, cnt, stablep);
+ nfsd_file_put(nf);
}
-out:
return err;
}
@@ -1043,9 +968,9 @@ __be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count)
{
- struct file *file;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
+ struct nfsd_file *nf;
+ loff_t end = LLONG_MAX;
+ __be32 err = nfserr_inval;
if (offset < 0)
goto out;
@@ -1055,12 +980,12 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
- err = nfsd_open(rqstp, fhp, S_IFREG,
- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+ err = nfsd_file_acquire(rqstp, fhp,
+ NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = vfs_fsync_range(file, offset, end, 0);
+ int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
if (err2 != -EINVAL)
err = nfserrno(err2);
@@ -1068,7 +993,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notsupp;
}
- fput(file);
+ nfsd_file_put(nf);
out:
return err;
}
@@ -1631,7 +1556,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
/* cannot use fh_lock as we need deadlock protective ordering
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = 1;
+ ffhp->fh_locked = tfhp->fh_locked = true;
fill_pre_wcc(ffhp);
fill_pre_wcc(tfhp);
@@ -1681,7 +1606,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
fill_post_wcc(ffhp);
fill_post_wcc(tfhp);
unlock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = 0;
+ ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp);
out:
@@ -1809,7 +1734,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
offset = *offsetp;
while (1) {
- struct inode *dir_inode = file_inode(file);
unsigned int reclen;
cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1828,15 +1752,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
if (!size)
break;
- /*
- * Various filldir functions may end up calling back into
- * lookup_one_len() and the file system's ->lookup() method.
- * These expect i_mutex to be held, as it would within readdir.
- */
- host_err = mutex_lock_killable(&dir_inode->i_mutex);
- if (host_err)
- break;
-
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
@@ -1853,7 +1768,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
- mutex_unlock(&dir_inode->i_mutex);
if (size > 0) /* We bailed out early */
break;
@@ -2016,63 +1930,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return err? nfserrno(err) : 0;
}
-
-void
-nfsd_racache_shutdown(void)
-{
- struct raparms *raparm, *last_raparm;
- unsigned int i;
-
- dprintk("nfsd: freeing readahead buffers.\n");
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- raparm = raparm_hash[i].pb_head;
- while(raparm) {
- last_raparm = raparm;
- raparm = raparm->p_next;
- kfree(last_raparm);
- }
- raparm_hash[i].pb_head = NULL;
- }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
- int i;
- int j = 0;
- int nperbucket;
- struct raparms **raparm = NULL;
-
-
- if (raparm_hash[0].pb_head)
- return 0;
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
- nperbucket = max(2, nperbucket);
- cache_size = nperbucket * RAPARM_HASH_SIZE;
-
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- spin_lock_init(&raparm_hash[i].pb_lock);
-
- raparm = &raparm_hash[i].pb_head;
- for (j = 0; j < nperbucket; j++) {
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
- if (!*raparm)
- goto out_nomem;
- raparm = &(*raparm)->p_next;
- }
- *raparm = NULL;
- }
-
- nfsdstats.ra_size = cache_size;
- return 0;
-
-out_nomem:
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
- nfsd_racache_shutdown();
- return -ENOMEM;
-}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fee2451ae248..1efccde4bf9a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -39,8 +39,6 @@
typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
/* nfsd/vfs.c */
-int nfsd_racache_init(int);
-void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -69,16 +67,18 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
loff_t, unsigned long);
#endif /* CONFIG_NFSD_V3 */
+int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-struct raparms;
+__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+ int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
+__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
@@ -104,22 +104,19 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
-struct raparms *nfsd_init_raparms(struct file *file);
-void nfsd_put_raparams(struct file *file, struct raparms *ra);
-
static inline int fh_want_write(struct svc_fh *fh)
{
int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
if (!ret)
- fh->fh_want_write = 1;
+ fh->fh_want_write = true;
return ret;
}
static inline void fh_drop_write(struct svc_fh *fh)
{
if (fh->fh_want_write) {
- fh->fh_want_write = 0;
+ fh->fh_want_write = false;
mnt_drop_write(fh->fh_export->ex_path.mnt);
}
}
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 9f991007a578..c167d7a5b0e6 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -268,15 +268,14 @@ struct nfsd4_open_downgrade {
struct nfsd4_read {
- stateid_t rd_stateid; /* request */
- u64 rd_offset; /* request */
- u32 rd_length; /* request */
- int rd_vlen;
- struct file *rd_filp;
- bool rd_tmp_file;
+ stateid_t rd_stateid; /* request */
+ u64 rd_offset; /* request */
+ u32 rd_length; /* request */
+ int rd_vlen;
+ struct nfsd_file *rd_nf;
- struct svc_rqst *rd_rqstp; /* response */
- struct svc_fh * rd_fhp; /* response */
+ struct svc_rqst *rd_rqstp; /* response */
+ struct svc_fh *rd_fhp; /* response */
};
struct nfsd4_readdir {
@@ -632,7 +631,7 @@ static inline void
set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
{
BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = fhp->fh_post_saved;
+ cinfo->atomic = (u32)fhp->fh_post_saved;
cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
cinfo->before_change = fhp->fh_pre_change;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index d16b62cb2854..295d08800126 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -81,6 +81,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
if (atomic_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group);
}
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
/*
* Create a new fsnotify_group and hold a reference for the group returned.
@@ -109,6 +110,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
return group;
}
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on)
{
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index e785fd954c30..76e608b8894c 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -87,6 +87,7 @@ struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
return mark;
}
+EXPORT_SYMBOL_GPL(fsnotify_find_inode_mark);
/*
* If we are setting a mark mask on an inode mark we should pin the inode
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index fc0df4442f7b..00e7072d3c95 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -92,9 +92,6 @@
#include "fsnotify.h"
struct srcu_struct fsnotify_mark_srcu;
-static DEFINE_SPINLOCK(destroy_lock);
-static LIST_HEAD(destroy_list);
-static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
@@ -109,6 +106,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
mark->free_mark(mark);
}
}
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
/* Calculate mask of events for a list of marks */
u32 fsnotify_recalc_mask(struct hlist_head *head)
@@ -168,10 +166,19 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
atomic_dec(&group->num_marks);
}
+static void
+fsnotify_mark_free_rcu(struct rcu_head *rcu)
+{
+ struct fsnotify_mark *mark;
+
+ mark = container_of(rcu, struct fsnotify_mark, g_rcu);
+ fsnotify_put_mark(mark);
+}
+
/*
- * Free fsnotify mark. The freeing is actually happening from a kthread which
- * first waits for srcu period end. Caller must have a reference to the mark
- * or be protected by fsnotify_mark_srcu.
+ * Free fsnotify mark. The freeing is actually happening from a call_srcu
+ * callback. Caller must have a reference to the mark or be protected by
+ * fsnotify_mark_srcu.
*/
void fsnotify_free_mark(struct fsnotify_mark *mark)
{
@@ -186,10 +193,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
spin_unlock(&mark->lock);
- spin_lock(&destroy_lock);
- list_add(&mark->g_list, &destroy_list);
- spin_unlock(&destroy_lock);
- wake_up(&destroy_waitq);
+ call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
/*
* Some groups like to know that marks are being freed. This is a
@@ -208,6 +212,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
mutex_unlock(&group->mark_mutex);
fsnotify_free_mark(mark);
}
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
{
@@ -385,11 +390,7 @@ err:
spin_unlock(&mark->lock);
- spin_lock(&destroy_lock);
- list_add(&mark->g_list, &destroy_list);
- spin_unlock(&destroy_lock);
- wake_up(&destroy_waitq);
-
+ call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
return ret;
}
@@ -402,6 +403,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
mutex_unlock(&group->mark_mutex);
return ret;
}
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
/*
* Given a list of marks, find the mark associated with given group. If found
@@ -492,40 +494,4 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
atomic_set(&mark->refcnt, 1);
mark->free_mark = free_mark;
}
-
-static int fsnotify_mark_destroy(void *ignored)
-{
- struct fsnotify_mark *mark, *next;
- struct list_head private_destroy_list;
-
- for (;;) {
- spin_lock(&destroy_lock);
- /* exchange the list head */
- list_replace_init(&destroy_list, &private_destroy_list);
- spin_unlock(&destroy_lock);
-
- synchronize_srcu(&fsnotify_mark_srcu);
-
- list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
- list_del_init(&mark->g_list);
- fsnotify_put_mark(mark);
- }
-
- wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
- }
-
- return 0;
-}
-
-static int __init fsnotify_mark_init(void)
-{
- struct task_struct *thread;
-
- thread = kthread_run(fsnotify_mark_destroy, NULL,
- "fsnotify_mark");
- if (IS_ERR(thread))
- panic("unable to start fsnotify mark destruction thread.");
-
- return 0;
-}
-device_initcall(fsnotify_mark_init);
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);