From dc7a08166f3a5f23e79e839a8a88849bd3397c32 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 Oct 2009 14:41:35 -0400 Subject: nfs: new subdir Documentation/filesystems/nfs We're adding enough nfs documentation that it may as well have its own subdirectory. Acked-by: Randy Dunlap Signed-off-by: J. Bruce Fields --- net/ipv4/Kconfig | 6 +++--- net/ipv4/ipconfig.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 70491d9035e..0c94a1ac294 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,7 +166,7 @@ config IP_PNP_DHCP If unsure, say Y. Note that if you want to use DHCP, a DHCP server must be operating on your network. Read - for details. + for details. config IP_PNP_BOOTP bool "IP: BOOTP support" @@ -181,7 +181,7 @@ config IP_PNP_BOOTP does BOOTP itself, providing all necessary information on the kernel command line, you can say N here. If unsure, say Y. Note that if you want to use BOOTP, a BOOTP server must be operating on your network. - Read for details. + Read for details. config IP_PNP_RARP bool "IP: RARP support" @@ -194,7 +194,7 @@ config IP_PNP_RARP older protocol which is being obsoleted by BOOTP and DHCP), say Y here. Note that if you want to use RARP, a RARP server must be operating on your network. Read - for details. + for details. # not yet ready.. # bool ' IP: ARP support' CONFIG_IP_PNP_ARP diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f8d04c25645..7dcbf470609 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1447,7 +1447,7 @@ late_initcall(ip_auto_config); /* * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel - * command line parameter. See Documentation/filesystems/nfsroot.txt. + * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt. */ static int __init ic_proto_name(char *name) { -- cgit v1.2.3 From dc83d6e27fa80babe31c80aa8568f125f72edf57 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Oct 2009 18:51:34 -0400 Subject: nfsd4: don't try to map gid's in generic rpc code For nfsd we provide users the option of mapping uid's to server-side supplementary group lists. That makes sense for nfsd, but not necessarily for other rpc users (such as the callback client). So move that lookup to svcauth_unix_set_client, which is a program-specific method. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 53 +++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 117f68a8aa4..97cc3de7432 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -655,23 +655,25 @@ static struct unix_gid *unix_gid_lookup(uid_t uid) return NULL; } -static int unix_gid_find(uid_t uid, struct group_info **gip, - struct svc_rqst *rqstp) +static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) { - struct unix_gid *ug = unix_gid_lookup(uid); + struct unix_gid *ug; + struct group_info *gi; + int ret; + + ug = unix_gid_lookup(uid); if (!ug) - return -EAGAIN; - switch (cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle)) { + return ERR_PTR(-EAGAIN); + ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); + switch (ret) { case -ENOENT: - *gip = NULL; - return 0; + return ERR_PTR(-ENOENT); case 0: - *gip = ug->gi; - get_group_info(*gip); + gi = get_group_info(ug->gi); cache_put(&ug->h, &unix_gid_cache); - return 0; + return gi; default: - return -EAGAIN; + return ERR_PTR(-EAGAIN); } } @@ -681,6 +683,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) struct sockaddr_in *sin; struct sockaddr_in6 *sin6, sin6_storage; struct ip_map *ipm; + struct group_info *gi; + struct svc_cred *cred = &rqstp->rq_cred; switch (rqstp->rq_addr.ss_family) { case AF_INET: @@ -722,6 +726,17 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ip_map_cached_put(rqstp, ipm); break; } + + gi = unix_gid_find(cred->cr_uid, rqstp); + switch (PTR_ERR(gi)) { + case -EAGAIN: + return SVC_DROP; + case -ENOENT: + break; + default: + put_group_info(cred->cr_group_info); + cred->cr_group_info = gi; + } return SVC_OK; } @@ -818,19 +833,11 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; - if (unix_gid_find(cred->cr_uid, &cred->cr_group_info, rqstp) - == -EAGAIN) + cred->cr_group_info = groups_alloc(slen); + if (cred->cr_group_info == NULL) return SVC_DROP; - if (cred->cr_group_info == NULL) { - cred->cr_group_info = groups_alloc(slen); - if (cred->cr_group_info == NULL) - return SVC_DROP; - for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); - } else { - for (i = 0; i < slen ; i++) - svc_getnl(argv); - } + for (i = 0; i < slen; i++) + GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; -- cgit v1.2.3 From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:06 -0800 Subject: RDMA/cm: fix loopback address support The RDMA CM is intended to support the use of a loopback address when establishing a connection; however, the behavior of the CM when loopback addresses are used is confusing and does not always work, depending on whether loopback was specified by the server, the client, or both. The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdam_bind_addr, no device is associated with the rdma_cm_id. Fix this. If a loopback address is specified by the client as the destination address for a connection, it will fail to establish a connection. This is true even if the server is listing across all addresses or on the loopback address itself. The issue is that the server tries to translate the IP address carried in the REQ message to a local net_device address, which fails. The translation is not needed in this case, since the REQ carries the actual HW address that should be used. Finally, cleanup loopback support to be more transport neutral. Replace separate calls to get/set the sgid and dgid from the device address to a single call that behaves correctly depending on the format of the device address. And support both IPv4 and IPv6 address formats. Signed-off-by: Sean Hefty [ Fixed RDS build by s/ib_addr_get/rdma_addr_get/ - Roland ] Signed-off-by: Roland Dreier --- net/rds/ib.c | 4 ++-- net/rds/iw.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rds/ib.c b/net/rds/ib.c index 536ebe5d3f6..3b899236104 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; diff --git a/net/rds/iw.c b/net/rds/iw.c index db224f7c293..b28fa8525b2 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; -- cgit v1.2.3 From 78c210efdefe07131f91ed512a3308b15bb14e2f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Aug 2009 15:41:34 -0400 Subject: Revert "knfsd: avoid overloading the CPU scheduler with enormous load averages" This reverts commit 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad. This helps in an entirely cached workload but not necessarily in workloads that require waiting on disk. Conflicts: include/linux/sunrpc/svc.h net/sunrpc/svc_xprt.c Reported-by: Simon Kirby Tested-by: Jesper Krogh Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index df124f78ee4..2c58b75a236 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -16,8 +16,6 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -#define SVC_MAX_WAKING 5 - static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); @@ -306,7 +304,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; - int thread_avail; if (!(xprt->xpt_flags & ((1<sp_lock); + if (!list_empty(&pool->sp_threads) && + !list_empty(&pool->sp_sockets)) + printk(KERN_ERR + "svc_xprt_enqueue: " + "threads and transports both waiting??\n"); + if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { /* Don't enqueue dead transports */ dprintk("svc: transport %p is dead, not enqueued\n", xprt); @@ -358,15 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) } process: - /* Work out whether threads are available */ - thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */ - if (pool->sp_nwaking >= SVC_MAX_WAKING) { - /* too many threads are runnable and trying to wake up */ - thread_avail = 0; - pool->sp_stats.overloads_avoided++; - } - - if (thread_avail) { + if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); @@ -381,8 +376,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); - rqstp->rq_waking = 1; - pool->sp_nwaking++; pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); @@ -651,11 +644,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) return -EINTR; spin_lock_bh(&pool->sp_lock); - if (rqstp->rq_waking) { - rqstp->rq_waking = 0; - pool->sp_nwaking--; - BUG_ON(pool->sp_nwaking < 0); - } xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; @@ -1204,16 +1192,15 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) struct svc_pool *pool = p; if (p == SEQ_START_TOKEN) { - seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n"); + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); return 0; } - seq_printf(m, "%u %lu %lu %lu %lu %lu\n", + seq_printf(m, "%u %lu %lu %lu %lu\n", pool->sp_id, pool->sp_stats.packets, pool->sp_stats.sockets_queued, pool->sp_stats.threads_woken, - pool->sp_stats.overloads_avoided, pool->sp_stats.threads_timedout); return 0; -- cgit v1.2.3 From feb8ca37cc3d83c07fd042509ef1e176cfeb2cfa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: SUNRPC: Ensure that we honour autoclose before attempting to reconnect If the XPRT_CLOSE_WAIT flag is set, we need to ensure that we call xprt->ops->close() while holding xprt_lock_write() before we can start reconnecting. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fd46d42afa8..469de292c23 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -700,6 +700,10 @@ void xprt_connect(struct rpc_task *task) } if (!xprt_lock_write(xprt, task)) return; + + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + xprt->ops->close(xprt); + if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { -- cgit v1.2.3 From f0380f3d16df8f9e2fcd1d8c16fb0d94370bea99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: RPC: Fix two potential races in put_rpccred It is possible for rpcauth_destroy_credcache() to cause the rpc credentials to be unhashed while put_rpccred is waiting for the rpc_credcache_lock on another cpu. Should this happen, then we can end up calling hlist_del_rcu(&cred->cr_hash) a second time in put_rpccred, thus causing list corruption. Should the credential actually be hashed, it is also possible for rpcauth_lookup_credcache to find and reference it before we get round to unhashing it. In this case, the call to rpcauth_unhash_cred will fail, and so we should just exit without destroying the cred. Reported-by: Neil Brown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 54a4e042f10..7ee6f7eaddf 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,16 +123,19 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred) clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } -static void +static int rpcauth_unhash_cred(struct rpc_cred *cred) { spinlock_t *cache_lock; + int ret; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); - if (atomic_read(&cred->cr_count) == 0) + ret = atomic_read(&cred->cr_count) == 0; + if (ret) rpcauth_unhash_cred_locked(cred); spin_unlock(cache_lock); + return ret; } /* @@ -446,31 +449,35 @@ void put_rpccred(struct rpc_cred *cred) { /* Fast path for unhashed credentials */ - if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - goto need_lock; - - if (!atomic_dec_and_test(&cred->cr_count)) + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { + if (atomic_dec_and_test(&cred->cr_count)) + cred->cr_ops->crdestroy(cred); return; - goto out_destroy; -need_lock: + } + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; if (!list_empty(&cred->cr_lru)) { number_cred_unused--; list_del_init(&cred->cr_lru); } - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - rpcauth_unhash_cred(cred); if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { - cred->cr_expire = jiffies; - list_add_tail(&cred->cr_lru, &cred_unused); - number_cred_unused++; - spin_unlock(&rpc_credcache_lock); - return; + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + goto out_nodestroy; + } + if (!rpcauth_unhash_cred(cred)) { + /* We were hashed and someone looked us up... */ + goto out_nodestroy; + } } spin_unlock(&rpc_credcache_lock); -out_destroy: cred->cr_ops->crdestroy(cred); + return; +out_nodestroy: + spin_unlock(&rpc_credcache_lock); } EXPORT_SYMBOL_GPL(put_rpccred); -- cgit v1.2.3 From dd1fd90fe65e2e642f0e58e2ff4849f317a6c43d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Display compressed (shorthand) IPv6 presentation addresses Recent changes to snprintf() introduced the %pI6c formatter, which can display an IPv6 address with standard shorthanding. Using a shorthanded address can save us a few bytes of memory for each stored presentation address, or a few bytes on the wire when sending these in a universal address. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index c7450c8f0a7..6dcdd251781 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -55,16 +55,8 @@ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, /* * RFC 4291, Section 2.2.1 - * - * To keep the result as short as possible, especially - * since we don't shorthand, we don't want leading zeros - * in each halfword, so avoid %pI6. */ - return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", - ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), - ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), - ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), - ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); + return snprintf(buf, buflen, "%pI6c", addr); } static size_t rpc_ntop6(const struct sockaddr *sap, -- cgit v1.2.3 From 206a134b4d8abf57cd34dffacf993869355b9aac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Check explicitly for tk_status == 0 in call_transmit_status() The success case, where task->tk_status == 0, is by far the most frequent case in call_transmit_status(). The default: arm of the switch statement in call_transmit_status() handles the 0 case. default: was moved close to the top of the switch statement in call_transmit_status() under the theory that the compiler places object code for the earliest arms of a switch statement first, making the CPU do less work. The default: arm of a switch statement, however, is executed only after all the other cases have been checked. Even if the compiler rearranges the object code, the default: arm is the "last resort", meaning all of the other cases have been explicitly exhausted. That makes the current arrangement about as inefficient as it gets for the common case. To fix this, add an explicit check for zero before the switch statement. That forces the compiler to do the zero check first, no matter what optimizations it might try to do to the switch statement. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 38829e20500..7bcd931e06e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1180,10 +1180,22 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; + + /* + * Common case: success. Force the compiler to put this + * test first. + */ + if (task->tk_status == 0) { + xprt_end_transmit(task); + rpc_task_force_reencode(task); + return; + } + switch (task->tk_status) { case -EAGAIN: break; default: + dprint_status(task); xprt_end_transmit(task); /* * Special cases: if we've been waiting on the -- cgit v1.2.3 From 09a21c4102c8f7893368553273d39c0cadedf9af Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Allow RPCs to fail quickly if the server is unreachable The kernel sometimes makes RPC calls to services that aren't running. Because the kernel's RPC client always assumes the hard retry semantic when reconnecting a connection-oriented RPC transport, the underlying reconnect logic takes a long while to time out, even though the remote may have responded immediately with ECONNREFUSED. In certain cases, like upcalls to our local rpcbind daemon, or for NFS mount requests, we'd like the kernel to fail immediately if the remote service isn't reachable. This allows another transport to be tried immediately, or the pending request can be abandoned quickly. Introduce a per-request flag which controls how call_transmit_status() behaves when request transmission fails because the server cannot be reached. We don't want soft connection semantics to apply to other errors. The default case of the switch statement in call_transmit_status() no longer falls through; the fall through code is copied to the default case, and a "break;" is added. The transport's connection re-establishment timeout is also ignored for such requests. We want the request to fail immediately, so the reconnect delay is skipped. Additionally, we don't want a connect failure here to further increase the reconnect timeout value, since this request will not be retried. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 11 +++++++++-- net/sunrpc/xprtsock.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7bcd931e06e..68a23583f44 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1197,6 +1197,8 @@ call_transmit_status(struct rpc_task *task) default: dprint_status(task); xprt_end_transmit(task); + rpc_task_force_reencode(task); + break; /* * Special cases: if we've been waiting on the * socket's write_space() callback, or if the @@ -1204,11 +1206,16 @@ call_transmit_status(struct rpc_task *task) * then hold onto the transport lock. */ case -ECONNREFUSED: - case -ECONNRESET: - case -ENOTCONN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + xprt_end_transmit(task); + rpc_exit(task, task->tk_status); + break; + } + case -ECONNRESET: + case -ENOTCONN: case -EPIPE: rpc_task_force_reencode(task); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba25..ff312f8b018 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2033,7 +2033,7 @@ static void xs_connect(struct rpc_task *task) if (xprt_test_and_set_connecting(xprt)) return; - if (transport->sock != NULL) { + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); -- cgit v1.2.3 From 5a46211540a83871196489247f57da2bdde58d87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Simplify synopsis of rpcb_local_clnt() Clean up: At one point, rpcb_local_clnt() handled IPv6 loopback addresses too, but it doesn't any more; only IPv4 loopback is used now. Get rid of the @addr and @addrlen arguments to rpcb_local_clnt(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 830faf4d999..28f50da60ce 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -163,13 +163,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, - size_t addrlen, u32 version) +static struct rpc_clnt *rpcb_create_local(u32 version) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, - .address = addr, - .addrsize = addrlen, + .address = (struct sockaddr *)&rpcb_inaddr_loopback, + .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, .version = version, @@ -211,14 +210,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, static int rpcb_register_call(const u32 version, struct rpc_message *msg) { - struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; - size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(addr, addrlen, version); + rpcb_clnt = rpcb_create_local(version); if (!IS_ERR(rpcb_clnt)) { error = rpc_call_sync(rpcb_clnt, msg, 0); rpc_shutdown_client(rpcb_clnt); -- cgit v1.2.3 From c526611dd631b2802b6b0221ffb306c5fa25c86c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use a cached RPC client and transport for rpcbind upcalls The kernel's rpcbind client creates and deletes an rpc_clnt and its underlying transport socket for every upcall to the local rpcbind daemon. When starting a typical NFS server on IPv4 and IPv6, the NFS service itself does three upcalls (one per version) times two upcalls (one per transport) times two upcalls (one per address family), making 12, plus another one for the initial call to unregister previous NFS services. Starting the NLM service adds an additional 13 upcalls, for similar reasons. (Currently the NFS service doesn't start IPv6 listeners, but it will soon enough). Instead, let's create an rpc_clnt for rpcbind upcalls during the first local rpcbind query, and cache it. This saves the overhead of creating and destroying an rpc_clnt and a socket for every upcall. The new logic also prevents the kernel from attempting an RPCB_SET or RPCB_UNSET if it knows from the start that the local portmapper does not support rpcbind protocol version 4. This will cut down on the number of rpcbind upcalls in legacy environments. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 93 +++++++++++++++++++++++++++++++++++++++--------- net/sunrpc/sunrpc_syms.c | 3 ++ 2 files changed, 80 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 28f50da60ce..116db74dd94 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -110,6 +111,9 @@ static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; +static struct rpc_clnt * rpcb_local_clnt; +static struct rpc_clnt * rpcb_local_clnt4; + struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -163,7 +167,13 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(u32 version) +static DEFINE_MUTEX(rpcb_create_local_mutex); + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, @@ -171,12 +181,46 @@ static struct rpc_clnt *rpcb_create_local(u32 version) .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, - .version = version, + .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create local rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } - return rpc_create(&args); + /* + * This results in an RPC ping. On systems running portmapper, + * the v4 ping will fail. Proceed anyway, but disallow rpcb + * v4 upcalls. + */ + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to create local rpcbind v4 " + "cleint (errno %ld).\n", PTR_ERR(clnt4)); + clnt4 = NULL; + } + + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; + +out: + mutex_unlock(&rpcb_create_local_mutex); + return result; } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, @@ -208,20 +252,13 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(const u32 version, struct rpc_message *msg) +static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) { - struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(version); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); - + error = rpc_call_sync(clnt, msg, 0); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -276,6 +313,11 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -285,7 +327,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_2, &msg); + return rpcb_register_call(rpcb_local_clnt, &msg); } /* @@ -310,7 +352,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -337,7 +379,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -353,7 +395,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(RPCBVERS_4, msg); + return rpcb_register_call(rpcb_local_clnt4, msg); } /** @@ -411,6 +453,13 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; + if (rpcb_local_clnt4 == NULL) + return -EPROTONOSUPPORT; if (address == NULL) return rpcb_unregister_all_protofamilies(&msg); @@ -1024,3 +1073,15 @@ static struct rpc_program rpcb_program = { .version = rpcb_version, .stats = &rpcb_stats, }; + +/** + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister + * + */ +void cleanup_rpcb_clnt(void) +{ + if (rpcb_local_clnt4) + rpc_shutdown_client(rpcb_local_clnt4); + if (rpcb_local_clnt) + rpc_shutdown_client(rpcb_local_clnt); +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8cce9218901..f438347d817 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,6 +24,8 @@ extern struct cache_detail ip_map_cache, unix_gid_cache; +extern void cleanup_rpcb_clnt(void); + static int __init init_sunrpc(void) { @@ -53,6 +55,7 @@ out: static void __exit cleanup_sunrpc(void) { + cleanup_rpcb_clnt(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); -- cgit v1.2.3 From 2a76b3bfa22993fc09166bd6a8db0dbe902b6813 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use TCP for local rpcbind upcalls Use TCP with the soft connect semantic for local rpcbind upcalls so the kernel can detect immediately if the local rpcbind daemon is not running. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 116db74dd94..401154094ee 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -176,7 +176,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex); static int rpcb_create_local(void) { struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_UDP, + .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", @@ -258,7 +258,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, 0); + error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); -- cgit v1.2.3 From 012da158f636347c4eb28fd1e1cca020fef5e54d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use soft connects for autobinding over TCP Autobinding is handled by the rpciod process, not in user processes that are generating regular RPC requests. Thus autobinding is usually not affected by signals targetting user processes, such as KILL or timer expiration events. In addition, an RPC request generated by a user process that has RPC_TASK_SOFTCONN set and needs to perform an autobind will hang if the remote rpcbind service is not available. For rpcbind queries on connection-oriented transports, let's use the new soft connect semantic to return control to the user's process quickly, if the kernel's rpcbind client can't connect to the remote rpcbind service. Logic is introduced in call_bind_status() to handle connection errors that occurred during an asynchronous rpcbind query. The logic abandons the rpcbind query if the RPC request has SOFTCONN set, and retries after a few seconds in the normal case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 17 ++++++++++++++++- net/sunrpc/rpcb_clnt.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 68a23583f44..4b76ef9336c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1060,7 +1060,7 @@ call_bind_status(struct rpc_task *task) goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u remote rpcbind service unavailable\n", + dprintk("RPC: %5u unrecognized remote rpcbind service\n", task->tk_pid); break; case -EPROTONOSUPPORT: @@ -1069,6 +1069,21 @@ call_bind_status(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_bind; return; + case -ECONNREFUSED: /* connection problems */ + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + dprintk("RPC: %5u remote rpcbind unreachable: %d\n", + task->tk_pid, task->tk_status); + if (!RPC_IS_SOFTCONN(task)) { + rpc_delay(task, 5*HZ); + goto retry_timeout; + } + status = task->tk_status; + break; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 401154094ee..3e3772d8eb9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -537,7 +537,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi .rpc_message = &msg, .callback_ops = &rpcb_getport_ops, .callback_data = map, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN, }; return rpc_run_task(&task_setup_data); -- cgit v1.2.3 From caabea8a565fb4e16f8e58e16bb9d535e591b709 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use soft connect semantics when performing RPC ping Currently, if a remote RPC service is unreachable, an RPC ping will hang until the underlying transport connect attempt times out. A more desirable behavior might be to have the ping fail immediately so upper layers can recover appropriately. In the case of an NFS mount, for instance, this would mean the mount(2) system call could fail immediately if the server isn't listening, rather than hanging uninterruptibly for more than 3 minutes. Change rpc_ping() so that it fails immediately for connection-oriented transports. rpc_create() will then fail immediately for such transports if an RPC ping was requested. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4b76ef9336c..97931d9f857 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,7 @@ static void call_connect_status(struct rpc_task *task); static __be32 *rpc_encode_header(struct rpc_task *task); static __be32 *rpc_verify_header(struct rpc_task *task); -static int rpc_ping(struct rpc_clnt *clnt, int flags); +static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -340,7 +340,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) return clnt; if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt, RPC_TASK_SOFT); + int err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); @@ -528,7 +528,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; - err = rpc_ping(clnt, RPC_TASK_SOFT); + err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); clnt = ERR_PTR(err); @@ -1709,14 +1709,14 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -static int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, }; int err; msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); - err = rpc_call_sync(clnt, &msg, flags); + err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); put_rpccred(msg.rpc_cred); return err; } -- cgit v1.2.3 From 3a28becc35e5c8f1fabb707bcd8a473712653de6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: soft connect semantics for UDP Introduce soft connect behavior for UDP transports. In this case, a major timeout returns ETIMEDOUT instead of EIO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 97931d9f857..154034b675b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1380,6 +1380,10 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, -ETIMEDOUT); + return; + } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", -- cgit v1.2.3 From 480e3243df156e39eea6c91057e2ae612a6bbe19 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 8 Dec 2009 13:13:03 -0500 Subject: SUNRPC: IS_ERR/PTR_ERR confusion IS_ERR returns 1 or 0, PTR_ERR returns the error value. Signed-off-by: Roel Kluin Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fc6a43ccd95..129d75ea25d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -485,7 +485,7 @@ gss_refresh_upcall(struct rpc_task *task) dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); - if (IS_ERR(gss_msg) == -EAGAIN) { + if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); -- cgit v1.2.3 From 053e324f67b9921fe7de0c4cbc720d29cb4bf207 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 9 Dec 2009 23:15:22 +0530 Subject: rpc: remove unneeded function parameter in gss_add_msg() The pointer to struct gss_auth parameter in gss_add_msg is not really needed after commit 5b7ddd4a. Zap it. Signed-off-by: Suresh Jayaraman Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 129d75ea25d..3c3c50f38a1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -304,7 +304,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * -gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) +gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_inode *rpci = gss_msg->inode; struct inode *inode = &rpci->vfs_inode; @@ -445,7 +445,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); if (IS_ERR(gss_new)) return gss_new; - gss_msg = gss_add_msg(gss_auth, gss_new); + gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { struct inode *inode = &gss_new->inode->vfs_inode; int res = rpc_queue_upcall(inode, &gss_new->msg); -- cgit v1.2.3 From 471452104b8520337ae2fb48c4e61cd4896e025d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 14 Dec 2009 18:00:08 -0800 Subject: const: constify remaining dev_pm_ops Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/iucv/af_iucv.c | 2 +- net/iucv/iucv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1e428863574..c18286a2167 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -221,7 +221,7 @@ static int afiucv_pm_restore_thaw(struct device *dev) return 0; } -static struct dev_pm_ops afiucv_pm_ops = { +static const struct dev_pm_ops afiucv_pm_ops = { .prepare = afiucv_pm_prepare, .complete = afiucv_pm_complete, .freeze = afiucv_pm_freeze, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 3b1f5f5f8de..fd8b28361a6 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -93,7 +93,7 @@ static int iucv_pm_freeze(struct device *); static int iucv_pm_thaw(struct device *); static int iucv_pm_restore(struct device *); -static struct dev_pm_ops iucv_pm_ops = { +static const struct dev_pm_ops iucv_pm_ops = { .prepare = iucv_pm_prepare, .complete = iucv_pm_complete, .freeze = iucv_pm_freeze, -- cgit v1.2.3 From e7d2860b690d4f3bed6824757c540579638e3d1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Mon, 14 Dec 2009 18:01:06 -0800 Subject: tree-wide: convert open calls to remove spaces to skip_spaces() lib function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes use of skip_spaces() defined in lib/string.c for removing leading spaces from strings all over the tree. It decreases lib.a code size by 47 bytes and reuses the function tree-wide: text data bss dec hex filename 64688 584 592 65864 10148 (TOTALS-BEFORE) 64641 584 592 65817 10119 (TOTALS-AFTER) Also, while at it, if we see (*str && isspace(*str)), we can be sure to remove the first condition (*str) as the second one (isspace(*str)) also evaluates to 0 whenever *str == 0, making it redundant. In other words, "a char equals zero is never a space". Julia Lawall tried the semantic patch (http://coccinelle.lip6.fr) below, and found occurrences of this pattern on 3 more files: drivers/leds/led-class.c drivers/leds/ledtrig-timer.c drivers/video/output.c @@ expression str; @@ ( // ignore skip_spaces cases while (*str && isspace(*str)) { \(str++;\|++str;\) } | - *str && isspace(*str) ) Signed-off-by: André Goddard Rosa Cc: Julia Lawall Cc: Martin Schwidefsky Cc: Jeff Dike Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Richard Purdie Cc: Neil Brown Cc: Kyle McMartin Cc: Henrique de Moraes Holschuh Cc: David Howells Cc: Cc: Samuel Ortiz Cc: Patrick McHardy Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/irda/irnet/irnet.h | 1 + net/irda/irnet/irnet_ppp.c | 8 +++----- net/netfilter/xt_recent.c | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad3..4300df35d37 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -249,6 +249,7 @@ #include #include #include /* isspace() */ +#include /* skip_spaces() */ #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7dea882dbb7..156020d138b 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -76,9 +76,8 @@ irnet_ctrl_write(irnet_socket * ap, /* Look at the next command */ start = next; - /* Scrap whitespaces before the command */ - while(isspace(*start)) - start++; + /* Scrap whitespaces before the command */ + start = skip_spaces(start); /* ',' is our command separator */ next = strchr(start, ','); @@ -133,8 +132,7 @@ irnet_ctrl_write(irnet_socket * ap, char * endp; /* Scrap whitespaces before the command */ - while(isspace(*begp)) - begp++; + begp = skip_spaces(begp); /* Convert argument to a number (last arg is the base) */ addr = simple_strtoul(begp, &endp, 16); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index eb0ceb84652..fc70a49c0af 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -482,8 +482,7 @@ static ssize_t recent_old_proc_write(struct file *file, if (copy_from_user(buf, input, size)) return -EFAULT; - while (isspace(*c)) - c++; + c = skip_spaces(c); if (size - (c - buf) < 5) return c - buf; -- cgit v1.2.3 From 48f186124220794fce85ed1439fc32f16f69d3e2 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Mon, 14 Dec 2009 21:27:53 -0800 Subject: rpc: add rpc_queue_empty function Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cef74ba0666..89ea8e69ec7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -384,6 +384,20 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r __rpc_do_wake_up_task(queue, task); } +/* + * Tests whether rpc queue is empty + */ +int rpc_queue_empty(struct rpc_wait_queue *queue) +{ + int res; + + spin_lock_bh(&queue->lock); + res = queue->qlen; + spin_unlock_bh(&queue->lock); + return (res == 0); +} +EXPORT_SYMBOL_GPL(rpc_queue_empty); + /* * Wake up a task on a specific queue */ -- cgit v1.2.3 From 689cf5c15baf603a8041565ff0bd0d65d1634fd7 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Mon, 14 Dec 2009 21:27:56 -0800 Subject: nfs: enforce FIFO ordering of operations trying to acquire slot Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 89ea8e69ec7..aae6907fd54 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -210,6 +210,7 @@ void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qnam { __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY); } +EXPORT_SYMBOL_GPL(rpc_init_priority_wait_queue); void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) { -- cgit v1.2.3 From 28dfef8febe48f59cf1e7596e1992a6a1893ca24 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2009 16:46:48 -0800 Subject: const: constify remaining pipe_buf_operations Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bfa3e7865a8..93c4e060c91 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -93,7 +93,7 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket. */ -static struct pipe_buf_operations sock_pipe_buf_ops = { +static const struct pipe_buf_operations sock_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, -- cgit v1.2.3 From 198de4d7ac3a0f1351c6377ff657950457ed0038 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Aug 2009 19:29:23 +0400 Subject: reorder alloc_fd/attach_fd in socketpair() Signed-off-by: Al Viro --- net/socket.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index b94c3dd7101..bf538bea8fb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1396,23 +1396,30 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, goto out_release_both; } - fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); - if (unlikely(fd2 < 0)) { - err = fd2; + err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); + if (unlikely(err < 0)) { put_filp(newfile1); put_unused_fd(fd1); goto out_release_both; } - err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); - if (unlikely(err < 0)) { - goto out_fd2; + fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); + if (unlikely(fd2 < 0)) { + err = fd2; + fput(newfile1); + put_unused_fd(fd1); + sock_release(sock2); + goto out; } err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); if (unlikely(err < 0)) { + put_filp(newfile2); + put_unused_fd(fd2); fput(newfile1); - goto out_fd1; + put_unused_fd(fd1); + sock_release(sock2); + goto out; } audit_fd_pair(fd1, fd2); @@ -1438,16 +1445,6 @@ out_release_1: sock_release(sock1); out: return err; - -out_fd2: - put_filp(newfile1); - sock_release(sock1); -out_fd1: - put_filp(newfile2); - sock_release(sock2); - put_unused_fd(fd1); - put_unused_fd(fd2); - goto out; } /* -- cgit v1.2.3 From 7cbe66b6b53b6615f1033bd5b3dbad8162886373 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Aug 2009 19:59:08 +0400 Subject: merge sock_alloc_fd/sock_attach_fd into a new helper Signed-off-by: Al Viro --- net/socket.c | 80 +++++++++++++++++------------------------------------------- 1 file changed, 23 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index bf538bea8fb..dbb3802a764 100644 --- a/net/socket.c +++ b/net/socket.c @@ -355,32 +355,30 @@ static const struct dentry_operations sockfs_dentry_operations = { * but we take care of internal coherence yet. */ -static int sock_alloc_fd(struct file **filep, int flags) +static int sock_alloc_file(struct socket *sock, struct file **f, int flags) { + struct qstr name = { .name = "" }; + struct dentry *dentry; + struct file *file; int fd; fd = get_unused_fd_flags(flags); - if (likely(fd >= 0)) { - struct file *file = get_empty_filp(); + if (unlikely(fd < 0)) + return fd; - *filep = file; - if (unlikely(!file)) { - put_unused_fd(fd); - return -ENFILE; - } - } else - *filep = NULL; - return fd; -} + file = get_empty_filp(); -static int sock_attach_fd(struct socket *sock, struct file *file, int flags) -{ - struct dentry *dentry; - struct qstr name = { .name = "" }; + if (unlikely(!file)) { + put_unused_fd(fd); + return -ENFILE; + } dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!dentry)) + if (unlikely(!dentry)) { + put_filp(file); + put_unused_fd(fd); return -ENOMEM; + } dentry->d_op = &sockfs_dentry_operations; /* @@ -399,24 +397,18 @@ static int sock_attach_fd(struct socket *sock, struct file *file, int flags) file->f_pos = 0; file->private_data = sock; - return 0; + *f = file; + return fd; } int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_fd(&newfile, flags); - - if (likely(fd >= 0)) { - int err = sock_attach_fd(sock, newfile, flags); + int fd = sock_alloc_file(sock, &newfile, flags); - if (unlikely(err < 0)) { - put_filp(newfile); - put_unused_fd(fd); - return err; - } + if (likely(fd >= 0)) fd_install(fd, newfile); - } + return fd; } @@ -1390,20 +1382,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); + fd1 = sock_alloc_file(sock1, &newfile1, flags); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); - if (unlikely(err < 0)) { - put_filp(newfile1); - put_unused_fd(fd1); - goto out_release_both; - } - - fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); + fd2 = sock_alloc_file(sock2, &newfile2, flags); if (unlikely(fd2 < 0)) { err = fd2; fput(newfile1); @@ -1412,16 +1397,6 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, goto out; } - err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); - if (unlikely(err < 0)) { - put_filp(newfile2); - put_unused_fd(fd2); - fput(newfile1); - put_unused_fd(fd1); - sock_release(sock2); - goto out; - } - audit_fd_pair(fd1, fd2); fd_install(fd1, newfile1); fd_install(fd2, newfile2); @@ -1548,17 +1523,13 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); + newfd = sock_alloc_file(newsock, &newfile, flags); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } - err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); - if (err < 0) - goto out_fd_simple; - err = security_socket_accept(sock, newsock); if (err) goto out_fd; @@ -1588,11 +1559,6 @@ out_put: fput_light(sock->file, fput_needed); out: return err; -out_fd_simple: - sock_release(newsock); - put_filp(newfile); - put_unused_fd(newfd); - goto out_put; out_fd: fput(newfile); put_unused_fd(newfd); -- cgit v1.2.3 From 6b18662e239a032f908b7f6e164bdf7e2e0a32c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Aug 2009 02:02:43 +0400 Subject: 9p connect fixes * if we fail in p9_conn_create(), we shouldn't leak references to struct file. Logics in ->close() doesn't help - ->trans is already gone by the time it's called. * sock_create_kern() can fail. * use of sock_map_fd() is all fscked up; I'd fixed most of that, but the rest will have to wait for a bit more work in net/socket.c (we still are violating the basic rule of working with descriptor table: "once the reference is installed there, don't rely on finding it there again"). Signed-off-by: Al Viro --- net/9p/trans_fd.c | 112 ++++++++++++++++++++++-------------------------------- 1 file changed, 46 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4dd873e3a1b..be1cb909d8c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -42,6 +42,8 @@ #include #include +#include /* killme */ + #define P9_PORT 564 #define MAX_SOCK_BUF (64*1024) #define MAXPOLLWADDR 2 @@ -788,24 +790,41 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) static int p9_socket_open(struct p9_client *client, struct socket *csocket) { - int fd, ret; + struct p9_trans_fd *p; + int ret, fd; + + p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); + if (!p) + return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; fd = sock_map_fd(csocket, 0); if (fd < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + sock_release(csocket); + kfree(p); return fd; } - ret = p9_fd_open(client, fd, fd); - if (ret < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); + get_file(csocket->file); + get_file(csocket->file); + p->wr = p->rd = csocket->file; + client->trans = p; + client->status = Connected; + + sys_close(fd); /* still racy */ + + p->rd->f_flags |= O_NONBLOCK; + + p->conn = p9_conn_create(client); + if (IS_ERR(p->conn)) { + ret = PTR_ERR(p->conn); + p->conn = NULL; + kfree(p); + sockfd_put(csocket); sockfd_put(csocket); return ret; } - - ((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK; - return 0; } @@ -883,7 +902,6 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; - struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ err = parse_opts(args, &opts); if (err < 0) @@ -897,12 +915,11 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - if (!csocket) { + if (err) { P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); - err = -EIO; - goto error; + return err; } err = csocket->ops->connect(csocket, @@ -912,30 +929,11 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem connecting socket to %s\n", addr); - goto error; - } - - err = p9_socket_open(client, csocket); - if (err < 0) - goto error; - - p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; - } - - return 0; - -error: - if (csocket) sock_release(csocket); + return err; + } - kfree(p); - - return err; + return p9_socket_open(client, csocket); } static int @@ -944,49 +942,33 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) int err; struct socket *csocket; struct sockaddr_un sun_server; - struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ csocket = NULL; if (strlen(addr) > UNIX_PATH_MAX) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", addr); - err = -ENAMETOOLONG; - goto error; + return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + if (err < 0) { + P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); + return err; + } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem connecting socket: %s: %d\n", addr, err); - goto error; - } - - err = p9_socket_open(client, csocket); - if (err < 0) - goto error; - - p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; - } - - return 0; - -error: - if (csocket) sock_release(csocket); + return err; + } - kfree(p); - return err; + return p9_socket_open(client, csocket); } static int @@ -994,7 +976,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; - struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */ + struct p9_trans_fd *p; parse_opts(args, &opts); @@ -1005,21 +987,19 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) - goto error; + return err; p = (struct p9_trans_fd *) client->trans; p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; - goto error; + fput(p->rd); + fput(p->wr); + return err; } return 0; - -error: - kfree(p); - return err; } static struct p9_trans_module p9_tcp_trans = { -- cgit v1.2.3 From cc3808f8c354889982e7e323050f1e50ad99a009 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Aug 2009 09:43:59 +0400 Subject: switch sock_alloc_file() to alloc_file() Signed-off-by: Al Viro --- net/socket.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index dbb3802a764..eaaba3510e8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -366,16 +366,8 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - file = get_empty_filp(); - - if (unlikely(!file)) { - put_unused_fd(fd); - return -ENFILE; - } - dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); if (unlikely(!dentry)) { - put_filp(file); put_unused_fd(fd); return -ENOMEM; } @@ -388,11 +380,19 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) */ dentry->d_flags &= ~DCACHE_UNHASHED; d_instantiate(dentry, SOCK_INODE(sock)); + SOCK_INODE(sock)->i_fop = &socket_file_ops; - sock->file = file; - init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, + file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE, &socket_file_ops); - SOCK_INODE(sock)->i_fop = &socket_file_ops; + if (unlikely(!file)) { + /* drop dentry, keep inode */ + atomic_inc(&path.dentry->d_inode->i_count); + dput(dentry); + put_unused_fd(fd); + return -ENFILE; + } + + sock->file = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; -- cgit v1.2.3 From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Aug 2009 00:52:35 +0400 Subject: switch alloc_file() to passing struct path ... and have the caller grab both mnt and dentry; kill leak in infiniband, while we are at it. Signed-off-by: Al Viro --- net/socket.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index eaaba3510e8..dbfdfa96d29 100644 --- a/net/socket.c +++ b/net/socket.c @@ -358,7 +358,7 @@ static const struct dentry_operations sockfs_dentry_operations = { static int sock_alloc_file(struct socket *sock, struct file **f, int flags) { struct qstr name = { .name = "" }; - struct dentry *dentry; + struct path path; struct file *file; int fd; @@ -366,28 +366,29 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!dentry)) { + path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; } + path.mnt = mntget(sock_mnt); - dentry->d_op = &sockfs_dentry_operations; + path.dentry->d_op = &sockfs_dentry_operations; /* * We dont want to push this dentry into global dentry hash table. * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED * This permits a working /proc/$pid/fd/XXX on sockets */ - dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(dentry, SOCK_INODE(sock)); + path.dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; - file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE, + file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); if (unlikely(!file)) { /* drop dentry, keep inode */ atomic_inc(&path.dentry->d_inode->i_count); - dput(dentry); + path_put(&path); put_unused_fd(fd); return -ENFILE; } -- cgit v1.2.3 From a3a065e3f13da8a3470ed09c7f38aad256083726 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Nov 2009 05:30:19 +0100 Subject: fs: no games with DCACHE_UNHASHED Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the fd link from being used if its dentry is not in the hash. Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear; that depends on the filesystem calling d_add or d_rehash. So delete the misleading comments and needless code. Acked-by: Miklos Szeredi Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- net/socket.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index dbfdfa96d29..769c386bd42 100644 --- a/net/socket.c +++ b/net/socket.c @@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = { .kill_sb = kill_anon_super, }; -static int sockfs_delete_dentry(struct dentry *dentry) -{ - /* - * At creation time, we pretended this dentry was hashed - * (by clearing DCACHE_UNHASHED bit in d_flags) - * At delete time, we restore the truth : not hashed. - * (so that dput() can proceed correctly) - */ - dentry->d_flags |= DCACHE_UNHASHED; - return 0; -} - /* * sockfs_dname() is called from d_path(). */ @@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) } static const struct dentry_operations sockfs_dentry_operations = { - .d_delete = sockfs_delete_dentry, .d_dname = sockfs_dname, }; @@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) path.mnt = mntget(sock_mnt); path.dentry->d_op = &sockfs_dentry_operations; - /* - * We dont want to push this dentry into global dentry hash table. - * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED - * This permits a working /proc/$pid/fd/XXX on sockets - */ - path.dentry->d_flags &= ~DCACHE_UNHASHED; d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; -- cgit v1.2.3 From 14ace024b1e16d2bb9445c8387494fbbd820a738 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Dec 2009 16:28:05 -0500 Subject: SUNRPC: Fix up an error return value in gss_import_sec_context_kerberos() If the context allocation fails, the function currently returns a random error code, since the variable 'p' still points to a valid memory location. Ensure that it returns ENOMEM... Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_mech.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index ef45eba2248..2deb0ed72ff 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -131,8 +131,10 @@ gss_import_sec_context_kerberos(const void *p, struct krb5_ctx *ctx; int tmp; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) { + p = ERR_PTR(-ENOMEM); goto out_err; + } p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); if (IS_ERR(p)) -- cgit v1.2.3 From b891e4a05ef6beac85465295a032431577c66b16 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Dec 2009 16:28:12 -0500 Subject: SUNRPC: Fix the return value in gss_import_sec_context() If the context allocation fails, it will return GSS_S_FAILURE, which is neither a valid error code, nor is it even negative. Return ENOMEM instead... Reported-by: Jeff Layton Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_mech_switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 6efbb0cd3c7..76e4c6f4ac3 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -252,7 +252,7 @@ gss_import_sec_context(const void *input_token, size_t bufsize, struct gss_ctx **ctx_id) { if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL))) - return GSS_S_FAILURE; + return -ENOMEM; (*ctx_id)->mech_type = gss_mech_get(mech); return mech->gm_ops -- cgit v1.2.3 From 486bad2e40e938cd68fd853b7a9fa3115a9d3a4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 18 Dec 2009 16:28:20 -0500 Subject: sunrpc: on successful gss error pipe write, don't return error When handling the gssd downcall, the kernel should distinguish between a successful downcall that contains an error code and a failed downcall (i.e. where the parsing failed or some other sort of problem occurred). In the former case, gss_pipe_downcall should be returning the number of bytes written to the pipe instead of an error. In the event of other errors, we generally want the initiating task to retry the upcall so we set msg.errno to -EAGAIN. An unexpected error code here is a bug however, so BUG() in that case. Signed-off-by: Jeff Layton Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3c3c50f38a1..66cb89c1834 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -644,7 +644,22 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); - gss_msg->msg.errno = (err == -EAGAIN) ? -EAGAIN : -EACCES; + switch (err) { + case -EACCES: + gss_msg->msg.errno = err; + err = mlen; + break; + case -EFAULT: + case -ENOMEM: + case -EINVAL: + case -ENOSYS: + gss_msg->msg.errno = -EAGAIN; + break; + default: + printk(KERN_CRIT "%s: bad return from " + "gss_fill_context: %ld\n", __func__, err); + BUG(); + } goto err_release_msg; } gss_msg->ctx = gss_get_ctx(ctx); -- cgit v1.2.3 From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:26 -0800 Subject: kfifo: move struct kfifo in place This is a new generic kernel FIFO implementation. The current kernel fifo API is not very widely used, because it has to many constrains. Only 17 files in the current 2.6.31-rc5 used it. FIFO's are like list's a very basic thing and a kfifo API which handles the most use case would save a lot of development time and memory resources. I think this are the reasons why kfifo is not in use: - The API is to simple, important functions are missing - A fifo can be only allocated dynamically - There is a requirement of a spinlock whether you need it or not - There is no support for data records inside a fifo So I decided to extend the kfifo in a more generic way without blowing up the API to much. The new API has the following benefits: - Generic usage: For kernel internal use and/or device driver. - Provide an API for the most use case. - Slim API: The whole API provides 25 functions. - Linux style habit. - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros - Direct copy_to_user from the fifo and copy_from_user into the fifo. - The kfifo itself is an in place member of the using data structure, this save an indirection access and does not waste the kernel allocator. - Lockless access: if only one reader and one writer is active on the fifo, which is the common use case, no additional locking is necessary. - Remove spinlock - give the user the freedom of choice what kind of locking to use if one is required. - Ability to handle records. Three type of records are supported: - Variable length records between 0-255 bytes, with a record size field of 1 bytes. - Variable length records between 0-65535 bytes, with a record size field of 2 bytes. - Fixed size records, which no record size field. - Preserve memory resource. - Performance! - Easy to use! This patch: Since most users want to have the kfifo as part of another object, reorganize the code to allow including struct kfifo in another data structure. This requires changing the kfifo_alloc and kfifo_init prototypes so that we pass an existing kfifo pointer into them. This patch changes the implementation and all existing users. [akpm@linux-foundation.org: fix warning] Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index dc328425fa2..6230ceb0823 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -43,7 +43,7 @@ static int bufsize = 64 * 1024; static const char procname[] = "dccpprobe"; static struct { - struct kfifo *fifo; + struct kfifo fifo; spinlock_t lock; wait_queue_head_t wait; struct timespec tstart; @@ -67,7 +67,7 @@ static void printl(const char *fmt, ...) len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); - kfifo_put(dccpw.fifo, tbuf, len); + kfifo_put(&dccpw.fifo, tbuf, len); wake_up(&dccpw.wait); } @@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = { static int dccpprobe_open(struct inode *inode, struct file *file) { - kfifo_reset(dccpw.fifo); + kfifo_reset(&dccpw.fifo); getnstimeofday(&dccpw.tstart); return 0; } @@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, return -ENOMEM; error = wait_event_interruptible(dccpw.wait, - __kfifo_len(dccpw.fifo) != 0); + __kfifo_len(&dccpw.fifo) != 0); if (error) goto out_free; - cnt = kfifo_get(dccpw.fifo, tbuf, len); + cnt = kfifo_get(&dccpw.fifo, tbuf, len); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: @@ -156,10 +156,8 @@ static __init int dccpprobe_init(void) init_waitqueue_head(&dccpw.wait); spin_lock_init(&dccpw.lock); - dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); - if (IS_ERR(dccpw.fifo)) - return PTR_ERR(dccpw.fifo); - + if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock)) + return ret; if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; @@ -172,14 +170,14 @@ static __init int dccpprobe_init(void) err1: proc_net_remove(&init_net, procname); err0: - kfifo_free(dccpw.fifo); + kfifo_free(&dccpw.fifo); return ret; } module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { - kfifo_free(dccpw.fifo); + kfifo_free(&dccpw.fifo); proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); -- cgit v1.2.3 From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:27 -0800 Subject: kfifo: move out spinlock Move the pointer to the spinlock out of struct kfifo. Most users in tree do not actually use a spinlock, so the few exceptions now have to call kfifo_{get,put}_locked, which takes an extra argument to a spinlock. Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 6230ceb0823..c6b50351aa7 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -67,7 +67,7 @@ static void printl(const char *fmt, ...) len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); - kfifo_put(&dccpw.fifo, tbuf, len); + kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); wake_up(&dccpw.wait); } @@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, if (error) goto out_free; - cnt = kfifo_get(&dccpw.fifo, tbuf, len); + cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: @@ -156,7 +156,7 @@ static __init int dccpprobe_init(void) init_waitqueue_head(&dccpw.wait); spin_lock_init(&dccpw.lock); - if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock)) + if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) return ret; if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; -- cgit v1.2.3 From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:28 -0800 Subject: kfifo: cleanup namespace change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo should be reserved for internal functions only. Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index c6b50351aa7..9ef36849edd 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, return -ENOMEM; error = wait_event_interruptible(dccpw.wait, - __kfifo_len(&dccpw.fifo) != 0); + kfifo_len(&dccpw.fifo) != 0); if (error) goto out_free; -- cgit v1.2.3 From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:28 -0800 Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into kfifo_out... rename kfifo_put... into kfifo_in... to prevent miss use of old non in kernel-tree drivers ditto for kfifo_get... -> kfifo_out... Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc annotations more readable. Add mini "howto porting to the new API" in kfifo.h Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 9ef36849edd..a1362dc8abb 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -67,7 +67,7 @@ static void printl(const char *fmt, ...) len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); - kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); + kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); wake_up(&dccpw.wait); } @@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, if (error) goto out_free; - cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); + cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: -- cgit v1.2.3 From 6c8530993e1fdf1d6af0403e796fe14d80b4b097 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Jan 2010 17:26:27 -0500 Subject: sunrpc: fix build-time warning Fix auth_gss printk format warning: net/sunrpc/auth_gss/auth_gss.c:660: warning: format '%ld' expects type 'long int', but argument 3 has type 'ssize_t' Signed-off-by: Randy Dunlap Acked-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 66cb89c1834..f7a7f8380e3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -657,7 +657,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) break; default: printk(KERN_CRIT "%s: bad return from " - "gss_fill_context: %ld\n", __func__, err); + "gss_fill_context: %zd\n", __func__, err); BUG(); } goto err_release_msg; -- cgit v1.2.3 From b292cf9ce70d221c3f04ff62db5ab13d9a249ca8 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 31 Dec 2009 10:52:36 +0800 Subject: sunrpc: fix peername failed on closed listener There're some warnings of "nfsd: peername failed (err 107)!" socket error -107 means Transport endpoint is not connected. This warning message was outputed by svc_tcp_accept() [net/sunrpc/svcsock.c], when kernel_getpeername returns -107. This means socket might be CLOSED. And svc_tcp_accept was called by svc_recv() [net/sunrpc/svc_xprt.c] if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { newxpt = xprt->xpt_ops->xpo_accept(xprt); So this might happen when xprt->xpt_flags has both XPT_LISTENER and XPT_CLOSE. Let's take a look at commit b0401d72, this commit has moved the close processing after do recvfrom method, but this commit also introduces this warnings, if the xpt_flags has both XPT_LISTENER and XPT_CLOSED, we should close it, not accpet then close. Signed-off-by: Xiaotian Feng Cc: J. Bruce Fields Cc: Neil Brown Cc: Trond Myklebust Cc: David S. Miller Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2c58b75a236..810ffe8a636 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -699,7 +699,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); len = 0; - if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + if (test_bit(XPT_LISTENER, &xprt->xpt_flags) && + !test_bit(XPT_CLOSE, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { -- cgit v1.2.3