From 6eb7ae1223f75fe19de8e75df80ac78ab6b7c39d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 18 Aug 2017 14:40:53 +0200 Subject: ipv6: accept 64k - 1 packet length in ip6_find_1stfragopt() [ Upstream commit 3de33e1ba0506723ab25734e098cf280ecc34756 ] A packet length of exactly IPV6_MAXPLEN is allowed, we should refuse parsing options only if the size is 64KiB or more. While at it, remove one extra variable and one assignment which were also introduced by the commit that introduced the size check. Checking the sum 'offset + len' and only later adding 'len' to 'offset' doesn't provide any advantage over directly summing to 'offset' and checking it. Fixes: 6399f1fae4ec ("ipv6: avoid overflow of offset in ip6_find_1stfragopt") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/output_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index f9f02581c4ca..f99a04674419 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -86,7 +86,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; - unsigned int len; switch (**nexthdr) { @@ -112,10 +111,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - len = ipv6_optlen(exthdr); - if (len + offset >= IPV6_MAXPLEN) + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) return -EINVAL; - offset += len; *nexthdr = &exthdr->nexthdr; } -- cgit v1.2.3 From e51bf99be7cc95db97d9fa9031ab09ea037a5c7a Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 21 Aug 2017 09:47:10 -0700 Subject: ipv6: add rcu grace period before freeing fib6_node [ Upstream commit c5cff8561d2d0006e972bd114afd51f082fee77c ] We currently keep rt->rt6i_node pointing to the fib6_node for the route. And some functions make use of this pointer to dereference the fib6_node from rt structure, e.g. rt6_check(). However, as there is neither refcount nor rcu taken when dereferencing rt->rt6i_node, it could potentially cause crashes as rt->rt6i_node could be set to NULL by other CPUs when doing a route deletion. This patch introduces an rcu grace period before freeing fib6_node and makes sure the functions that dereference it takes rcu_read_lock(). Note: there is no "Fixes" tag because this bug was there in a very early stage. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_fib.h | 30 +++++++++++++++++++++++++++++- net/ipv6/ip6_fib.c | 20 ++++++++++++++++---- net/ipv6/route.c | 14 +++++++++++--- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index fb961a576abe..f57321248be2 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -68,6 +68,7 @@ struct fib6_node { __u16 fn_flags; int fn_sernum; struct rt6_info *rr_ptr; + struct rcu_head rcu; }; #ifndef CONFIG_IPV6_SUBTREES @@ -165,13 +166,40 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +/* Function to safely get fn->sernum for passed in rt + * and store result in passed in cookie. + * Return true if we can get cookie safely + * Return false if not + */ +static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, + u32 *cookie) +{ + struct fib6_node *fn; + bool status = false; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + + if (fn) { + *cookie = fn->fn_sernum; + status = true; + } + + rcu_read_unlock(); + return status; +} + static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + u32 cookie = 0; + if (rt->rt6i_flags & RTF_PCPU || (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); - return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + rt6_get_cookie_safe(rt, &cookie); + + return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index aad8cdf15472..e766b5e3c61c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -150,11 +150,23 @@ static struct fib6_node *node_alloc(void) return fn; } -static void node_free(struct fib6_node *fn) +static void node_free_immediate(struct fib6_node *fn) +{ + kmem_cache_free(fib6_node_kmem, fn); +} + +static void node_free_rcu(struct rcu_head *head) { + struct fib6_node *fn = container_of(head, struct fib6_node, rcu); + kmem_cache_free(fib6_node_kmem, fn); } +static void node_free(struct fib6_node *fn) +{ + call_rcu(&fn->rcu, node_free_rcu); +} + static void rt6_rcu_free(struct rt6_info *rt) { call_rcu(&rt->dst.rcu_head, dst_rcu_free); @@ -588,9 +600,9 @@ insert_above: if (!in || !ln) { if (in) - node_free(in); + node_free_immediate(in); if (ln) - node_free(ln); + node_free_immediate(ln); return ERR_PTR(-ENOMEM); } @@ -1015,7 +1027,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, root, and then (in failure) stale node in main tree. */ - node_free(sfn); + node_free_immediate(sfn); err = PTR_ERR(sn); goto failure; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ef335070e98a..0e91fbf67200 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1248,7 +1248,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt) static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + u32 rt_cookie; + + if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -1316,8 +1318,14 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt->rt6i_flags & RTF_CACHE) { dst_hold(&rt->dst); ip6_del_rt(rt); - } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { - rt->rt6i_node->fn_sernum = -1; + } else { + struct fib6_node *fn; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) + fn->fn_sernum = -1; + rcu_read_unlock(); } } } -- cgit v1.2.3 From 354d36b746c3fdde7397409ce79ca89a2da2fbce Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Aug 2017 15:03:10 -0700 Subject: ipv6: fix sparse warning on rt6i_node [ Upstream commit 4e587ea71bf924f7dac621f1351653bd41e446cb ] Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This generates a new sparse warning on rt->rt6i_node related code: net/ipv6/route.c:1394:30: error: incompatible types in comparison expression (different address spaces) ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison expression (different address spaces) This commit adds "__rcu" tag for rt6i_node and makes sure corresponding rcu API is used for it. After this fix, sparse no longer generates the above warning. Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_fib.h | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 11 +++++++---- net/ipv6/route.c | 3 ++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index f57321248be2..fa5e703a14ed 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -103,7 +103,7 @@ struct rt6_info { * the same cache line. */ struct fib6_table *rt6i_table; - struct fib6_node *rt6i_node; + struct fib6_node __rcu *rt6i_node; struct in6_addr rt6i_gateway; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 735b22b1b4ea..92174881844d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5152,7 +5152,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!(ifp->rt->rt6i_node)) + if (!rcu_access_pointer(ifp->rt->rt6i_node)) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e766b5e3c61c..c14f6038a061 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -869,7 +869,7 @@ add: rt->dst.rt6_next = iter; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -894,7 +894,7 @@ add: return err; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); @@ -1454,8 +1454,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct net *net = info->nl_net; - struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; #if RT6_DEBUG >= 2 @@ -1644,7 +1645,9 @@ static int fib6_clean_node(struct fib6_walker *w) if (res) { #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", - __func__, rt, rt->rt6i_node, res); + __func__, rt, + rcu_access_pointer(rt->rt6i_node), + res); #endif continue; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0e91fbf67200..48917437550e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1342,7 +1342,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); + (rt->rt6i_flags & RTF_PCPU || + rcu_access_pointer(rt->rt6i_node)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, -- cgit v1.2.3 From 6d8c8fd1c4c71fac291e7243becf2de11c82c216 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Aug 2017 15:59:49 +0200 Subject: qlge: avoid memcpy buffer overflow [ Upstream commit e58f95831e7468d25eb6e41f234842ecfe6f014f ] gcc-8.0.0 (snapshot) points out that we copy a variable-length string into a fixed length field using memcpy() with the destination length, and that ends up copying whatever follows the string: inlined from 'ql_core_dump' at drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:1106:2: drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:708:2: error: 'memcpy' reading 15 bytes from a region of size 14 [-Werror=stringop-overflow=] memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); Changing it to use strncpy() will instead zero-pad the destination, which seems to be the right thing to do here. The bug is probably harmless, but it seems like a good idea to address it in stable kernels as well, if only for the purpose of building with gcc-8 without warnings. Fixes: a61f80261306 ("qlge: Add ethtool register dump function.") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index 829be21f97b2..be258d90de9e 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -724,7 +724,7 @@ static void ql_build_coredump_seg_header( seg_hdr->cookie = MPI_COREDUMP_COOKIE; seg_hdr->segNum = seg_number; seg_hdr->segSize = seg_size; - memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); + strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); } /* -- cgit v1.2.3 From 081be8c9efd6003e1aa78679b3265732de4cec9b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 30 Aug 2017 17:49:29 -0700 Subject: Revert "net: phy: Correctly process PHY_HALTED in phy_stop_machine()" [ Upstream commit ebc8254aeae34226d0bc8fda309fd9790d4dccfe ] This reverts commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ("net: phy: Correctly process PHY_HALTED in phy_stop_machine()") because it is creating the possibility for a NULL pointer dereference. David Daney provide the following call trace and diagram of events: When ndo_stop() is called we call: phy_disconnect() +---> phy_stop_interrupts() implies: phydev->irq = PHY_POLL; +---> phy_stop_machine() | +---> phy_state_machine() | +----> queue_delayed_work(): Work queued. +--->phy_detach() implies: phydev->attached_dev = NULL; Now at a later time the queued work does: phy_state_machine() +---->netif_carrier_off(phydev->attached_dev): Oh no! It is NULL: CPU 12 Unable to handle kernel paging request at virtual address 0000000000000048, epc == ffffffff80de37ec, ra == ffffffff80c7c Oops[#1]: CPU: 12 PID: 1502 Comm: kworker/12:1 Not tainted 4.9.43-Cavium-Octeon+ #1 Workqueue: events_power_efficient phy_state_machine task: 80000004021ed100 task.stack: 8000000409d70000 $ 0 : 0000000000000000 ffffffff84720060 0000000000000048 0000000000000004 $ 4 : 0000000000000000 0000000000000001 0000000000000004 0000000000000000 $ 8 : 0000000000000000 0000000000000000 00000000ffff98f3 0000000000000000 $12 : 8000000409d73fe0 0000000000009c00 ffffffff846547c8 000000000000af3b $16 : 80000004096bab68 80000004096babd0 0000000000000000 80000004096ba800 $20 : 0000000000000000 0000000000000000 ffffffff81090000 0000000000000008 $24 : 0000000000000061 ffffffff808637b0 $28 : 8000000409d70000 8000000409d73cf0 80000000271bd300 ffffffff80c7804c Hi : 000000000000002a Lo : 000000000000003f epc : ffffffff80de37ec netif_carrier_off+0xc/0x58 ra : ffffffff80c7804c phy_state_machine+0x48c/0x4f8 Status: 14009ce3 KX SX UX KERNEL EXL IE Cause : 00800008 (ExcCode 02) BadVA : 0000000000000048 PrId : 000d9501 (Cavium Octeon III) Modules linked in: Process kworker/12:1 (pid: 1502, threadinfo=8000000409d70000, task=80000004021ed100, tls=0000000000000000) Stack : 8000000409a54000 80000004096bab68 80000000271bd300 80000000271c1e00 0000000000000000 ffffffff808a1708 8000000409a54000 80000000271bd300 80000000271bd320 8000000409a54030 ffffffff80ff0f00 0000000000000001 ffffffff81090000 ffffffff808a1ac0 8000000402182080 ffffffff84650000 8000000402182080 ffffffff84650000 ffffffff80ff0000 8000000409a54000 ffffffff808a1970 0000000000000000 80000004099e8000 8000000402099240 0000000000000000 ffffffff808a8598 0000000000000000 8000000408eeeb00 8000000409a54000 00000000810a1d00 0000000000000000 8000000409d73de8 8000000409d73de8 0000000000000088 000000000c009c00 8000000409d73e08 8000000409d73e08 8000000402182080 ffffffff808a84d0 8000000402182080 ... Call Trace: [] netif_carrier_off+0xc/0x58 [] phy_state_machine+0x48c/0x4f8 [] process_one_work+0x158/0x368 [] worker_thread+0x150/0x4c0 [] kthread+0xc8/0xe0 [] ret_from_kernel_thread+0x14/0x1c The original motivation for this change originated from Marc Gonzales indicating that his network driver did not have its adjust_link callback executing with phydev->link = 0 while he was expecting it. PHYLIB has never made any such guarantees ever because phy_stop() merely just tells the workqueue to move into PHY_HALTED state which will happen asynchronously. Reported-by: Geert Uytterhoeven Reported-by: David Daney Fixes: 7ad813f20853 ("net: phy: Correctly process PHY_HALTED in phy_stop_machine()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 49d9f0a789fe..7d0690433ee0 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -541,9 +541,6 @@ void phy_stop_machine(struct phy_device *phydev) if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); - - /* Now we can run the state machine synchronously */ - phy_state_machine(&phydev->state_queue.work); } /** -- cgit v1.2.3 From 611a98c8eca3098173309642df187056c17e0f65 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 May 2017 11:22:33 -0700 Subject: tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0 [ Upstream commit 499350a5a6e7512d9ed369ed63a4244b6536f4f8 ] When tcp_disconnect() is called, inet_csk_delack_init() sets icsk->icsk_ack.rcv_mss to 0. This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() => __tcp_select_window() call path to have division by 0 issue. So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0. Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0870a86e9d96..5597120c8ffd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2260,6 +2260,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); -- cgit v1.2.3 From 40bc5355e134af1d0ac05fe0dcb0aa55f9144bb4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:08 +0200 Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" [ Upstream commit fb452a1aa3fd4034d7999e309c5466ff2d7005aa ] This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. There is a bug in fragmentation codes use of the percpu_counter API, that can cause issues on systems with many CPUs. The frag_mem_limit() just reads the global counter (fbc->count), without considering other CPUs can have upto batch size (130K) that haven't been subtracted yet. Due to the 3MBytes lower thresh limit, this become dangerous at >=24 CPUs (3*1024*1024/130000=24). The correct API usage would be to use __percpu_counter_compare() which does the right thing, and takes into account the number of (online) CPUs and batch size, to account for this and call __percpu_counter_sum() when needed. We choose to revert the use of the lib/percpu_counter API for frag memory accounting for several reasons: 1) On systems with CPUs > 24, the heavier fully locked __percpu_counter_sum() is always invoked, which will be more expensive than the atomic_t that is reverted to. Given systems with more than 24 CPUs are becoming common this doesn't seem like a good option. To mitigate this, the batch size could be decreased and thresh be increased. 2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX CPU, before SKBs are pushed into sockets on remote CPUs. Given NICs can only hash on L2 part of the IP-header, the NIC-RXq's will likely be limited. Thus, a fair chance that atomic add+dec happen on the same CPU. Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") removed init_frag_mem_limit() and instead use inet_frags_init_net(). After this revert, inet_frags_uninit_net() becomes empty. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 36 +++++++++--------------------------- net/ipv4/inet_fragment.c | 4 +--- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index ac42bbb37b2d..0cc1e287e683 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,14 +1,9 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include - struct netns_frags { - /* The percpu_counter "mem" need to be cacheline aligned. - * mem.count must not share cacheline with other writers - */ - struct percpu_counter mem ____cacheline_aligned_in_smp; - + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ int timeout; int high_thresh; @@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); + atomic_set(&nf->mem, 0); + return 0; } static inline void inet_frags_uninit_net(struct netns_frags *nf) { - percpu_counter_destroy(&nf->mem); } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); @@ -140,37 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -/* The default percpu_counter batch size is not big enough to scale to - * fragmentation mem acct sizes. - * The mem size of a 64K fragment is approx: - * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes - */ -static unsigned int frag_percpu_counter_batch = 130000; - static inline int frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_read(&nf->mem); + return atomic_read(&nf->mem); } static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); + atomic_sub(i, &nf->mem); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); + atomic_add(i, &nf->mem); } -static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) +static inline int sum_frag_mem_limit(struct netns_frags *nf) { - unsigned int res; - - local_bh_disable(); - res = percpu_counter_sum_positive(&nf->mem); - local_bh_enable(); - - return res; + return atomic_read(&nf->mem); } /* RFC 3168 support : diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index fe144dae7372..c5fb2f694ed0 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -234,10 +234,8 @@ evict_again: cond_resched(); if (read_seqretry(&f->rnd_seqlock, seq) || - percpu_counter_sum(&nf->mem)) + sum_frag_mem_limit(nf)) goto evict_again; - - percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); -- cgit v1.2.3 From 5f529e0d78447e03a3acf125883a3f7826817c01 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:13 +0200 Subject: Revert "net: fix percpu memory leaks" [ Upstream commit 5a63643e583b6a9789d7a225ae076fb4e603991c ] This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") then here is no need for this fix-up patch. As percpu_counter is no longer used, it cannot memory leak it any-longer. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 7 +------ net/ieee802154/6lowpan/reassembly.c | 11 +++-------- net/ipv4/ip_fragment.c | 12 +++--------- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++--------- net/ipv6/reassembly.c | 12 +++--------- 5 files changed, 13 insertions(+), 41 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0cc1e287e683..c26a6e4dc306 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -103,15 +103,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +static inline void inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; } -static inline void inet_frags_uninit_net(struct netns_frags *nf) -{ -} - void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6b437e8760d3..12e8cf4bda9f 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&ieee802154_lowpan->frags); - if (res) - return res; - res = lowpan_frags_ns_sysctl_register(net); - if (res) - inet_frags_uninit_net(&ieee802154_lowpan->frags); - return res; + inet_frags_init_net(&ieee802154_lowpan->frags); + + return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b8a0607dab96..e2e162432aa3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -840,8 +840,6 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { - int res; - /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -865,13 +863,9 @@ static int __net_init ipv4_frags_init_net(struct net *net) */ net->ipv4.frags.timeout = IP_FRAG_TIME; - res = inet_frags_init_net(&net->ipv4.frags); - if (res) - return res; - res = ip4_frags_ns_ctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv4.frags); - return res; + inet_frags_init_net(&net->ipv4.frags); + + return ip4_frags_ns_ctl_register(net); } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bab4441ed4e4..eb2dc39f7066 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -649,18 +649,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); static int nf_ct_net_init(struct net *net) { - int res; - net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&net->nf_frag.frags); - if (res) - return res; - res = nf_ct_frag6_sysctl_register(net); - if (res) - inet_frags_uninit_net(&net->nf_frag.frags); - return res; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); } static void nf_ct_net_exit(struct net *net) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index a234552a7e3d..58f2139ebb5e 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -708,19 +708,13 @@ static void ip6_frags_sysctl_unregister(void) static int __net_init ipv6_frags_init_net(struct net *net) { - int res; - net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&net->ipv6.frags); - if (res) - return res; - res = ip6_frags_ns_sysctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv6.frags); - return res; + inet_frags_init_net(&net->ipv6.frags); + + return ip6_frags_ns_sysctl_register(net); } static void __net_exit ipv6_frags_exit_net(struct net *net) -- cgit v1.2.3 From 9b5e5d8a0045ca7c5dd195cba803eac6de6f589f Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Mon, 4 Sep 2017 10:45:28 +0300 Subject: gianfar: Fix Tx flow control deactivation [ Upstream commit 5d621672bc1a1e5090c1ac5432a18c79e0e13e03 ] The wrong register is checked for the Tx flow control bit, it should have been maccfg1 not maccfg2. This went unnoticed for so long probably because the impact is hardly visible, not to mention the tangled code from adjust_link(). First, link flow control (i.e. handling of Rx/Tx link level pause frames) is disabled by default (needs to be enabled via 'ethtool -A'). Secondly, maccfg2 always returns 0 for tx_flow_oldval (except for a few old boards), which results in Tx flow control remaining always on once activated. Fixes: 45b679c9a3ccd9e34f28e6ec677b812a860eb8eb ("gianfar: Implement PAUSE frame generation support") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4cd2a7d0124f..7923bfdc9b30 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3676,7 +3676,7 @@ static noinline void gfar_update_link_state(struct gfar_private *priv) u32 tempval1 = gfar_read(®s->maccfg1); u32 tempval = gfar_read(®s->maccfg2); u32 ecntrl = gfar_read(®s->ecntrl); - u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW); + u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW); if (phydev->duplex != priv->oldduplex) { if (!(phydev->duplex)) -- cgit v1.2.3 From 70479eafe3d974c60a71718530a46f8ad3ce9c3f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 8 Sep 2017 10:26:19 +0200 Subject: ipv6: fix memory leak with multiple tables during netns destruction [ Upstream commit ba1cc08d9488c94cb8d94f545305688b72a2a300 ] fib6_net_exit only frees the main and local tables. If another table was created with fib6_alloc_table, we leak it when the netns is destroyed. Fix this in the same way ip_fib_net_exit cleans up tables, by walking through the whole hashtable of fib6_table's. We can get rid of the special cases for local and main, since they're also part of the hashtable. Reproducer: ip netns add x ip -net x -6 rule add from 6003:1::/64 table 100 ip netns del x Reported-by: Jianlin Shi Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c14f6038a061..e03043dc1d78 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -203,6 +203,12 @@ static void rt6_release(struct rt6_info *rt) } } +static void fib6_free_table(struct fib6_table *table) +{ + inetpeer_invalidate_tree(&table->tb6_peers); + kfree(table); +} + static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -1885,15 +1891,22 @@ out_timer: static void fib6_net_exit(struct net *net) { + unsigned int i; + rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); - kfree(net->ipv6.fib6_local_tbl); -#endif - inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); - kfree(net->ipv6.fib6_main_tbl); + for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[i]; + struct hlist_node *tmp; + struct fib6_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { + hlist_del(&tb->tb6_hlist); + fib6_free_table(tb); + } + } + kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); } -- cgit v1.2.3 From be9994817ad5717f64e07c19e5ec2f6b29aad4d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2017 15:48:47 -0700 Subject: ipv6: fix typo in fib6_net_exit() [ Upstream commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b ] IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ. Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e03043dc1d78..c23e02a7ccb0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1896,7 +1896,7 @@ static void fib6_net_exit(struct net *net) rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { struct hlist_head *head = &net->ipv6.fib_table_hash[i]; struct hlist_node *tmp; struct fib6_table *tb; -- cgit v1.2.3 From 53e5f7b8d41bb4af0666fe1e7887c13754b10094 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 12 Aug 2017 21:33:23 -0700 Subject: f2fs: check hot_data for roll-forward recovery commit 125c9fb1ccb53eb2ea9380df40f3c743f3fb2fed upstream. We need to check HOT_DATA to truncate any previous data block when doing roll-forward recovery. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cbf74f47cce8..e32f349f341b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -276,7 +276,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return 0; /* Get the previous summary */ - for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); if (curseg->segno == segno) { sum = curseg->sum_blk->entries[blkoff]; -- cgit v1.2.3 From e21d66048d4db2206c12344af07a934fd68418e4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 1 Aug 2017 07:11:35 -0700 Subject: x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps commit 9584d98bed7a7a904d0702ad06bbcc94703cb5b4 upstream. In ELF_COPY_CORE_REGS, we're copying from the current task, so accessing thread.fsbase and thread.gsbase makes no sense. Just read the values from the CPU registers. In practice, the old code would have been correct most of the time simply because thread.fsbase and thread.gsbase usually matched the CPU registers. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chang Seok Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/elf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index bcd3d6199464..bb16a58cf7e4 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -204,6 +204,7 @@ void set_personality_ia32(bool); #define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ + unsigned long base; \ unsigned v; \ (pr_reg)[0] = (regs)->r15; \ (pr_reg)[1] = (regs)->r14; \ @@ -226,8 +227,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - (pr_reg)[21] = current->thread.fs; \ - (pr_reg)[22] = current->thread.gs; \ + rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \ + rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ -- cgit v1.2.3 From d5c59ee8482042a0c63fa033c043989d00582525 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 24 Aug 2017 09:53:59 -0700 Subject: md/raid5: release/flush io in raid5_do_work() commit 9c72a18e46ebe0f09484cce8ebf847abdab58498 upstream. In raid5, there are scenarios where some ios are deferred to a later time, and some IO need a flush to complete. To make sure we make progress with these IOs, we need to call the following functions: flush_deferred_bios(conf); r5l_flush_stripe_to_raid(conf->log); Both of these functions are called in raid5d(), but missing in raid5_do_work(). As a result, these functions are not called when multi-threading (group_thread_cnt > 0) is enabled. This patch adds calls to these function to raid5_do_work(). Note for stable branches: r5l_flush_stripe_to_raid(conf->log) is need for 4.4+ flush_deferred_bios(conf) is only needed for 4.11+ Signed-off-by: Song Liu Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8f60520c8392..5eac08ffc697 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5822,6 +5822,8 @@ static void raid5_do_work(struct work_struct *work) spin_unlock_irq(&conf->device_lock); + r5l_flush_stripe_to_raid(conf->log); + async_tx_issue_pending_all(); blk_finish_plug(&plug); -- cgit v1.2.3 From 6ea627b20205fcf7e8191b28f9207c97a69bf58f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 29 Oct 2016 18:19:03 -0400 Subject: nfsd: Fix general protection fault in release_lock_stateid() commit f46c445b79906a9da55c13e0a6f6b6a006b892fe upstream. When I push NFSv4.1 / RDMA hard, (xfstests generic/089, for example), I get this crash on the server: Oct 28 22:04:30 klimt kernel: general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC Oct 28 22:04:30 klimt kernel: Modules linked in: cts rpcsec_gss_krb5 iTCO_wdt iTCO_vendor_support sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm btrfs irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd xor pcspkr raid6_pq i2c_i801 i2c_smbus lpc_ich mfd_core sg mei_me mei ioatdma shpchp wmi ipmi_si ipmi_msghandler rpcrdma ib_ipoib rdma_ucm acpi_power_meter acpi_pad ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c mlx4_ib mlx4_en ib_core sr_mod cdrom sd_mod ast drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel igb ahci libahci ptp mlx4_core pps_core dca libata i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod Oct 28 22:04:30 klimt kernel: CPU: 7 PID: 1558 Comm: nfsd Not tainted 4.9.0-rc2-00005-g82cd754 #8 Oct 28 22:04:30 klimt kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 Oct 28 22:04:30 klimt kernel: task: ffff880835c3a100 task.stack: ffff8808420d8000 Oct 28 22:04:30 klimt kernel: RIP: 0010:[] [] release_lock_stateid+0x1f/0x60 [nfsd] Oct 28 22:04:30 klimt kernel: RSP: 0018:ffff8808420dbce0 EFLAGS: 00010246 Oct 28 22:04:30 klimt kernel: RAX: ffff88084e6660f0 RBX: ffff88084e667020 RCX: 0000000000000000 Oct 28 22:04:30 klimt kernel: RDX: 0000000000000007 RSI: 0000000000000000 RDI: ffff88084e667020 Oct 28 22:04:30 klimt kernel: RBP: ffff8808420dbcf8 R08: 0000000000000001 R09: 0000000000000000 Oct 28 22:04:30 klimt kernel: R10: ffff880835c3a100 R11: ffff880835c3aca8 R12: 6b6b6b6b6b6b6b6b Oct 28 22:04:30 klimt kernel: R13: ffff88084e6670d8 R14: ffff880835f546f0 R15: ffff880835f1c548 Oct 28 22:04:30 klimt kernel: FS: 0000000000000000(0000) GS:ffff88087bdc0000(0000) knlGS:0000000000000000 Oct 28 22:04:30 klimt kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Oct 28 22:04:30 klimt kernel: CR2: 00007ff020389000 CR3: 0000000001c06000 CR4: 00000000001406e0 Oct 28 22:04:30 klimt kernel: Stack: Oct 28 22:04:30 klimt kernel: ffff88084e667020 0000000000000000 ffff88084e6670d8 ffff8808420dbd20 Oct 28 22:04:30 klimt kernel: ffffffffa05ac80d ffff880835f54548 ffff88084e640008 ffff880835f545b0 Oct 28 22:04:30 klimt kernel: ffff8808420dbd70 ffffffffa059803d ffff880835f1c768 0000000000000870 Oct 28 22:04:30 klimt kernel: Call Trace: Oct 28 22:04:30 klimt kernel: [] nfsd4_free_stateid+0xfd/0x1b0 [nfsd] Oct 28 22:04:30 klimt kernel: [] nfsd4_proc_compound+0x40d/0x690 [nfsd] Oct 28 22:04:30 klimt kernel: [] nfsd_dispatch+0xd4/0x1d0 [nfsd] Oct 28 22:04:30 klimt kernel: [] svc_process_common+0x3d9/0x700 [sunrpc] Oct 28 22:04:30 klimt kernel: [] svc_process+0xf4/0x330 [sunrpc] Oct 28 22:04:30 klimt kernel: [] nfsd+0xfa/0x160 [nfsd] Oct 28 22:04:30 klimt kernel: [] ? nfsd_destroy+0x170/0x170 [nfsd] Oct 28 22:04:30 klimt kernel: [] kthread+0x10b/0x120 Oct 28 22:04:30 klimt kernel: [] ? kthread_stop+0x280/0x280 Oct 28 22:04:30 klimt kernel: [] ret_from_fork+0x2a/0x40 Oct 28 22:04:30 klimt kernel: Code: c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 48 8b 87 b0 00 00 00 48 89 fb 4c 8b a0 98 00 00 00 <49> 8b 44 24 20 48 8d b8 80 03 00 00 e8 10 66 1a e1 48 89 df e8 Oct 28 22:04:30 klimt kernel: RIP [] release_lock_stateid+0x1f/0x60 [nfsd] Oct 28 22:04:30 klimt kernel: RSP Oct 28 22:04:30 klimt kernel: ---[ end trace cf5d0b371973e167 ]--- Jeff Layton says: > Hm...now that I look though, this is a little suspicious: > > struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); > > I wonder if it's possible for the openstateid to have already been > destroyed at this point. > > We might be better off doing something like this to get the client pointer: > > stp->st_stid.sc_client; > > ...which should be more direct and less dependent on other stateids > staying valid. With the suggested change, I am no longer able to reproduce the above oops. v2: Fix unhash_lock_stateid() as well Fix-suggested-by: Jeff Layton Fixes: 42691398be08 ('nfsd: Fix race between FREE_STATEID and LOCK') Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Cc: Christian Theune Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c7f1ce41442a..9e5a6842346e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1145,9 +1145,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); - - lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); list_del_init(&stp->st_locks); nfs4_unhash_stid(&stp->st_stid); @@ -1156,12 +1154,12 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) static void release_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + struct nfs4_client *clp = stp->st_stid.sc_client; bool unhashed; - spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&clp->cl_lock); unhashed = unhash_lock_stateid(stp); - spin_unlock(&oo->oo_owner.so_client->cl_lock); + spin_unlock(&clp->cl_lock); if (unhashed) nfs4_put_stid(&stp->st_stid); } -- cgit v1.2.3 From c576160ff3f31c1dd9536188520a41fdf8b5fc95 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 12 Dec 2016 16:42:08 -0800 Subject: mm: prevent double decrease of nr_reserved_highatomic commit 4855e4a7f29d6d10b0b9c84e189c770c9a94e91e upstream. There is race between page freeing and unreserved highatomic. CPU 0 CPU 1 free_hot_cold_page mt = get_pfnblock_migratetype set_pcppage_migratetype(page, mt) unreserve_highatomic_pageblock spin_lock_irqsave(&zone->lock) move_freepages_block set_pageblock_migratetype(page) spin_unlock_irqrestore(&zone->lock) free_pcppages_bulk __free_one_page(mt) <- mt is stale By above race, a page on CPU 0 could go non-highorderatomic free list since the pageblock's type is changed. By that, unreserve logic of highorderatomic can decrease reserved count on a same pageblock severak times and then it will make mismatch between nr_reserved_highatomic and the number of reserved pageblock. So, this patch verifies whether the pageblock is highatomic or not and decrease the count only if the pageblock is highatomic. Link: http://lkml.kernel.org/r/1476259429-18279-3-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Joonsoo Kim Cc: Sangseok Lee Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Miles Chen Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 53286b2f5b1c..6b5421ae86c6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1748,13 +1748,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) struct page, lru); /* - * It should never happen but changes to locking could - * inadvertently allow a per-cpu drain to add pages - * to MIGRATE_HIGHATOMIC while unreserving so be safe - * and watch for underflows. + * In page freeing path, migratetype change is racy so + * we can counter several free pages in a pageblock + * in this loop althoug we changed the pageblock type + * from highatomic to ac->migratetype. So we should + * adjust the count once. */ - zone->nr_reserved_highatomic -= min(pageblock_nr_pages, - zone->nr_reserved_highatomic); + if (get_pageblock_migratetype(page) == + MIGRATE_HIGHATOMIC) { + /* + * It should never happen but changes to + * locking could inadvertently allow a per-cpu + * drain to add pages to MIGRATE_HIGHATOMIC + * while unreserving so be safe and watch for + * underflows. + */ + zone->nr_reserved_highatomic -= min( + pageblock_nr_pages, + zone->nr_reserved_highatomic); + } /* * Convert to ac->migratetype and avoid the normal -- cgit v1.2.3 From e1e6620f042cd7a6b1846335c46ca7b9897bc823 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Jun 2017 23:10:41 +0200 Subject: tty: improve tty_insert_flip_char() fast path commit 979990c6284814617d8f2179d197f72ff62b5d85 upstream. kernelci.org reports a crazy stack usage for the VT code when CONFIG_KASAN is enabled: drivers/tty/vt/keyboard.c: In function 'kbd_keycode': drivers/tty/vt/keyboard.c:1452:1: error: the frame size of 2240 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] The problem is that tty_insert_flip_char() gets inlined many times into kbd_keycode(), and also into other functions, and each copy requires 128 bytes for stack redzone to check for a possible out-of-bounds access on the 'ch' and 'flags' arguments that are passed into tty_insert_flip_string_flags as a variable-length string. This introduces a new __tty_insert_flip_char() function for the slow path, which receives the two arguments by value. This completely avoids the problem and the stack usage goes back down to around 100 bytes. Without KASAN, this is also slightly better, as we don't have to spill the arguments to the stack but can simply pass 'ch' and 'flag' in registers, saving a few bytes in .text for each call site. This should be backported to linux-4.0 or later, which first introduced the stack sanitizer in the kernel. Fixes: c420f167db8c ("kasan: enable stack instrumentation") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 24 ++++++++++++++++++++++++ include/linux/tty_flip.h | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index fb31eecb708d..c478da54fd45 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -361,6 +361,30 @@ int tty_insert_flip_string_flags(struct tty_port *port, } EXPORT_SYMBOL(tty_insert_flip_string_flags); +/** + * __tty_insert_flip_char - Add one character to the tty buffer + * @port: tty port + * @ch: character + * @flag: flag byte + * + * Queue a single byte to the tty buffering, with an optional flag. + * This is the slow path of tty_insert_flip_char. + */ +int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) +{ + struct tty_buffer *tb = port->buf.tail; + int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; + + if (!tty_buffer_request_room(port, 1)) + return 0; + + *flag_buf_ptr(tb, tb->used) = flag; + *char_buf_ptr(tb, tb->used++) = ch; + + return 1; +} +EXPORT_SYMBOL(__tty_insert_flip_char); + /** * tty_schedule_flip - push characters to ldisc * @port: tty port to push from diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index c28dd523f96e..d43837f2ce3a 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -12,6 +12,7 @@ extern int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); extern void tty_flip_buffer_push(struct tty_port *port); void tty_schedule_flip(struct tty_port *port); +int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) @@ -26,7 +27,7 @@ static inline int tty_insert_flip_char(struct tty_port *port, *char_buf_ptr(tb, tb->used++) = ch; return 1; } - return tty_insert_flip_string_flags(port, &ch, &flag, 1); + return __tty_insert_flip_char(port, ch, flag); } static inline int tty_insert_flip_string(struct tty_port *port, -- cgit v1.2.3 From 077933dcd5cabd45e82aceab45dec772ebecbd09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Jun 2017 23:10:42 +0200 Subject: tty: improve tty_insert_flip_char() slow path commit 065ea0a7afd64d6cf3464bdd1d8cd227527e2045 upstream. While working on improving the fast path of tty_insert_flip_char(), I noticed that by calling tty_buffer_request_room(), we needlessly move to the separate flag buffer mode for the tty, even when all characters use TTY_NORMAL as the flag. This changes the code to call __tty_buffer_request_room() with the correct flag, which will then allocate a regular buffer when it rounds out of space but no special flags have been used. I'm guessing that this is the behavior that Peter Hurley intended when he introduced the compacted flip buffers. Fixes: acc0f67f307f ("tty: Halve flip buffer GFP_ATOMIC memory consumption") Cc: Peter Hurley Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index c478da54fd45..e4d0ef844d9d 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -375,10 +375,11 @@ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) struct tty_buffer *tb = port->buf.tail; int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; - if (!tty_buffer_request_room(port, 1)) + if (!__tty_buffer_request_room(port, 1, flags)) return 0; - *flag_buf_ptr(tb, tb->used) = flag; + if (~tb->flags & TTYB_NORMAL) + *flag_buf_ptr(tb, tb->used) = flag; *char_buf_ptr(tb, tb->used++) = ch; return 1; -- cgit v1.2.3 From c13c5c7e88d79cae57ac25c6a3946cb17418ae3f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 2 Aug 2017 13:11:39 +0200 Subject: tty: fix __tty_insert_flip_char regression commit 8a5a90a2a477b86a3dc2eaa5a706db9bfdd647ca upstream. Sergey noticed a small but fatal mistake in __tty_insert_flip_char, leading to an oops in an interrupt handler when using any serial port. The problem is that I accidentally took the tty_buffer pointer before calling __tty_buffer_request_room(), which replaces the buffer. This moves the pointer lookup to the right place after allocating the new buffer space. Fixes: 979990c62848 ("tty: improve tty_insert_flip_char() fast path") Reported-by: Sergey Senozhatsky Tested-by: Sergey Senozhatsky Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index e4d0ef844d9d..8f3566cde3eb 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -372,12 +372,13 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags); */ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) { - struct tty_buffer *tb = port->buf.tail; + struct tty_buffer *tb; int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; if (!__tty_buffer_request_room(port, 1, flags)) return 0; + tb = port->buf.tail; if (~tb->flags & TTYB_NORMAL) *flag_buf_ptr(tb, tb->used) = flag; *char_buf_ptr(tb, tb->used++) = ch; -- cgit v1.2.3 From bf592dde1262c7c3a65d879d09272902d3fa7c6b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 15 Sep 2017 09:36:16 -0700 Subject: Input: i8042 - add Gigabyte P57 to the keyboard reset table commit 697c5d8a36768b36729533fb44622b35d56d6ad0 upstream. Similar to other Gigabyte laptops, the touchpad on P57 requires a keyboard reset to detect Elantech touchpad correctly. BugLink: https://bugs.launchpad.net/bugs/1594214 Signed-off-by: Kai-Heng Feng Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 5be14ad29d46..dbf09836ff30 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -904,6 +904,13 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "P34"), }, }, + { + /* Gigabyte P57 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "P57"), + }, + }, { /* Schenker XMG C504 - Elantech touchpad */ .matches = { -- cgit v1.2.3 From b6c818d813c66ac86818f0548cf27e1001c0e0c0 Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 27 Jul 2017 18:08:48 +0200 Subject: MIPS: math-emu: .: Fix quiet NaN propagation commit e78bf0dc4789bdea1453595ae89e8db65918e22e upstream. Fix the value returned by . fd,fs,ft, if both inputs are quiet NaNs. The . specifications state that the returned value in such cases should be the quiet NaN contained in register fs. A relevant example: MAX.S fd,fs,ft: If fs contains qNaN1, and ft contains qNaN2, fd is going to contain qNaN1 (without this patch, it used to contain qNaN2). Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16880/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/dp_fmax.c | 32 ++++++++++++++++++++++++++++---- arch/mips/math-emu/dp_fmin.c | 32 ++++++++++++++++++++++++++++---- arch/mips/math-emu/sp_fmax.c | 32 ++++++++++++++++++++++++++++---- arch/mips/math-emu/sp_fmin.c | 32 ++++++++++++++++++++++++++++---- 4 files changed, 112 insertions(+), 16 deletions(-) diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index fd71b8daaaf2..41bd6ed852b9 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -147,14 +159,26 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index c1072b0dfb95..53fb8c904e32 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -147,14 +159,26 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 4d000844e48e..d0d73c3226dc 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -147,14 +159,26 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 4eb1bb9e9dec..011692e326af 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -147,14 +159,26 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): -- cgit v1.2.3 From 6acd1d26c32ea6e9d38f4839ba921cc3780bb205 Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 27 Jul 2017 18:08:49 +0200 Subject: MIPS: math-emu: .: Fix cases of both inputs zero commit 15560a58bfd4ff82cdd16b2270d4ef9b06d2cc4d upstream. Fix the value returned by ., if both inputs are zeros. The right behavior in such cases is stated in instruction reference manual and is as follows: fs ft MAX MIN MAXA MINA --------------------------------------------- 0 0 0 0 0 0 0 -0 0 -0 0 -0 -0 0 0 -0 0 -0 -0 -0 -0 -0 -0 -0 Prior to this patch, some of the above cases were yielding correct results. However, for the sake of code consistency, all such cases are rewritten in this patch. A relevant example: MAX.S fd,fs,ft: If fs contains +0.0, and ft contains -0.0, fd is going to contain +0.0 (without this patch, it used to contain -0.0). Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16881/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/dp_fmax.c | 8 ++------ arch/mips/math-emu/dp_fmin.c | 8 ++------ arch/mips/math-emu/sp_fmax.c | 8 ++------ arch/mips/math-emu/sp_fmin.c | 8 ++------ 4 files changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 41bd6ed852b9..31f091a7819b 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -92,9 +92,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) return ys ? x : y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -204,9 +202,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index 53fb8c904e32..e607d55208ad 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -92,9 +92,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) return ys ? y : x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -204,9 +202,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index d0d73c3226dc..3ca5b204e9d0 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -92,9 +92,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) return ys ? x : y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -204,9 +202,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 011692e326af..c982647df39a 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -92,9 +92,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) return ys ? y : x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -204,9 +202,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; -- cgit v1.2.3 From a83ffb581f2675250d0127d686a918446975b3e2 Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 27 Jul 2017 18:08:50 +0200 Subject: MIPS: math-emu: .: Fix cases of both inputs negative commit aabf5cf02e22ebc4e541adf835910f388b6c3e65 upstream. Fix the value returned by ., if both inputs are negative normal fp numbers. The previous logic did not take into account that if both inputs have the same sign, there should be separate treatment of the cases when both inputs are negative and when both inputs are positive. A relevant example: MAX.S fd,fs,ft: If fs contains -5.0, and ft contains -7.0, fd is going to contain -5.0 (without this patch, it used to contain -7.0). Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16882/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/dp_fmax.c | 32 ++++++++++++++++++++++++-------- arch/mips/math-emu/dp_fmin.c | 32 ++++++++++++++++++++++++-------- arch/mips/math-emu/sp_fmax.c | 32 ++++++++++++++++++++++++-------- arch/mips/math-emu/sp_fmin.c | 32 ++++++++++++++++++++++++-------- 4 files changed, 96 insertions(+), 32 deletions(-) diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 31f091a7819b..0b53c7861101 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -116,16 +116,32 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) else if (xs < ys) return x; - /* Compare exponent */ - if (xe > ye) - return x; - else if (xe < ye) - return y; + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return y; - return x; + return x; + return y; } union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index e607d55208ad..099e6bd55353 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -116,16 +116,32 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) else if (xs < ys) return y; - /* Compare exponent */ - if (xe > ye) - return y; - else if (xe < ye) - return x; + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return x; - return y; + return y; + return x; } union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 3ca5b204e9d0..7efa7729bd85 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -116,16 +116,32 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) else if (xs < ys) return x; - /* Compare exponent */ - if (xe > ye) - return x; - else if (xe < ye) - return y; + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return y; - return x; + return x; + return y; } union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index c982647df39a..e2c554359f7b 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -116,16 +116,32 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) else if (xs < ys) return y; - /* Compare exponent */ - if (xe > ye) - return y; - else if (xe < ye) - return x; + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return x; - return y; + return y; + return x; } union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) -- cgit v1.2.3 From 322bf697bdc4ed16c9ec89d0253c3a01023e51f4 Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 27 Jul 2017 18:08:51 +0200 Subject: MIPS: math-emu: .: Fix cases of input values with opposite signs commit 1a41b3b441508ae63b1a9ec699ec94065739eb60 upstream. Fix the value returned by ., if the inputs are normal fp numbers of the same absolute value, but opposite signs. A relevant example: MAXA.S fd,fs,ft: If fs contains -3.0, and ft contains +3.0, fd is going to contain +3.0 (without this patch, it used to contain -3.0). Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16883/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/dp_fmax.c | 8 ++++++-- arch/mips/math-emu/dp_fmin.c | 6 +++++- arch/mips/math-emu/sp_fmax.c | 8 ++++++-- arch/mips/math-emu/sp_fmin.c | 6 +++++- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 0b53c7861101..81d12bfa6d5e 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -243,7 +243,11 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) return y; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) return y; - return x; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; } diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index 099e6bd55353..4574f04b44ce 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -243,7 +243,11 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) return x; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) return x; return y; } diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 7efa7729bd85..fb4149762101 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -243,7 +243,11 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) return y; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) return y; - return x; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; } diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index e2c554359f7b..7915b9430f68 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -243,7 +243,11 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) return x; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) return x; return y; } -- cgit v1.2.3 From f4d77fc754f2be1db6815cd8ccf26c9c0514a96f Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 27 Jul 2017 18:08:52 +0200 Subject: MIPS: math-emu: .: Fix cases of both infinite inputs commit 3444c4eb534c20e44f0d6670b34263efaf8b531f upstream. Fix the value returned by . fd,fs,ft, if both inputs are infinite. The previous implementation returned always the value contained in ft in such cases. The correct behavior is specified in Mips instruction set manual and is as follows: fs ft MAXA MINA --------------------------------- inf inf inf inf inf -inf inf -inf -inf inf inf -inf -inf -inf -inf -inf A relevant example: MAXA.S fd,fs,ft: If fs contains +inf, and ft contains -inf, fd is going to contain +inf (without this patch, it used to contain -inf). Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16884/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/dp_fmax.c | 4 +++- arch/mips/math-emu/dp_fmin.c | 4 +++- arch/mips/math-emu/sp_fmax.c | 4 +++- arch/mips/math-emu/sp_fmin.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 81d12bfa6d5e..5bec64f2884e 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -202,6 +202,9 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs & ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -209,7 +212,6 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index 4574f04b44ce..2495bd7333f5 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -202,6 +202,9 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs | ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -209,7 +212,6 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index fb4149762101..74a5a00d2f22 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -202,6 +202,9 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs & ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -209,7 +212,6 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 7915b9430f68..42ec4315e66c 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -202,6 +202,9 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs | ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -209,7 +212,6 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): -- cgit v1.2.3 From 9354f4d0beb05f17a9c06315b8c00b25d6c97095 Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Thu, 27 Jul 2017 18:08:53 +0200 Subject: MIPS: math-emu: MINA.: Fix some cases of infinity and zero inputs commit 304bfe473e70523e591fb1c9223289d355e0bdcb upstream. Fix following special cases for MINA>.: - if one of the inputs is zero, and the other is subnormal, normal, or infinity, the value of the former should be returned (that is, a zero). - if one of the inputs is infinity, and the other input is normal, or subnormal, the value of the latter should be returned. The previous implementation's logic for such cases was incorrect - it appears as if it implements MAXA, and not MINA instruction. A relevant example: MINA.S fd,fs,ft: If fs contains 100.0, and ft contains 0.0, fd is going to contain 0.0 (without this patch, it used to contain 100.0). Fixes: a79f5f9ba508 ("MIPS: math-emu: Add support for the MIPS R6 MAX{, A} FPU instruction") Fixes: 4e9561b20e2f ("MIPS: math-emu: Add support for the MIPS R6 MIN{, A} FPU instruction") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16885/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/dp_fmin.c | 4 ++-- arch/mips/math-emu/sp_fmin.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index 2495bd7333f5..a287b23818d8 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -210,14 +210,14 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - return x; + return y; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - return y; + return x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): return ieee754dp_zero(xs | ys); diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 42ec4315e66c..c51385f46b09 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -210,14 +210,14 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - return x; + return y; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - return y; + return x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): return ieee754sp_zero(xs | ys); -- cgit v1.2.3 From 5e9d28b003b0312bc1c17994edb84bbb9a4a060a Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 21 Sep 2017 10:16:53 +0200 Subject: crypto: AF_ALG - remove SGL terminator indicator when chaining Fixed differently upstream as commit 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code") The SGL is MAX_SGL_ENTS + 1 in size. The last SG entry is used for the chaining and is properly updated with the sg_chain invocation. During the filling-in of the initial SG entries, sg_mark_end is called for each SG entry. This is appropriate as long as no additional SGL is chained with the current SGL. However, when a new SGL is chained and the last SG entry is updated with sg_chain, the last but one entry still contains the end marker from the sg_mark_end. This end marker must be removed as otherwise a walk of the chained SGLs will cause a NULL pointer dereference at the last but one SG entry, because sg_next will return NULL. The patch only applies to all kernels up to and including 4.13. The patch 2d97591ef43d0587be22ad1b0d758d6df4999a0b added to 4.14-rc1 introduced a complete new code base which addresses this bug in a different way. Yet, that patch is too invasive for stable kernels and was therefore not marked for stable. Fixes: 8ff590903d5fc ("crypto: algif_skcipher - User-space interface for skcipher operations") Signed-off-by: Stephan Mueller Acked-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algif_skcipher.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index b3b0004ea8ac..d12782dc9683 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -143,8 +143,10 @@ static int skcipher_alloc_sgl(struct sock *sk) sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); sgl->cur = 0; - if (sg) + if (sg) { sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + sg_unmark_end(sg + (MAX_SGL_ENTS - 1)); + } list_add_tail(&sgl->list, &ctx->tsgl); } -- cgit v1.2.3 From cd46241eb03ca90f58553ad15e484c4f86d6fd64 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Thu, 24 Aug 2017 15:19:39 -0400 Subject: ext4: fix incorrect quotaoff if the quota feature is enabled commit b0a5a9589decd07db755d6a8d9c0910d96ff7992 upstream. Current ext4 quota should always "usage enabled" if the quota feautre is enabled. But in ext4_orphan_cleanup(), it turn quotas off directly (used for the older journaled quota), so we cannot turn it on again via "quotaon" unless umount and remount ext4. Simple reproduce: mkfs.ext4 -O project,quota /dev/vdb1 mount -o prjquota /dev/vdb1 /mnt chattr -p 123 /mnt chattr +P /mnt touch /mnt/aa /mnt/bb exec 100<>/mnt/aa rm -f /mnt/aa sync echo c > /proc/sysrq-trigger #reboot and mount mount -o prjquota /dev/vdb1 /mnt #query status quotaon -Ppv /dev/vdb1 #output quotaon: Cannot find mountpoint for device /dev/vdb1 quotaon: No correct mountpoint specified. This patch add check for journaled quotas to avoid incorrect quotaoff when ext4 has quota feautre. Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 68345a9e59b8..079b4ed457cb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2243,7 +2243,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ + /* Turn on journaled quotas so that they are updated correctly */ for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); @@ -2309,9 +2309,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA - /* Turn quotas off */ + /* Turn off journaled quotas if they were enabled for orphan cleanup */ for (i = 0; i < EXT4_MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) + if (EXT4_SB(sb)->s_qf_names[i] && sb_dqopt(sb)->files[i]) dquot_quota_off(sb, i); } #endif -- cgit v1.2.3 From c53f01698f68a3d6880c27e24a4e98ca6cd9feb9 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Thu, 24 Aug 2017 15:21:50 -0400 Subject: ext4: fix quota inconsistency during orphan cleanup for read-only mounts commit 95f1fda47c9d8738f858c3861add7bf0a36a7c0b upstream. Quota does not get enabled for read-only mounts if filesystem has quota feature, so that quotas cannot updated during orphan cleanup, which will lead to quota inconsistency. This patch turn on quotas during orphan cleanup for this case, make sure quotas can be updated correctly. Reported-by: Jan Kara Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 079b4ed457cb..32941cd6d34b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2205,6 +2205,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; #ifdef CONFIG_QUOTA + int quota_update = 0; int i; #endif if (!es->s_last_orphan) { @@ -2243,14 +2244,32 @@ static void ext4_orphan_cleanup(struct super_block *sb, #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; - /* Turn on journaled quotas so that they are updated correctly */ + + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) { + int ret = ext4_enable_quotas(sb); + + if (!ret) + quota_update = 1; + else + ext4_msg(sb, KERN_ERR, + "Cannot turn on quotas: error %d", ret); + } + + /* Turn on journaled quotas used for old sytle */ for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); - if (ret < 0) + + if (!ret) + quota_update = 1; + else ext4_msg(sb, KERN_ERR, "Cannot turn on journaled " - "quota: error %d", ret); + "quota: type %d: error %d", i, ret); } } #endif @@ -2309,10 +2328,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA - /* Turn off journaled quotas if they were enabled for orphan cleanup */ - for (i = 0; i < EXT4_MAXQUOTAS; i++) { - if (EXT4_SB(sb)->s_qf_names[i] && sb_dqopt(sb)->files[i]) - dquot_quota_off(sb, i); + /* Turn off quotas if they were enabled for orphan cleanup */ + if (quota_update) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (sb_dqopt(sb)->files[i]) + dquot_quota_off(sb, i); + } } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -5120,6 +5141,9 @@ static int ext4_enable_quotas(struct super_block *sb) err = ext4_quota_enable(sb, type, QFMT_VFS_V1, DQUOT_USAGE_ENABLED); if (err) { + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + ext4_warning(sb, "Failed to enable quota tracking " "(type=%d, err=%d). Please run " -- cgit v1.2.3 From a918d32583e0d4ed9d9aac49988c5dfa384f3ede Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Aug 2017 20:49:57 +1000 Subject: powerpc: Fix DAR reporting when alignment handler faults commit f9effe925039cf54489b5c04e0d40073bb3a123d upstream. Anton noticed that if we fault part way through emulating an unaligned instruction, we don't update the DAR to reflect that. The DAR value is eventually reported back to userspace as the address in the SEGV signal, and if userspace is using that value to demand fault then it can be confused by us not setting the value correctly. This patch is ugly as hell, but is intended to be the minimal fix and back ports easily. Signed-off-by: Michael Ellerman Reviewed-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/align.c | 119 +++++++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 91e5c1758b5c..64e016abb2a5 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -236,6 +236,28 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) #define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz)) +#define __get_user_or_set_dar(_regs, _dest, _addr) \ + ({ \ + int rc = 0; \ + typeof(_addr) __addr = (_addr); \ + if (__get_user_inatomic(_dest, __addr)) { \ + _regs->dar = (unsigned long)__addr; \ + rc = -EFAULT; \ + } \ + rc; \ + }) + +#define __put_user_or_set_dar(_regs, _src, _addr) \ + ({ \ + int rc = 0; \ + typeof(_addr) __addr = (_addr); \ + if (__put_user_inatomic(_src, __addr)) { \ + _regs->dar = (unsigned long)__addr; \ + rc = -EFAULT; \ + } \ + rc; \ + }) + static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, unsigned int reg, unsigned int nb, unsigned int flags, unsigned int instr, @@ -264,9 +286,10 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, } else { unsigned long pc = regs->nip ^ (swiz & 4); - if (__get_user_inatomic(instr, - (unsigned int __user *)pc)) + if (__get_user_or_set_dar(regs, instr, + (unsigned int __user *)pc)) return -EFAULT; + if (swiz == 0 && (flags & SW)) instr = cpu_to_le32(instr); nb = (instr >> 11) & 0x1f; @@ -310,31 +333,31 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, ((nb0 + 3) / 4) * sizeof(unsigned long)); for (i = 0; i < nb; ++i, ++p) - if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz), - SWIZ_PTR(p))) + if (__get_user_or_set_dar(regs, REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; if (nb0 > 0) { rptr = ®s->gpr[0]; addr += nb; for (i = 0; i < nb0; ++i, ++p) - if (__get_user_inatomic(REG_BYTE(rptr, - i ^ bswiz), - SWIZ_PTR(p))) + if (__get_user_or_set_dar(regs, + REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; } } else { for (i = 0; i < nb; ++i, ++p) - if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz), - SWIZ_PTR(p))) + if (__put_user_or_set_dar(regs, REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; if (nb0 > 0) { rptr = ®s->gpr[0]; addr += nb; for (i = 0; i < nb0; ++i, ++p) - if (__put_user_inatomic(REG_BYTE(rptr, - i ^ bswiz), - SWIZ_PTR(p))) + if (__put_user_or_set_dar(regs, + REG_BYTE(rptr, i ^ bswiz), + SWIZ_PTR(p))) return -EFAULT; } } @@ -346,29 +369,32 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, * Only POWER6 has these instructions, and it does true little-endian, * so we don't need the address swizzling. */ -static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, - unsigned int flags) +static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr, + unsigned int reg, unsigned int flags) { char *ptr0 = (char *) ¤t->thread.TS_FPR(reg); char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); - int i, ret, sw = 0; + int i, sw = 0; if (reg & 1) return 0; /* invalid form: FRS/FRT must be even */ if (flags & SW) sw = 7; - ret = 0; + for (i = 0; i < 8; ++i) { if (!(flags & ST)) { - ret |= __get_user(ptr0[i^sw], addr + i); - ret |= __get_user(ptr1[i^sw], addr + i + 8); + if (__get_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__get_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } else { - ret |= __put_user(ptr0[i^sw], addr + i); - ret |= __put_user(ptr1[i^sw], addr + i + 8); + if (__put_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__put_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } } - if (ret) - return -EFAULT; + return 1; /* exception handled and fixed up */ } @@ -378,24 +404,27 @@ static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr, { char *ptr0 = (char *)®s->gpr[reg]; char *ptr1 = (char *)®s->gpr[reg+1]; - int i, ret, sw = 0; + int i, sw = 0; if (reg & 1) return 0; /* invalid form: GPR must be even */ if (flags & SW) sw = 7; - ret = 0; + for (i = 0; i < 8; ++i) { if (!(flags & ST)) { - ret |= __get_user(ptr0[i^sw], addr + i); - ret |= __get_user(ptr1[i^sw], addr + i + 8); + if (__get_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__get_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } else { - ret |= __put_user(ptr0[i^sw], addr + i); - ret |= __put_user(ptr1[i^sw], addr + i + 8); + if (__put_user_or_set_dar(regs, ptr0[i^sw], addr + i)) + return -EFAULT; + if (__put_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8)) + return -EFAULT; } } - if (ret) - return -EFAULT; + return 1; /* exception handled and fixed up */ } #endif /* CONFIG_PPC64 */ @@ -688,9 +717,14 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, for (j = 0; j < length; j += elsize) { for (i = 0; i < elsize; ++i) { if (flags & ST) - ret |= __put_user(ptr[i^sw], addr + i); + ret = __put_user_or_set_dar(regs, ptr[i^sw], + addr + i); else - ret |= __get_user(ptr[i^sw], addr + i); + ret = __get_user_or_set_dar(regs, ptr[i^sw], + addr + i); + + if (ret) + return ret; } ptr += elsize; #ifdef __LITTLE_ENDIAN__ @@ -740,7 +774,7 @@ int fix_alignment(struct pt_regs *regs) unsigned int dsisr; unsigned char __user *addr; unsigned long p, swiz; - int ret, i; + int i; union data { u64 ll; double dd; @@ -923,7 +957,7 @@ int fix_alignment(struct pt_regs *regs) if (flags & F) { /* Special case for 16-byte FP loads and stores */ PPC_WARN_ALIGNMENT(fp_pair, regs); - return emulate_fp_pair(addr, reg, flags); + return emulate_fp_pair(regs, addr, reg, flags); } else { #ifdef CONFIG_PPC64 /* Special case for 16-byte loads and stores */ @@ -953,15 +987,12 @@ int fix_alignment(struct pt_regs *regs) } data.ll = 0; - ret = 0; p = (unsigned long)addr; for (i = 0; i < nb; i++) - ret |= __get_user_inatomic(data.v[start + i], - SWIZ_PTR(p++)); - - if (unlikely(ret)) - return -EFAULT; + if (__get_user_or_set_dar(regs, data.v[start + i], + SWIZ_PTR(p++))) + return -EFAULT; } else if (flags & F) { data.ll = current->thread.TS_FPR(reg); @@ -1031,15 +1062,13 @@ int fix_alignment(struct pt_regs *regs) break; } - ret = 0; p = (unsigned long)addr; for (i = 0; i < nb; i++) - ret |= __put_user_inatomic(data.v[start + i], - SWIZ_PTR(p++)); + if (__put_user_or_set_dar(regs, data.v[start + i], + SWIZ_PTR(p++))) + return -EFAULT; - if (unlikely(ret)) - return -EFAULT; } else if (flags & F) current->thread.TS_FPR(reg) = data.ll; else -- cgit v1.2.3 From 30e81e7fe197dd14d5b7653c75140ea75fe5c3d4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Aug 2017 13:12:44 -0700 Subject: block: Relax a check in blk_start_queue() commit 4ddd56b003f251091a67c15ae3fe4a5c5c5e390a upstream. Calling blk_start_queue() from interrupt context with the queue lock held and without disabling IRQs, as the skd driver does, is safe. This patch avoids that loading the skd driver triggers the following warning: WARNING: CPU: 11 PID: 1348 at block/blk-core.c:283 blk_start_queue+0x84/0xa0 RIP: 0010:blk_start_queue+0x84/0xa0 Call Trace: skd_unquiesce_dev+0x12a/0x1d0 [skd] skd_complete_internal+0x1e7/0x5a0 [skd] skd_complete_other+0xc2/0xd0 [skd] skd_isr_completion_posted.isra.30+0x2a5/0x470 [skd] skd_isr+0x14f/0x180 [skd] irq_forced_thread_fn+0x2a/0x70 irq_thread+0x144/0x1a0 kthread+0x125/0x140 ret_from_fork+0x2a/0x40 Fixes: commit a038e2536472 ("[PATCH] blk_start_queue() must be called with irq disabled - add warning") Signed-off-by: Bart Van Assche Cc: Paolo 'Blaisorblade' Giarrusso Cc: Andrew Morton Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index ef083e7a37c5..119658534dfd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -233,7 +233,7 @@ EXPORT_SYMBOL(blk_start_queue_async); **/ void blk_start_queue(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON(!in_interrupt() && !irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); __blk_run_queue(q); -- cgit v1.2.3 From f05dafbd779112307e8dfe0ed8226910d29f0020 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 Aug 2017 10:23:25 +1000 Subject: md/bitmap: disable bitmap_resize for file-backed bitmaps. commit e8a27f836f165c26f867ece7f31eb5c811692319 upstream. bitmap_resize() does not work for file-backed bitmaps. The buffer_heads are allocated and initialized when the bitmap is read from the file, but resize doesn't read from the file, it loads from the internal bitmap. When it comes time to write the new bitmap, the bh is non-existent and we crash. The common case when growing an array involves making the array larger, and that normally means making the bitmap larger. Doing that inside the kernel is possible, but would need more code. It is probably easier to require people who use file-backed bitmaps to remove them and re-add after a reshape. So this patch disables the resizing of arrays which have file-backed bitmaps. This is better than crashing. Reported-by: Zhilong Liu Fixes: d60b479d177a ("md/bitmap: add bitmap_resize function to allow bitmap resizing.") Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/bitmap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 4f22e919787a..7a50728b9389 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1960,6 +1960,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, long pages; struct bitmap_page *new_bp; + if (bitmap->storage.file && !init) { + pr_info("md: cannot resize file-based bitmap\n"); + return -EINVAL; + } + if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. -- cgit v1.2.3 From 0bcaf5178fe6cc3169d4ef47e92e84e938bf7b3c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Aug 2017 13:12:45 -0700 Subject: skd: Avoid that module unloading triggers a use-after-free commit 7277cc67b3916eed47558c64f9c9c0de00a35cda upstream. Since put_disk() triggers a disk_release() call and since that last function calls blk_put_queue() if disk->queue != NULL, clear the disk->queue pointer before calling put_disk(). This avoids that unloading the skd kernel module triggers the following use-after-free: WARNING: CPU: 8 PID: 297 at lib/refcount.c:128 refcount_sub_and_test+0x70/0x80 refcount_t: underflow; use-after-free. CPU: 8 PID: 297 Comm: kworker/8:1 Not tainted 4.11.10-300.fc26.x86_64 #1 Workqueue: events work_for_cpu_fn Call Trace: dump_stack+0x63/0x84 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5a/0x80 refcount_sub_and_test+0x70/0x80 refcount_dec_and_test+0x11/0x20 kobject_put+0x1f/0x50 blk_put_queue+0x15/0x20 disk_release+0xae/0xf0 device_release+0x32/0x90 kobject_release+0x67/0x170 kobject_put+0x2b/0x50 put_disk+0x17/0x20 skd_destruct+0x5c/0x890 [skd] skd_pci_probe+0x124d/0x13a0 [skd] local_pci_probe+0x42/0xa0 work_for_cpu_fn+0x14/0x20 process_one_work+0x19e/0x470 worker_thread+0x1dc/0x4a0 kthread+0x125/0x140 ret_from_fork+0x25/0x30 Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/skd_main.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 586f9168ffa4..a1f8bf96d8b3 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -4679,15 +4679,16 @@ static void skd_free_disk(struct skd_device *skdev) { struct gendisk *disk = skdev->disk; - if (disk != NULL) { - struct request_queue *q = disk->queue; + if (disk && (disk->flags & GENHD_FL_UP)) + del_gendisk(disk); - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (q) - blk_cleanup_queue(q); - put_disk(disk); + if (skdev->queue) { + blk_cleanup_queue(skdev->queue); + skdev->queue = NULL; + disk->queue = NULL; } + + put_disk(disk); skdev->disk = NULL; } -- cgit v1.2.3 From 19978c50db689ab0691080a65d4a635aebd0f6a7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Aug 2017 13:12:46 -0700 Subject: skd: Submit requests to firmware before triggering the doorbell commit 5fbd545cd3fd311ea1d6e8be4cedddd0ee5684c7 upstream. Ensure that the members of struct skd_msg_buf have been transferred to the PCIe adapter before the doorbell is triggered. This patch avoids that I/O fails sporadically and that the following error message is reported: (skd0:STM000196603:[0000:00:09.0]): Completion mismatch comp_id=0x0000 skreq=0x0400 new=0x0000 Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/skd_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index a1f8bf96d8b3..47d1e834f3f4 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2214,6 +2214,9 @@ static void skd_send_fitmsg(struct skd_device *skdev, */ qcmd |= FIT_QCMD_MSGSIZE_64; + /* Make sure skd_msg_buf is written before the doorbell is triggered. */ + smp_wmb(); + SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); } @@ -2260,6 +2263,9 @@ static void skd_send_special_fitmsg(struct skd_device *skdev, qcmd = skspcl->mb_dma_address; qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128; + /* Make sure skd_msg_buf is written before the doorbell is triggered. */ + smp_wmb(); + SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); } -- cgit v1.2.3 From cfc49967434db15f202204eae4306b2a78d9ea03 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:51 +0200 Subject: scsi: zfcp: fix queuecommand for scsi_eh commands when DIX enabled commit 71b8e45da51a7b64a23378221c0a5868bd79da4f upstream. Since commit db007fc5e20c ("[SCSI] Command protection operation"), scsi_eh_prep_cmnd() saves scmd->prot_op and temporarily resets it to SCSI_PROT_NORMAL. Other FCP LLDDs such as qla2xxx and lpfc shield their queuecommand() to only access any of scsi_prot_sg...() if (scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL). Do the same thing for zfcp, which introduced DIX support with commit ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for DIF/DIX"). Otherwise, TUR SCSI commands as part of scsi_eh likely fail in zfcp, because the regular SCSI command with DIX protection data, that scsi_eh re-uses in scsi_send_eh_cmnd(), of course still has (scsi_prot_sg_count() != 0) and so zfcp sends down bogus requests to the FCP channel hardware. This causes scsi_eh_test_devices() to have (finish_cmds == 0) [not SCSI device is online or not scsi_eh_tur() failed] so regular SCSI commands, that caused / were affected by scsi_eh, are moved to work_q and scsi_eh_test_devices() itself returns false. In turn, it unnecessarily escalates in our case in scsi_eh_ready_devs() beyond host reset to finally scsi_eh_offline_sdevs() which sets affected SCSI devices offline with the following kernel message: "kernel: sd H:0:T:L: Device offlined - not ready after error recovery" Signed-off-by: Steffen Maier Fixes: ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for DIF/DIX") Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_fsf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 27ff38f839fc..4efdb7415b03 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2258,7 +2258,8 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd) fcp_cmnd = (struct fcp_cmnd *) &req->qtcb->bottom.io.fcp_cmnd; zfcp_fc_scsi_to_fcp(fcp_cmnd, scsi_cmnd, 0); - if (scsi_prot_sg_count(scsi_cmnd)) { + if ((scsi_get_prot_op(scsi_cmnd) != SCSI_PROT_NORMAL) && + scsi_prot_sg_count(scsi_cmnd)) { zfcp_qdio_set_data_div(qdio, &req->qdio_req, scsi_prot_sg_count(scsi_cmnd)); retval = zfcp_qdio_sbals_from_sg(qdio, &req->qdio_req, -- cgit v1.2.3 From d0c02c6f3e8589f9276f92b1633629721c13ad20 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 28 Jul 2017 12:30:52 +0200 Subject: scsi: zfcp: add handling for FCP_RESID_OVER to the fcp ingress path commit a099b7b1fc1f0418ab8d79ecf98153e1e134656e upstream. Up until now zfcp would just ignore the FCP_RESID_OVER flag in the FCP response IU. When this flag is set, it is possible, in regards to the FCP standard, that the storage-server processes the command normally, up to the point where data is missing and simply ignores those. In this case no CHECK CONDITION would be set, and because we ignored the FCP_RESID_OVER flag we resulted in at least a data loss or even -corruption as a follow-up error, depending on how the applications/layers on top behave. To prevent this, we now set the host-byte of the corresponding scsi_cmnd to DID_ERROR. Other storage-behaviors, where the same condition results in a CHECK CONDITION set in the answer, don't need to be changed as they are handled in the mid-layer already. Following is an example trace record decoded with zfcpdbf from the s390-tools package. We forcefully injected a fc_dl which is one byte too small: Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : rsl_err Request ID : 0x... SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00070000 ^^DID_ERROR SCSI retries : 0x.. SCSI allowed : 0x.. SCSI scribble : 0x... SCSI opcode : 2a000000 00000000 08000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000400 00000001 ^^fr_flags==FCP_RESID_OVER ^^fr_status==SAM_STAT_GOOD ^^^^^^^^fr_resid 00000000 00000000 As of now, we don't actively handle to possibility that a response IU has both flags - FCP_RESID_OVER and FCP_RESID_UNDER - set at once. Reported-by: Luke M. Hopkins Reviewed-by: Steffen Maier Fixes: 553448f6c483 ("[SCSI] zfcp: Message cleanup") Fixes: ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git) Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_fc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h index df2b541c8287..a2275825186f 100644 --- a/drivers/s390/scsi/zfcp_fc.h +++ b/drivers/s390/scsi/zfcp_fc.h @@ -4,7 +4,7 @@ * Fibre Channel related definitions and inline functions for the zfcp * device driver * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2017 */ #ifndef ZFCP_FC_H @@ -279,6 +279,10 @@ void zfcp_fc_eval_fcp_rsp(struct fcp_resp_with_ext *fcp_rsp, !(rsp_flags & FCP_SNS_LEN_VAL) && fcp_rsp->resp.fr_status == SAM_STAT_GOOD) set_host_byte(scsi, DID_ERROR); + } else if (unlikely(rsp_flags & FCP_RESID_OVER)) { + /* FCP_DL was not sufficient for SCSI data length */ + if (fcp_rsp->resp.fr_status == SAM_STAT_GOOD) + set_host_byte(scsi, DID_ERROR); } } -- cgit v1.2.3 From 52661717ee664d349ba789cac386e8b046f8ed79 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:53 +0200 Subject: scsi: zfcp: fix capping of unsuccessful GPN_FT SAN response trace records commit 975171b4461be296a35e83ebd748946b81cf0635 upstream. v4.9 commit aceeffbb59bb ("zfcp: trace full payload of all SAN records (req,resp,iels)") fixed trace data loss of 2.6.38 commit 2c55b750a884 ("[SCSI] zfcp: Redesign of the debug tracing for SAN records.") necessary for problem determination, e.g. to see the currently active zone set during automatic port scan. While it already saves space by not dumping any empty residual entries of the large successful GPN_FT response (4 pages), there are seldom cases where the GPN_FT response is unsuccessful and likely does not have FC_NS_FID_LAST set in fp_flags so we did not cap the trace record. We typically see such case for an initiator WWPN, which is not in any zone. Cap unsuccessful responses to at least the actual basic CT_IU response plus whatever fits the SAN trace record built-in "payload" buffer just in case there's trailing information of which we would at least see the existence and its beginning. In order not to erroneously cap successful responses, we need to swap calling the trace function and setting the CT / ELS status to success (0). Example trace record pair formatted with zfcpdbf: Timestamp : ... Area : SAN Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : fssct_1 Request ID : 0x Destination ID : 0x00fffffc SAN req short : 01000000 fc020000 01720ffc 00000000 00000008 SAN req length : 20 | Timestamp : ... Area : SAN Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 2 Tag : fsscth2 Request ID : 0x Destination ID : 0x00fffffc SAN resp short : 01000000 fc020000 80010000 00090700 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] SAN resp length: 16384 San resp info : 01000000 fc020000 80010000 00090700 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] 00000000 00000000 00000000 00000000 [trailing info] The fix saves all but one of the previously associated 64 PAYload trace record chunks of size 256 bytes each. Signed-off-by: Steffen Maier Fixes: aceeffbb59bb ("zfcp: trace full payload of all SAN records (req,resp,iels)") Fixes: 2c55b750a884 ("[SCSI] zfcp: Redesign of the debug tracing for SAN records.") Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.c | 10 +++++++++- drivers/s390/scsi/zfcp_fsf.c | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index d5bf36ec8a75..31d62ea5fdcd 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -3,7 +3,7 @@ * * Debug traces for zfcp. * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2017 */ #define KMSG_COMPONENT "zfcp" @@ -447,6 +447,7 @@ static u16 zfcp_dbf_san_res_cap_len_if_gpn_ft(char *tag, struct fc_ct_hdr *reqh = sg_virt(ct_els->req); struct fc_ns_gid_ft *reqn = (struct fc_ns_gid_ft *)(reqh + 1); struct scatterlist *resp_entry = ct_els->resp; + struct fc_ct_hdr *resph; struct fc_gpn_ft_resp *acc; int max_entries, x, last = 0; @@ -473,6 +474,13 @@ static u16 zfcp_dbf_san_res_cap_len_if_gpn_ft(char *tag, return len; /* not GPN_FT response so do not cap */ acc = sg_virt(resp_entry); + + /* cap all but accept CT responses to at least the CT header */ + resph = (struct fc_ct_hdr *)acc; + if ((ct_els->status) || + (resph->ct_cmd != cpu_to_be16(FC_FS_ACC))) + return max(FC_CT_HDR_LEN, ZFCP_DBF_SAN_MAX_PAYLOAD); + max_entries = (reqh->ct_mr_size * 4 / sizeof(struct fc_gpn_ft_resp)) + 1 /* zfcp_fc_scan_ports: bytes correct, entries off-by-one * to account for header as 1st pseudo "entry" */; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 4efdb7415b03..1964391db904 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -928,8 +928,8 @@ static void zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *req) switch (header->fsf_status) { case FSF_GOOD: - zfcp_dbf_san_res("fsscth2", req); ct->status = 0; + zfcp_dbf_san_res("fsscth2", req); break; case FSF_SERVICE_CLASS_NOT_SUPPORTED: zfcp_fsf_class_not_supp(req); @@ -1109,8 +1109,8 @@ static void zfcp_fsf_send_els_handler(struct zfcp_fsf_req *req) switch (header->fsf_status) { case FSF_GOOD: - zfcp_dbf_san_res("fsselh1", req); send_els->status = 0; + zfcp_dbf_san_res("fsselh1", req); break; case FSF_SERVICE_CLASS_NOT_SUPPORTED: zfcp_fsf_class_not_supp(req); -- cgit v1.2.3 From 1a847369487c31b405e3bda614a36612250905ac Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:54 +0200 Subject: scsi: zfcp: fix passing fsf_req to SCSI trace on TMF to correlate with HBA commit 9fe5d2b2fd30aa8c7827ec62cbbe6d30df4fe3e3 upstream. Without this fix we get SCSI trace records on task management functions which cannot be correlated to HBA trace records because all fields related to the FSF request are empty (zero). Also, the FCP_RSP_IU is missing as well as any sense data if available. This was caused by v2.6.14 commit 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features") introducing trace records for TMFs but hard coding NULL for a possibly existing TMF FSF request. The scsi_cmnd scribble is also zero or unrelated for the TMF request so it also could not lookup a suitable FSF request from there. A broken example trace record formatted with zfcpdbf from the s390-tools package: Timestamp : ... Area : SCSI Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : lr_fail Request ID : 0x0000000000000000 ^^^^^^^^^^^^^^^^ no correlation to HBA record SCSI ID : 0x SCSI LUN : 0x SCSI result : 0x000e0000 SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x0000000000000000 SCSI opcode : 2a000017 3bb80000 08000000 00000000 FCP rsp inf cod: 0x00 ^^ no TMF response FCP rsp IU : 00000000 00000000 00000000 00000000 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 00000000 00000000 ^^^^^^^^^^^^^^^^^ no interesting FCP_RSP_IU Sense len : ... ^^^^^^^^^^^^^^^^^^^^ no sense data length Sense info : ... ^^^^^^^^^^^^^^^^^^^^ no sense data content, even if present There are some true cases where we really do not have an FSF request: "rsl_fai" from zfcp_dbf_scsi_fail_send() called for early returns / completions in zfcp_scsi_queuecommand(), "abrt_or", "abrt_bl", "abrt_ru", "abrt_ar" from zfcp_scsi_eh_abort_handler() where we did not get as far, "lr_nres", "tr_nres" from zfcp_task_mgmt_function() where we're successful and do not need to do anything because adapter stopped. For these cases it's correct to pass NULL for fsf_req to _zfcp_dbf_scsi(). Signed-off-by: Steffen Maier Fixes: 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features") Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.h | 7 ++++--- drivers/s390/scsi/zfcp_scsi.c | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index db186d44cfaf..776d1ac125ff 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -2,7 +2,7 @@ * zfcp device driver * debug feature declarations * - * Copyright IBM Corp. 2008, 2016 + * Copyright IBM Corp. 2008, 2017 */ #ifndef ZFCP_DBF_H @@ -401,7 +401,8 @@ void zfcp_dbf_scsi_abort(char *tag, struct scsi_cmnd *scmd, * @flag: indicates type of reset (Target Reset, Logical Unit Reset) */ static inline -void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) +void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag, + struct zfcp_fsf_req *fsf_req) { char tmp_tag[ZFCP_DBF_TAG_LEN]; @@ -411,7 +412,7 @@ void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) memcpy(tmp_tag, "lr_", 3); memcpy(&tmp_tag[3], tag, 4); - _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); + _zfcp_dbf_scsi(tmp_tag, 1, scmnd, fsf_req); } /** diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 07ffdbb5107f..ecce7e858af9 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2017 */ #define KMSG_COMPONENT "zfcp" @@ -278,7 +278,7 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) { - zfcp_dbf_scsi_devreset("nres", scpnt, tm_flags); + zfcp_dbf_scsi_devreset("nres", scpnt, tm_flags, NULL); return SUCCESS; } } @@ -288,10 +288,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) wait_for_completion(&fsf_req->completion); if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { - zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); + zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags, fsf_req); retval = FAILED; } else { - zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); + zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags, fsf_req); zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); } -- cgit v1.2.3 From d0fbe221b8f13192afffb5fd44024dbfdfb8f656 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:55 +0200 Subject: scsi: zfcp: fix missing trace records for early returns in TMF eh handlers commit 1a5d999ebfc7bfe28deb48931bb57faa8e4102b6 upstream. For problem determination we need to see that we were in scsi_eh as well as whether and why we were successful or not. The following commits introduced new early returns without adding a trace record: v2.6.35 commit a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") on fc_block_scsi_eh() returning != 0 which is FAST_IO_FAIL, v2.6.30 commit 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp") on not having gotten an FSF request after the maximum number of retry attempts and thus could not issue a TMF and has to return FAILED. Signed-off-by: Steffen Maier Fixes: a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") Fixes: 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp") Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_scsi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index ecce7e858af9..9bd9b9a29dfc 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -273,8 +273,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) zfcp_erp_wait(adapter); ret = fc_block_scsi_eh(scpnt); - if (ret) + if (ret) { + zfcp_dbf_scsi_devreset("fiof", scpnt, tm_flags, NULL); return ret; + } if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) { @@ -282,8 +284,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) return SUCCESS; } } - if (!fsf_req) + if (!fsf_req) { + zfcp_dbf_scsi_devreset("reqf", scpnt, tm_flags, NULL); return FAILED; + } wait_for_completion(&fsf_req->completion); -- cgit v1.2.3 From 7194822422f9ebada58f7fa7db824ee97b949fe2 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:56 +0200 Subject: scsi: zfcp: fix payload with full FCP_RSP IU in SCSI trace records commit 12c3e5754c8022a4f2fd1e9f00d19e99ee0d3cc1 upstream. If the FCP_RSP UI has optional parts (FCP_SNS_INFO or FCP_RSP_INFO) and thus does not fit into the fsp_rsp field built into a SCSI trace record, trace the full FCP_RSP UI with all optional parts as payload record instead of just FCP_SNS_INFO as payload and a 1 byte RSP_INFO_CODE part of FCP_RSP_INFO built into the SCSI record. That way we would also get the full FCP_SNS_INFO in case a target would ever send more than min(SCSI_SENSE_BUFFERSIZE==96, ZFCP_DBF_PAY_MAX_REC==256)==96. The mandatory part of FCP_RSP IU is only 24 bytes. PAYload costs at least one full PAY record of 256 bytes anyway. We cap to the hardware response size which is only FSF_FCP_RSP_SIZE==128. So we can just put the whole FCP_RSP IU with any optional parts into PAYload similarly as we do for SAN PAY since v4.9 commit aceeffbb59bb ("zfcp: trace full payload of all SAN records (req,resp,iels)"). This does not cause any additional trace records wasting memory. Decoded trace records were confusing because they showed a hard-coded sense data length of 96 even if the FCP_RSP_IU field FCP_SNS_LEN showed actually less. Since the same commit, we set pl_len for SAN traces to the full length of a request/response even if we cap the corresponding trace. In contrast, here for SCSI traces we set pl_len to the pre-computed length of FCP_RSP IU considering SNS_LEN or RSP_LEN if valid. Nonetheless we trace a hardcoded payload of length FSF_FCP_RSP_SIZE==128 if there were optional parts. This makes it easier for the zfcpdbf tool to format only the relevant part of the long FCP_RSP UI buffer. And any trailing information is still available in the payload trace record just in case. Rename the payload record tag from "fcp_sns" to "fcp_riu" to make the new content explicit to zfcpdbf which can then pick a suitable field name such as "FCP rsp IU all:" instead of "Sense info :" Also, the same zfcpdbf can still be backwards compatible with "fcp_sns". Old example trace record before this fix, formatted with the tool zfcpdbf from s390-tools: Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU id : .. Caller : 0x... Record id : 1 Tag : rsl_err Request id : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00000002 SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x SCSI opcode : 00000000 00000000 00000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000202 00000000 ^^==FCP_SNS_LEN_VALID 00000020 00000000 ^^^^^^^^==FCP_SNS_LEN==32 Sense len : 96 <==min(SCSI_SENSE_BUFFERSIZE,ZFCP_DBF_PAY_MAX_REC) Sense info : 70000600 00000018 00000000 29000000 00000400 00000000 00000000 00000000 00000000 00000000 00000000 00000000<==superfluous 00000000 00000000 00000000 00000000<==superfluous 00000000 00000000 00000000 00000000<==superfluous 00000000 00000000 00000000 00000000<==superfluous New example trace records with this fix: Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : rsl_err Request ID : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00000002 SCSI retries : 0x00 SCSI allowed : 0x03 SCSI scribble : 0x SCSI opcode : a30c0112 00000000 02000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000a02 00000200 00000020 00000000 FCP rsp IU len : 56 FCP rsp IU all : 00000000 00000000 00000a02 00000200 ^^=FCP_RESID_UNDER|FCP_SNS_LEN_VALID 00000020 00000000 70000500 00000018 ^^^^^^^^==FCP_SNS_LEN ^^^^^^^^^^^^^^^^^ 00000000 240000cb 00011100 00000000 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 00000000 00000000 ^^^^^^^^^^^^^^^^^==FCP_SNS_INFO Timestamp : ... Area : SCSI Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : lr_okay Request ID : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00000000 SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x SCSI opcode : FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000100 00000000 00000000 00000008 FCP rsp IU len : 32 FCP rsp IU all : 00000000 00000000 00000100 00000000 ^^==FCP_RSP_LEN_VALID 00000000 00000008 00000000 00000000 ^^^^^^^^==FCP_RSP_LEN ^^^^^^^^^^^^^^^^^==FCP_RSP_INFO Signed-off-by: Steffen Maier Fixes: 250a1352b95e ("[SCSI] zfcp: Redesign of the debug tracing for SCSI records.") Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 31d62ea5fdcd..c801f9782cb2 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -572,19 +572,32 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, if (fsf) { rec->fsf_req_id = fsf->req_id; + rec->pl_len = FCP_RESP_WITH_EXT; fcp_rsp = (struct fcp_resp_with_ext *) &(fsf->qtcb->bottom.io.fcp_rsp); + /* mandatory parts of FCP_RSP IU in this SCSI record */ memcpy(&rec->fcp_rsp, fcp_rsp, FCP_RESP_WITH_EXT); if (fcp_rsp->resp.fr_flags & FCP_RSP_LEN_VAL) { fcp_rsp_info = (struct fcp_resp_rsp_info *) &fcp_rsp[1]; rec->fcp_rsp_info = fcp_rsp_info->rsp_code; + rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_rsp_len); } if (fcp_rsp->resp.fr_flags & FCP_SNS_LEN_VAL) { - rec->pl_len = min((u16)SCSI_SENSE_BUFFERSIZE, - (u16)ZFCP_DBF_PAY_MAX_REC); - zfcp_dbf_pl_write(dbf, sc->sense_buffer, rec->pl_len, - "fcp_sns", fsf->req_id); + rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_sns_len); } + /* complete FCP_RSP IU in associated PAYload record + * but only if there are optional parts + */ + if (fcp_rsp->resp.fr_flags != 0) + zfcp_dbf_pl_write( + dbf, fcp_rsp, + /* at least one full PAY record + * but not beyond hardware response field + */ + min_t(u16, max_t(u16, rec->pl_len, + ZFCP_DBF_PAY_MAX_REC), + FSF_FCP_RSP_SIZE), + "fcp_riu", fsf->req_id); } debug_event(dbf->scsi, level, rec, sizeof(*rec)); -- cgit v1.2.3 From 1e6c640a75d09064a27d7e08524c0b2d8b17190c Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:57 +0200 Subject: scsi: zfcp: trace HBA FSF response by default on dismiss or timedout late response commit fdb7cee3b9e3c561502e58137a837341f10cbf8b upstream. At the default trace level, we only trace unsuccessful events including FSF responses. zfcp_dbf_hba_fsf_response() only used protocol status and FSF status to decide on an unsuccessful response. However, this is only one of multiple possible sources determining a failed struct zfcp_fsf_req. An FSF request can also "fail" if its response runs into an ERP timeout or if it gets dismissed because a higher level recovery was triggered [trace tags "erscf_1" or "erscf_2" in zfcp_erp_strategy_check_fsfreq()]. FSF requests with ERP timeout are: FSF_QTCB_EXCHANGE_CONFIG_DATA, FSF_QTCB_EXCHANGE_PORT_DATA, FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT or FSF_QTCB_CLOSE_PHYSICAL_PORT for target ports, FSF_QTCB_OPEN_LUN, FSF_QTCB_CLOSE_LUN. One example is slow queue processing which can cause follow-on errors, e.g. FSF_PORT_ALREADY_OPEN after FSF_QTCB_OPEN_PORT_WITH_DID timed out. In order to see the root cause, we need to see late responses even if the channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD. Example trace records formatted with zfcpdbf from the s390-tools package: Timestamp : ... Area : REC Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : ... Record ID : 1 Tag : fcegpf1 LUN : 0xffffffffffffffff WWPN : 0x D_ID : 0x00 Adapter status : 0x5400050b Port status : 0x41200000 LUN status : 0x00000000 Ready count : 0x00000001 Running count : 0x... ERP want : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT ERP need : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT | Timestamp : ... 30 seconds later Area : REC Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : ... Record ID : 2 Tag : erscf_2 LUN : 0xffffffffffffffff WWPN : 0x D_ID : 0x00 Adapter status : 0x5400050b Port status : 0x41200000 LUN status : 0x00000000 Request ID : 0x ERP status : 0x10000000 ZFCP_STATUS_ERP_TIMEDOUT ERP step : 0x0800 ZFCP_ERP_STEP_PORT_OPENING ERP action : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT ERP count : 0x00 | Timestamp : ... later than previous record Area : HBA Subarea : 00 Level : 5 > default level => 3 <= default level Exception : - CPU ID : 00 Caller : ... Record ID : 1 Tag : fs_qtcb => fs_rerr Request ID : 0x Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED | ZFCP_STATUS_FSFREQ_CLEANUP FSF cmnd : 0x00000005 FSF sequence no: 0x... FSF issued : ... > 30 seconds ago FSF stat : 0x00000000 FSF_GOOD FSF stat qual : 00000000 00000000 00000000 00000000 Prot stat : 0x00000001 FSF_PROT_GOOD Prot stat qual : 00000000 00000000 00000000 00000000 Port handle : 0x... LUN handle : 0x00000000 QTCB log length: ... QTCB log info : ... In case of problems detecting that new responses are waiting on the input queue, we sooner or later trigger adapter recovery due to an FSF request timeout (trace tag "fsrth_1"). FSF requests with FSF request timeout are: typically FSF_QTCB_ABORT_FCP_CMND; but theoretically also FSF_QTCB_EXCHANGE_CONFIG_DATA or FSF_QTCB_EXCHANGE_PORT_DATA via sysfs, FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT for WKA ports, FSF_QTCB_FCP_CMND for task management function (LUN / target reset). One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD because the channel filled in the response via DMA into the request's QTCB. In a theroretical case, inject code can create an erroneous FSF request on purpose. If data router is enabled, it uses deferred error reporting. A READ SCSI command can succeed with FSF_PROT_GOOD, FSF_GOOD, and SAM_STAT_GOOD. But on writing the read data to host memory via DMA, it can still fail, e.g. if an intentionally wrong scatter list does not provide enough space. Rather than getting an unsuccessful response, we get a QDIO activate check which in turn triggers adapter recovery. One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD because the channel filled in the response via DMA into the request's QTCB. Example trace records formatted with zfcpdbf from the s390-tools package: Timestamp : ... Area : HBA Subarea : 00 Level : 6 > default level => 3 <= default level Exception : - CPU ID : .. Caller : ... Record ID : 1 Tag : fs_norm => fs_rerr Request ID : 0x Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED | ZFCP_STATUS_FSFREQ_CLEANUP FSF cmnd : 0x00000001 FSF sequence no: 0x... FSF issued : ... FSF stat : 0x00000000 FSF_GOOD FSF stat qual : 00000000 00000000 00000000 00000000 Prot stat : 0x00000001 FSF_PROT_GOOD Prot stat qual : ........ ........ 00000000 00000000 Port handle : 0x... LUN handle : 0x... | Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU ID : .. Caller : ... Record ID : 1 Tag : rsl_err Request ID : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x000e0000 DID_TRANSPORT_DISRUPTED SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x SCSI opcode : 28... Read(10) FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000000 00000000 ^^ SAM_STAT_GOOD 00000000 00000000 Only with luck in both above cases, we could see a follow-on trace record of an unsuccesful event following a successful but late FSF response with FSF_PROT_GOOD and FSF_GOOD. Typically this was the case for I/O requests resulting in a SCSI trace record "rsl_err" with DID_TRANSPORT_DISRUPTED [On ZFCP_STATUS_FSFREQ_DISMISSED, zfcp_fsf_protstatus_eval() sets ZFCP_STATUS_FSFREQ_ERROR seen by the request handler functions as failure]. However, the reason for this follow-on trace was invisible because the corresponding HBA trace record was missing at the default trace level (by default hidden records with tags "fs_norm", "fs_qtcb", or "fs_open"). On adapter recovery, after we had shut down the QDIO queues, we perform unsuccessful pseudo completions with flag ZFCP_STATUS_FSFREQ_DISMISSED for each pending FSF request in zfcp_fsf_req_dismiss_all(). In order to find the root cause, we need to see all pseudo responses even if the channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD. Therefore, check zfcp_fsf_req.status for ZFCP_STATUS_FSFREQ_DISMISSED or ZFCP_STATUS_FSFREQ_ERROR and trace with a new tag "fs_rerr". It does not matter that there are numerous places which set ZFCP_STATUS_FSFREQ_ERROR after the location where we trace an FSF response early. These cases are based on protocol status != FSF_PROT_GOOD or == FSF_PROT_FSF_STATUS_PRESENTED and are thus already traced by default as trace tag "fs_perr" or "fs_ferr" respectively. NB: The trace record with tag "fssrh_1" for status read buffers on dismiss all remains. zfcp_fsf_req_complete() handles this and returns early. All other FSF request types are handled separately and as described above. Signed-off-by: Steffen Maier Fixes: 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features") Fixes: 2e261af84cdb ("[SCSI] zfcp: Only collect FSF/HBA debug data for matching trace levels") Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 776d1ac125ff..8e7f8e6037d2 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -323,7 +323,11 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) { struct fsf_qtcb *qtcb = req->qtcb; - if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && + if (unlikely(req->status & (ZFCP_STATUS_FSFREQ_DISMISSED | + ZFCP_STATUS_FSFREQ_ERROR))) { + zfcp_dbf_hba_fsf_resp("fs_rerr", 3, req); + + } else if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) { zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); -- cgit v1.2.3 From 4dd6cbbc2191587942c0bcd4630b191a1064487c Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:58 +0200 Subject: scsi: zfcp: trace high part of "new" 64 bit SCSI LUN commit 5d4a3d0a2ff23799b956e5962b886287614e7fad upstream. Complements debugging aspects of the otherwise functionally complete v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs"). While I don't have access to a target exporting 3 or 4 level LUNs, I did test it by explicitly attaching a non-existent fake 4 level LUN by means of zfcp sysfs attribute "unit_add". In order to see corresponding trace records of otherwise successful events, we had to increase the trace level of area SCSI and HBA to 6. $ echo 6 > /sys/kernel/debug/s390dbf/zfcp_0.0.1880_scsi/level $ echo 6 > /sys/kernel/debug/s390dbf/zfcp_0.0.1880_hba/level $ echo 0x4011402240334044 > \ /sys/bus/ccw/drivers/zfcp/0.0.1880/0x50050763031bd327/unit_add Example output formatted by an updated zfcpdbf from the s390-tools package interspersed with kernel messages at scsi_logging_level=4605: Timestamp : ... Area : REC Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : scsla_1 LUN : 0x4011402240334044 WWPN : 0x50050763031bd327 D_ID : 0x00...... Adapter status : 0x5400050b Port status : 0x54000001 LUN status : 0x41000000 Ready count : 0x00000001 Running count : 0x00000000 ERP want : 0x01 ERP need : 0x01 scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY pass 1 length 36 scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY successful with code 0x0 Timestamp : ... Area : HBA Subarea : 00 Level : 6 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : fs_norm Request ID : 0x Request status : 0x00000010 FSF cmnd : 0x00000001 FSF sequence no: 0x... FSF issued : ... FSF stat : 0x00000000 FSF stat qual : 00000000 00000000 00000000 00000000 Prot stat : 0x00000001 Prot stat qual : ........ ........ 00000000 00000000 Port handle : 0x... LUN handle : 0x... | Timestamp : ... Area : SCSI Subarea : 00 Level : 6 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : rsl_nor Request ID : 0x SCSI ID : 0x00000000 SCSI LUN : 0x40224011 SCSI LUN high : 0x40444033 <======================= SCSI result : 0x00000000 SCSI retries : 0x00 SCSI allowed : 0x03 SCSI scribble : 0x SCSI opcode : 12000000 a4000000 00000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000000 00000000 00000000 00000000 scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY pass 2 length 164 scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY successful with code 0x0 scsi 2:0:0:4630896905707208721: scsi scan: peripheral device type of 31, \ no device added Signed-off-by: Steffen Maier Fixes: 9cb78c16f5da ("scsi: use 64-bit LUNs") Reviewed-by: Benjamin Block Reviewed-by: Jens Remus Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.c | 2 +- drivers/s390/scsi/zfcp_dbf.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index c801f9782cb2..34367d172961 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -563,8 +563,8 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, rec->scsi_retries = sc->retries; rec->scsi_allowed = sc->allowed; rec->scsi_id = sc->device->id; - /* struct zfcp_dbf_scsi needs to be updated to handle 64bit LUNs */ rec->scsi_lun = (u32)sc->device->lun; + rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32); rec->host_scribble = (unsigned long)sc->host_scribble; memcpy(rec->scsi_opcode, sc->cmnd, diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 8e7f8e6037d2..b60667c145fd 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -204,7 +204,7 @@ enum zfcp_dbf_scsi_id { * @id: unique number of recovery record type * @tag: identifier string specifying the location of initiation * @scsi_id: scsi device id - * @scsi_lun: scsi device logical unit number + * @scsi_lun: scsi device logical unit number, low part of 64 bit, old 32 bit * @scsi_result: scsi result * @scsi_retries: current retry number of scsi request * @scsi_allowed: allowed retries @@ -214,6 +214,7 @@ enum zfcp_dbf_scsi_id { * @host_scribble: LLD specific data attached to SCSI request * @pl_len: length of paload stored as zfcp_dbf_pay * @fsf_rsp: response for fsf request + * @scsi_lun_64_hi: scsi device logical unit number, high part of 64 bit */ struct zfcp_dbf_scsi { u8 id; @@ -230,6 +231,7 @@ struct zfcp_dbf_scsi { u64 host_scribble; u16 pl_len; struct fcp_resp_with_ext fcp_rsp; + u32 scsi_lun_64_hi; } __packed; /** -- cgit v1.2.3 From d9b8f1ccbb8c7acddbe2e2fe0dfff51b8c75d361 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Wed, 23 Aug 2017 04:47:01 -0700 Subject: scsi: megaraid_sas: Check valid aen class range to avoid kernel panic commit 91b3d9f0069c8307d0b3a4c6843b65a439183318 upstream. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 17c440b9d086..42ddc4d592fc 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5094,6 +5094,14 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, prev_aen.word = le32_to_cpu(instance->aen_cmd->frame->dcmd.mbox.w[1]); + if ((curr_aen.members.class < MFI_EVT_CLASS_DEBUG) || + (curr_aen.members.class > MFI_EVT_CLASS_DEAD)) { + dev_info(&instance->pdev->dev, + "%s %d out of range class %d send by application\n", + __func__, __LINE__, curr_aen.members.class); + return 0; + } + /* * A class whose enum value is smaller is inclusive of all * higher values. If a PROGRESS (= -1) was previously -- cgit v1.2.3 From b4730f456e21ee98cec3e19837ca3e32fe689c65 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Wed, 23 Aug 2017 04:47:04 -0700 Subject: scsi: megaraid_sas: Return pended IOCTLs with cmd_status MFI_STAT_WRONG_STATE in case adapter is dead commit eb3fe263a48b0d27b229c213929c4cb3b1b39a0f upstream. After a kill adapter, since the cmd_status is not set, the IOCTLs will be hung in driver resulting in application hang. Set cmd_status MFI_STAT_WRONG_STATE when completing pended IOCTLs. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 42ddc4d592fc..6835bae33ec4 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1824,9 +1824,12 @@ static void megasas_complete_outstanding_ioctls(struct megasas_instance *instanc if (cmd_fusion->sync_cmd_idx != (u32)ULONG_MAX) { cmd_mfi = instance->cmd_list[cmd_fusion->sync_cmd_idx]; if (cmd_mfi->sync_cmd && - cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT) + (cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT)) { + cmd_mfi->frame->hdr.cmd_status = + MFI_STAT_WRONG_STATE; megasas_complete_cmd(instance, cmd_mfi, DID_OK); + } } } } else { -- cgit v1.2.3 From cf22210c66ca1a252633b8ad9055b082727dff67 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 28 Aug 2017 17:43:59 -0700 Subject: scsi: storvsc: fix memory leak on ring buffer busy commit 0208eeaa650c5c866a3242201678a19e6dc4a14e upstream. When storvsc is sending I/O to Hyper-v, it may allocate a bigger buffer descriptor for large data payload that can't fit into a pre-allocated buffer descriptor. This bigger buffer is freed on return path. If I/O request to Hyper-v fails due to ring buffer busy, the storvsc allocated buffer descriptor should also be freed. [mkp: applied by hand] Fixes: be0cf6ca301c ("scsi: storvsc: Set the tablesize based on the information given by the host") Signed-off-by: Long Li Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index cd5c1c060481..6df2841cb7f9 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1511,6 +1511,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request); if (ret == -EAGAIN) { + if (payload_sz > sizeof(cmd_request->mpb)) + kfree(payload); /* no more space */ return SCSI_MLQUEUE_DEVICE_BUSY; } -- cgit v1.2.3 From 6b498ad144728628c541dbb703d14142c10311a4 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Apr 2017 09:34:13 +0200 Subject: scsi: sg: remove 'save_scat_len' commit 136e57bf43dc4babbfb8783abbf707d483cacbe3 upstream. Unused. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 71325972e503..0e3a694f08fa 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -157,7 +157,6 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ int timeout; /* defaults to SG_DEFAULT_TIMEOUT */ int timeout_user; /* defaults to SG_DEFAULT_TIMEOUT_USER */ Sg_scatter_hold reserve; /* buffer held for this file descriptor */ - unsigned save_scat_len; /* original length of trunc. scat. element */ Sg_request *headrp; /* head of request slist, NULL->empty */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ @@ -2059,7 +2058,6 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) req_schp->pages = NULL; req_schp->page_order = 0; req_schp->sglist_len = 0; - sfp->save_scat_len = 0; srp->res_used = 0; /* Called without mutex lock to avoid deadlock */ sfp->res_in_use = 0; -- cgit v1.2.3 From 3682e0c61ffb4fccd1a86bd2af3cbdd23723b9ed Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Apr 2017 09:34:16 +0200 Subject: scsi: sg: use standard lists for sg_requests commit 109bade9c625c89bb5ea753aaa1a0a97e6fbb548 upstream. 'Sg_request' is using a private list implementation; convert it to standard lists. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 147 ++++++++++++++++++++++-------------------------------- 1 file changed, 61 insertions(+), 86 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0e3a694f08fa..ff84f4094fca 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -133,7 +133,7 @@ struct sg_device; /* forward declarations */ struct sg_fd; typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ - struct sg_request *nextrp; /* NULL -> tail request (slist) */ + struct list_head entry; /* list entry */ struct sg_fd *parentfp; /* NULL -> not in use */ Sg_scatter_hold data; /* hold buffer, perhaps scatter list */ sg_io_hdr_t header; /* scsi command+info, see */ @@ -157,7 +157,7 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ int timeout; /* defaults to SG_DEFAULT_TIMEOUT */ int timeout_user; /* defaults to SG_DEFAULT_TIMEOUT_USER */ Sg_scatter_hold reserve; /* buffer held for this file descriptor */ - Sg_request *headrp; /* head of request slist, NULL->empty */ + struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ char low_dma; /* as in parent but possibly overridden to 1 */ @@ -950,7 +950,7 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) if (!access_ok(VERIFY_WRITE, ip, sizeof (int))) return -EFAULT; read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp; srp; srp = srp->nextrp) { + list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) { read_unlock_irqrestore(&sfp->rq_list_lock, iflags); @@ -963,7 +963,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return 0; case SG_GET_NUM_WAITING: read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (val = 0, srp = sfp->headrp; srp; srp = srp->nextrp) { + val = 0; + list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) ++val; } @@ -1038,35 +1039,33 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp, val = 0; val < SG_MAX_QUEUE; - ++val, srp = srp ? srp->nextrp : srp) { + val = 0; + list_for_each_entry(srp, &sfp->rq_list, entry) { + if (val > SG_MAX_QUEUE) + break; memset(&rinfo[val], 0, SZ_SG_REQ_INFO); - if (srp) { - rinfo[val].req_state = srp->done + 1; - rinfo[val].problem = - srp->header.masked_status & - srp->header.host_status & - srp->header.driver_status; - if (srp->done) - rinfo[val].duration = - srp->header.duration; - else { - ms = jiffies_to_msecs(jiffies); - rinfo[val].duration = - (ms > srp->header.duration) ? - (ms - srp->header.duration) : 0; - } - rinfo[val].orphan = srp->orphan; - rinfo[val].sg_io_owned = - srp->sg_io_owned; - rinfo[val].pack_id = - srp->header.pack_id; - rinfo[val].usr_ptr = - srp->header.usr_ptr; + rinfo[val].req_state = srp->done + 1; + rinfo[val].problem = + srp->header.masked_status & + srp->header.host_status & + srp->header.driver_status; + if (srp->done) + rinfo[val].duration = + srp->header.duration; + else { + ms = jiffies_to_msecs(jiffies); + rinfo[val].duration = + (ms > srp->header.duration) ? + (ms - srp->header.duration) : 0; } + rinfo[val].orphan = srp->orphan; + rinfo[val].sg_io_owned = srp->sg_io_owned; + rinfo[val].pack_id = srp->header.pack_id; + rinfo[val].usr_ptr = srp->header.usr_ptr; + val++; } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - result = __copy_to_user(p, rinfo, + result = __copy_to_user(p, rinfo, SZ_SG_REQ_INFO * SG_MAX_QUEUE); result = result ? -EFAULT : 0; kfree(rinfo); @@ -1172,7 +1171,7 @@ sg_poll(struct file *filp, poll_table * wait) return POLLERR; poll_wait(filp, &sfp->read_wait, wait); read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp; srp; srp = srp->nextrp) { + list_for_each_entry(srp, &sfp->rq_list, entry) { /* if any read waiting, flag it */ if ((0 == res) && (1 == srp->done) && (!srp->sg_io_owned)) res = POLLIN | POLLRDNORM; @@ -2070,7 +2069,7 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id) unsigned long iflags; write_lock_irqsave(&sfp->rq_list_lock, iflags); - for (resp = sfp->headrp; resp; resp = resp->nextrp) { + list_for_each_entry(resp, &sfp->rq_list, entry) { /* look for requests that are ready + not SG_IO owned */ if ((1 == resp->done) && (!resp->sg_io_owned) && ((-1 == pack_id) || (resp->header.pack_id == pack_id))) { @@ -2088,70 +2087,45 @@ sg_add_request(Sg_fd * sfp) { int k; unsigned long iflags; - Sg_request *resp; Sg_request *rp = sfp->req_arr; write_lock_irqsave(&sfp->rq_list_lock, iflags); - resp = sfp->headrp; - if (!resp) { - memset(rp, 0, sizeof (Sg_request)); - rp->parentfp = sfp; - resp = rp; - sfp->headrp = resp; - } else { - if (0 == sfp->cmd_q) - resp = NULL; /* command queuing disallowed */ - else { - for (k = 0; k < SG_MAX_QUEUE; ++k, ++rp) { - if (!rp->parentfp) - break; - } - if (k < SG_MAX_QUEUE) { - memset(rp, 0, sizeof (Sg_request)); - rp->parentfp = sfp; - while (resp->nextrp) - resp = resp->nextrp; - resp->nextrp = rp; - resp = rp; - } else - resp = NULL; + if (!list_empty(&sfp->rq_list)) { + if (!sfp->cmd_q) + goto out_unlock; + + for (k = 0; k < SG_MAX_QUEUE; ++k, ++rp) { + if (!rp->parentfp) + break; } + if (k >= SG_MAX_QUEUE) + goto out_unlock; } - if (resp) { - resp->nextrp = NULL; - resp->header.duration = jiffies_to_msecs(jiffies); - } + memset(rp, 0, sizeof (Sg_request)); + rp->parentfp = sfp; + rp->header.duration = jiffies_to_msecs(jiffies); + list_add_tail(&rp->entry, &sfp->rq_list); write_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return resp; + return rp; +out_unlock: + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + return NULL; } /* Return of 1 for found; 0 for not found */ static int sg_remove_request(Sg_fd * sfp, Sg_request * srp) { - Sg_request *prev_rp; - Sg_request *rp; unsigned long iflags; int res = 0; - if ((!sfp) || (!srp) || (!sfp->headrp)) + if (!sfp || !srp || list_empty(&sfp->rq_list)) return res; write_lock_irqsave(&sfp->rq_list_lock, iflags); - prev_rp = sfp->headrp; - if (srp == prev_rp) { - sfp->headrp = prev_rp->nextrp; - prev_rp->parentfp = NULL; + if (!list_empty(&srp->entry)) { + list_del(&srp->entry); + srp->parentfp = NULL; res = 1; - } else { - while ((rp = prev_rp->nextrp)) { - if (srp == rp) { - prev_rp->nextrp = rp->nextrp; - rp->parentfp = NULL; - res = 1; - break; - } - prev_rp = rp; - } } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return res; @@ -2170,7 +2144,7 @@ sg_add_sfp(Sg_device * sdp) init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); - + INIT_LIST_HEAD(&sfp->rq_list); kref_init(&sfp->f_ref); mutex_init(&sfp->f_mutex); sfp->timeout = SG_DEFAULT_TIMEOUT; @@ -2211,10 +2185,13 @@ sg_remove_sfp_usercontext(struct work_struct *work) { struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); struct sg_device *sdp = sfp->parentdp; + Sg_request *srp; /* Cleanup any responses which were never read(). */ - while (sfp->headrp) - sg_finish_rem_req(sfp->headrp); + while (!list_empty(&sfp->rq_list)) { + srp = list_first_entry(&sfp->rq_list, Sg_request, entry); + sg_finish_rem_req(srp); + } if (sfp->reserve.bufflen > 0) { SCSI_LOG_TIMEOUT(6, sg_printk(KERN_INFO, sdp, @@ -2617,7 +2594,7 @@ static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) /* must be called while holding sg_index_lock */ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) { - int k, m, new_interface, blen, usg; + int k, new_interface, blen, usg; Sg_request *srp; Sg_fd *fp; const sg_io_hdr_t *hp; @@ -2637,13 +2614,11 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan); - for (m = 0, srp = fp->headrp; - srp != NULL; - ++m, srp = srp->nextrp) { + list_for_each_entry(srp, &fp->rq_list, entry) { hp = &srp->header; new_interface = (hp->interface_id == '\0') ? 0 : 1; if (srp->res_used) { - if (new_interface && + if (new_interface && (SG_FLAG_MMAP_IO & hp->flags)) cp = " mmap>> "; else @@ -2674,7 +2649,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) seq_printf(s, "ms sgat=%d op=0x%02x\n", usg, (int) srp->data.cmd_opcode); } - if (0 == m) + if (list_empty(&fp->rq_list)) seq_puts(s, " No requests active\n"); read_unlock(&fp->rq_list_lock); } -- cgit v1.2.3 From f0cd701d475038d3078867cad576f6530c065120 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Aug 2017 10:09:54 +0300 Subject: scsi: sg: off by one in sg_ioctl() commit bd46fc406b30d1db1aff8dabaff8d18bb423fdcf upstream. If "val" is SG_MAX_QUEUE then we are one element beyond the end of the "rinfo" array so the > should be >=. Fixes: 109bade9c625 ("scsi: sg: use standard lists for sg_requests") Signed-off-by: Dan Carpenter Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ff84f4094fca..f758f80f1c77 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1041,7 +1041,7 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) read_lock_irqsave(&sfp->rq_list_lock, iflags); val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { - if (val > SG_MAX_QUEUE) + if (val >= SG_MAX_QUEUE) break; memset(&rinfo[val], 0, SZ_SG_REQ_INFO); rinfo[val].req_state = srp->done + 1; -- cgit v1.2.3 From c04996ad58eefbef5d3aafd340ce64aa54661425 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 15 Sep 2017 14:05:15 +0200 Subject: scsi: sg: factor out sg_fill_request_table() commit 4759df905a474d245752c9dc94288e779b8734dd upstream. Factor out sg_fill_request_table() for better readability. [mkp: typos, applied by hand] Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 61 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index f758f80f1c77..525937f9b536 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -839,6 +839,40 @@ static int max_sectors_bytes(struct request_queue *q) return max_sectors << 9; } +static void +sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) +{ + Sg_request *srp; + int val; + unsigned int ms; + + val = 0; + list_for_each_entry(srp, &sfp->rq_list, entry) { + if (val > SG_MAX_QUEUE) + break; + memset(&rinfo[val], 0, SZ_SG_REQ_INFO); + rinfo[val].req_state = srp->done + 1; + rinfo[val].problem = + srp->header.masked_status & + srp->header.host_status & + srp->header.driver_status; + if (srp->done) + rinfo[val].duration = + srp->header.duration; + else { + ms = jiffies_to_msecs(jiffies); + rinfo[val].duration = + (ms > srp->header.duration) ? + (ms - srp->header.duration) : 0; + } + rinfo[val].orphan = srp->orphan; + rinfo[val].sg_io_owned = srp->sg_io_owned; + rinfo[val].pack_id = srp->header.pack_id; + rinfo[val].usr_ptr = srp->header.usr_ptr; + val++; + } +} + static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { @@ -1032,38 +1066,13 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return -EFAULT; else { sg_req_info_t *rinfo; - unsigned int ms; rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); - val = 0; - list_for_each_entry(srp, &sfp->rq_list, entry) { - if (val >= SG_MAX_QUEUE) - break; - memset(&rinfo[val], 0, SZ_SG_REQ_INFO); - rinfo[val].req_state = srp->done + 1; - rinfo[val].problem = - srp->header.masked_status & - srp->header.host_status & - srp->header.driver_status; - if (srp->done) - rinfo[val].duration = - srp->header.duration; - else { - ms = jiffies_to_msecs(jiffies); - rinfo[val].duration = - (ms > srp->header.duration) ? - (ms - srp->header.duration) : 0; - } - rinfo[val].orphan = srp->orphan; - rinfo[val].sg_io_owned = srp->sg_io_owned; - rinfo[val].pack_id = srp->header.pack_id; - rinfo[val].usr_ptr = srp->header.usr_ptr; - val++; - } + sg_fill_request_table(sfp, rinfo); read_unlock_irqrestore(&sfp->rq_list_lock, iflags); result = __copy_to_user(p, rinfo, SZ_SG_REQ_INFO * SG_MAX_QUEUE); -- cgit v1.2.3 From 72896ca30a7f6ceb5238714d5761e4ad4521ccc5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 15 Sep 2017 14:05:16 +0200 Subject: scsi: sg: fixup infoleak when using SG_GET_REQUEST_TABLE commit 3e0097499839e0fe3af380410eababe5a47c4cf9 upstream. When calling SG_GET_REQUEST_TABLE ioctl only a half-filled table is returned; the remaining part will then contain stale kernel memory information. This patch zeroes out the entire table to avoid this issue. Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Eric Dumazet Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 525937f9b536..39e8b5dc23fa 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -850,7 +850,6 @@ sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) list_for_each_entry(srp, &sfp->rq_list, entry) { if (val > SG_MAX_QUEUE) break; - memset(&rinfo[val], 0, SZ_SG_REQ_INFO); rinfo[val].req_state = srp->done + 1; rinfo[val].problem = srp->header.masked_status & @@ -1067,8 +1066,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) else { sg_req_info_t *rinfo; - rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, - GFP_KERNEL); + rinfo = kzalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, + GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); -- cgit v1.2.3 From d8663aa2778965c75b5e75c7948b44f5de601a88 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Aug 2017 16:30:35 +0300 Subject: scsi: qla2xxx: Fix an integer overflow in sysfs code commit e6f77540c067b48dee10f1e33678415bfcc89017 upstream. The value of "size" comes from the user. When we add "start + size" it could lead to an integer overflow bug. It means we vmalloc() a lot more memory than we had intended. I believe that on 64 bit systems vmalloc() can succeed even if we ask it to allocate huge 4GB buffers. So we would get memory corruption and likely a crash when we call ha->isp_ops->write_optrom() and ->read_optrom(). Only root can trigger this bug. Link: https://bugzilla.kernel.org/show_bug.cgi?id=194061 Fixes: b7cc176c9eb3 ("[SCSI] qla2xxx: Allow region-based flash-part accesses.") Reported-by: shqking Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 1ed85dfc008d..ac12ee844bfc 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -404,6 +404,8 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, return -EINVAL; if (start > ha->optrom_size) return -EINVAL; + if (size > ha->optrom_size - start) + size = ha->optrom_size - start; mutex_lock(&ha->optrom_mutex); switch (val) { @@ -429,8 +431,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size > ha->optrom_size ? - ha->optrom_size - start : size; + ha->optrom_region_size = start + size; ha->optrom_state = QLA_SREADING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); @@ -503,8 +504,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size > ha->optrom_size ? - ha->optrom_size - start : size; + ha->optrom_region_size = start + size; ha->optrom_state = QLA_SWRITING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); -- cgit v1.2.3 From 753154fcfefe0d8be9c68096e7709326b9ede349 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 1 Sep 2017 12:04:09 -0400 Subject: ftrace: Fix selftest goto location on error commit 46320a6acc4fb58f04bcf78c4c942cc43b20f986 upstream. In the second iteration of trace_selftest_ops(), the error goto label is wrong in the case where trace_selftest_test_global_cnt is off. In the case of error, it leaks the dynamic ops that was allocated. Fixes: 95950c2e ("ftrace: Add self-tests for multiple function trace users") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index b0f86ea77881..ca70d11b8aa7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -272,7 +272,7 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt) goto out_free; if (cnt > 1) { if (trace_selftest_test_global_cnt == 0) - goto out; + goto out_free; } if (trace_selftest_test_dyn_cnt == 0) goto out_free; -- cgit v1.2.3 From d28e96be7c6a2a4310c83c13054475836f6ffbae Mon Sep 17 00:00:00 2001 From: Baohong Liu Date: Tue, 5 Sep 2017 16:57:19 -0500 Subject: tracing: Apply trace_clock changes to instance max buffer commit 170b3b1050e28d1ba0700e262f0899ffa4fccc52 upstream. Currently trace_clock timestamps are applied to both regular and max buffers only for global trace. For instance trace, trace_clock timestamps are applied only to regular buffer. But, regular and max buffers can be swapped, for example, following a snapshot. So, for instance trace, bad timestamps can be seen following a snapshot. Let's apply trace_clock timestamps to instance max buffer as well. Link: http://lkml.kernel.org/r/ebdb168d0be042dcdf51f81e696b17fabe3609c1.1504642143.git.tom.zanussi@linux.intel.com Fixes: 277ba0446 ("tracing: Add interface to allow multiple trace buffers") Signed-off-by: Baohong Liu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d59ebd9d21df..4743066010c4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5237,7 +5237,7 @@ static int tracing_set_clock(struct trace_array *tr, const char *clockstr) tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) + if (tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->max_buffer); #endif -- cgit v1.2.3 From 81306fc3dbb53b11f9c42d31403df3655d50f935 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 1 Sep 2017 17:00:23 +0100 Subject: ARC: Re-enable MMU upon Machine Check exception commit 1ee55a8f7f6b7ca4c0c59e0b4b4e3584a085c2d3 upstream. I recently came upon a scenario where I would get a double fault machine check exception tiriggered by a kernel module. However the ensuing crash stacktrace (ksym lookup) was not working correctly. Turns out that machine check auto-disables MMU while modules are allocated in kernel vaddr spapce. This patch re-enables the MMU before start printing the stacktrace making stacktracing of modules work upon a fatal exception. Signed-off-by: Jose Abreu Reviewed-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: moved code into low level handler to avoid in 2 places] Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/entry.S | 6 ++++++ arch/arc/mm/tlb.c | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 2efb0625331d..db1eee5fe502 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -104,6 +104,12 @@ ENTRY(EV_MachineCheck) lr r0, [efa] mov r1, sp + ; hardware auto-disables MMU, re-enable it to allow kernel vaddr + ; access for say stack unwinding of modules for crash dumps + lr r3, [ARC_REG_PID] + or r3, r3, MMU_ENABLE + sr r3, [ARC_REG_PID] + lsr r3, r2, 8 bmsk r3, r3, 7 brne r3, ECR_C_MCHK_DUP_TLB, 1f diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index daf2bf52b984..97e9582dcf99 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -885,9 +885,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, local_irq_save(flags); - /* re-enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); - /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { -- cgit v1.2.3 From 7498bd6058405ba17df33be06046fefdb419fe00 Mon Sep 17 00:00:00 2001 From: Aleksandr Bezzubikov Date: Tue, 18 Jul 2017 17:12:25 +0300 Subject: PCI: shpchp: Enable bridge bus mastering if MSI is enabled commit 48b79a14505349a29b3e20f03619ada9b33c4b17 upstream. An SHPC may generate MSIs to notify software about slot or controller events (SHPC spec r1.0, sec 4.7). A PCI device can only generate an MSI if it has bus mastering enabled. Enable bus mastering if the bridge contains an SHPC that uses MSI for event notifications. Signed-off-by: Aleksandr Bezzubikov [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Reviewed-by: Marcel Apfelbaum Acked-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/shpchp_hpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index 7d223e9080ef..77dddee2753a 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -1062,6 +1062,8 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev) if (rc) { ctrl_info(ctrl, "Can't get msi for the hotplug controller\n"); ctrl_info(ctrl, "Use INTx for the hotplug controller\n"); + } else { + pci_set_master(pdev); } rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED, -- cgit v1.2.3 From 04affe4e117169e75c4ff1f12dd30d74c9a629fc Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Wed, 2 Aug 2017 23:42:17 -0400 Subject: media: v4l2-compat-ioctl32: Fix timespec conversion commit 9c7ba1d7634cef490b85bc64c4091ff004821bfd upstream. Certain syscalls like recvmmsg support 64 bit timespec values for the X32 ABI. The helper function compat_put_timespec converts a timespec value to a 32 bit or 64 bit value depending on what ABI is used. The v4l2 compat layer, however, is not designed to support 64 bit timespec values and always uses 32 bit values. Hence, compat_put_timespec must not be used. Without this patch, user space will be provided with bad timestamp values from the VIDIOC_DQEVENT ioctl. Also, fields of the struct v4l2_event32 that come immediately after timestamp get overwritten, namely the field named id. Fixes: 81993e81a994 ("compat: Get rid of (get|put)_compat_time(val|spec)") Cc: H. Peter Anvin Cc: Laurent Pinchart Cc: Tiffany Lin Cc: Ricardo Ribalda Delgado Cc: Sakari Ailus Signed-off-by: Daniel Mentz Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 109f687d1cbd..4379b949bb93 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -773,7 +773,8 @@ static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *u copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || put_user(kp->pending, &up->pending) || put_user(kp->sequence, &up->sequence) || - compat_put_timespec(&kp->timestamp, &up->timestamp) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || put_user(kp->id, &up->id) || copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) return -EFAULT; -- cgit v1.2.3 From 4931578fbeb525e717a7aa96f83f4d85cf48d0b2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 8 Aug 2017 08:56:21 -0400 Subject: media: uvcvideo: Prevent heap overflow when accessing mapped controls commit 7e09f7d5c790278ab98e5f2c22307ebe8ad6e8ba upstream. The size of uvc_control_mapping is user controlled leading to a potential heap overflow in the uvc driver. This adds a check to verify the user provided size fits within the bounds of the defined buffer size. Originally-from: Richard Simmons Signed-off-by: Guenter Roeck Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_ctrl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 3e59b288b8a8..618e4e2b4207 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2001,6 +2001,13 @@ int uvc_ctrl_add_mapping(struct uvc_video_chain *chain, goto done; } + /* Validate the user-provided bit-size and offset */ + if (mapping->size > 32 || + mapping->offset + mapping->size > ctrl->info.size * 8) { + ret = -EINVAL; + goto done; + } + list_for_each_entry(map, &ctrl->info.mappings, list) { if (mapping->id == map->id) { uvc_trace(UVC_TRACE_CONTROL, "Can't add mapping '%s', " -- cgit v1.2.3 From 5025da3b532bc5a16ca89670353aa1c89f33be29 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Thu, 7 Sep 2017 01:28:53 +0800 Subject: bcache: initialize dirty stripes in flash_dev_run() commit 175206cf9ab63161dec74d9cd7f9992e062491f5 upstream. bcache uses a Proportion-Differentiation Controller algorithm to control writeback rate to cached devices. In the PD controller algorithm, dirty stripes of thin flash device should not be counted in, because flash only volumes never write back dirty data. Currently dirty stripe counter for thin flash device is not initialized when the thin flash device starts. Which means the following calculation in PD controller will reference an undefined dirty stripes number, and all cached devices attached to the same cache set where the thin flash device lies on may have an inaccurate writeback rate. This patch calles bch_sectors_dirty_init() in flash_dev_run(), to correctly initialize dirty stripe counter when the thin flash device starts to run. This patch also does following parameter data type change, -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); to call this function conveniently in flash_dev_run(). (Commit log is composed by Coly Li) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 3 ++- drivers/md/bcache/writeback.c | 8 ++++---- drivers/md/bcache/writeback.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7b5880b8874c..5bf9da324b78 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1023,7 +1023,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1227,6 +1227,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index b9346cd9cda1..5f5da2eeec34 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -488,17 +488,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 073a042aed24..357658b64794 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -85,7 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); -- cgit v1.2.3 From 093457f2bd329a2fdb253a68c4fffebb0e3d7786 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 14:25:51 +0800 Subject: bcache: Fix leak of bdev reference commit 4b758df21ee7081ab41448d21d60367efaa625b3 upstream. If blkdev_get_by_path() in register_bcache() fails, we try to lookup the block device using lookup_bdev() to detect which situation we are in to properly report error. However we never drop the reference returned to us from lookup_bdev(). Fix that. Signed-off-by: Jan Kara Acked-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 5bf9da324b78..46f4c7ee5560 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1960,6 +1960,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } -- cgit v1.2.3 From 0471f58e18e60dcdbd89f882dc0fa6370b94cd48 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:53 +0800 Subject: bcache: do not subtract sectors_to_gc for bypassed IO commit 69daf03adef5f7bc13e0ac86b4b8007df1767aab upstream. Since bypassed IOs use no bucket, so do not subtract sectors_to_gc to trigger gc thread. Signed-off-by: tang.junhui Acked-by: Coly Li Reviewed-by: Eric Wheeler Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2410df1c2a05..6c4c7caea693 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_FLUSH; if the original write was a * flush, it'll wait on the journal write. -- cgit v1.2.3 From d9c6a28a6a1cf0e0854c8175386898288f7f3ddf Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:56 +0800 Subject: bcache: correct cache_dirty_target in __update_writeback_rate() commit a8394090a9129b40f9d90dcb7f4a49d60c727ca6 upstream. __update_write_rate() uses a Proportion-Differentiation Controller algorithm to control writeback rate. A dirty target number is used in this PD controller to control writeback rate. A larger target number will make the writeback rate smaller, on the versus, a smaller target number will make the writeback rate larger. bcache uses the following steps to calculate the target number, 1) cache_sectors = all-buckets-of-cache-set * buckets-size 2) cache_dirty_target = cache_sectors * cached-device-writeback_percent 3) target = cache_dirty_target * (sectors-of-cached-device/sectors-of-all-cached-devices-of-this-cache-set) The calculation at step 1) for cache_sectors is incorrect, which does not consider dirty blocks occupied by flash only volume. A flash only volume can be took as a bcache device without cached device. All data sectors allocated for it are persistent on cache device and marked dirty, they are not touched by bcache writeback and garbage collection code. So data blocks of flash only volume should be ignore when calculating cache_sectors of cache set. Current code does not subtract dirty sectors of flash only volume, which results a larger target number from the above 3 steps. And in sequence the cache device's writeback rate is smaller then a correct value, writeback speed is slower on all cached devices. This patch fixes the incorrect slower writeback rate by subtracting dirty sectors of flash only volumes in __update_writeback_rate(). (Commit log composed by Coly Li to pass checkpatch.pl checking) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 3 ++- drivers/md/bcache/writeback.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 5f5da2eeec34..8513c5e43458 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -21,7 +21,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 357658b64794..daec4fd782ea 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { -- cgit v1.2.3 From a6c5e7a0cd0184d3cf4fe29b598931329b237569 Mon Sep 17 00:00:00 2001 From: Tony Asleson Date: Wed, 6 Sep 2017 14:25:57 +0800 Subject: bcache: Correct return value for sysfs attach errors commit 77fa100f27475d08a569b9d51c17722130f089e7 upstream. If you encounter any errors in bch_cached_dev_attach it will return a negative error code. The variable 'v' which stores the result is unsigned, thus user space sees a very large value returned for bytes written which can cause incorrect user space behavior. Utilize 1 signed variable to use throughout the function to preserve error return capability. Signed-off-by: Tony Asleson Acked-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3ff57d61dde..4fbb5532f24c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -191,7 +191,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -226,7 +226,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; -- cgit v1.2.3 From f522051a84e566b5552b3ba6127184d9cba54120 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:59 +0800 Subject: bcache: fix for gc and write-back race commit 9baf30972b5568d8b5bc8b3c46a6ec5b58100463 upstream. gc and write-back get raced (see the email "bcache get stucked" I sended before): gc thread write-back thread | |bch_writeback_thread() |bch_gc_thread() | | |==>read_dirty() |==>bch_btree_gc() | |==>btree_root() //get btree root | | //node write locker | |==>bch_btree_gc_root() | | |==>read_dirty_submit() | |==>write_dirty() | |==>continue_at(cl, | | write_dirty_finish, | | system_wq); | |==>write_dirty_finish()//excute | | //in system_wq | |==>bch_btree_insert() | |==>bch_btree_map_leaf_nodes() | |==>__bch_btree_map_nodes() | |==>btree_root //try to get btree | | //root node read | | //lock | |-----stuck here |==>bch_btree_set_root() |==>bch_journal_meta() |==>bch_journal() |==>journal_try_write() |==>journal_write_unlocked() //journal_full(&c->journal) | //condition satisfied |==>continue_at(cl, journal_write, system_wq); //try to excute | //journal_write in system_wq | //but work queue is excuting | //write_dirty_finish() |==>closure_sync(); //wait journal_write execute | //over and wake up gc, |-------------stuck here |==>release root node write locker This patch alloc a separate work-queue for write-back thread to avoid such race. (Commit log re-organized by Coly Li to pass checkpatch.pl checking) Signed-off-by: Tang Junhui Acked-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 2 ++ drivers/md/bcache/writeback.c | 9 +++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index c3ea03c9a1a8..02619cabda8b 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 46f4c7ee5560..c5ceea9222ff 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1056,6 +1056,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 8513c5e43458..bbb1dc9e1639 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -191,7 +191,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -211,7 +211,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -523,6 +523,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) -- cgit v1.2.3 From a069d0a43de42e009ec09e76855a1780fbf20938 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Wed, 6 Sep 2017 14:26:02 +0800 Subject: bcache: fix bch_hprint crash and improve output commit 9276717b9e297a62d1151a43d1cd286213f68eb7 upstream. Most importantly, solve a crash where %llu was used to format signed numbers. This would cause a buffer overflow when reading sysfs writeback_rate_debug, as only 20 bytes were allocated for this and %llu writes 20 characters plus a null. Always use the units mechanism rather than having different output paths for simplicity. Also, correct problems with display output where 1.10 was a larger number than 1.09, by multiplying by 10 and then dividing by 1024 instead of dividing by 100. (Remainders of >= 1000 would print as .10). Minor changes: Always display the decimal point instead of trying to omit it based on number of digits shown. Decide what units to use based on 1000 as a threshold, not 1024 (in other words, always print at most 3 digits before the decimal point). Signed-off-by: Michael Lyle Reported-by: Dmitry Yu Okunev Acked-by: Kent Overstreet Reviewed-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/util.c | 50 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index db3ae4c2b223..6c18e3ec3e48 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -73,24 +73,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; - - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } - - if (!u) - return sprintf(buf, "%llu", v); - - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); - - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + int u = 0, t; + + uint64_t q; + + if (v < 0) + q = -v; + else + q = v; + + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; + + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], -- cgit v1.2.3 From ed1bf4397d2219d4b9ec2d5517416ba102186650 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 1 Sep 2017 12:18:28 -0400 Subject: ftrace: Fix memleak when unregistering dynamic ops when tracing disabled commit edb096e00724f02db5f6ec7900f3bbd465c6c76f upstream. If function tracing is disabled by the user via the function-trace option or the proc sysctl file, and a ftrace_ops that was allocated on the heap is unregistered, then the shutdown code exits out without doing the proper clean up. This was found via kmemleak and running the ftrace selftests, as one of the tests unregisters with function tracing disabled. # cat kmemleak unreferenced object 0xffffffffa0020000 (size 4096): comm "swapper/0", pid 1, jiffies 4294668889 (age 569.209s) hex dump (first 32 bytes): 55 ff 74 24 10 55 48 89 e5 ff 74 24 18 55 48 89 U.t$.UH...t$.UH. e5 48 81 ec a8 00 00 00 48 89 44 24 50 48 89 4c .H......H.D$PH.L backtrace: [] kmemleak_vmalloc+0x85/0xf0 [] __vmalloc_node_range+0x281/0x3e0 [] module_alloc+0x4f/0x90 [] arch_ftrace_update_trampoline+0x160/0x420 [] ftrace_startup+0xe7/0x300 [] register_ftrace_function+0x72/0x90 [] trace_selftest_ops+0x204/0x397 [] trace_selftest_startup_function+0x394/0x624 [] run_tracer_selftest+0x15c/0x1d7 [] init_trace_selftests+0x75/0x192 [] do_one_initcall+0x90/0x1e2 [] kernel_init_freeable+0x350/0x3fe [] kernel_init+0x13/0x122 [] ret_from_fork+0x2a/0x40 [] 0xffffffffffffffff Fixes: 12cce594fa ("ftrace/x86: Allow !CONFIG_PREEMPT dynamic ops to use allocated trampolines") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eba904bae48c..38d73a6e2857 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2667,13 +2667,14 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (!command || !ftrace_enabled) { /* - * If these are control ops, they still need their - * per_cpu field freed. Since, function tracing is + * If these are dynamic or control ops, they still + * need their data freed. Since, function tracing is * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) + goto free_ops; + return 0; } @@ -2728,6 +2729,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { schedule_on_each_cpu(ftrace_sync); + free_ops: arch_ftrace_trampoline_free(ops); if (ops->flags & FTRACE_OPS_FL_CONTROL) -- cgit v1.2.3 From 10def3a6779924f7bb130200b8b940caf2914111 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Sep 2017 11:00:37 +0200 Subject: Linux 4.4.89 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 788d90a0051b..7e4c46b375b3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 88 +SUBLEVEL = 89 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3