From c21fec188ab43661e5bf08d9d377ecafc7a8abf9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Apr 2016 23:01:30 -0400 Subject: decnet: Do not build routes to devices without decnet private data. [ Upstream commit a36a0d4008488fa545c74445d69eaf56377d5d4e ] In particular, make sure we check for decnet private presence for loopback devices. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/decnet/dn_route.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 607a14f20d88..b1dc096d22f8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1034,10 +1034,13 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); + err = -EINVAL; dev_out = init_net.loopback_dev; + if (!dev_out->dn_ptr) + goto out; + err = -EADDRNOTAVAIL; dev_hold(dev_out); if (!fld.daddr) { fld.daddr = @@ -1110,6 +1113,8 @@ source_ok: if (dev_out == NULL) goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); + if (!dn_db) + goto e_inval; /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); @@ -1151,6 +1156,8 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); + if (!dev_out->dn_ptr) + goto e_inval; fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); -- cgit v1.2.3 From d0bfda58b414487f06ac0911674a9808f71d3ab1 Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Fri, 8 Apr 2016 15:21:30 -0600 Subject: route: do not cache fib route info on local routes with oif [ Upstream commit d6d5e999e5df67f8ec20b6be45e2229455ee3699 ] For local routes that require a particular output interface we do not want to cache the result. Caching the result causes incorrect behaviour when there are multiple source addresses on the interface. The end result being that if the intended recipient is waiting on that interface for the packet he won't receive it because it will be delivered on the loopback interface and the IP_PKTINFO ipi_ifindex will be set to the loopback interface as well. This can be tested by running a program such as "dhcp_release" which attempts to inject a packet on a particular interface so that it is received by another program on the same board. The receiving process should see an IP_PKTINFO ipi_ifndex value of the source interface (e.g., eth1) instead of the loopback interface (e.g., lo). The packet will still appear on the loopback interface in tcpdump but the important aspect is that the CMSG info is correct. Sample dhcp_release command line: dhcp_release eth1 192.168.204.222 02:11:33:22:44:66 Signed-off-by: Allain Legacy Signed off-by: Chris Friesen Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 02c62299d717..b050cf980a57 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2045,6 +2045,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ if (fi && res->prefixlen < 4) fi = NULL; + } else if ((type == RTN_LOCAL) && (orig_oif != 0) && + (orig_oif != dev_out->ifindex)) { + /* For local routes that require a particular output interface + * we do not want to cache the result. Caching the result + * causes incorrect behaviour when there are multiple source + * addresses on the interface, the end result being that if the + * intended recipient is waiting on that interface for the + * packet he won't receive it because it will be delivered on + * the loopback interface and the IP_PKTINFO ipi_ifindex will + * be set to the loopback interface as well. + */ + fi = NULL; } fnhe = NULL; -- cgit v1.2.3 From a416c9483c719cc9160b4b9af09085dfe461383f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 10 Apr 2016 12:52:28 +0200 Subject: packet: fix heap info leak in PACKET_DIAG_MCLIST sock_diag interface [ Upstream commit 309cf37fe2a781279b7675d4bb7173198e532867 ] Because we miss to wipe the remainder of i->addr[] in packet_mc_add(), pdiag_put_mclist() leaks uninitialized heap bytes via the PACKET_DIAG_MCLIST netlink attribute. Fix this by explicitly memset(0)ing the remaining bytes in i->addr[]. Fixes: eea68e2f1a00 ("packet: Report socket mclist info via diag module") Signed-off-by: Mathias Krause Cc: Eric W. Biederman Cc: Pavel Emelyanov Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index da1ae0e13cb5..9cc7b512b472 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3436,6 +3436,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); + memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; -- cgit v1.2.3 From 5c136901ef17cd42d56b5c02135a0c67fb58424f Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Tue, 12 Apr 2016 08:45:52 +0200 Subject: net: sched: do not requeue a NULL skb [ Upstream commit 3dcd493fbebfd631913df6e2773cc295d3bf7d22 ] A failure in validate_xmit_skb_list() triggered an unconditional call to dev_requeue_skb with skb=NULL. This slowly grows the queue discipline's qlen count until all traffic through the queue stops. We take the optimistic approach and continue running the queue after a failure since it is unknown if later packets also will fail in the validate path. Fixes: 55a93b3ea780 ("qdisc: validate skb without holding lock") Signed-off-by: Lars Persson Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_generic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 16bc83b2842a..aa4725038f94 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (validate) skb = validate_xmit_skb_list(skb, dev); - if (skb) { + if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); + } else { + spin_lock(root_lock); + return qdisc_qlen(q); } spin_lock(root_lock); -- cgit v1.2.3 From 8427d5547d0b63beb70d3858127942f828400ad2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 12 Apr 2016 10:26:19 -0700 Subject: bpf/verifier: reject invalid LD_ABS | BPF_DW instruction [ Upstream commit d82bccc69041a51f7b7b9b4a36db0772f4cdba21 ] verifier must check for reserved size bits in instruction opcode and reject BPF_LD | BPF_ABS | BPF_DW and BPF_LD | BPF_IND | BPF_DW instructions, otherwise interpreter will WARN_RATELIMIT on them during execution. Fixes: ddd872bc3098 ("bpf: verifier: add checks for BPF_ABS | BPF_IND instructions") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2e7f7ab739e4..c21cb146086c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1348,6 +1348,7 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) } if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || + BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { verbose("BPF_LD_ABS uses reserved fields\n"); return -EINVAL; -- cgit v1.2.3 From 1d794379798b33797d9afe4a477bfd89ce399184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 12 Apr 2016 16:11:12 +0200 Subject: cdc_mbim: apply "NDP to end" quirk to all Huawei devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c5b5343cfbc9f46af65033fa4f407d7b7d98371d ] We now have a positive report of another Huawei device needing this quirk: The ME906s-158 (12d1:15c1). This is an m.2 form factor modem with no obvious relationship to the E3372 (12d1:157d) we already have a quirk entry for. This is reason enough to believe the quirk might be necessary for any number of current and future Huawei devices. Applying the quirk to all Huawei devices, since it is crucial to any device affected by the firmware bug, while the impact on non-affected devices is negligible. The quirk can if necessary be disabled per-device by writing N to /sys/class/net//cdc_ncm/ndp_to_end Reported-by: Andreas Fett Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_mbim.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index bdd83d95ec0a..96a5028621c8 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -617,8 +617,13 @@ static const struct usb_device_id mbim_devs[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info, }, - /* Huawei E3372 fails unless NDP comes after the IP packets */ - { USB_DEVICE_AND_INTERFACE_INFO(0x12d1, 0x157d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + + /* Some Huawei devices, ME906s-158 (12d1:15c1) and E3372 + * (12d1:157d), are known to fail unless the NDP is placed + * after the IP packets. Applying the quirk to all Huawei + * devices is broader than necessary, but harmless. + */ + { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end, }, /* default entry */ -- cgit v1.2.3 From 79fdabe8706bd240c083a848f64081a4bc46d7af Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 20 Feb 2016 00:29:30 +0100 Subject: net: use skb_postpush_rcsum instead of own implementations [ Upstream commit 6b83d28a55a891a9d70fc61ccb1c138e47dcbe74 ] Replace individual implementations with the recently introduced skb_postpush_rcsum() helper. Signed-off-by: Daniel Borkmann Acked-by: Tom Herbert Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 4 +--- net/ipv6/reassembly.c | 6 ++---- net/openvswitch/actions.c | 8 +++----- net/openvswitch/vport-netdev.c | 2 +- net/openvswitch/vport.h | 7 ------- 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8616d1147c93..78abe110c7a5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4433,9 +4433,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) skb->mac_len += VLAN_HLEN; __skb_pull(skb, offset); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + (2 * ETH_ALEN), VLAN_HLEN, 0)); + skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 45f5ae51de65..a234552a7e3d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c88d0f2d3e01..7aef0c8bd2c5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, - MPLS_HLEN, 0)); + skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; @@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, mask->eth_dst); - ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); @@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk /* Reconstruct the MAC header. */ skb_push(skb, data->l2_len); memcpy(skb->data, &data->l2_data, data->l2_len); - ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); ovs_vport_send(vport, skb); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6b0190b987ec..76fcaf1fd2a9 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 8ea3a96980ac..6e2b62f9d595 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -184,13 +184,6 @@ static inline struct vport *vport_from_priv(void *priv) int ovs_vport_receive(struct vport *, struct sk_buff *, const struct ip_tunnel_info *); -static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); -} - static inline const char *ovs_vport_name(struct vport *vport) { return vport->dev->name; -- cgit v1.2.3 From a66ce519a3e9aca63739da0d716555588df5cc48 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 16 Apr 2016 02:27:58 +0200 Subject: vlan: pull on __vlan_insert_tag error path and fix csum correction [ Upstream commit 9241e2df4fbc648a92ea0752918e05c26255649e ] When __vlan_insert_tag() fails from skb_vlan_push() path due to the skb_cow_head(), we need to undo the __skb_push() in the error path as well that was done earlier to move skb->data pointer to mac header. Moreover, I noticed that when in the non-error path the __skb_pull() is done and the original offset to mac header was non-zero, we fixup from a wrong skb->data offset in the checksum complete processing. So the skb_postpush_rcsum() really needs to be done before __skb_pull() where skb->data still points to the mac header start and thus operates under the same conditions as in __vlan_insert_tag(). Fixes: 93515d53b133 ("net: move vlan pop/push functions into common code") Signed-off-by: Daniel Borkmann Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 78abe110c7a5..9835d9a8a7a4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4427,13 +4427,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) + if (err) { + __skb_pull(skb, offset); return err; + } + skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; - __skb_pull(skb, offset); skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); + __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; -- cgit v1.2.3 From b5c9a73c501e8aed86dd578309813c7818ca248c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 20 Apr 2016 23:23:08 +0100 Subject: atl2: Disable unimplemented scatter/gather feature [ Upstream commit f43bfaeddc79effbf3d0fcb53ca477cca66f3db8 ] atl2 includes NETIF_F_SG in hw_features even though it has no support for non-linear skbs. This bug was originally harmless since the driver does not claim to implement checksum offload and that used to be a requirement for SG. Now that SG and checksum offload are independent features, if you explicitly enable SG *and* use one of the rare protocols that can use SG without checkusm offload, this potentially leaks sensitive information (before you notice that it just isn't working). Therefore this obscure bug has been designated CVE-2016-2117. Reported-by: Justin Yackoski Signed-off-by: Ben Hutchings Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 8f76f4558a88..2ff465848b65 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1412,7 +1412,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -EIO; - netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX; netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); /* Init PHY as early as possible due to power saving issue */ -- cgit v1.2.3 From 2a33f756ae1986a67d145f050bf4350e5ee17746 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 21 Apr 2016 11:49:15 +1000 Subject: openvswitch: use flow protocol when recalculating ipv6 checksums [ Upstream commit b4f70527f052b0c00be4d7cac562baa75b212df5 ] When using masked actions the ipv6_proto field of an action to set IPv6 fields may be zero rather than the prevailing protocol which will result in skipping checksum recalculation. This patch resolves the problem by relying on the protocol in the flow key rather than that in the set field action. Fixes: 83d2b9ba1abc ("net: openvswitch: Support masked set actions.") Cc: Jarno Rajahalme Signed-off-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/actions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 7aef0c8bd2c5..7cb8184ac165 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -461,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { - set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked, true); memcpy(&flow_key->ipv6.addr.src, masked, sizeof(flow_key->ipv6.addr.src)); @@ -483,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, NULL, &flags) != NEXTHDR_ROUTING); - set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked, recalc_csum); memcpy(&flow_key->ipv6.addr.dst, masked, sizeof(flow_key->ipv6.addr.dst)); -- cgit v1.2.3 From 3032b09874a4709f8529d620dd270e14a56eb61d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:03 +0300 Subject: net/mlx5e: Device's mtu field is u16 and not int [ Upstream commit 046339eaab26804f52f6604877f5674f70815b26 ] For set/query MTU port firmware commands the MTU field is 16 bits, here I changed all the "int mtu" parameters of the functions wrapping those firmware commands to be u16. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/port.c | 10 +++++----- include/linux/mlx5/driver.h | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 721d63f5b461..fd17443aeacd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -405,8 +405,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *rep; - int max_mtu; - int oper_mtu; + u16 max_mtu; + u16 oper_mtu; int err; u8 ib_link_width_oper; u8 vl_hw_cap; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1203d892e842..d6a7213649f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1372,7 +1372,7 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int hw_mtu; + u16 hw_mtu; int err; err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1); @@ -1896,7 +1896,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; bool was_opened; - int max_mtu; + u16 max_mtu; int err = 0; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index a87e773e93f3..53a793bc2e3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -246,8 +246,8 @@ int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); -static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, - int *max_mtu, int *oper_mtu, u8 port) +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, + u16 *max_mtu, u16 *oper_mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -267,7 +267,7 @@ static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); } -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -282,14 +282,14 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) } EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port) { mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port) { mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af3efd9157f0..412aa988c6ad 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -792,9 +792,9 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, -- cgit v1.2.3 From da465bd9391efca3649ab34c3575a2b27ac384e3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:04 +0300 Subject: net/mlx5e: Fix minimum MTU [ Upstream commit d8edd2469ace550db707798180d1c84d81f93bca ] Minimum MTU that can be set in Connectx4 device is 68. This fixes the case where a user wants to set invalid MTU, the driver will fail to satisfy this request and the interface will stay down. It is better to report an error and continue working with old mtu. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d6a7213649f6..cbd17e25beeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1891,22 +1891,27 @@ static int mlx5e_set_features(struct net_device *netdev, return err; } +#define MXL5_HW_MIN_MTU 64 +#define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN) + static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; bool was_opened; u16 max_mtu; + u16 min_mtu; int err = 0; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); max_mtu = MLX5E_HW2SW_MTU(max_mtu); + min_mtu = MLX5E_HW2SW_MTU(MXL5E_MIN_MTU); - if (new_mtu > max_mtu) { + if (new_mtu > max_mtu || new_mtu < min_mtu) { netdev_err(netdev, - "%s: Bad MTU (%d) > (%d) Max\n", - __func__, new_mtu, max_mtu); + "%s: Bad MTU (%d), valid range is: [%d..%d]\n", + __func__, new_mtu, min_mtu, max_mtu); return -EINVAL; } -- cgit v1.2.3 From 0633185047d91174ee2cbc8f1bd59e75e3c45ccd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Apr 2016 22:23:31 +0200 Subject: ipv4/fib: don't warn when primary address is missing if in_dev is dead [ Upstream commit 391a20333b8393ef2e13014e6e59d192c5594471 ] After commit fbd40ea0180a ("ipv4: Don't do expensive useless work during inetdev destroy.") when deleting an interface, fib_del_ifaddr() can be executed without any primary address present on the dead interface. The above is safe, but triggers some "bug: prim == NULL" warnings. This commit avoids warning if the in_dev is dead Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8a9246deccfe..63566ec54794 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -904,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (!prim) { - pr_warn("%s: bug: prim == NULL\n", __func__); + /* if the device has been deleted, we don't perform + * address promotion + */ + if (!in_dev->dead) + pr_warn("%s: bug: prim == NULL\n", __func__); return; } if (iprim && iprim != prim) { -- cgit v1.2.3 From 828255b591768b4e5a762df21a0133065ddcb44b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Apr 2016 11:35:46 -0700 Subject: net/mlx4_en: fix spurious timestamping callbacks [ Upstream commit fc96256c906362e845d848d0f6a6354450059e81 ] When multiple skb are TX-completed in a row, we might incorrectly keep a timestamp of a prior skb and cause extra work. Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4421bf5463f6..e4019a803a9c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -400,7 +400,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; - u64 timestamp = 0; int done = 0; int budget = priv->tx_work_limit; u32 last_nr_txbb; @@ -440,9 +439,12 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { + u64 timestamp = 0; + txbbs_skipped += last_nr_txbb; ring_index = (ring_index + last_nr_txbb) & size_mask; - if (ring->tx_info[ring_index].ts_requested) + + if (unlikely(ring->tx_info[ring_index].ts_requested)) timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ -- cgit v1.2.3 From 608d2c3c7a046c222cae2e857cf648a9f89e772b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 26 Apr 2016 22:26:26 +0200 Subject: bpf: fix double-fdput in replace_map_fd_with_map_ptr() [ Upstream commit 8358b02bf67d3a5d8a825070e1aa73f25fb2e4c7 ] When bpf(BPF_PROG_LOAD, ...) was invoked with a BPF program whose bytecode references a non-map file descriptor as a map file descriptor, the error handling code called fdput() twice instead of once (in __bpf_map_get() and in replace_map_fd_with_map_ptr()). If the file descriptor table of the current task is shared, this causes f_count to be decremented too much, allowing the struct file to be freed while it is still in use (use-after-free). This can be exploited to gain root privileges by an unprivileged user. This bug was introduced in commit 0246e64d9a5f ("bpf: handle pseudo BPF_LD_IMM64 insn"), but is only exploitable since commit 1be7f75d1668 ("bpf: enable non-root eBPF programs") because previously, CAP_SYS_ADMIN was required to reach the vulnerable code. (posted publicly according to request by maintainer) Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c21cb146086c..e3798cf7f49d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2004,7 +2004,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) if (IS_ERR(map)) { verbose("fd %d is not pointing to valid bpf_map\n", insn->imm); - fdput(f); return PTR_ERR(map); } -- cgit v1.2.3 From 3899251bdb9c2b31fc73d4cc132f52d3710101de Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:20 -0700 Subject: bpf: fix refcnt overflow [ Upstream commit 92117d8443bc5afacc8d5ba82e541946310f106e ] On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK, the malicious application may overflow 32-bit bpf program refcnt. It's also possible to overflow map refcnt on 1Tb system. Impose 32k hard limit which means that the same bpf program or map cannot be shared by more than 32k processes. Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 3 ++- kernel/bpf/inode.c | 7 ++++--- kernel/bpf/syscall.c | 24 ++++++++++++++++++++---- kernel/bpf/verifier.c | 11 +++++++---- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 83d1926c61e4..67bc2da5d233 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -165,12 +165,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -void bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5a8a797d50b7..d1a7646f79c5 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: - atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); + raw = bpf_prog_inc(raw); break; case BPF_TYPE_MAP: - bpf_map_inc(raw, true); + raw = bpf_map_inc(raw, true); break; default: WARN_ON_ONCE(1); @@ -277,7 +277,8 @@ static void *bpf_obj_do_get(const struct filename *pathname, goto out; raw = bpf_any_get(inode->i_private, *type); - touch_atime(&path); + if (!IS_ERR(raw)) + touch_atime(&path); path_put(&path); return raw; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3b39550d8485..4e32cc94edd9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -181,11 +181,18 @@ struct bpf_map *__bpf_map_get(struct fd f) return f.file->private_data; } -void bpf_map_inc(struct bpf_map *map, bool uref) +/* prog's and map's refcnt limit */ +#define BPF_MAX_REFCNT 32768 + +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) { - atomic_inc(&map->refcnt); + if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { + atomic_dec(&map->refcnt); + return ERR_PTR(-EBUSY); + } if (uref) atomic_inc(&map->usercnt); + return map; } struct bpf_map *bpf_map_get_with_uref(u32 ufd) @@ -197,7 +204,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) if (IS_ERR(map)) return map; - bpf_map_inc(map, true); + map = bpf_map_inc(map, true); fdput(f); return map; @@ -580,6 +587,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f) return f.file->private_data; } +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) { + atomic_dec(&prog->aux->refcnt); + return ERR_PTR(-EBUSY); + } + return prog; +} + /* called by sockets/tracing/seccomp before attaching program to an event * pairs with bpf_prog_put() */ @@ -592,7 +608,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) if (IS_ERR(prog)) return prog; - atomic_inc(&prog->aux->refcnt); + prog = bpf_prog_inc(prog); fdput(f); return prog; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e3798cf7f49d..5a615c188001 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2023,15 +2023,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) return -E2BIG; } - /* remember this map */ - env->used_maps[env->used_map_cnt++] = map; - /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded * and all maps are released in free_bpf_prog_info() */ - bpf_map_inc(map, false); + map = bpf_map_inc(map, false); + if (IS_ERR(map)) { + fdput(f); + return PTR_ERR(map); + } + env->used_maps[env->used_map_cnt++] = map; + fdput(f); next_insn: insn++; -- cgit v1.2.3 From bb10156f572f06f3b6cadd378e5a0ab3ed8da991 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:21 -0700 Subject: bpf: fix check_map_func_compatibility logic [ Upstream commit 6aff67c85c9e5a4bc99e5211c1bac547936626ca ] The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") introduced clever way to check bpf_helper<->map_type compatibility. Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted the logic and inadvertently broke it. Get rid of the clever bool compare and go back to two-way check from map and from helper perspective. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 53 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5a615c188001..2cbfba78d3db 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -239,15 +239,6 @@ static const char * const reg_type_str[] = { [CONST_IMM] = "imm", }; -static const struct { - int map_type; - int func_id; -} func_limit[] = { - {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, -}; - static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -898,24 +889,44 @@ static int check_func_arg(struct verifier_env *env, u32 regno, static int check_map_func_compatibility(struct bpf_map *map, int func_id) { - bool bool_map, bool_func; - int i; - if (!map) return 0; - for (i = 0; i < ARRAY_SIZE(func_limit); i++) { - bool_map = (map->map_type == func_limit[i].map_type); - bool_func = (func_id == func_limit[i].func_id); - /* only when map & func pair match it can continue. - * don't allow any other map type to be passed into - * the special func; - */ - if (bool_func && bool_map != bool_func) - return -EINVAL; + /* We need a two way check, first is from map perspective ... */ + switch (map->map_type) { + case BPF_MAP_TYPE_PROG_ARRAY: + if (func_id != BPF_FUNC_tail_call) + goto error; + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && + func_id != BPF_FUNC_perf_event_output) + goto error; + break; + default: + break; + } + + /* ... and second from the function itself. */ + switch (func_id) { + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; + default: + break; } return 0; +error: + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); + return -EINVAL; } static int check_call(struct verifier_env *env, int func_id) -- cgit v1.2.3 From 85256f78bff1c4a24ad8edac63726e8ca5ccedc1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:22 -0700 Subject: samples/bpf: fix trace_output example [ Upstream commit 569cc39d39385a74b23145496bca2df5ac8b2fb8 ] llvm cannot always recognize memset as builtin function and optimize it away, so just delete it. It was a leftover from testing of bpf_perf_event_output() with large data structures. Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- samples/bpf/trace_output_kern.c | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index 8d8d1ec429eb..9b96f4fb8cea 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx) u64 cookie; } data; - memset(&data, 0, sizeof(data)); data.pid = bpf_get_current_pid_tgid(); data.cookie = 0x12345678; -- cgit v1.2.3 From 390d4b3e0d0f1f57ec5d8a0cf3e0d93444563e9c Mon Sep 17 00:00:00 2001 From: Tim Bingham Date: Fri, 29 Apr 2016 13:30:23 -0400 Subject: net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case [ Upstream commit 2c94b53738549d81dc7464a32117d1f5112c64d3 ] Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the implementation of net_dbg_ratelimited() was buggy for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases. The bug was that net_ratelimit() was being called and, despite returning true, nothing was being printed to the console. This resulted in messages like the following - "net_ratelimit: %d callbacks suppressed" with no other output nearby. After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the bug is fixed for the DEBUG case. However, there's no output at all for CONFIG_DYNAMIC_DEBUG case. This patch restores debug output (if enabled) for the CONFIG_DYNAMIC_DEBUG case. Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG case. The implementation takes care to check that dynamic debugging is enabled before calling net_ratelimit(). Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") Signed-off-by: Tim Bingham Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index 0b4ac7da583a..25ef630f1bd6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -245,7 +245,15 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) -#if defined(DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) +#define net_dbg_ratelimited(fmt, ...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ + net_ratelimit()) \ + __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ +} while (0) +#elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else -- cgit v1.2.3 From c985780791efec375865c0fbd21794d631c68fce Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 29 Apr 2016 23:31:32 +0200 Subject: gre: do not pull header in ICMP error processing [ Upstream commit b7f8fe251e4609e2a437bd2c2dea01e61db6849c ] iptunnel_pull_header expects that IP header was already pulled; with this expectation, it pulls the tunnel header. This is not true in gre_err. Furthermore, ipv4_update_pmtu and ipv4_redirect expect that skb->data points to the IP header. We cannot pull the tunnel header in this path. It's just a matter of not calling iptunnel_pull_header - we don't need any of its effects. Fixes: bda7bb463436 ("gre: Allow multiple protocol listener for gre protocol.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 614521437e30..7dc962b89fa1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -180,6 +180,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -239,7 +240,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -342,7 +343,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -420,6 +421,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -429,7 +431,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) -- cgit v1.2.3 From 1188e1403a772d9698c06bcc53c44783536ff09e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 25 Feb 2016 14:55:00 -0800 Subject: net_sched: introduce qdisc_replace() helper [ Upstream commit 86a7996cc8a078793670d82ed97d5a99bb4e8496 ] Remove nearly duplicated code and prepare for the following patch. Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 17 +++++++++++++++++ net/sched/sch_cbq.c | 7 +------ net/sched/sch_drr.c | 6 +----- net/sched/sch_dsmark.c | 8 +------- net/sched/sch_hfsc.c | 6 +----- net/sched/sch_htb.c | 9 +-------- net/sched/sch_multiq.c | 8 +------- net/sched/sch_netem.c | 10 +--------- net/sched/sch_prio.c | 8 +------- net/sched/sch_qfq.c | 6 +----- net/sched/sch_red.c | 7 +------ net/sched/sch_sfb.c | 7 +------ net/sched/sch_tbf.c | 8 +------- 13 files changed, 29 insertions(+), 78 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b2a8e6338576..4dba2663eaed 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -698,6 +698,23 @@ static inline void qdisc_reset_queue(struct Qdisc *sch) sch->qstats.backlog = 0; } +static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, + struct Qdisc **pold) +{ + struct Qdisc *old; + + sch_tree_lock(sch); + old = *pold; + *pold = new; + if (old != NULL) { + qdisc_tree_decrease_qlen(old, old->q.qlen); + qdisc_reset(old); + } + sch_tree_unlock(sch); + + return old; +} + static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch, struct sk_buff_head *list) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index c538d9e4a8f6..7f8474cdce32 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new->reshape_fail = cbq_reshape_fail; #endif } - sch_tree_lock(sch); - *old = cl->q; - cl->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->q); return 0; } diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index f26bdea875c1..c76cdd423b6f 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -226,11 +226,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - drr_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index f357f34d02d2..cfddb1c635c3 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - *old = p->q; - p->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &p->q); return 0; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b7ebe2c87586..089f3b667d36 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1215,11 +1215,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; } - sch_tree_lock(sch); - hfsc_purge_queue(sch, cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 15ccd7f8fb2a..0efbcf358cd0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1163,14 +1163,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->common.classid)) == NULL) return -ENOBUFS; - sch_tree_lock(sch); - *old = cl->un.leaf.q; - cl->un.leaf.q = new; - if (*old != NULL) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->un.leaf.q); return 0; } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 4e904ca0af9d..a0103a138563 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -303,13 +303,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->queues[band]; - q->queues[band] = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->queues[band]); return 0; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5abd1d9de989..0a6ddaf7f561 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1037,15 +1037,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct netem_sched_data *q = qdisc_priv(sch); - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - if (*old) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index ba6487f2741f..1b4aaec64a24 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -268,13 +268,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->queues[band]; - q->queues[band] = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->queues[band]); return 0; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 3dc3a6e56052..b5c52caf2e73 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -617,11 +617,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - qfq_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6c0534cc7758..d5abcee454d8 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -313,12 +313,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 5bbb6332ec57..0e74e55fda15 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -606,12 +606,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a4afde14e865..56a1aef3495f 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -502,13 +502,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } -- cgit v1.2.3 From ca375cf34a7186f5b8817082d2b594dcd8cedc8b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 25 Feb 2016 14:55:01 -0800 Subject: net_sched: update hierarchical backlog too [ Upstream commit 2ccccf5fb43ff62b2b96cc58d95fc0b3596516e4 ] When the bottom qdisc decides to, for example, drop some packet, it calls qdisc_tree_decrease_qlen() to update the queue length for all its ancestors, we need to update the backlog too to keep the stats on root qdisc accurate. Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/codel.h | 4 ++++ include/net/sch_generic.h | 5 +++-- net/sched/sch_api.c | 8 +++++--- net/sched/sch_cbq.c | 5 +++-- net/sched/sch_choke.c | 6 ++++-- net/sched/sch_codel.c | 10 ++++++---- net/sched/sch_drr.c | 3 ++- net/sched/sch_fq.c | 4 +++- net/sched/sch_fq_codel.c | 17 ++++++++++++----- net/sched/sch_hfsc.c | 3 ++- net/sched/sch_hhf.c | 10 +++++++--- net/sched/sch_htb.c | 10 ++++++---- net/sched/sch_multiq.c | 8 +++++--- net/sched/sch_netem.c | 3 ++- net/sched/sch_pie.c | 5 +++-- net/sched/sch_prio.c | 7 ++++--- net/sched/sch_qfq.c | 3 ++- net/sched/sch_red.c | 3 ++- net/sched/sch_sfb.c | 3 ++- net/sched/sch_sfq.c | 16 +++++++++------- net/sched/sch_tbf.c | 7 +++++-- 21 files changed, 91 insertions(+), 49 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index 267e70210061..d168aca115cc 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -162,12 +162,14 @@ struct codel_vars { * struct codel_stats - contains codel shared variables and stats * @maxpacket: largest packet we've seen so far * @drop_count: temp count of dropped packets in dequeue() + * @drop_len: bytes of dropped packets in dequeue() * ecn_mark: number of packets we ECN marked instead of dropping * ce_mark: number of packets CE marked because sojourn time was above ce_threshold */ struct codel_stats { u32 maxpacket; u32 drop_count; + u32 drop_len; u32 ecn_mark; u32 ce_mark; }; @@ -308,6 +310,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, vars->rec_inv_sqrt); goto end; } + stats->drop_len += qdisc_pkt_len(skb); qdisc_drop(skb, sch); stats->drop_count++; skb = dequeue_func(vars, sch); @@ -330,6 +333,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { + stats->drop_len += qdisc_pkt_len(skb); qdisc_drop(skb, sch); stats->drop_count++; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4dba2663eaed..86df0835f6b5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -396,7 +396,8 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); void qdisc_destroy(struct Qdisc *qdisc); -void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); +void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, + unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops); struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, @@ -707,7 +708,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) { - qdisc_tree_decrease_qlen(old, old->q.qlen); + qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); qdisc_reset(old); } sch_tree_unlock(sch); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index af1acf009866..95b560f0b253 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return 0; } -void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, + unsigned int len) { const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; int drops; - if (n == 0) + if (n == 0 && len == 0) return; drops = max_t(int, n, 0); rcu_read_lock(); @@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) cops->put(sch, cl); } sch->q.qlen -= n; + sch->qstats.backlog -= len; __qdisc_qstats_drop(sch, drops); } rcu_read_unlock(); } -EXPORT_SYMBOL(qdisc_tree_decrease_qlen); +EXPORT_SYMBOL(qdisc_tree_reduce_backlog); static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 7f8474cdce32..baafddf229ce 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1909,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen; + unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; @@ -1917,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); qlen = cl->q->q.qlen; + backlog = cl->q->qstats.backlog; qdisc_reset(cl->q); - qdisc_tree_decrease_qlen(cl->q, qlen); + qdisc_tree_reduce_backlog(cl->q, qlen, backlog); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 5ffb8b8337c7..0a08c860eee4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) choke_zap_tail_holes(q); qdisc_qstats_backlog_dec(sch, skb); + qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_drop(skb, sch); - qdisc_tree_decrease_qlen(sch, 1); --sch->q.qlen; } @@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) old = q->tab; if (old) { unsigned int oqlen = sch->q.qlen, tail = 0; + unsigned dropped = 0; while (q->head != q->tail) { struct sk_buff *skb = q->tab[q->head]; @@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) ntab[tail++] = skb; continue; } + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); --sch->q.qlen; qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); q->head = 0; q->tail = tail; } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 535007d5f0b5..9b7e2980ee5c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); - /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. */ if (q->stats.drop_count && sch->q.qlen) { - qdisc_tree_decrease_qlen(sch, q->stats.drop_count); + qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; + q->stats.drop_len = 0; } if (skb) qdisc_bstats_update(sch, skb); @@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) { struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; - unsigned int qlen; + unsigned int qlen, dropped = 0; int err; if (!opt) @@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = __skb_dequeue(&sch->q); + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index c76cdd423b6f..d6e3ad43cecb 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) static void drr_purge_queue(struct drr_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 109b2322778f..3c6a47d66a04 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; int err, drop_count = 0; + unsigned drop_len = 0; u32 fq_log; if (!opt) @@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!skb) break; + drop_len += qdisc_pkt_len(skb); kfree_skb(skb); drop_count++; } - qdisc_tree_decrease_qlen(sch, drop_count); + qdisc_tree_reduce_backlog(sch, drop_count, drop_len); sch_tree_unlock(sch); return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4c834e93dafb..d3fc8f9dd3d4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - unsigned int idx; + unsigned int idx, prev_backlog; struct fq_codel_flow *flow; int uninitialized_var(ret); @@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; + prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet * from this flow. @@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } @@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct fq_codel_flow *flow; struct list_head *head; u32 prev_drop_count, prev_ecn_mark; + unsigned int prev_backlog; begin: head = &q->new_flows; @@ -259,6 +261,7 @@ begin: prev_drop_count = q->cstats.drop_count; prev_ecn_mark = q->cstats.ecn_mark; + prev_backlog = sch->qstats.backlog; skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, dequeue); @@ -276,12 +279,14 @@ begin: } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. */ if (q->cstats.drop_count && sch->q.qlen) { - qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, + q->cstats.drop_len); q->cstats.drop_count = 0; + q->cstats.drop_len = 0; } return skb; } @@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_codel_dequeue(sch); + q->cstats.drop_len += qdisc_pkt_len(skb); kfree_skb(skb); q->cstats.drop_count++; } - qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; + q->cstats.drop_len = 0; sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 089f3b667d36..d783d7cc3348 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -895,9 +895,10 @@ static void hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static void diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 86b04e31e60b..13d6f83ec491 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct hhf_sched_data *q = qdisc_priv(sch); enum wdrr_bucket_idx idx; struct wdrr_bucket *bucket; + unsigned int prev_backlog; idx = hhf_classify(skb, sch); @@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; + prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet from this * bucket. @@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this. */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } @@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) { struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; - unsigned int qlen; + unsigned int qlen, prev_backlog; int err; u64 non_hh_quantum; u32 new_quantum = q->quantum; @@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) } qlen = sch->q.qlen; + prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = hhf_dequeue(sch); kfree_skb(skb); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, + prev_backlog - sch->qstats.backlog); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0efbcf358cd0..846a7f98cef9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1265,7 +1265,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - unsigned int qlen; struct Qdisc *new_q = NULL; int last_child = 0; @@ -1285,9 +1284,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); if (!cl->level) { - qlen = cl->un.leaf.q->q.qlen; + unsigned int qlen = cl->un.leaf.q->q.qlen; + unsigned int backlog = cl->un.leaf.q->qstats.backlog; + qdisc_reset(cl->un.leaf.q); - qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog); } /* delete from hash and active; remainder in destroy_class */ @@ -1421,10 +1422,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; + unsigned int backlog = parent->un.leaf.q->qstats.backlog; /* turn parent into inner node */ qdisc_reset(parent->un.leaf.q); - qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); + qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog); qdisc_destroy(parent->un.leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index a0103a138563..bcdd54bb101c 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; - qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); qdisc_destroy(child); } } @@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->queues[i] = child; if (old != &noop_qdisc) { - qdisc_tree_decrease_qlen(old, - old->q.qlen); + qdisc_tree_reduce_backlog(old, + old->q.qlen, + old->qstats.backlog); qdisc_destroy(old); } sch_tree_unlock(sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0a6ddaf7f561..9640bb39a5d2 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -598,7 +598,8 @@ deliver: if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { qdisc_qstats_drop(sch); - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, + qdisc_pkt_len(skb)); } } goto tfifo_dequeue; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index b783a446d884..71ae3b9629f9 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) { struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_PIE_MAX + 1]; - unsigned int qlen; + unsigned int qlen, dropped = 0; int err; if (!opt) @@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = __skb_dequeue(&sch->q); + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 1b4aaec64a24..fee1b15506b2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); qdisc_destroy(child); } } @@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) q->queues[i] = child; if (old != &noop_qdisc) { - qdisc_tree_decrease_qlen(old, - old->q.qlen); + qdisc_tree_reduce_backlog(old, + old->q.qlen, + old->qstats.backlog); qdisc_destroy(old); } sch_tree_unlock(sch); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b5c52caf2e73..8d2d8d953432 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) static void qfq_purge_queue(struct qfq_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index d5abcee454d8..8c0508c0e287 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0e74e55fda15..c69611640fa5 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3abab534eb5c..498f0a2cb47f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -346,7 +346,7 @@ static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; + unsigned int hash, dropped; sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); @@ -461,7 +461,7 @@ enqueue: return NET_XMIT_SUCCESS; qlen = slot->qlen; - sfq_drop(sch); + dropped = sfq_drop(sch); /* Return Congestion Notification only if we dropped a packet * from this flow. */ @@ -469,7 +469,7 @@ enqueue: return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, dropped); return NET_XMIT_SUCCESS; } @@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch) struct sfq_slot *slot; struct sk_buff_head list; int dropped = 0; + unsigned int drop_len = 0; __skb_queue_head_init(&list); @@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch) if (x >= SFQ_MAX_FLOWS) { drop: qdisc_qstats_backlog_dec(sch, skb); + drop_len += qdisc_pkt_len(skb); kfree_skb(skb); dropped++; continue; @@ -594,7 +596,7 @@ drop: } } sch->q.qlen -= dropped; - qdisc_tree_decrease_qlen(sch, dropped); + qdisc_tree_reduce_backlog(sch, dropped, drop_len); } static void sfq_perturbation(unsigned long arg) @@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; - unsigned int qlen; + unsigned int qlen, dropped = 0; struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) @@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > q->limit) - sfq_drop(sch); - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + dropped += sfq_drop(sch); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); del_timer(&q->perturb_timer); if (q->perturb_period) { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 56a1aef3495f..c2fbde742f37 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); + unsigned int len = 0, prev_len = qdisc_pkt_len(skb); int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); @@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) nskb = segs->next; segs->next = NULL; qdisc_skb_cb(segs)->pkt_len = segs->len; + len += segs->len; ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) @@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen += nb; if (nb > 1) - qdisc_tree_decrease_qlen(sch, 1 - nb); + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); consume_skb(skb); return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } @@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; } -- cgit v1.2.3 From 67b014f957cfb29f02a71834facf9f3d7ef4b3fd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 25 Feb 2016 14:55:02 -0800 Subject: sch_htb: update backlog as well [ Upstream commit 431e3a8e36a05a37126f34b41aa3a5a6456af04e ] We saw qlen!=0 but backlog==0 on our production machine: qdisc htb 1: dev eth0 root refcnt 2 r2q 10 default 1 direct_packets_stat 0 ver 3.17 Sent 172680457356 bytes 222469449 pkt (dropped 0, overlimits 123575834 requeues 0) backlog 0b 72p requeues 0 The problem is we only count qlen for HTB qdisc but not backlog. We need to update backlog too when we update qlen, so that we can at least know the average packet length. Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_htb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 846a7f98cef9..87b02ed3d5f2 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) htb_activate(q, cl); } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) ok: qdisc_bstats_update(sch, skb); qdisc_unthrottled(sch); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch) unsigned int len; if (cl->un.leaf.q->ops->drop && (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { + sch->qstats.backlog -= len; sch->q.qlen--; if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); @@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch) } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; - } } qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; + sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); for (i = 0; i < TC_HTB_NUMPRIO; i++) -- cgit v1.2.3 From 5ecc98e1b8de36e2ac351d174a5f5d92b5085d15 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 25 Feb 2016 14:55:03 -0800 Subject: sch_dsmark: update backlog as well [ Upstream commit bdf17661f63a79c3cb4209b970b1cc39e34f7543 ] Similarly, we need to update backlog too when we update qlen. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_dsmark.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index cfddb1c635c3..d0dff0cd8186 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -258,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -280,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) return NULL; qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); @@ -395,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); + sch->qstats.backlog = 0; sch->q.qlen = 0; } -- cgit v1.2.3 From 71a783bce6dd0a39937322de683cf4bda2f3a1be Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 2 May 2016 12:20:15 -0400 Subject: netem: Segment GSO packets on enqueue [ Upstream commit 6071bd1aa13ed9e41824bafad845b7b7f4df5cfd ] This was recently reported to me, and reproduced on the latest net kernel, when attempting to run netperf from a host that had a netem qdisc attached to the egress interface: [ 788.073771] ---------------------[ cut here ]--------------------------- [ 788.096716] WARNING: at net/core/dev.c:2253 skb_warn_bad_offload+0xcd/0xda() [ 788.129521] bnx2: caps=(0x00000001801949b3, 0x0000000000000000) len=2962 data_len=0 gso_size=1448 gso_type=1 ip_summed=3 [ 788.182150] Modules linked in: sch_netem kvm_amd kvm crc32_pclmul ipmi_ssif ghash_clmulni_intel sp5100_tco amd64_edac_mod aesni_intel lrw gf128mul glue_helper ablk_helper edac_mce_amd cryptd pcspkr sg edac_core hpilo ipmi_si i2c_piix4 k10temp fam15h_power hpwdt ipmi_msghandler shpchp acpi_power_meter pcc_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ahci ata_generic pata_acpi ttm libahci crct10dif_pclmul pata_atiixp tg3 libata crct10dif_common drm crc32c_intel ptp serio_raw bnx2 r8169 hpsa pps_core i2c_core mii dm_mirror dm_region_hash dm_log dm_mod [ 788.465294] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G W ------------ 3.10.0-327.el7.x86_64 #1 [ 788.511521] Hardware name: HP ProLiant DL385p Gen8, BIOS A28 12/17/2012 [ 788.542260] ffff880437c036b8 f7afc56532a53db9 ffff880437c03670 ffffffff816351f1 [ 788.576332] ffff880437c036a8 ffffffff8107b200 ffff880633e74200 ffff880231674000 [ 788.611943] 0000000000000001 0000000000000003 0000000000000000 ffff880437c03710 [ 788.647241] Call Trace: [ 788.658817] [] dump_stack+0x19/0x1b [ 788.686193] [] warn_slowpath_common+0x70/0xb0 [ 788.713803] [] warn_slowpath_fmt+0x5c/0x80 [ 788.741314] [] ? ___ratelimit+0x93/0x100 [ 788.767018] [] skb_warn_bad_offload+0xcd/0xda [ 788.796117] [] skb_checksum_help+0x17c/0x190 [ 788.823392] [] netem_enqueue+0x741/0x7c0 [sch_netem] [ 788.854487] [] dev_queue_xmit+0x2a8/0x570 [ 788.880870] [] ip_finish_output+0x53d/0x7d0 ... The problem occurs because netem is not prepared to handle GSO packets (as it uses skb_checksum_help in its enqueue path, which cannot manipulate these frames). The solution I think is to simply segment the skb in a simmilar fashion to the way we do in __dev_queue_xmit (via validate_xmit_skb), with some minor changes. When we decide to corrupt an skb, if the frame is GSO, we segment it, corrupt the first segment, and enqueue the remaining ones. tested successfully by myself on the latest net kernel, to which this applies Signed-off-by: Neil Horman CC: Jamal Hadi Salim CC: "David S. Miller" CC: netem@lists.linux-foundation.org CC: eric.dumazet@gmail.com CC: stephen@networkplumber.org Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9640bb39a5d2..4befe97a9034 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) sch->q.qlen++; } +/* netem can't properly corrupt a megapacket (like we get from GSO), so instead + * when we statistically choose to corrupt one, we instead segment it, returning + * the first packet to be corrupted, and re-enqueue the remaining frames + */ +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct sk_buff *segs; + netdev_features_t features = netif_skb_features(skb); + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) { + qdisc_reshape_fail(skb, sch); + return NULL; + } + consume_skb(skb); + return segs; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; + struct sk_buff *segs = NULL; + unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); + int nb = 0; int count = 1; + int rc = NET_XMIT_SUCCESS; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) @@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (skb_is_gso(skb)) { + segs = netem_segment(skb, sch); + if (!segs) + return NET_XMIT_DROP; + } else { + segs = skb; + } + + skb = segs; + segs = segs->next; + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) - return qdisc_drop(skb, sch); + skb_checksum_help(skb))) { + rc = qdisc_drop(skb, sch); + goto finish_segs; + } skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); @@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.requeues++; } +finish_segs: + if (segs) { + while (segs) { + skb2 = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + last_len = segs->len; + rc = qdisc_enqueue(segs, sch); + if (rc != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(rc)) + qdisc_qstats_drop(sch); + } else { + nb++; + len += last_len; + } + segs = skb2; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + } return NET_XMIT_SUCCESS; } -- cgit v1.2.3 From f27e1ed8d971a6649c0da8e8a8517fea56ad71f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 3 May 2016 16:38:53 +0200 Subject: net: fec: only clear a queue's work bit if the queue was emptied MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c021bb717a70aaeaa4b25c91f43c2aeddd922de ] In the receive path a queue's work bit was cleared unconditionally even if fec_enet_rx_queue only read out a part of the available packets from the hardware. This resulted in not reading any packets in the next napi turn and so packets were delayed or lost. The obvious fix is to only clear a queue's bit when the queue was emptied. Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue") Signed-off-by: Uwe Kleine-König Reviewed-by: Lucas Stach Tested-by: Fugang Duan Acked-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b2a32209ffbf..f6147ffc7fbc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1557,9 +1557,15 @@ fec_enet_rx(struct net_device *ndev, int budget) struct fec_enet_private *fep = netdev_priv(ndev); for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) { - clear_bit(queue_id, &fep->work_rx); - pkt_received += fec_enet_rx_queue(ndev, + int ret; + + ret = fec_enet_rx_queue(ndev, budget - pkt_received, queue_id); + + if (ret < budget - pkt_received) + clear_bit(queue_id, &fep->work_rx); + + pkt_received += ret; } return pkt_received; } -- cgit v1.2.3 From 52f307b18b1f070f0442fc98515575616b21fa20 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:35:05 -0400 Subject: net: fix infoleak in llc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b8670c09f37bdf2847cc44f36511a53afc6161fd ] The stack object “info” has a total size of 12 bytes. Its last byte is padding which is not initialized and leaked via “put_cmsg”. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8dab4e569571..bb8edb9ef506 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); -- cgit v1.2.3 From e0c0313681aaa0c4514c6794635aba82691d2154 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:46:24 -0400 Subject: net: fix infoleak in rtnetlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f8e44741f9f216e33736ea4ec65ca9ac03036e6 ] The stack object “map” has a total size of 32 bytes. Its last 4 bytes are padding generated by compiler. These padding bytes are not initialized and sent out via “nla_put”. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ca966f7de351..87b91ffbdec3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1175,14 +1175,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) return -EMSGSIZE; -- cgit v1.2.3 From bcf3e33e962d83837a03ccc489d834e0e9d95d58 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 4 May 2016 15:00:33 +0300 Subject: net/mlx4_en: Fix endianness bug in IPV6 csum calculation [ Upstream commit 82d69203df634b4dfa765c94f60ce9482bcc44d6 ] Use htons instead of unconditionally byte swapping nexthdr. On a little endian systems shifting the byte is correct behavior, but it results in incorrect csums on big endian architectures. Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE') Signed-off-by: Daniel Jurgens Reviewed-by: Carol Soto Tested-by: Carol Soto Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e7a5000aa12c..bbff8ec6713e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -704,7 +704,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); csum_pseudo_hdr = csum_partial(&ipv6h->saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); -- cgit v1.2.3 From 67779d20fb540b719ca0781200b79831d3498fa4 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 May 2016 14:21:53 +0100 Subject: VSOCK: do not disconnect socket when peer has shutdown SEND only [ Upstream commit dedc58e067d8c379a15a8a183c5db318201295bb ] The peer may be expecting a reply having sent a request and then done a shutdown(SHUT_WR), so tearing down the whole socket at this point seems wrong and breaks for me with a client which does a SHUT_WR. Looking at other socket family's stream_recvmsg callbacks doing a shutdown here does not seem to be the norm and removing it does not seem to have had any adverse effects that I can see. I'm using Stefan's RFC virtio transport patches, I'm unsure of the impact on the vmci transport. Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: Stefan Hajnoczi Cc: Claudio Imbrenda Cc: Andy King Cc: Dmitry Torokhov Cc: Jorgen Hansen Cc: Adit Ranadive Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fd1220fbfa0..9b5bd6d142dc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out_wait: finish_wait(sk_sleep(sk), &wait); -- cgit v1.2.3 From 97c2160da468f71ba998f2c6c82ed33cdfbc7245 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 4 May 2016 16:18:45 +0200 Subject: net: bridge: fix old ioctl unlocked net device walk [ Upstream commit 31ca0458a61a502adb7ed192bf9716c6d05791a5 ] get_bridge_ifindices() is used from the old "deviceless" bridge ioctl calls which aren't called with rtnl held. The comment above says that it is called with rtnl but that is not really the case. Here's a sample output from a test ASSERT_RTNL() which I put in get_bridge_ifindices and executed "brctl show": [ 957.422726] RTNL: assertion failed at net/bridge//br_ioctl.c (30) [ 957.422925] CPU: 0 PID: 1862 Comm: brctl Tainted: G W O 4.6.0-rc4+ #157 [ 957.423009] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 957.423009] 0000000000000000 ffff880058adfdf0 ffffffff8138dec5 0000000000000400 [ 957.423009] ffffffff81ce8380 ffff880058adfe58 ffffffffa05ead32 0000000000000001 [ 957.423009] 00007ffec1a444b0 0000000000000400 ffff880053c19130 0000000000008940 [ 957.423009] Call Trace: [ 957.423009] [] dump_stack+0x85/0xc0 [ 957.423009] [] br_ioctl_deviceless_stub+0x212/0x2e0 [bridge] [ 957.423009] [] sock_ioctl+0x22b/0x290 [ 957.423009] [] do_vfs_ioctl+0x95/0x700 [ 957.423009] [] SyS_ioctl+0x79/0x90 [ 957.423009] [] entry_SYSCALL_64_fastpath+0x23/0xc1 Since it only reads bridge ifindices, we can use rcu to safely walk the net device list. Also remove the wrong rtnl comment above. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 263b4de4de57..60a3dbfca8a1 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } -- cgit v1.2.3 From 5895701216ef9e4f89734433dcf64c85fc3d64a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 4 May 2016 17:25:02 +0200 Subject: bridge: fix igmp / mld query parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 856ce5d083e14571d051301fe3c65b32b8cbe321 ] With the newly introduced helper functions the skb pulling is hidden in the checksumming function - and undone before returning to the caller. The IGMP and MLD query parsing functions in the bridge still assumed that the skb is pointing to the beginning of the IGMP/MLD message while it is now kept at the beginning of the IPv4/6 header. If there is a querier somewhere else, then this either causes the multicast snooping to stay disabled even though it could be enabled. Or, if we have the querier enabled too, then this can create unnecessary IGMP / MLD query messages on the link. Fixing this by taking the offset between IP and IGMP/MLD header into account, too. Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Simon Wunderlich Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 03661d97463c..ea9893743a0f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1270,6 +1270,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1280,14 +1281,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1348,6 +1349,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1357,8 +1359,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1367,7 +1369,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } -- cgit v1.2.3 From 1575c095e444c927f0ebcdeb179c460c8c3b7f1f Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 24 Apr 2016 17:45:00 +0200 Subject: uapi glibc compat: fix compile errors when glibc net/if.h included before linux/if.h MIME-Version: 1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4a91cb61bb995e5571098188092e296192309c77 ] glibc's net/if.h contains copies of definitions from linux/if.h and these conflict and cause build failures if both files are included by application source code. Changes in uapi headers, which fixed header file dependencies to include linux/if.h when it was needed, e.g. commit 1ffad83d, made the net/if.h and linux/if.h incompatibilities visible as build failures for userspace applications like iproute2 and xtables-addons. This patch fixes compile errors when glibc net/if.h is included before linux/if.h: ./linux/if.h:99:21: error: redeclaration of enumerator ‘IFF_NOARP’ ./linux/if.h:98:23: error: redeclaration of enumerator ‘IFF_RUNNING’ ./linux/if.h:97:26: error: redeclaration of enumerator ‘IFF_NOTRAILERS’ ./linux/if.h:96:27: error: redeclaration of enumerator ‘IFF_POINTOPOINT’ ./linux/if.h:95:24: error: redeclaration of enumerator ‘IFF_LOOPBACK’ ./linux/if.h:94:21: error: redeclaration of enumerator ‘IFF_DEBUG’ ./linux/if.h:93:25: error: redeclaration of enumerator ‘IFF_BROADCAST’ ./linux/if.h:92:19: error: redeclaration of enumerator ‘IFF_UP’ ./linux/if.h:252:8: error: redefinition of ‘struct ifconf’ ./linux/if.h:203:8: error: redefinition of ‘struct ifreq’ ./linux/if.h:169:8: error: redefinition of ‘struct ifmap’ ./linux/if.h:107:23: error: redeclaration of enumerator ‘IFF_DYNAMIC’ ./linux/if.h:106:25: error: redeclaration of enumerator ‘IFF_AUTOMEDIA’ ./linux/if.h:105:23: error: redeclaration of enumerator ‘IFF_PORTSEL’ ./linux/if.h:104:25: error: redeclaration of enumerator ‘IFF_MULTICAST’ ./linux/if.h:103:21: error: redeclaration of enumerator ‘IFF_SLAVE’ ./linux/if.h:102:22: error: redeclaration of enumerator ‘IFF_MASTER’ ./linux/if.h:101:24: error: redeclaration of enumerator ‘IFF_ALLMULTI’ ./linux/if.h:100:23: error: redeclaration of enumerator ‘IFF_PROMISC’ The cases where linux/if.h is included before net/if.h need a similar fix in the glibc side, or the order of include files can be changed userspace code as a workaround. This change was tested in x86 userspace on Debian unstable with scripts/headers_compile_test.sh: $ make headers_install && \ cd usr/include && ../../scripts/headers_compile_test.sh -l -k ... cc -Wall -c -nostdinc -I /usr/lib/gcc/i586-linux-gnu/5/include -I /usr/lib/gcc/i586-linux-gnu/5/include-fixed -I . -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH/i586-linux-gnu -o /dev/null ./linux/if.h_libc_before_kernel.h PASSED libc before kernel test: ./linux/if.h Reported-by: Jan Engelhardt Reported-by: Josh Boyer Reported-by: Stephen Hemminger Reported-by: Waldemar Brodkorb Cc: Gabriel Laskar Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/if.h | 28 +++++++++++++++++++++++++ include/uapi/linux/libc-compat.h | 44 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 9cf2394f0bcf..752f5dc040a5 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -19,14 +19,20 @@ #ifndef _LINUX_IF_H #define _LINUX_IF_H +#include /* for compatibility with glibc */ #include /* for "__kernel_caddr_t" et al */ #include /* for "struct sockaddr" et al */ #include /* for "__user" et al */ +#if __UAPI_DEF_IF_IFNAMSIZ #define IFNAMSIZ 16 +#endif /* __UAPI_DEF_IF_IFNAMSIZ */ #define IFALIASZ 256 #include +/* For glibc compatibility. An empty enum does not compile. */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \ + __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 /** * enum net_device_flags - &struct net_device flags * @@ -68,6 +74,8 @@ * @IFF_ECHO: echo sent packets. Volatile. */ enum net_device_flags { +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS IFF_UP = 1<<0, /* sysfs */ IFF_BROADCAST = 1<<1, /* volatile */ IFF_DEBUG = 1<<2, /* sysfs */ @@ -84,11 +92,17 @@ enum net_device_flags { IFF_PORTSEL = 1<<13, /* sysfs */ IFF_AUTOMEDIA = 1<<14, /* sysfs */ IFF_DYNAMIC = 1<<15, /* sysfs */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO IFF_LOWER_UP = 1<<16, /* volatile */ IFF_DORMANT = 1<<17, /* volatile */ IFF_ECHO = 1<<18, /* volatile */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ }; +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS #define IFF_UP IFF_UP #define IFF_BROADCAST IFF_BROADCAST #define IFF_DEBUG IFF_DEBUG @@ -105,9 +119,13 @@ enum net_device_flags { #define IFF_PORTSEL IFF_PORTSEL #define IFF_AUTOMEDIA IFF_AUTOMEDIA #define IFF_DYNAMIC IFF_DYNAMIC +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */ + +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define IFF_LOWER_UP IFF_LOWER_UP #define IFF_DORMANT IFF_DORMANT #define IFF_ECHO IFF_ECHO +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT) @@ -166,6 +184,8 @@ enum { * being very small might be worth keeping for clean configuration. */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFMAP struct ifmap { unsigned long mem_start; unsigned long mem_end; @@ -175,6 +195,7 @@ struct ifmap { unsigned char port; /* 3 bytes spare */ }; +#endif /* __UAPI_DEF_IF_IFMAP */ struct if_settings { unsigned int type; /* Type of physical device or protocol */ @@ -200,6 +221,8 @@ struct if_settings { * remainder may be interface specific. */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFREQ struct ifreq { #define IFHWADDRLEN 6 union @@ -223,6 +246,7 @@ struct ifreq { struct if_settings ifru_settings; } ifr_ifru; }; +#endif /* __UAPI_DEF_IF_IFREQ */ #define ifr_name ifr_ifrn.ifrn_name /* interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ @@ -249,6 +273,8 @@ struct ifreq { * must know all networks accessible). */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFCONF struct ifconf { int ifc_len; /* size of buffer */ union { @@ -256,6 +282,8 @@ struct ifconf { struct ifreq __user *ifcu_req; } ifc_ifcu; }; +#endif /* __UAPI_DEF_IF_IFCONF */ + #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 7d024ceb075d..d5e38c73377c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -51,6 +51,40 @@ /* We have included glibc headers... */ #if defined(__GLIBC__) +/* Coordinate with glibc net/if.h header. */ +#if defined(_NET_IF_H) + +/* GLIBC headers included first so don't define anything + * that would already be defined. */ + +#define __UAPI_DEF_IF_IFCONF 0 +#define __UAPI_DEF_IF_IFMAP 0 +#define __UAPI_DEF_IF_IFNAMSIZ 0 +#define __UAPI_DEF_IF_IFREQ 0 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ + +#else /* _NET_IF_H */ + +/* Linux headers included first, and we must define everything + * we need. The expectation is that glibc will check the + * __UAPI_DEF_* defines and adjust appropriately. */ + +#define __UAPI_DEF_IF_IFCONF 1 +#define __UAPI_DEF_IF_IFMAP 1 +#define __UAPI_DEF_IF_IFNAMSIZ 1 +#define __UAPI_DEF_IF_IFREQ 1 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 + +#endif /* _NET_IF_H */ + /* Coordinate with glibc netinet/in.h header. */ #if defined(_NETINET_IN_H) @@ -117,6 +151,16 @@ * that we need. */ #else /* !defined(__GLIBC__) */ +/* Definitions for if.h */ +#define __UAPI_DEF_IF_IFCONF 1 +#define __UAPI_DEF_IF_IFMAP 1 +#define __UAPI_DEF_IF_IFNAMSIZ 1 +#define __UAPI_DEF_IF_IFREQ 1 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 + /* Definitions for in.h */ #define __UAPI_DEF_IN_ADDR 1 #define __UAPI_DEF_IN_IPPROTO 1 -- cgit v1.2.3 From 83857201758ead21e19e36d9ab5b2f87be03dfe2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sun, 8 May 2016 12:10:14 -0400 Subject: net: fix a kernel infoleak in x25 module [ Upstream commit 79e48650320e6fba48369fccf13fd045315b19b8 ] Stack object "dte_facilities" is allocated in x25_rx_call_request(), which is supposed to be initialized in x25_negotiate_facilities. However, 5 fields (8 bytes in total) are not initialized. This object is then copied to userland via copy_to_user, thus infoleak occurs. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/x25_facilities.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 7ecd04c21360..997ff7b2509b 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memset(&theirs, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); if (len < 0) -- cgit v1.2.3 From a7ddb047796d072a3f19c55aac1d9bfe8cb4b15c Mon Sep 17 00:00:00 2001 From: "xypron.glpk@gmx.de" Date: Mon, 9 May 2016 00:46:18 +0200 Subject: net: thunderx: avoid exposing kernel stack [ Upstream commit 161de2caf68c549c266e571ffba8e2163886fb10 ] Reserved fields should be set to zero to avoid exposing bits from the kernel stack. Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 206b6a71a545..d1c217eaf417 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -550,6 +550,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, nicvf_config_vlan_stripping(nic, nic->netdev->features); /* Enable Receive queue */ + memset(&rq_cfg, 0, sizeof(struct rq_cfg)); rq_cfg.ena = 1; rq_cfg.tcp_ena = 0; nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg); @@ -582,6 +583,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, qidx, (u64)(cq->dmem.phys_base)); /* Enable Completion queue */ + memset(&cq_cfg, 0, sizeof(struct cq_cfg)); cq_cfg.ena = 1; cq_cfg.reset = 0; cq_cfg.caching = 0; @@ -630,6 +632,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, qidx, (u64)(sq->dmem.phys_base)); /* Enable send queue & set queue size */ + memset(&sq_cfg, 0, sizeof(struct sq_cfg)); sq_cfg.ena = 1; sq_cfg.reset = 0; sq_cfg.ldwb = 0; @@ -666,6 +669,7 @@ static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, /* Enable RBDR & set queue size */ /* Buffer size should be in multiples of 128 bytes */ + memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg)); rbdr_cfg.ena = 1; rbdr_cfg.reset = 0; rbdr_cfg.ldwb = 0; -- cgit v1.2.3 From 2cddc95adf3b1be879e6540187bb5aae24dd2689 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 May 2016 20:55:16 -0700 Subject: tcp: refresh skb timestamp at retransmit time [ Upstream commit 10a81980fc47e64ffac26a073139813d3f697b64 ] In the very unlikely case __tcp_retransmit_skb() can not use the cloning done in tcp_transmit_skb(), we need to refresh skb_mstamp before doing the copy and transmit, otherwise TCP TS val will be an exact copy of original transmit. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9bfc39ff2285..7c9883ab56e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(&skb->skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else { -- cgit v1.2.3 From 1c76c5d5ffbbaed1cb1829c1eb9b97648b4979fd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 May 2016 18:33:41 +0200 Subject: net/route: enforce hoplimit max value [ Upstream commit 626abd59e51d4d8c6367e03aae252a8aa759ac78 ] Currently, when creating or updating a route, no check is performed in both ipv4 and ipv6 code to the hoplimit value. The caller can i.e. set hoplimit to 256, and when such route will be used, packets will be sent with hoplimit/ttl equal to 0. This commit adds checks for the RTAX_HOPLIMIT value, in both ipv4 ipv6 route code, substituting any value greater than 255 with 255. This is consistent with what is currently done for ADVMSS and MTU in the ipv4 code. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 2 ++ net/ipv6/route.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d97268e8ff10..2b68418c7198 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3f164d3aaee2..5af2cca0a46d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1727,6 +1727,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err; -- cgit v1.2.3 From 3cbabd4e83f63d5271c8ff74c059f7cbe85c4c1d Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 12 May 2016 15:42:15 -0700 Subject: ocfs2: revert using ocfs2_acl_chmod to avoid inode cluster lock hang commit 5ee0fbd50fdf1c1329de8bee35ea9d7c6a81a2e0 upstream. Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()") introduced this issue. ocfs2_setattr called by chmod command holds cluster wide inode lock when calling posix_acl_chmod. This latter function in turn calls ocfs2_iop_get_acl and ocfs2_iop_set_acl. These two are also called directly from vfs layer for getfacl/setfacl commands and therefore acquire the cluster wide inode lock. If a remote conversion request comes after the first inode lock in ocfs2_setattr, OCFS2_LOCK_BLOCKED will be set. And this will cause the second call to inode lock from the ocfs2_iop_get_acl() to block indefinetly. The deleted version of ocfs2_acl_chmod() calls __posix_acl_chmod() which does not call back into the filesystem. Therefore, we restore ocfs2_acl_chmod(), modify it slightly for locking as needed, and use that instead. Fixes: 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()") Signed-off-by: Tariq Saeed Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/acl.c | 24 ++++++++++++++++++++++++ fs/ocfs2/acl.h | 1 + fs/ocfs2/file.c | 4 ++-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 0cdf497c91ef..749d3bc41232 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -322,3 +322,27 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) brelse(di_bh); return acl; } + +int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl; + int ret; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (ret) + return ret; + ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, + acl, NULL, NULL); + posix_acl_release(acl); + return ret; +} diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 3fce68d08625..035e5878db06 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -35,5 +35,6 @@ int ocfs2_set_acl(handle_t *handle, struct posix_acl *acl, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac); +extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); #endif /* OCFS2_ACL_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0e5b4515f92e..77d30cbd944d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1268,20 +1268,20 @@ bail_unlock_rw: if (size_change) ocfs2_rw_unlock(inode, 1); bail: - brelse(bh); /* Release quota pointers in case we acquired them */ for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++) dqput(transfer_to[qtype]); if (!status && attr->ia_valid & ATTR_MODE) { - status = posix_acl_chmod(inode, inode->i_mode); + status = ocfs2_acl_chmod(inode, bh); if (status < 0) mlog_errno(status); } if (inode_locked) ocfs2_inode_unlock(inode, 1); + brelse(bh); return status; } -- cgit v1.2.3 From dc3e6de00b00bb7b716d2653650fdd112e981578 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 12 May 2016 15:42:18 -0700 Subject: ocfs2: fix posix_acl_create deadlock commit c25a1e0671fbca7b2c0d0757d533bd2650d6dc0c upstream. Commit 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure") refactored code to use posix_acl_create. The problem with this function is that it is not mindful of the cluster wide inode lock making it unsuitable for use with ocfs2 inode creation with ACLs. For example, when used in ocfs2_mknod, this function can cause deadlock as follows. The parent dir inode lock is taken when calling posix_acl_create -> get_acl -> ocfs2_iop_get_acl which takes the inode lock again. This can cause deadlock if there is a blocked remote lock request waiting for the lock to be downconverted. And same deadlock happened in ocfs2_reflink. This fix is to revert back using ocfs2_init_acl. Fixes: 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure") Signed-off-by: Tariq Saeed Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/acl.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/acl.h | 4 ++++ fs/ocfs2/namei.c | 23 ++---------------- fs/ocfs2/refcounttree.c | 17 ++----------- fs/ocfs2/xattr.c | 14 ++++------- fs/ocfs2/xattr.h | 4 +--- 6 files changed, 77 insertions(+), 48 deletions(-) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 749d3bc41232..2162434728c0 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -346,3 +346,66 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) posix_acl_release(acl); return ret; } + +/* + * Initialize the ACLs of a new inode. If parent directory has default ACL, + * then clone to new inode. Called from ocfs2_mknod. + */ +int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl = NULL; + int ret = 0, ret2; + umode_t mode; + + if (!S_ISLNK(inode->i_mode)) { + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, + dir_bh); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) { + mode = inode->i_mode & ~current_umask(); + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + } + } + if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { + if (S_ISDIR(inode->i_mode)) { + ret = ocfs2_set_acl(handle, inode, di_bh, + ACL_TYPE_DEFAULT, acl, + meta_ac, data_ac); + if (ret) + goto cleanup; + } + mode = inode->i_mode; + ret = __posix_acl_create(&acl, GFP_NOFS, &mode); + if (ret < 0) + return ret; + + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret2) { + mlog_errno(ret2); + ret = ret2; + goto cleanup; + } + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + acl, meta_ac, data_ac); + } + } +cleanup: + posix_acl_release(acl); + return ret; +} diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 035e5878db06..2783a75b3999 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -36,5 +36,9 @@ int ocfs2_set_acl(handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); +extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, + struct buffer_head *, struct buffer_head *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); #endif /* OCFS2_ACL_H */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3123408da935..62af9554541d 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir, struct ocfs2_dir_lookup_result lookup = { NULL, }; sigset_t oldset; int did_block_signals = 0; - struct posix_acl *default_acl = NULL, *acl = NULL; struct ocfs2_dentry_lock *dl = NULL; trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, @@ -367,12 +366,6 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); - if (status) { - mlog_errno(status); - goto leave; - } - handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), xattr_credits)); @@ -421,16 +414,8 @@ static int ocfs2_mknod(struct inode *dir, inc_nlink(dir); } - if (default_acl) { - status = ocfs2_set_acl(handle, inode, new_fe_bh, - ACL_TYPE_DEFAULT, default_acl, - meta_ac, data_ac); - } - if (!status && acl) { - status = ocfs2_set_acl(handle, inode, new_fe_bh, - ACL_TYPE_ACCESS, acl, - meta_ac, data_ac); - } + status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh, + meta_ac, data_ac); if (status < 0) { mlog_errno(status); @@ -472,10 +457,6 @@ static int ocfs2_mknod(struct inode *dir, d_instantiate(dentry, inode); status = 0; leave: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); if (status < 0 && did_quota_inode) dquot_free_inode(inode); if (handle) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 252119860e6c..6a0c55d7dff0 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4248,20 +4248,12 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, struct inode *inode = d_inode(old_dentry); struct buffer_head *old_bh = NULL; struct inode *new_orphan_inode = NULL; - struct posix_acl *default_acl, *acl; - umode_t mode; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; - mode = inode->i_mode; - error = posix_acl_create(dir, &mode, &default_acl, &acl); - if (error) { - mlog_errno(error); - return error; - } - error = ocfs2_create_inode_in_orphan(dir, mode, + error = ocfs2_create_inode_in_orphan(dir, inode->i_mode, &new_orphan_inode); if (error) { mlog_errno(error); @@ -4300,16 +4292,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, /* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) { error = ocfs2_init_security_and_acl(dir, new_orphan_inode, - &new_dentry->d_name, - default_acl, acl); + &new_dentry->d_name); if (error) mlog_errno(error); } out: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); if (!error) { error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, new_dentry); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index e9164f09841b..877830b05e12 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7197,12 +7197,10 @@ out: */ int ocfs2_init_security_and_acl(struct inode *dir, struct inode *inode, - const struct qstr *qstr, - struct posix_acl *default_acl, - struct posix_acl *acl) + const struct qstr *qstr) { - struct buffer_head *dir_bh = NULL; int ret = 0; + struct buffer_head *dir_bh = NULL; ret = ocfs2_init_security_get(inode, dir, qstr, NULL); if (ret) { @@ -7215,11 +7213,9 @@ int ocfs2_init_security_and_acl(struct inode *dir, mlog_errno(ret); goto leave; } - - if (!ret && default_acl) - ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); - if (!ret && acl) - ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS); + ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); + if (ret) + mlog_errno(ret); ocfs2_inode_unlock(dir, 0); brelse(dir_bh); diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index f10d5b93c366..1633cc15ea1f 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -94,7 +94,5 @@ int ocfs2_reflink_xattrs(struct inode *old_inode, bool preserve_security); int ocfs2_init_security_and_acl(struct inode *dir, struct inode *inode, - const struct qstr *qstr, - struct posix_acl *default_acl, - struct posix_acl *acl); + const struct qstr *qstr); #endif /* OCFS2_XATTR_H */ -- cgit v1.2.3 From 1d77f0a51cb6517d72146c6cb81a997b060b68b2 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 9 May 2016 16:28:49 -0700 Subject: zsmalloc: fix zs_can_compact() integer overflow commit 44f43e99fe70833058482d183e99fdfd11220996 upstream. zs_can_compact() has two race conditions in its core calculation: unsigned long obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - zs_stat_get(class, OBJ_USED); 1) classes are not locked, so the numbers of allocated and used objects can change by the concurrent ops happening on other CPUs 2) shrinker invokes it from preemptible context Depending on the circumstances, thus, OBJ_ALLOCATED can become less than OBJ_USED, which can result in either very high or negative `total_scan' value calculated later in do_shrink_slab(). do_shrink_slab() has some logic to prevent those cases: vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-64 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 However, due to the way `total_scan' is calculated, not every shrinker->count_objects() overflow can be spotted and handled. To demonstrate the latter, I added some debugging code to do_shrink_slab() (x86_64) and the results were: vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615] vmscan: but total_scan > 0: 92679974445502 vmscan: resulting total_scan: 92679974445502 [..] vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615] vmscan: but total_scan > 0: 22634041808232578 vmscan: resulting total_scan: 22634041808232578 Even though shrinker->count_objects() has returned an overflowed value, the resulting `total_scan' is positive, and, what is more worrisome, it is insanely huge. This value is getting used later on in shrinker->scan_objects() loop: while (total_scan >= batch_size || total_scan >= freeable) { unsigned long ret; unsigned long nr_to_scan = min(batch_size, total_scan); shrinkctl->nr_to_scan = nr_to_scan; ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break; freed += ret; count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; cond_resched(); } `total_scan >= batch_size' is true for a very-very long time and 'total_scan >= freeable' is also true for quite some time, because `freeable < 0' and `total_scan' is large enough, for example, 22634041808232578. The only break condition, in the given scheme of things, is shrinker->scan_objects() == SHRINK_STOP test, which is a bit too weak to rely on, especially in heavy zsmalloc-usage scenarios. To fix the issue, take a pool stat snapshot and use it instead of racy zs_stat_get() calls. Link: http://lkml.kernel.org/r/20160509140052.3389-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index fc083996e40a..c1ea19478119 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1732,10 +1732,13 @@ static struct page *isolate_source_page(struct size_class *class) static unsigned long zs_can_compact(struct size_class *class) { unsigned long obj_wasted; + unsigned long obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); + unsigned long obj_used = zs_stat_get(class, OBJ_USED); - obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - - zs_stat_get(class, OBJ_USED); + if (obj_allocated <= obj_used) + return 0; + obj_wasted = obj_allocated - obj_used; obj_wasted /= get_maxobj_per_zspage(class->size, class->pages_per_zspage); -- cgit v1.2.3 From ce1bc448bac01edfccdc26d8318cfd39aa09e6e0 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 15 Apr 2016 16:38:40 +0200 Subject: s390/mm: fix asce_bits handling with dynamic pagetable levels commit 723cacbd9dc79582e562c123a0bacf8bfc69e72a upstream. There is a race with multi-threaded applications between context switch and pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a pagetable upgrade on another thread in crst_table_upgrade() could already have set new asce_bits, but not yet the new mm->pgd. This would result in a corrupt user_asce in switch_mm(), and eventually in a kernel panic from a translation exception. Fix this by storing the complete asce instead of just the asce_bits, which can then be read atomically from switch_mm(), so that it either sees the old value or the new value, but no mixture. Both cases are OK. Having the old value would result in a page fault on access to the higher level memory, but the fault handler would see the new mm->pgd, if it was a valid access after the mmap on the other thread has completed. So as worst-case scenario we would have a page fault loop for the racing thread until the next time slice. Also remove dead code and simplify the upgrade/downgrade path, there are no upgrades from 2 levels, and only downgrades from 3 levels for compat tasks. There are also no concurrent upgrades, because the mmap_sem is held with down_write() in do_mmap, so the flush and table checks during upgrade can be removed. Reported-by: Michael Munday Reviewed-by: Martin Schwidefsky Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/mmu.h | 2 +- arch/s390/include/asm/mmu_context.h | 28 +++++++++--- arch/s390/include/asm/pgalloc.h | 4 +- arch/s390/include/asm/processor.h | 2 +- arch/s390/include/asm/tlbflush.h | 9 ++-- arch/s390/mm/init.c | 3 +- arch/s390/mm/mmap.c | 6 +-- arch/s390/mm/pgtable.c | 85 ++++++++++++------------------------- 8 files changed, 62 insertions(+), 77 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index d29ad9545b41..081b2ad99d73 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -11,7 +11,7 @@ typedef struct { spinlock_t list_lock; struct list_head pgtable_list; struct list_head gmap_list; - unsigned long asce_bits; + unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; /* The mmu context allocates 4K page tables. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index e485817f7b1a..22877c9440ea 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - if (mm->context.asce_limit == 0) { + switch (mm->context.asce_limit) { + case 1UL << 42: + /* + * forked 3-level task, fall through to set new asce with new + * mm->pgd + */ + case 0: /* context created by exec, set asce limit to 4TB */ - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION3; mm->context.asce_limit = STACK_TOP_MAX; - } else if (mm->context.asce_limit == (1UL << 31)) { + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + break; + case 1UL << 53: + /* forked 4-level task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + break; + case 1UL << 31: + /* forked 2-level compat task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + /* pgd_alloc() did not increase mm->nr_pmds */ mm_inc_nr_pmds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); @@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk, static inline void set_user_asce(struct mm_struct *mm) { - S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd); + S390_lowcore.user_asce = mm->context.asce; if (current->thread.mm_segment.ar4) __ctl_load(S390_lowcore.user_asce, 7, 7); set_cpu_flag(CIF_ASCE); @@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); + S390_lowcore.user_asce = next->context.asce; if (prev == next) return; if (MACHINE_HAS_TLB_LC) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index d7cc79fb6191..5991cdcb5b40 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -56,8 +56,8 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _REGION2_ENTRY_EMPTY; } -int crst_table_upgrade(struct mm_struct *, unsigned long limit); -void crst_table_downgrade(struct mm_struct *, unsigned long limit); +int crst_table_upgrade(struct mm_struct *); +void crst_table_downgrade(struct mm_struct *); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b16c3d0a1b9f..c1ea67db8404 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -163,7 +163,7 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS]; regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ + crst_table_downgrade(current->mm); \ execve_tail(); \ } while (0) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index ca148f7c3eaa..a2e6ef32e054 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -110,8 +110,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_IDTE) - __tlb_flush_idte((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); } @@ -133,8 +132,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_TLB_LC) - __tlb_flush_idte_local((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte_local(init_mm.context.asce); else __tlb_flush_local(); } @@ -148,8 +146,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * only ran on the local cpu. */ if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) - __tlb_flush_asce(mm, (unsigned long) mm->pgd | - mm->context.asce_bits); + __tlb_flush_asce(mm, mm->context.asce); else __tlb_flush_full(mm); } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index c722400c7697..feff9caf89b5 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -89,7 +89,8 @@ void __init paging_init(void) asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; pgd_type = _REGION3_ENTRY_EMPTY; } - S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + S390_lowcore.kernel_asce = init_mm.context.asce; clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index ea01477b4aa6..f2b6b1d9c804 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -174,7 +174,7 @@ int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) if (!(flags & MAP_FIXED)) addr = 0; if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm, 1UL << 53); + return crst_table_upgrade(current->mm); return 0; } @@ -191,7 +191,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr, return area; if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, 1UL << 53); + rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); @@ -213,7 +213,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, return area; if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, 1UL << 53); + rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area_topdown(filp, addr, len, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 54ef3bc01b43..471a370a527b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -49,81 +49,52 @@ static void __crst_table_upgrade(void *arg) __tlb_flush_local(); } -int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +int crst_table_upgrade(struct mm_struct *mm) { unsigned long *table, *pgd; - unsigned long entry; - int flush; - BUG_ON(limit > (1UL << 53)); - flush = 0; -repeat: + /* upgrade should only happen from 3 to 4 levels */ + BUG_ON(mm->context.asce_limit != (1UL << 42)); + table = crst_table_alloc(mm); if (!table) return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); - if (mm->context.asce_limit < limit) { - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit <= (1UL << 31)) { - entry = _REGION3_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - } else { - entry = _REGION2_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 53; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION2; - } - crst_table_init(table, entry); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->task_size = mm->context.asce_limit; - table = NULL; - flush = 1; - } + pgd = (unsigned long *) mm->pgd; + crst_table_init(table, _REGION2_ENTRY_EMPTY); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = 1UL << 53; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + mm->task_size = mm->context.asce_limit; spin_unlock_bh(&mm->page_table_lock); - if (table) - crst_table_free(mm, table); - if (mm->context.asce_limit < limit) - goto repeat; - if (flush) - on_each_cpu(__crst_table_upgrade, mm, 0); + + on_each_cpu(__crst_table_upgrade, mm, 0); return 0; } -void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +void crst_table_downgrade(struct mm_struct *mm) { pgd_t *pgd; + /* downgrade should only happen from 3 to 2 levels (compat only) */ + BUG_ON(mm->context.asce_limit != (1UL << 42)); + if (current->active_mm == mm) { clear_user_asce(); __tlb_flush_mm(mm); } - while (mm->context.asce_limit > limit) { - pgd = mm->pgd; - switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { - case _REGION_ENTRY_TYPE_R2: - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - break; - case _REGION_ENTRY_TYPE_R3: - mm->context.asce_limit = 1UL << 31; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_SEGMENT; - break; - default: - BUG(); - } - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->task_size = mm->context.asce_limit; - crst_table_free(mm, (unsigned long *) pgd); - } + + pgd = mm->pgd; + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->context.asce_limit = 1UL << 31; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + if (current->active_mm == mm) set_user_asce(mm); } -- cgit v1.2.3 From 6f1519425705094323912e55d8d6e174775b9caf Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 25 Apr 2016 07:32:19 -0700 Subject: crypto: qat - fix invalid pf2vf_resp_wq logic commit 9e209fcfb804da262e38e5cd2e680c47a41f0f95 upstream. The pf2vf_resp_wq is a global so it has to be created at init and destroyed at exit, instead of per device. Tested-by: Suresh Marikkannu Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/adf_common_drv.h | 2 ++ drivers/crypto/qat/qat_common/adf_ctl_drv.c | 6 ++++++ drivers/crypto/qat/qat_common/adf_sriov.c | 26 ++++++++++++++++---------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 3f76bd495bcb..b9178d0a3093 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -145,6 +145,8 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf); void adf_disable_aer(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 473d36d91644..e7480f373532 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -469,12 +469,17 @@ static int __init adf_register_ctl_device_driver(void) if (adf_init_aer()) goto err_aer; + if (adf_init_pf_wq()) + goto err_pf_wq; + if (qat_crypto_register()) goto err_crypto_register; return 0; err_crypto_register: + adf_exit_pf_wq(); +err_pf_wq: adf_exit_aer(); err_aer: adf_chr_drv_destroy(); @@ -487,6 +492,7 @@ static void __exit adf_unregister_ctl_device_driver(void) { adf_chr_drv_destroy(); adf_exit_aer(); + adf_exit_pf_wq(); qat_crypto_unregister(); adf_clean_vf_map(false); mutex_destroy(&adf_ctl_lock); diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 1117a8b58280..38a0415e767d 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -119,11 +119,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) int i; u32 reg; - /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); - if (!pf2vf_resp_wq) - return -ENOMEM; - for (i = 0, vf_info = accel_dev->pf.vf_info; i < totalvfs; i++, vf_info++) { /* This ptr will be populated when VFs will be created */ @@ -216,11 +211,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) kfree(accel_dev->pf.vf_info); accel_dev->pf.vf_info = NULL; - - if (pf2vf_resp_wq) { - destroy_workqueue(pf2vf_resp_wq); - pf2vf_resp_wq = NULL; - } } EXPORT_SYMBOL_GPL(adf_disable_sriov); @@ -304,3 +294,19 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) return numvfs; } EXPORT_SYMBOL_GPL(adf_sriov_configure); + +int __init adf_init_pf_wq(void) +{ + /* Workqueue for PF2VF responses */ + pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + + return !pf2vf_resp_wq ? -ENOMEM : 0; +} + +void adf_exit_pf_wq(void) +{ + if (pf2vf_resp_wq) { + destroy_workqueue(pf2vf_resp_wq); + pf2vf_resp_wq = NULL; + } +} -- cgit v1.2.3 From 3cbc5f6ed2722a65b75de22fe2206471f093b6c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 4 May 2016 17:52:56 +0800 Subject: crypto: hash - Fix page length clamping in hash walk commit 13f4bb78cf6a312bbdec367ba3da044b09bf0e29 upstream. The crypto hash walk code is broken when supplied with an offset greater than or equal to PAGE_SIZE. This patch fixes it by adjusting walk->pg and walk->offset when this happens. Reported-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index d19b52324cf5..dac1c24e9c3e 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -69,8 +69,9 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk) struct scatterlist *sg; sg = walk->sg; - walk->pg = sg_page(sg); walk->offset = sg->offset; + walk->pg = sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); + walk->offset = offset_in_page(walk->offset); walk->entrylen = sg->length; if (walk->entrylen > walk->total) -- cgit v1.2.3 From 7c712000ba78d26f46d16fcc8ac3a9d827ddb242 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 May 2016 16:42:49 +0800 Subject: crypto: testmgr - Use kmalloc memory for RSA input commit df27b26f04ed388ff4cc2b5d8cfdb5d97678816f upstream. As akcipher uses an SG interface, you must not use vmalloc memory as input for it. This patch fixes testmgr to copy the vmalloc test vectors to kmalloc memory before running the test. This patch also removes a superfluous sg_virt call in do_test_rsa. Reported-by: Anatoly Pugachev Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/testmgr.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ae8c57fd8bc7..d4944318ca1f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1849,6 +1849,7 @@ static int alg_test_drbg(const struct alg_test_desc *desc, const char *driver, static int do_test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs) { + char *xbuf[XBUFSIZE]; struct akcipher_request *req; void *outbuf_enc = NULL; void *outbuf_dec = NULL; @@ -1857,9 +1858,12 @@ static int do_test_rsa(struct crypto_akcipher *tfm, int err = -ENOMEM; struct scatterlist src, dst, src_tab[2]; + if (testmgr_alloc_buf(xbuf)) + return err; + req = akcipher_request_alloc(tfm, GFP_KERNEL); if (!req) - return err; + goto free_xbuf; init_completion(&result.completion); @@ -1877,9 +1881,14 @@ static int do_test_rsa(struct crypto_akcipher *tfm, if (!outbuf_enc) goto free_req; + if (WARN_ON(vecs->m_size > PAGE_SIZE)) + goto free_all; + + memcpy(xbuf[0], vecs->m, vecs->m_size); + sg_init_table(src_tab, 2); - sg_set_buf(&src_tab[0], vecs->m, 8); - sg_set_buf(&src_tab[1], vecs->m + 8, vecs->m_size - 8); + sg_set_buf(&src_tab[0], xbuf[0], 8); + sg_set_buf(&src_tab[1], xbuf[0] + 8, vecs->m_size - 8); sg_init_one(&dst, outbuf_enc, out_len_max); akcipher_request_set_crypt(req, src_tab, &dst, vecs->m_size, out_len_max); @@ -1898,7 +1907,7 @@ static int do_test_rsa(struct crypto_akcipher *tfm, goto free_all; } /* verify that encrypted message is equal to expected */ - if (memcmp(vecs->c, sg_virt(req->dst), vecs->c_size)) { + if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) { pr_err("alg: rsa: encrypt test failed. Invalid output\n"); err = -EINVAL; goto free_all; @@ -1913,7 +1922,13 @@ static int do_test_rsa(struct crypto_akcipher *tfm, err = -ENOMEM; goto free_all; } - sg_init_one(&src, vecs->c, vecs->c_size); + + if (WARN_ON(vecs->c_size > PAGE_SIZE)) + goto free_all; + + memcpy(xbuf[0], vecs->c, vecs->c_size); + + sg_init_one(&src, xbuf[0], vecs->c_size); sg_init_one(&dst, outbuf_dec, out_len_max); init_completion(&result.completion); akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max); @@ -1940,6 +1955,8 @@ free_all: kfree(outbuf_enc); free_req: akcipher_request_free(req); +free_xbuf: + testmgr_free_buf(xbuf); return err; } -- cgit v1.2.3 From 2a5db188f3151be7d10c01c29deb6867b8bb2925 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Apr 2016 11:20:15 +0200 Subject: ALSA: usb-audio: Quirk for yet another Phoenix Audio devices (v2) commit 2d2c038a9999f423e820d89db2b5d7774b67ba49 upstream. Phoenix Audio MT202pcs (1de7:0114) and MT202exe (1de7:0013) need the same workaround as TMX320 for avoiding the firmware bug. It fixes the frequent error about the sample rate inquiries and the slow device probe as consequence. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117321 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 001fb4dc0722..93c076574def 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1139,7 +1139,9 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ + case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; } -- cgit v1.2.3 From 7e8b58b0fa1c7f2da95307ab3fdf5704ca3f11db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 May 2016 17:48:00 +0200 Subject: ALSA: usb-audio: Yet another Phoneix Audio device quirk commit 84add303ef950b8d85f54bc2248c2bc73467c329 upstream. Phoenix Audio has yet another device with another id (even a different vendor id, 0556:0014) that requires the same quirk for the sample rate. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110221 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 93c076574def..db11ecf0b74d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ + case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ -- cgit v1.2.3 From 28c56446f563af16707e7945ce88f143f8fd6285 Mon Sep 17 00:00:00 2001 From: Yura Pakhuchiy Date: Sat, 7 May 2016 23:53:36 +0700 Subject: ALSA: hda - Fix subwoofer pin on ASUS N751 and N551 commit 3231e2053eaeee70bdfb216a78a30f11e88e2243 upstream. Subwoofer does not work out of the box on ASUS N751/N551 laptops. This patch fixes it. Patch tested on N751 laptop. N551 part is not tested, but according to [1] and [2] this laptop requires similar changes, so I included them in the patch. 1. https://github.com/honsiorovskyi/asus-n551-hda-fix 2. https://bugs.launchpad.net/ubuntu/+source/alsa-tools/+bug/1405691 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117781 Signed-off-by: Yura Pakhuchiy Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ac4490a96863..0b5fe30f626d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6426,6 +6426,7 @@ enum { ALC668_FIXUP_DELL_DISABLE_AAMIX, ALC668_FIXUP_DELL_XPS13, ALC662_FIXUP_ASUS_Nx50, + ALC668_FIXUP_ASUS_Nx51, }; static const struct hda_fixup alc662_fixups[] = { @@ -6672,6 +6673,15 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_BASS_1A }, + [ALC668_FIXUP_ASUS_Nx51] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + {0x1a, 0x90170151}, /* bass speaker */ + {} + }, + .chained = true, + .chain_id = ALC662_FIXUP_BASS_CHMAP, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6699,6 +6709,8 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), + SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), -- cgit v1.2.3 From 28ff35e454b4e2688eb0456b0981d6003a19d122 Mon Sep 17 00:00:00 2001 From: Kaho Ng Date: Mon, 9 May 2016 00:27:49 +0800 Subject: ALSA: hda - Fix white noise on Asus UX501VW headset commit 2da2dc9ead232f25601404335cca13c0f722d41b upstream. For reducing the noise from the headset output on ASUS UX501VW, call the existing fixup, alc_fixup_headset_mode_alc668(), additionally. Thread: https://bbs.archlinux.org/viewtopic.php?id=209554 Signed-off-by: Kaho Ng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0b5fe30f626d..4918ffa5ba68 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6704,6 +6704,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), -- cgit v1.2.3 From 90204cbb4890422facac52ed197b169eb1f26c9f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 10 May 2016 10:24:02 +0200 Subject: ALSA: hda - Fix broken reconfig commit addacd801e1638f41d659cb53b9b73fc14322cb1 upstream. The HD-audio reconfig function got broken in the recent kernels, typically resulting in a failure like: snd_hda_intel 0000:00:1b.0: control 3:0:0:Playback Channel Map:0 is already present This is because of the code restructuring to move the PCM and control instantiation into the codec drive probe, by the commit [bcd96557bd0a: ALSA: hda - Build PCMs and controls at codec driver probe]. Although the commit above removed the calls of snd_hda_codec_build_pcms() and *_build_controls() at the controller driver probe, the similar calls in the reconfig were still left forgotten. This caused the conflicting and duplicated PCMs and controls. The fix is trivial: just remove these superfluous calls from reconfig_codec(). Fixes: bcd96557bd0a ('ALSA: hda - Build PCMs and controls at codec driver probe') Reported-by: Jochen Henneberg Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_sysfs.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 64e0d1d81ca5..9739fce9e032 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -139,14 +139,6 @@ static int reconfig_codec(struct hda_codec *codec) goto error; } err = snd_hda_codec_configure(codec); - if (err < 0) - goto error; - /* rebuild PCMs */ - err = snd_hda_codec_build_pcms(codec); - if (err < 0) - goto error; - /* rebuild mixers */ - err = snd_hda_codec_build_controls(codec); if (err < 0) goto error; err = snd_card_register(codec->card); -- cgit v1.2.3 From ca100af95931ebf2610b47ff2c311fe2c5e43495 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 26 Apr 2016 10:08:26 +0300 Subject: spi: pxa2xx: Do not detect number of enabled chip selects on Intel SPT commit 66ec246eb9982e7eb8e15e1fc55f543230310dd0 upstream. Certain Intel Sunrisepoint PCH variants report zero chip selects in SPI capabilities register even they have one per port. Detection in pxa2xx_spi_probe() sets master->num_chipselect to 0 leading to -EINVAL from spi_register_master() where chip select count is validated. Fix this by not using SPI capabilities register on Sunrisepoint. They don't have more than one chip select so use the default value 1 instead of detection. Fixes: 8b136baa5892 ("spi: pxa2xx: Detect number of enabled Intel LPSS SPI chip select signals") Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index b25dc71b0ea9..73c8ea0b1360 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -111,7 +111,7 @@ static const struct lpss_config lpss_platforms[] = { .reg_general = -1, .reg_ssp = 0x20, .reg_cs_ctrl = 0x24, - .reg_capabilities = 0xfc, + .reg_capabilities = -1, .rx_threshold = 1, .tx_threshold_lo = 32, .tx_threshold_hi = 56, -- cgit v1.2.3 From 444189274bd802692e3e9334e23c1d9348ea87f6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2016 12:56:25 +0100 Subject: spi: spi-ti-qspi: Fix FLEN and WLEN settings if bits_per_word is overridden commit ea1b60fb085839a9544cb3a0069992991beabb7f upstream. Each transfer can specify 8, 16 or 32 bits per word independently of the default for the device being addressed. However, currently we calculate the number of words in the frame assuming that the word size is the device default. If multiple transfers in the same message have differing bits_per_word, we bitwise-or the different values in the WLEN register field. Fix both of these. Also rename 'frame_length' to 'frame_len_words' to make clear that it's not a byte count like spi_message::frame_length. Signed-off-by: Ben Hutchings Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ti-qspi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 64318fcfacf2..2137f2112804 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -94,6 +94,7 @@ struct ti_qspi { #define QSPI_FLEN(n) ((n - 1) << 0) #define QSPI_WLEN_MAX_BITS 128 #define QSPI_WLEN_MAX_BYTES 16 +#define QSPI_WLEN_MASK QSPI_WLEN(QSPI_WLEN_MAX_BITS) /* STATUS REGISTER */ #define BUSY 0x01 @@ -373,7 +374,7 @@ static int ti_qspi_start_transfer_one(struct spi_master *master, struct spi_device *spi = m->spi; struct spi_transfer *t; int status = 0, ret; - int frame_length; + unsigned int frame_len_words; /* setup device control reg */ qspi->dc = 0; @@ -385,21 +386,23 @@ static int ti_qspi_start_transfer_one(struct spi_master *master, if (spi->mode & SPI_CS_HIGH) qspi->dc |= QSPI_CSPOL(spi->chip_select); - frame_length = (m->frame_length << 3) / spi->bits_per_word; - - frame_length = clamp(frame_length, 0, QSPI_FRAME); + frame_len_words = 0; + list_for_each_entry(t, &m->transfers, transfer_list) + frame_len_words += t->len / (t->bits_per_word >> 3); + frame_len_words = min_t(unsigned int, frame_len_words, QSPI_FRAME); /* setup command reg */ qspi->cmd = 0; qspi->cmd |= QSPI_EN_CS(spi->chip_select); - qspi->cmd |= QSPI_FLEN(frame_length); + qspi->cmd |= QSPI_FLEN(frame_len_words); ti_qspi_write(qspi, qspi->dc, QSPI_SPI_DC_REG); mutex_lock(&qspi->list_lock); list_for_each_entry(t, &m->transfers, transfer_list) { - qspi->cmd |= QSPI_WLEN(t->bits_per_word); + qspi->cmd = ((qspi->cmd & ~QSPI_WLEN_MASK) | + QSPI_WLEN(t->bits_per_word)); ret = qspi_transfer_msg(qspi, t); if (ret) { -- cgit v1.2.3 From 1441dcbdcbe529fd7f0db6fd95b4bc9d867e04bb Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2016 12:58:14 +0100 Subject: spi: spi-ti-qspi: Handle truncated frames properly commit 1ff7760ff66b98ef244bf0e5e2bd5310651205ad upstream. We clamp frame_len_words to a maximum of 4096, but do not actually limit the number of words written or read through the DATA registers or the length added to spi_message::actual_length. This results in silent data corruption for commands longer than this maximum. Recalculate the length of each transfer, taking frame_len_words into account. Use this length in qspi_{read,write}_msg(), and to increment spi_message::actual_length. Signed-off-by: Ben Hutchings Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ti-qspi.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 2137f2112804..5044c6198332 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -225,16 +225,16 @@ static inline int ti_qspi_poll_wc(struct ti_qspi *qspi) return -ETIMEDOUT; } -static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) +static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t, + int count) { - int wlen, count, xfer_len; + int wlen, xfer_len; unsigned int cmd; const u8 *txbuf; u32 data; txbuf = t->tx_buf; cmd = qspi->cmd | QSPI_WR_SNGL; - count = t->len; wlen = t->bits_per_word >> 3; /* in bytes */ xfer_len = wlen; @@ -294,9 +294,10 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) return 0; } -static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t) +static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t, + int count) { - int wlen, count; + int wlen; unsigned int cmd; u8 *rxbuf; @@ -313,7 +314,6 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t) cmd |= QSPI_RD_SNGL; break; } - count = t->len; wlen = t->bits_per_word >> 3; /* in bytes */ while (count) { @@ -344,12 +344,13 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t) return 0; } -static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t) +static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t, + int count) { int ret; if (t->tx_buf) { - ret = qspi_write_msg(qspi, t); + ret = qspi_write_msg(qspi, t, count); if (ret) { dev_dbg(qspi->dev, "Error while writing\n"); return ret; @@ -357,7 +358,7 @@ static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t) } if (t->rx_buf) { - ret = qspi_read_msg(qspi, t); + ret = qspi_read_msg(qspi, t, count); if (ret) { dev_dbg(qspi->dev, "Error while reading\n"); return ret; @@ -374,7 +375,8 @@ static int ti_qspi_start_transfer_one(struct spi_master *master, struct spi_device *spi = m->spi; struct spi_transfer *t; int status = 0, ret; - unsigned int frame_len_words; + unsigned int frame_len_words, transfer_len_words; + int wlen; /* setup device control reg */ qspi->dc = 0; @@ -404,14 +406,20 @@ static int ti_qspi_start_transfer_one(struct spi_master *master, qspi->cmd = ((qspi->cmd & ~QSPI_WLEN_MASK) | QSPI_WLEN(t->bits_per_word)); - ret = qspi_transfer_msg(qspi, t); + wlen = t->bits_per_word >> 3; + transfer_len_words = min(t->len / wlen, frame_len_words); + + ret = qspi_transfer_msg(qspi, t, transfer_len_words * wlen); if (ret) { dev_dbg(qspi->dev, "transfer message failed\n"); mutex_unlock(&qspi->list_lock); return -EINVAL; } - m->actual_length += t->len; + m->actual_length += transfer_len_words * wlen; + frame_len_words -= transfer_len_words; + if (frame_len_words == 0) + break; } mutex_unlock(&qspi->list_lock); -- cgit v1.2.3 From 1fba064f789e053be80f1ee38c70abde71f584e2 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Tue, 19 Apr 2016 16:03:45 +0200 Subject: pinctrl: at91-pio4: fix pull-up/down logic commit 5305a7b7e860bb40ab226bc7d58019416073948a upstream. The default configuration of a pin is often with a value in the pull-up/down field at chip reset. So, even if the internal logic of the controller prevents writing a configuration with pull-up and pull-down at the same time, we must ensure explicitly this condition before writing the register. This was leading to a pull-down condition not taken into account for instance. Signed-off-by: Ludovic Desroches Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller") Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91-pio4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 33edd07d9149..b3235fd2950c 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -717,9 +717,11 @@ static int atmel_conf_pin_config_group_set(struct pinctrl_dev *pctldev, break; case PIN_CONFIG_BIAS_PULL_UP: conf |= ATMEL_PIO_PUEN_MASK; + conf &= (~ATMEL_PIO_PDEN_MASK); break; case PIN_CONFIG_BIAS_PULL_DOWN: conf |= ATMEL_PIO_PDEN_MASK; + conf &= (~ATMEL_PIO_PUEN_MASK); break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: if (arg == 0) -- cgit v1.2.3 From 4e7e3f54cae7a6ea567c09d18c35bcc468b564c3 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 14 Apr 2016 23:37:26 -0700 Subject: regmap: spmi: Fix regmap_spmi_ext_read in multi-byte case commit dec8e8f6e6504aa3496c0f7cc10c756bb0e10f44 upstream. Specifically for the case of reads that use the Extended Register Read Long command, a multi-byte read operation is broken up into 8-byte chunks. However the call to spmi_ext_register_readl() is incorrectly passing 'val_size', which if greater than 8 will always fail. The argument should instead be 'len'. Fixes: c9afbb05a9ff ("regmap: spmi: support base and extended register spaces") Signed-off-by: Jack Pham Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-spmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c index 7e58f6560399..4a36e415e938 100644 --- a/drivers/base/regmap/regmap-spmi.c +++ b/drivers/base/regmap/regmap-spmi.c @@ -142,7 +142,7 @@ static int regmap_spmi_ext_read(void *context, while (val_size) { len = min_t(size_t, val_size, 8); - err = spmi_ext_register_readl(context, addr, val, val_size); + err = spmi_ext_register_readl(context, addr, val, len); if (err) goto err_out; -- cgit v1.2.3 From e54c41be42cd77cf11817bc50f91933cda3903d9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 10 May 2016 16:18:33 +0300 Subject: perf/core: Disable the event on a truncated AUX record commit 9f448cd3cbcec8995935e60b27802ae56aac8cc0 upstream. When the PMU driver reports a truncated AUX record, it effectively means that there is no more usable room in the event's AUX buffer (even though there may still be some room, so that perf_aux_output_begin() doesn't take action). At this point the consumer still has to be woken up and the event has to be disabled, otherwise the event will just keep spinning between perf_aux_output_begin() and perf_aux_output_end() until its context gets unscheduled. Again, for cpu-wide events this means never, so once in this condition, they will be forever losing data. Fix this by disabling the event and waking up the consumer in case of a truncated AUX record. Reported-by: Markus Metzger Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1462886313-13660-3-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index adfdc0536117..014b69528194 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -347,6 +347,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, bool truncated) { struct ring_buffer *rb = handle->rb; + bool wakeup = truncated; unsigned long aux_head; u64 flags = 0; @@ -375,9 +376,16 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { - perf_output_wakeup(handle); + wakeup = true; local_add(rb->aux_watermark, &rb->aux_wakeup); } + + if (wakeup) { + if (truncated) + handle->event->pending_disable = 1; + perf_output_wakeup(handle); + } + handle->event = NULL; local_set(&rb->aux_nest, 0); -- cgit v1.2.3 From b0dac61d2454b392dbdb4ad565f9dc3dc76fce96 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 May 2016 01:16:37 +0200 Subject: vfs: add vfs_select_inode() helper commit 54d5ca871e72f2bb172ec9323497f01cd5091ec7 upstream. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 12 ++++-------- include/linux/dcache.h | 12 ++++++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/open.c b/fs/open.c index 6a24f988d253..157b9940dd73 100644 --- a/fs/open.c +++ b/fs/open.c @@ -840,16 +840,12 @@ EXPORT_SYMBOL(file_path); int vfs_open(const struct path *path, struct file *file, const struct cred *cred) { - struct dentry *dentry = path->dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = vfs_select_inode(path->dentry, file->f_flags); - file->f_path = *path; - if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { - inode = dentry->d_op->d_select_inode(dentry, file->f_flags); - if (IS_ERR(inode)) - return PTR_ERR(inode); - } + if (IS_ERR(inode)) + return PTR_ERR(inode); + file->f_path = *path; return do_dentry_open(file, inode, NULL, cred); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f513dd855cb2..d81746d3b2da 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -592,4 +592,16 @@ static inline struct dentry *d_real(struct dentry *dentry) return dentry; } +static inline struct inode *vfs_select_inode(struct dentry *dentry, + unsigned open_flags) +{ + struct inode *inode = d_inode(dentry); + + if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE)) + inode = dentry->d_op->d_select_inode(dentry, open_flags); + + return inode; +} + + #endif /* __LINUX_DCACHE_H */ -- cgit v1.2.3 From 8e5bb3c5417fd98c8966807dc07cc1b687da97c4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 May 2016 01:16:37 +0200 Subject: vfs: rename: check backing inode being equal commit 9409e22acdfc9153f88d9b1ed2bd2a5b34d2d3ca upstream. If a file is renamed to a hardlink of itself POSIX specifies that rename(2) should do nothing and return success. This condition is checked in vfs_rename(). However it won't detect hard links on overlayfs where these are given separate inodes on the overlayfs layer. Overlayfs itself detects this condition and returns success without doing anything, but then vfs_rename() will proceed as if this was a successful rename (detach_mounts(), d_move()). The correct thing to do is to detect this condition before even calling into overlayfs. This patch does this by calling vfs_select_inode() to get the underlying inodes. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index d8ee4da93650..0202aebb9813 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4195,7 +4195,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; - if (source == target) + /* + * Check source == target. + * On overlayfs need to look at underlying inodes. + */ + if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0)) return 0; error = may_delete(old_dir, old_dentry, is_dir); -- cgit v1.2.3 From b66cb8c5137da3dda2f32b2bcc69d3937f6d4899 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 11 May 2016 11:00:02 +0200 Subject: ARM: dts: at91: sam9x5: Fix the memory range assigned to the PMC commit aab0a4c83ceb344d2327194bf354820e50607af6 upstream. The memory range assigned to the PMC (Power Management Controller) was not including the PMC_PCR register which are used to control peripheral clocks. This was working fine thanks to the page granularity of ioremap(), but started to fail when we switched to syscon/regmap, because regmap is making sure that all accesses are falling into the reserved range. Signed-off-by: Boris Brezillon Reported-by: Richard Genoud Tested-by: Richard Genoud Fixes: 863a81c3be1d ("clk: at91: make use of syscon to share PMC registers in several drivers") Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9x5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 0827d594b1f0..cd0cd5fd09a3 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -106,7 +106,7 @@ pmc: pmc@fffffc00 { compatible = "atmel,at91sam9x5-pmc", "syscon"; - reg = <0xfffffc00 0x100>; + reg = <0xfffffc00 0x200>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; interrupt-controller; #address-cells = <1>; -- cgit v1.2.3 From cf73d8ad76e4555a45ee399887b7c0361354d10f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 11 May 2016 17:55:18 +0800 Subject: workqueue: fix rebind bound workers warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f7c17d26f43d5cc1b7a6b896cd2fa24a079739b9 upstream. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 16 at kernel/workqueue.c:4559 rebind_workers+0x1c0/0x1d0 Modules linked in: CPU: 0 PID: 16 Comm: cpuhp/0 Not tainted 4.6.0-rc4+ #31 Hardware name: IBM IBM System x3550 M4 Server -[7914IUW]-/00Y8603, BIOS -[D7E128FUS-1.40]- 07/23/2013 0000000000000000 ffff881037babb58 ffffffff8139d885 0000000000000010 0000000000000000 0000000000000000 0000000000000000 ffff881037babba8 ffffffff8108505d ffff881037ba0000 000011cf3e7d6e60 0000000000000046 Call Trace: dump_stack+0x89/0xd4 __warn+0xfd/0x120 warn_slowpath_null+0x1d/0x20 rebind_workers+0x1c0/0x1d0 workqueue_cpu_up_callback+0xf5/0x1d0 notifier_call_chain+0x64/0x90 ? trace_hardirqs_on_caller+0xf2/0x220 ? notify_prepare+0x80/0x80 __raw_notifier_call_chain+0xe/0x10 __cpu_notify+0x35/0x50 notify_down_prepare+0x5e/0x80 ? notify_prepare+0x80/0x80 cpuhp_invoke_callback+0x73/0x330 ? __schedule+0x33e/0x8a0 cpuhp_down_callbacks+0x51/0xc0 cpuhp_thread_fun+0xc1/0xf0 smpboot_thread_fn+0x159/0x2a0 ? smpboot_create_threads+0x80/0x80 kthread+0xef/0x110 ? wait_for_completion+0xf0/0x120 ? schedule_tail+0x35/0xf0 ret_from_fork+0x22/0x50 ? __init_kthread_worker+0x70/0x70 ---[ end trace eb12ae47d2382d8f ]--- notify_down_prepare: attempt to take down CPU 0 failed This bug can be reproduced by below config w/ nohz_full= all cpus: CONFIG_BOOTPARAM_HOTPLUG_CPU0=y CONFIG_DEBUG_HOTPLUG_CPU0=y CONFIG_NO_HZ_FULL=y As Thomas pointed out: | If a down prepare callback fails, then DOWN_FAILED is invoked for all | callbacks which have successfully executed DOWN_PREPARE. | | But, workqueue has actually two notifiers. One which handles | UP/DOWN_FAILED/ONLINE and one which handles DOWN_PREPARE. | | Now look at the priorities of those callbacks: | | CPU_PRI_WORKQUEUE_UP = 5 | CPU_PRI_WORKQUEUE_DOWN = -5 | | So the call order on DOWN_PREPARE is: | | CB 1 | CB ... | CB workqueue_up() -> Ignores DOWN_PREPARE | CB ... | CB X ---> Fails | | So we call up to CB X with DOWN_FAILED | | CB 1 | CB ... | CB workqueue_up() -> Handles DOWN_FAILED | CB ... | CB X-1 | | So the problem is that the workqueue stuff handles DOWN_FAILED in the up | callback, while it should do it in the down callback. Which is not a good idea | either because it wants to be called early on rollback... | | Brilliant stuff, isn't it? The hotplug rework will solve this problem because | the callbacks become symetric, but for the existing mess, we need some | workaround in the workqueue code. The boot CPU handles housekeeping duty(unbound timers, workqueues, timekeeping, ...) on behalf of full dynticks CPUs. It must remain online when nohz full is enabled. There is a priority set to every notifier_blocks: workqueue_cpu_up > tick_nohz_cpu_down > workqueue_cpu_down So tick_nohz_cpu_down callback failed when down prepare cpu 0, and notifier_blocks behind tick_nohz_cpu_down will not be called any more, which leads to workers are actually not unbound. Then hotplug state machine will fallback to undo and online cpu 0 again. Workers will be rebound unconditionally even if they are not unbound and trigger the warning in this progress. This patch fix it by catching !DISASSOCIATED to avoid rebind bound workers. Cc: Tejun Heo Cc: Lai Jiangshan Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frédéric Weisbecker Suggested-by: Lai Jiangshan Signed-off-by: Wanpeng Li Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0ec05948a97b..2c2f971f3e75 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4457,6 +4457,17 @@ static void rebind_workers(struct worker_pool *pool) pool->attrs->cpumask) < 0); spin_lock_irq(&pool->lock); + + /* + * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED + * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is + * being reworked and this can go away in time. + */ + if (!(pool->flags & POOL_DISASSOCIATED)) { + spin_unlock_irq(&pool->lock); + return; + } + pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) { -- cgit v1.2.3 From fc2d8c98f754993f0c66d373672418a503e6e299 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 28 Mar 2016 13:09:56 +0900 Subject: regulator: s2mps11: Fix invalid selector mask and voltages for buck9 commit 3b672623079bb3e5685b8549e514f2dfaa564406 upstream. The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff instead of 0x1f) thus reading entire register as buck9's voltage. This effectively caused regulator core to interpret values as higher voltages than they were and then to set real voltage much lower than intended. The buck9 provides power to other regulators, including LDO13 and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower voltage caused SD card detection errors on Odroid XU3/XU4: mmc1: card never left busy state mmc1: error -110 whilst initialising SD card During driver probe the regulator core was checking whether initial voltage matches the constraints. With incorrect vsel_mask of 0xff and default value of 0x50, the core interpreted this as 5 V which is outside of constraints (3-3.775 V). Then the regulator core was adjusting the voltage to match the constraints. With incorrect vsel_mask this new voltage mapped to a vere low voltage in the driver. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s2mps11.c | 28 ++++++++++++++++++++++------ include/linux/mfd/samsung/s2mps11.h | 2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index 72fc3c32db49..b6d831b84e1d 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -305,7 +305,7 @@ static struct regulator_ops s2mps11_buck_ops = { .enable_mask = S2MPS11_ENABLE_MASK \ } -#define regulator_desc_s2mps11_buck6_10(num, min, step) { \ +#define regulator_desc_s2mps11_buck67810(num, min, step) { \ .name = "BUCK"#num, \ .id = S2MPS11_BUCK##num, \ .ops = &s2mps11_buck_ops, \ @@ -321,6 +321,22 @@ static struct regulator_ops s2mps11_buck_ops = { .enable_mask = S2MPS11_ENABLE_MASK \ } +#define regulator_desc_s2mps11_buck9 { \ + .name = "BUCK9", \ + .id = S2MPS11_BUCK9, \ + .ops = &s2mps11_buck_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = MIN_3000_MV, \ + .uV_step = STEP_25_MV, \ + .n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \ + .ramp_delay = S2MPS11_RAMP_DELAY, \ + .vsel_reg = S2MPS11_REG_B9CTRL2, \ + .vsel_mask = S2MPS11_BUCK9_VSEL_MASK, \ + .enable_reg = S2MPS11_REG_B9CTRL1, \ + .enable_mask = S2MPS11_ENABLE_MASK \ +} + static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_ldo(1, STEP_25_MV), regulator_desc_s2mps11_ldo(2, STEP_50_MV), @@ -365,11 +381,11 @@ static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_buck1_4(3), regulator_desc_s2mps11_buck1_4(4), regulator_desc_s2mps11_buck5, - regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV), - regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV), + regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck9, + regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV), }; static struct regulator_ops s2mps14_reg_ops; diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index b288965e8101..2c14eeca46f0 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -173,10 +173,12 @@ enum s2mps11_regulators { #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF +#define S2MPS11_BUCK9_VSEL_MASK 0x1F #define S2MPS11_ENABLE_MASK (0x03 << S2MPS11_ENABLE_SHIFT) #define S2MPS11_ENABLE_SHIFT 0x06 #define S2MPS11_LDO_N_VOLTAGES (S2MPS11_LDO_VSEL_MASK + 1) #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1) +#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1) #define S2MPS11_RAMP_DELAY 25000 /* uV/us */ #define S2MPS11_CTRL1_PWRHOLD_MASK BIT(4) -- cgit v1.2.3 From b657027854182f708d74424998fe95624f7c69c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 27 Apr 2016 15:59:27 +0200 Subject: regulator: axp20x: Fix axp22x ldo_io voltage ranges commit a2262e5a12e05389ab4c7fc5cf60016b041dd8dc upstream. The minium voltage of 1800mV is a copy and paste error from the axp20x regulator info. The correct minimum voltage for the ldo_io regulators on the axp22x is 700mV. Fixes: 1b82b4e4f954 ("regulator: axp20x: Add support for AXP22X regulators") Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index f2e1a39ce0f3..5cf4a97e0304 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -221,10 +221,10 @@ static const struct regulator_desc axp22x_regulators[] = { AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)), AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)), - AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100, AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), - AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100, AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000), -- cgit v1.2.3 From 4549fc712830400a3af34292ffdaa025f7e0c36e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 01:11:55 -0400 Subject: atomic_open(): fix the handling of create_error commit 10c64cea04d3c75c306b3f990586ffb343b63287 upstream. * if we have a hashed negative dentry and either CREAT|EXCL on r/o filesystem, or CREAT|TRUNC on r/o filesystem, or CREAT|EXCL with failing may_o_create(), we should fail with EROFS or the error may_o_create() has returned, but not ENOENT. Which is what the current code ends up returning. * if we have CREAT|TRUNC hitting a regular file on a read-only filesystem, we can't fail with EROFS here. At the very least, not until we'd done follow_managed() - we might have a writable file (or a device, for that matter) bound on top of that one. Moreover, the code downstream will see that O_TRUNC and attempt to grab the write access (*after* following possible mount), so if we really should fail with EROFS, it will happen. No need to do that inside atomic_open(). The real logics is much simpler than what the current code is trying to do - if we decided to go for simple lookup, ended up with a negative dentry *and* had create_error set, fail with create_error. No matter whether we'd got that negative dentry from lookup_real() or had found it in dcache. Acked-by: Miklos Szeredi Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 0202aebb9813..209ca7737cb2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2906,22 +2906,10 @@ no_open: dentry = lookup_real(dir, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); - - if (create_error) { - int open_flag = op->open_flag; - - error = create_error; - if ((open_flag & O_EXCL)) { - if (!dentry->d_inode) - goto out; - } else if (!dentry->d_inode) { - goto out; - } else if ((open_flag & O_TRUNC) && - d_is_reg(dentry)) { - goto out; - } - /* will fail later, go on to get the right error */ - } + } + if (create_error && !dentry->d_inode) { + error = create_error; + goto out; } looked_up: path->dentry = dentry; -- cgit v1.2.3 From 4c127a3e21daa77e437828728d40084aa428415c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 27 Apr 2016 10:48:52 +0200 Subject: qla1280: Don't allocate 512kb of host tags commit 2bcbc81421c511ef117cadcf0bee9c4340e68db0 upstream. The qla1280 driver sets the scsi_host_template's can_queue field to 0xfffff which results in an allocation failure when allocating the block layer tags for the driver's queues. This was introduced with the change for host wide tags in commit 64d513ac31b - "scsi: use host wide tags by default". Reduce can_queue to MAX_OUTSTANDING_COMMANDS (512) to solve the allocation error. Signed-off-by: Johannes Thumshirn Fixes: 64d513ac31b - "scsi: use host wide tags by default" Cc: Laura Abbott Cc: Michael Reed Reviewed-by: Laurence Oberman Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla1280.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 5d0ec42a9317..634254a52301 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -4214,7 +4214,7 @@ static struct scsi_host_template qla1280_driver_template = { .eh_bus_reset_handler = qla1280_eh_bus_reset, .eh_host_reset_handler = qla1280_eh_adapter_reset, .bios_param = qla1280_biosparam, - .can_queue = 0xfffff, + .can_queue = MAX_OUTSTANDING_COMMANDS, .this_id = -1, .sg_tablesize = SG_ALL, .use_clustering = ENABLE_CLUSTERING, -- cgit v1.2.3 From 35eb30c213ab718d1c66d44bcfda524184356250 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 May 2016 15:09:36 -0400 Subject: tools lib traceevent: Do not reassign parg after collapse_tree() commit 106b816cb46ebd87408b4ed99a2e16203114daa6 upstream. At the end of process_filter(), collapse_tree() was changed to update the parg parameter, but the reassignment after the call wasn't removed. What happens is that the "current_op" gets modified and freed and parg is assigned to the new allocated argument. But after the call to collapse_tree(), parg is assigned again to the just freed "current_op", and this causes the tool to crash. The current_op variable must also be assigned to NULL in case of error, otherwise it will cause it to be free()ed twice. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Fixes: 42d6194d133c ("tools lib traceevent: Refactor process_filter()") Link: http://lkml.kernel.org/r/20160511150936.678c18a1@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/lib/traceevent/parse-filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 0144b3d1bb77..88cccea3ca99 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1164,11 +1164,11 @@ process_filter(struct event_format *event, struct filter_arg **parg, current_op = current_exp; ret = collapse_tree(current_op, parg, error_str); + /* collapse_tree() may free current_op, and updates parg accordingly */ + current_op = NULL; if (ret < 0) goto fail; - *parg = current_op; - free(token); return 0; -- cgit v1.2.3 From 007796c01f0b293c68585397211af2b390bf126d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 5 May 2016 16:25:35 -0400 Subject: get_rock_ridge_filename(): handle malformed NM entries commit 99d825822eade8d827a1817357cbf3f889a552d6 upstream. Payloads of NM entries are not supposed to contain NUL. When we run into such, only the part prior to the first NUL goes into the concatenation (i.e. the directory entry name being encoded by a bunch of NM entries). We do stop when the amount collected so far + the claimed amount in the current NM entry exceed 254. So far, so good, but what we return as the total length is the sum of *claimed* sizes, not the actual amount collected. And that can grow pretty large - not unlimited, since you'd need to put CE entries in between to be able to get more than the maximum that could be contained in one isofs directory entry / continuation chunk and we are stop once we'd encountered 32 CEs, but you can get about 8Kb easily. And that's what will be passed to readdir callback as the name length. 8Kb __copy_to_user() from a buffer allocated by __get_free_page() Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/isofs/rock.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 735d7522a3a9..204659a5f6db 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_directory_record *de, int retnamlen = 0; int truncate = 0; int ret = 0; + char *p; + int len; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; @@ -267,12 +269,17 @@ repeat: rr->u.NM.flags); break; } - if ((strlen(retname) + rr->len - 5) >= 254) { + len = rr->len - 5; + if (retnamlen + len >= 254) { truncate = 1; break; } - strncat(retname, rr->u.NM.name, rr->len - 5); - retnamlen += rr->len - 5; + p = memchr(rr->u.NM.name, '\0', len); + if (unlikely(p)) + len = p - rr->u.NM.name; + memcpy(retname + retnamlen, rr->u.NM.name, len); + retnamlen += len; + retname[retnamlen] = '\0'; break; case SIG('R', 'E'): kfree(rs.buffer); -- cgit v1.2.3 From 1abbf8044aa09aaa2c0810942a853bbf404f4198 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 9 May 2016 09:31:47 -0700 Subject: Input: max8997-haptic - fix NULL pointer dereference commit 6ae645d5fa385f3787bf1723639cd907fe5865e7 upstream. NULL pointer derefence happens when booting with DTB because the platform data for haptic device is not set in supplied data from parent MFD device. The MFD device creates only platform data (from Device Tree) for itself, not for haptic child. Unable to handle kernel NULL pointer dereference at virtual address 0000009c pgd = c0004000 [0000009c] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM (max8997_haptic_probe) from [] (platform_drv_probe+0x4c/0xb0) (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) (driver_probe_device) from [] (__driver_attach+0xac/0xb0) (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) (bus_add_driver) from [] (driver_register+0x78/0xf8) (driver_register) from [] (do_one_initcall+0x90/0x1d8) (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) (kernel_init_freeable) from [] (kernel_init+0x8/0x114) (kernel_init) from [] (ret_from_fork+0x14/0x3c) Signed-off-by: Marek Szyprowski Fixes: 104594b01ce7 ("Input: add driver support for MAX8997-haptic") [k.kozlowski: Write commit message, add CC-stable] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/max8997_haptic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c index a806ba3818f7..8d6326d7e7be 100644 --- a/drivers/input/misc/max8997_haptic.c +++ b/drivers/input/misc/max8997_haptic.c @@ -255,12 +255,14 @@ static int max8997_haptic_probe(struct platform_device *pdev) struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent); const struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev); - const struct max8997_haptic_platform_data *haptic_pdata = - pdata->haptic_pdata; + const struct max8997_haptic_platform_data *haptic_pdata = NULL; struct max8997_haptic *chip; struct input_dev *input_dev; int error; + if (pdata) + haptic_pdata = pdata->haptic_pdata; + if (!haptic_pdata) { dev_err(&pdev->dev, "no haptic platform data\n"); return -EINVAL; -- cgit v1.2.3 From 9df2dc6cf4adb711545f48001b34f35fd3bb79ef Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 11 May 2016 13:09:34 -0300 Subject: Revert "[media] videobuf2-v4l2: Verify planes array in buffer dequeueing" commit 93f0750dcdaed083d6209b01e952e98ca730db66 upstream. This patch causes a Kernel panic when called on a DVB driver. This was also reported by David R : May 7 14:47:35 server kernel: [ 501.247123] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 May 7 14:47:35 server kernel: [ 501.247239] IP: [] __verify_planes_array.isra.3+0x1/0x80 [videobuf2_v4l2] May 7 14:47:35 server kernel: [ 501.247354] PGD cae6f067 PUD ca99c067 PMD 0 May 7 14:47:35 server kernel: [ 501.247426] Oops: 0000 [#1] SMP May 7 14:47:35 server kernel: [ 501.247482] Modules linked in: xfs tun xt_connmark xt_TCPMSS xt_tcpmss xt_owner xt_REDIRECT nf_nat_redirect xt_nat ipt_MASQUERADE nf_nat_masquerade_ipv4 ts_kmp ts_bm xt_string ipt_REJECT nf_reject_ipv4 xt_recent xt_conntrack xt_multiport xt_pkttype xt_tcpudp xt_mark nf_log_ipv4 nf_log_common xt_LOG xt_limit iptable_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_filter ip_tables ip6table_filter ip6_tables x_tables pppoe pppox dm_crypt ts2020 regmap_i2c ds3000 cx88_dvb dvb_pll cx88_vp3054_i2c mt352 videobuf2_dvb cx8800 cx8802 cx88xx pl2303 tveeprom videobuf2_dma_sg ppdev videobuf2_memops videobuf2_v4l2 videobuf2_core dvb_usb_digitv snd_hda_codec_via snd_hda_codec_hdmi snd_hda_codec_generic radeon dvb_usb snd_hda_intel amd64_edac_mod serio_raw snd_hda_codec edac_core fbcon k10temp bitblit softcursor snd_hda_core font snd_pcm_oss i2c_piix4 snd_mixer_oss tileblit drm_kms_helper syscopyarea snd_pcm snd_seq_dummy sysfillrect snd_seq_oss sysimgblt fb_sys_fops ttm snd_seq_midi r8169 snd_rawmidi drm snd_seq_midi_event e1000e snd_seq snd_seq_device snd_timer snd ptp pps_core i2c_algo_bit soundcore parport_pc ohci_pci shpchp tpm_tis tpm nfsd auth_rpcgss oid_registry hwmon_vid exportfs nfs_acl mii nfs bonding lockd grace lp sunrpc parport May 7 14:47:35 server kernel: [ 501.249564] CPU: 1 PID: 6889 Comm: vb2-cx88[0] Not tainted 4.5.3 #3 May 7 14:47:35 server kernel: [ 501.249644] Hardware name: System manufacturer System Product Name/M4A785TD-V EVO, BIOS 0211 07/08/2009 May 7 14:47:35 server kernel: [ 501.249767] task: ffff8800aebf3600 ti: ffff8801e07a0000 task.ti: ffff8801e07a0000 May 7 14:47:35 server kernel: [ 501.249861] RIP: 0010:[] [] __verify_planes_array.isra.3+0x1/0x80 [videobuf2_v4l2] May 7 14:47:35 server kernel: [ 501.250002] RSP: 0018:ffff8801e07a3de8 EFLAGS: 00010086 May 7 14:47:35 server kernel: [ 501.250071] RAX: 0000000000000283 RBX: ffff880210dc5000 RCX: 0000000000000283 May 7 14:47:35 server kernel: [ 501.250161] RDX: ffffffffa0222cf0 RSI: 0000000000000000 RDI: ffff880210dc5014 May 7 14:47:35 server kernel: [ 501.250251] RBP: ffff8801e07a3df8 R08: ffff8801e07a0000 R09: 0000000000000000 May 7 14:47:35 server kernel: [ 501.250348] R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800cda2a9d8 May 7 14:47:35 server kernel: [ 501.250438] R13: ffff880210dc51b8 R14: 0000000000000000 R15: ffff8800cda2a828 May 7 14:47:35 server kernel: [ 501.250528] FS: 00007f5b77fff700(0000) GS:ffff88021fc40000(0000) knlGS:00000000adaffb40 May 7 14:47:35 server kernel: [ 501.250631] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b May 7 14:47:35 server kernel: [ 501.250704] CR2: 0000000000000004 CR3: 00000000ca19d000 CR4: 00000000000006e0 May 7 14:47:35 server kernel: [ 501.250794] Stack: May 7 14:47:35 server kernel: [ 501.250822] ffff8801e07a3df8 ffffffffa0222cfd ffff8801e07a3e70 ffffffffa0236beb May 7 14:47:35 server kernel: [ 501.250937] 0000000000000283 ffff8801e07a3e94 0000000000000000 0000000000000000 May 7 14:47:35 server kernel: [ 501.251051] ffff8800aebf3600 ffffffff8108d8e0 ffff8801e07a3e38 ffff8801e07a3e38 May 7 14:47:35 server kernel: [ 501.251165] Call Trace: May 7 14:47:35 server kernel: [ 501.251200] [] ? __verify_planes_array_core+0xd/0x10 [videobuf2_v4l2] May 7 14:47:35 server kernel: [ 501.251306] [] vb2_core_dqbuf+0x2eb/0x4c0 [videobuf2_core] May 7 14:47:35 server kernel: [ 501.251398] [] ? prepare_to_wait_event+0x100/0x100 May 7 14:47:35 server kernel: [ 501.251482] [] vb2_thread+0x1cb/0x220 [videobuf2_core] May 7 14:47:35 server kernel: [ 501.251569] [] ? vb2_core_qbuf+0x230/0x230 [videobuf2_core] May 7 14:47:35 server kernel: [ 501.251662] [] ? vb2_core_qbuf+0x230/0x230 [videobuf2_core] May 7 14:47:35 server kernel: [ 501.255982] [] kthread+0xc4/0xe0 May 7 14:47:35 server kernel: [ 501.260292] [] ? kthread_park+0x50/0x50 May 7 14:47:35 server kernel: [ 501.264615] [] ret_from_fork+0x3f/0x70 May 7 14:47:35 server kernel: [ 501.268962] [] ? kthread_park+0x50/0x50 May 7 14:47:35 server kernel: [ 501.273216] Code: 0d 01 74 16 48 8b 46 28 48 8b 56 30 48 89 87 d0 01 00 00 48 89 97 d8 01 00 00 5d c3 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 <8b> 46 04 48 89 e5 8d 50 f7 31 c0 83 fa 01 76 02 5d c3 48 83 7e May 7 14:47:35 server kernel: [ 501.282146] RIP [] __verify_planes_array.isra.3+0x1/0x80 [videobuf2_v4l2] May 7 14:47:35 server kernel: [ 501.286391] RSP May 7 14:47:35 server kernel: [ 501.290619] CR2: 0000000000000004 May 7 14:47:35 server kernel: [ 501.294786] ---[ end trace b2b354153ccad110 ]--- This reverts commit 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab. Cc: Sakari Ailus Cc: Hans Verkuil Fixes: 2c1f6951a8a8 ("[media] videobuf2-v4l2: Verify planes array in buffer dequeueing") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-v4l2.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 6c441be8f893..502984c724ff 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -67,11 +67,6 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer return 0; } -static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb) -{ - return __verify_planes_array(vb, pb); -} - /** * __verify_length() - Verify that the bytesused value for each plane fits in * the plane length and that the data offset doesn't exceed the bytesused value. @@ -437,7 +432,6 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb, } static const struct vb2_buf_ops v4l2_buf_ops = { - .verify_planes_array = __verify_planes_array_core, .fill_user_buffer = __fill_v4l2_buffer, .fill_vb2_buffer = __fill_vb2_buffer, .set_timestamp = __set_timestamp, -- cgit v1.2.3 From 472f52f5639238f569696082e0effbfb2171ad1a Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 5 May 2016 10:16:44 -0400 Subject: drm/radeon: fix PLL sharing on DCE6.1 (v2) commit e3c00d87845ab375f90fa6e10a5e72a3a5778cd3 upstream. On DCE6.1 PPLL2 is exclusively available to UNIPHYA, so it should not be taken into consideration when looking for an already enabled PLL to be shared with other outputs. This fixes the broken VGA port (TRAVIS DP->VGA bridge) on my Richland based laptop, where the internal display is connected to UNIPHYA through a TRAVIS DP->LVDS bridge. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=78987 v2: agd: add check in radeon_get_shared_nondp_ppll as well, drop extra parameter. Signed-off-by: Lucas Stach Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_crtc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index dac78ad24b31..79bab6fd76bb 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1739,6 +1739,7 @@ static u32 radeon_get_pll_use_mask(struct drm_crtc *crtc) static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_crtc *test_crtc; struct radeon_crtc *test_radeon_crtc; @@ -1748,6 +1749,10 @@ static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc) test_radeon_crtc = to_radeon_crtc(test_crtc); if (test_radeon_crtc->encoder && ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) { + /* PPLL2 is exclusive to UNIPHYA on DCE61 */ + if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) && + test_radeon_crtc->pll_id == ATOM_PPLL2) + continue; /* for DP use the same PLL for all */ if (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) return test_radeon_crtc->pll_id; @@ -1769,6 +1774,7 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_crtc *test_crtc; struct radeon_crtc *test_radeon_crtc; u32 adjusted_clock, test_adjusted_clock; @@ -1784,6 +1790,10 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc) test_radeon_crtc = to_radeon_crtc(test_crtc); if (test_radeon_crtc->encoder && !ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) { + /* PPLL2 is exclusive to UNIPHYA on DCE61 */ + if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) && + test_radeon_crtc->pll_id == ATOM_PPLL2) + continue; /* check if we are already driving this connector with another crtc */ if (test_radeon_crtc->connector == radeon_crtc->connector) { /* if we are, return that pll */ -- cgit v1.2.3 From bf12e894e6b4ae0181af83ce5f6bb5e05c744660 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 3 May 2016 10:33:01 +0200 Subject: drm/i915: Bail out of pipe config compute loop on LPT commit 2700818ac9f935d8590715eecd7e8cadbca552b6 upstream. LPT is pch, so might run into the fdi bandwidth constraint (especially since it has only 2 lanes). But right now we just force pipe_bpp back to 24, resulting in a nice loop (which we bail out with a loud WARN_ON). Fix this. Cc: Chris Wilson Cc: Maarten Lankhorst References: https://bugs.freedesktop.org/show_bug.cgi?id=93477 Signed-off-by: Daniel Vetter Tested-by: Chris Wilson Signed-off-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1462264381-7573-1-git-send-email-daniel.vetter@ffwll.ch (cherry picked from commit f58a1acc7e4a1f37d26124ce4c875c647fbcc61f) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_crt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 6a2c76e367a5..97d1ed20418b 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -248,8 +248,14 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; /* LPT FDI RX only supports 8bpc. */ - if (HAS_PCH_LPT(dev)) + if (HAS_PCH_LPT(dev)) { + if (pipe_config->bw_constrained && pipe_config->pipe_bpp < 24) { + DRM_DEBUG_KMS("LPT only supports 24bpp\n"); + return false; + } + pipe_config->pipe_bpp = 24; + } /* FDI must always be 2.7 GHz */ if (HAS_DDI(dev)) { -- cgit v1.2.3 From bafa4fbc2b4ac51045fe7fb3de94bcd5560a56c7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 May 2016 15:54:19 +0300 Subject: drm/i915/bdw: Add missing delay during L3 SQC credit programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d6a862fe8c48229ba342648bcd535b2404724603 upstream. BSpec requires us to wait ~100 clocks before re-enabling clock gating, so make sure we do this. CC: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1462280061-1457-2-git-send-email-imre.deak@intel.com (cherry picked from commit 48e5d68d28f00c0cadac5a830980ff3222781abb) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f091ad12d694..0a68d2ec89dc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6620,6 +6620,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev) misccpctl = I915_READ(GEN7_MISCCPCTL); I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); + /* + * Wait at least 100 clocks before re-enabling clock gating. See + * the definition of L3SQCREG1 in BSpec. + */ + POSTING_READ(GEN8_L3SQCREG1); + udelay(1); I915_WRITE(GEN7_MISCCPCTL, misccpctl); /* -- cgit v1.2.3 From 62b68367b74b2456ee68deafab047067a6acae67 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Wed, 4 May 2016 23:39:59 +0530 Subject: drm/radeon: fix DP link training issue with second 4K monitor commit 1a738347df2ee4977459a8776fe2c62196bdcb1b upstream. There is an issue observed when we hotplug a second DP 4K monitor to the system. Sometimes, the link training fails for the second monitor after HPD interrupt generation. The issue happens when some queued or deferred transactions are already present on the AUX channel when we initiate a new transcation to (say) get DPCD or during link training. We set AUX_IGNORE_HPD_DISCON bit in the AUX_CONTROL register so that we can ignore any such deferred transactions when a new AUX transaction is initiated. Signed-off-by: Arindam Nath Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_dp_auxch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c index 3b0c229d7dcd..db64e0062689 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c +++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c @@ -105,7 +105,7 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg tmp &= AUX_HPD_SEL(0x7); tmp |= AUX_HPD_SEL(chan->rec.hpd); - tmp |= AUX_EN | AUX_LS_READ_EN; + tmp |= AUX_EN | AUX_LS_READ_EN | AUX_HPD_DISCON(0x1); WREG32(AUX_CONTROL + aux_offset[instance], tmp); -- cgit v1.2.3 From 6ff8315a4df67bfad96cffc406f91ceb6df70cde Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 14 May 2016 11:11:44 -0700 Subject: nf_conntrack: avoid kernel pointer value leak in slab name commit 31b0b385f69d8d5491a4bca288e25e63f1d945d0 upstream. The slab name ends up being visible in the directory structure under /sys, and even if you don't have access rights to the file you can see the filenames. Just use a 64-bit counter instead of the pointer to the 'net' structure to generate a unique name. This code will go away in 4.7 when the conntrack code moves to a single kmemcache, but this is the backportable simple solution to avoiding leaking kernel pointers to user space. Fixes: 5b3501faa874 ("netfilter: nf_conntrack: per netns nf_conntrack_cachep") Signed-off-by: Linus Torvalds Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3cb3cb831591..86a3c6f0c871 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1757,6 +1757,7 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + static atomic64_t unique_id; int ret = -ENOMEM; int cpu; @@ -1779,7 +1780,8 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", + (u64)atomic64_inc_return(&unique_id)); if (!net->ct.slabname) goto err_slabname; -- cgit v1.2.3 From 544ec5b08d007f184ab97abdbed87e613c8c0b83 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 May 2016 17:08:36 -0700 Subject: Linux 4.4.11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5b5f462f834c..aad86274b61b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 10 +SUBLEVEL = 11 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3