From 9bdbb8968e9e7e6b9619076d1867da1c0cfec2c7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 8 Mar 2015 21:53:42 +0100 Subject: ARM: dts: sunxi: Adjust touchscreen compatible for sun5i and later commit 8bf1b9b3d90194a174493febc731f7783f2adf1a upstream. The touchscreen controller in the A13 and later has a different temperature curve than the one in the original A10, change the compatible for the A13 and later so that the kernel will use the correct curve. Reported-by: Tong Zhang Signed-off-by: Hans de Goede Signed-off-by: Maxime Ripard Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun5i-a10s.dtsi | 2 +- arch/arm/boot/dts/sun5i-a13.dtsi | 2 +- arch/arm/boot/dts/sun7i-a20.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 2fd8988f310c..3794ca16499d 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -573,7 +573,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; #thermal-sensor-cells = <0>; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 883cb4873688..5098185abde6 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -555,7 +555,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; #thermal-sensor-cells = <0>; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index fdd181792b4b..2b4847c7cbd4 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -1042,7 +1042,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = ; #thermal-sensor-cells = <0>; -- cgit v1.2.3 From 9d281572f2be11afa36e1534da43890cb9a130cc Mon Sep 17 00:00:00 2001 From: Sebastien Szymanski Date: Wed, 20 May 2015 16:30:37 +0200 Subject: ARM: clk-imx6q: refine sata's parent commit da946aeaeadcd24ff0cda9984c6fb8ed2bfd462a upstream. According to IMX6D/Q RM, table 18-3, sata clock's parent is ahb, not ipg. Signed-off-by: Sebastien Szymanski Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo [dirk.behme: Adjust moved file] Signed-off-by: Dirk Behme Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 469a150bf98f..a2e8ef3c0bd9 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -443,7 +443,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) clk[IMX6QDL_CLK_GPMI_IO] = imx_clk_gate2("gpmi_io", "enfc", base + 0x78, 28); clk[IMX6QDL_CLK_GPMI_APB] = imx_clk_gate2("gpmi_apb", "usdhc3", base + 0x78, 30); clk[IMX6QDL_CLK_ROM] = imx_clk_gate2("rom", "ahb", base + 0x7c, 0); - clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ipg", base + 0x7c, 4); + clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ahb", base + 0x7c, 4); clk[IMX6QDL_CLK_SDMA] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6); clk[IMX6QDL_CLK_SPBA] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12); clk[IMX6QDL_CLK_SPDIF] = imx_clk_gate2("spdif", "spdif_podf", base + 0x7c, 14); -- cgit v1.2.3 From aa0b8c72f5c1e0208bfafb383b535bdc9d3b2c6b Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Thu, 11 Jun 2015 02:05:33 -0400 Subject: KVM: nSVM: Check for NRIPS support before updating control field commit f104765b4f81fd74d69e0eb161e89096deade2db upstream. If hardware doesn't support DecodeAssist - a feature that provides more information about the intercept in the VMCB, KVM decodes the instruction and then updates the next_rip vmcb control field. However, NRIP support itself depends on cpuid Fn8000_000A_EDX[NRIPS]. Since skip_emulated_instruction() doesn't verify nrip support before accepting control.next_rip as valid, avoid writing this field if support isn't present. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9afa233b5482..4911bf19122b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -511,8 +511,10 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->vmcb->control.next_rip != 0) + if (svm->vmcb->control.next_rip != 0) { + WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; + } if (!svm->next_rip) { if (emulate_instruction(vcpu, EMULTYPE_SKIP) != @@ -4317,7 +4319,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; } - vmcb->control.next_rip = info->next_rip; + /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ + if (static_cpu_has(X86_FEATURE_NRIPS)) + vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; vmexit = nested_svm_exit_handled(svm); -- cgit v1.2.3 From 1a2cddd17ebe5e76a796c25e11cdd5518310eabc Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sun, 21 Jun 2015 18:50:44 +0200 Subject: can: fix loss of CAN frames in raw_rcv commit 36c01245eb8046c16eee6431e7dbfbb302635fa8 upstream. As reported by Manfred Schlaegl here http://marc.info/?l=linux-netdev&m=143482089824232&w=2 commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. As net timestamping is influenced by several players (netstamp_needed and netdev_tstamp_prequeue) Manfred missed a proper timestamp which leads to CAN frame loss. As skb timestamping became now mandatory for CAN related skbs this patch makes sure that received CAN skbs always have a proper timestamp set. Maybe there's a better solution in the future but this patch fixes the CAN frame loss so far. Reported-by: Manfred Schlaegl Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 5 +++++ drivers/net/can/slcan.c | 1 + drivers/net/can/vcan.c | 3 +++ net/can/af_can.c | 6 +++++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b0f69248cb71..e9b1810d319f 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,6 +440,9 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; + if (!(skb->tstamp.tv64)) + __net_timestamp(skb); + netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -575,6 +578,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; + __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -603,6 +607,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; + __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index c837eb91d43e..f64f5290d6f8 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,6 +207,7 @@ static void slc_bump(struct slcan *sl) if (!skb) return; + __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 674f367087c5..0ce868de855d 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,6 +78,9 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!(skb->tstamp.tv64)) + __net_timestamp(skb); + netif_rx_ni(skb); } diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc..689c818ed007 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -310,8 +310,12 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) + if (newskb) { + if (!(newskb->tstamp.tv64)) + __net_timestamp(newskb); + netif_rx_ni(newskb); + } /* update statistics */ can_stats.tx_frames++; -- cgit v1.2.3 From bd0a0d20ebd08f250af9023530b5de4bc433ebaa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 12 Jun 2015 10:16:41 -0300 Subject: sctp: fix ASCONF list handling [ Upstream commit 2d45a02d0166caf2627fe91897c6ffc3b19514c4 ] ->auto_asconf_splist is per namespace and mangled by functions like sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization. Also, the call to inet_sk_copy_descendant() was backuping ->auto_asconf_list through the copy but was not honoring ->do_auto_asconf, which could lead to list corruption if it was different between both sockets. This commit thus fixes the list handling by using ->addr_wq_lock spinlock to protect the list. A special handling is done upon socket creation and destruction for that. Error handlig on sctp_init_sock() will never return an error after having initialized asconf, so sctp_destroy_sock() can be called without addrq_wq_lock. The lock now will be take on sctp_close_sock(), before locking the socket, so we don't do it in inverse order compared to sctp_addr_wq_timeout_handler(). Instead of taking the lock on sctp_sock_migrate() for copying and restoring the list values, it's preferred to avoid rewritting it by implementing sctp_copy_descendant(). Issue was found with a test application that kept flipping sysctl default_auto_asconf on and off, but one could trigger it by issuing simultaneous setsockopt() calls on multiple sockets or by creating/destroying sockets fast enough. This is only triggerable locally. Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).") Reported-by: Ji Jianwen Suggested-by: Neil Horman Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/netns/sctp.h | 1 + include/net/sctp/structs.h | 4 ++++ net/sctp/socket.c | 43 ++++++++++++++++++++++++++++++++----------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 3573a81815ad..8ba379f9e467 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -31,6 +31,7 @@ struct netns_sctp { struct list_head addr_waitq; struct timer_list addr_wq_timer; struct list_head auto_asconf_splist; + /* Lock that protects both addr_waitq and auto_asconf_splist */ spinlock_t addr_wq_lock; /* Lock that protects the local_addr_list writers */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2bb2fcf5b11f..495c87e367b3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -223,6 +223,10 @@ struct sctp_sock { atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; + + /* These must be the last fields, as they will skipped on copies, + * like on accept and peeloff operations + */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f09de7fac2e6..5f6c4e61325b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1528,8 +1528,10 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - local_bh_disable(); + spin_lock_bh(&net->sctp.addr_wq_lock); bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() @@ -1539,7 +1541,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - local_bh_enable(); + spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); @@ -3580,6 +3582,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) return 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); if (val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; @@ -3588,6 +3591,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); return 0; } @@ -4121,18 +4125,28 @@ static int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(net, sk->sk_prot, 1); + + /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ if (net->sctp.default_auto_asconf) { + spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; - } else + spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); + } else { sp->do_auto_asconf = 0; + } + local_bh_enable(); return 0; } -/* Cleanup any SCTP per socket resources. */ +/* Cleanup any SCTP per socket resources. Must be called with + * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true + */ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -7195,6 +7209,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_list = NULL; } +static inline void sctp_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + int ancestor_size = sizeof(struct inet_sock) + + sizeof(struct sctp_sock) - + offsetof(struct sctp_sock, auto_asconf_list); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); +} + /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ @@ -7209,7 +7236,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; - struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -7217,12 +7243,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - if (oldsp->do_auto_asconf) { - memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); - inet_sk_copy_descendant(newsk, oldsk); - memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); - } else - inet_sk_copy_descendant(newsk, oldsk); + sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. -- cgit v1.2.3 From 08be544ef5d8453b7778bd57f3da8eeebcf1cd65 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 15 Jun 2015 20:28:51 +0300 Subject: bridge: fix br_stp_set_bridge_priority race conditions [ Upstream commit 2dab80a8b486f02222a69daca6859519e05781d9 ] After the ->set() spinlocks were removed br_stp_set_bridge_priority was left running without any protection when used via sysfs. It can race with port add/del and could result in use-after-free cases and corrupted lists. Tested by running port add/del in a loop with stp enabled while setting priority in a loop, crashes are easily reproducible. The spinlocks around sysfs ->set() were removed in commit: 14f98f258f19 ("bridge: range check STP parameters") There's also a race condition in the netlink priority support that is fixed by this change, but it was introduced recently and the fixes tag covers it, just in case it's needed the commit is: af615762e972 ("bridge: add ageing_time, stp_state, priority over netlink") Signed-off-by: Nikolay Aleksandrov Fixes: 14f98f258f19 ("bridge: range check STP parameters") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_ioctl.c | 2 -- net/bridge/br_stp_if.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index a9a4a1b7863d..8d423bc649b9 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); br_stp_set_bridge_priority(br, args[1]); - spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_PORT_PRIORITY: diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 41146872c1b4..7832d07f48f6 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -243,12 +243,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) return true; } -/* called under bridge lock */ +/* Acquires and releases bridge lock */ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) { struct net_bridge_port *p; int wasroot; + spin_lock_bh(&br->lock); wasroot = br_is_root_bridge(br); list_for_each_entry(p, &br->port_list, list) { @@ -266,6 +267,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); + spin_unlock_bh(&br->lock); } /* called under bridge lock */ -- cgit v1.2.3 From d7884e43677ba8618ae2f1ead7b96b215b409e20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Jun 2015 07:59:11 -0700 Subject: packet: read num_members once in packet_rcv_fanout() [ Upstream commit f98f4514d07871da7a113dd9e3e330743fd70ae4 ] We need to tell compiler it must not read f->num_members multiple times. Otherwise testing if num is not zero is flaky, and we could attempt an invalid divide by 0 in fanout_demux_cpu() Note bug was present in packet_rcv_fanout_hash() and packet_rcv_fanout_lb() but final 3.1 had a simple location after commit 95ec3eb417115fb ("packet: Add 'cpu' fanout policy.") Fixes: dc99f600698dc ("packet: Add fanout support.") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5989c6ee551..131545a06f05 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1353,7 +1353,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; - unsigned int num = f->num_members; + unsigned int num = READ_ONCE(f->num_members); struct packet_sock *po; unsigned int idx; -- cgit v1.2.3 From 2c330edb4b31fdf18c6a6c4d1d9b482632c3925b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 17 Jun 2015 15:59:34 -0400 Subject: packet: avoid out of bounds read in round robin fanout [ Upstream commit 468479e6043c84f5a65299cc07cb08a22a28c2b1 ] PACKET_FANOUT_LB computes f->rr_cur such that it is modulo f->num_members. It returns the old value unconditionally, but f->num_members may have changed since the last store. Ensure that the return value is always < num. When modifying the logic, simplify it further by replacing the loop with an unconditional atomic increment. Fixes: dc99f600698d ("packet: Add fanout support.") Suggested-by: Eric Dumazet Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 131545a06f05..fe1610ddeacf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1272,16 +1272,6 @@ static void packet_sock_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static int fanout_rr_next(struct packet_fanout *f, unsigned int num) -{ - int x = atomic_read(&f->rr_cur) + 1; - - if (x >= num) - x = 0; - - return x; -} - static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) @@ -1293,13 +1283,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - int cur, old; + unsigned int val = atomic_inc_return(&f->rr_cur); - cur = atomic_read(&f->rr_cur); - while ((old = atomic_cmpxchg(&f->rr_cur, cur, - fanout_rr_next(f, num))) != cur) - cur = old; - return cur; + return val % num; } static unsigned int fanout_demux_cpu(struct packet_fanout *f, -- cgit v1.2.3 From 914b0ef228c97dfea025091428f6c7809c9d38ad Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 16 Jun 2015 22:56:39 +0300 Subject: neigh: do not modify unlinked entries [ Upstream commit 2c51a97f76d20ebf1f50fef908b986cb051fdff9 ] The lockless lookups can return entry that is unlinked. Sometimes they get reference before last neigh_cleanup_and_release, sometimes they do not need reference. Later, any modification attempts may result in the following problems: 1. entry is not destroyed immediately because neigh_update can start the timer for dead entry, eg. on change to NUD_REACHABLE state. As result, entry lives for some time but is invisible and out of control. 2. __neigh_event_send can run in parallel with neigh_destroy while refcnt=0 but if timer is started and expired refcnt can reach 0 for second time leading to second neigh_destroy and possible crash. Thanks to Eric Dumazet and Ying Xue for their work and analyze on the __neigh_event_send change. Fixes: 767e97e1e0db ("neigh: RCU conversion of struct neighbour") Fixes: a263b3093641 ("ipv4: Make neigh lookups directly in output packet path.") Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().") Cc: Eric Dumazet Cc: Ying Xue Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3de654256028..2237c1b3cdd2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -957,6 +957,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; + if (neigh->dead) + goto out_dead; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + @@ -1013,6 +1015,13 @@ out_unlock_bh: write_unlock(&neigh->lock); local_bh_enable(); return rc; + +out_dead: + if (neigh->nud_state & NUD_STALE) + goto out_unlock_bh; + write_unlock_bh(&neigh->lock); + kfree_skb(skb); + return 1; } EXPORT_SYMBOL(__neigh_event_send); @@ -1076,6 +1085,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; + if (neigh->dead) + goto out; if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1225,6 +1236,8 @@ EXPORT_SYMBOL(neigh_update); */ void __neigh_set_probe_once(struct neighbour *neigh) { + if (neigh->dead) + return; neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; -- cgit v1.2.3 From 80b856db9f9e1dbaf20a24020f8ade95c14f7477 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Jun 2015 13:54:54 +0200 Subject: mac80211: fix locking in update_vlan_tailroom_need_count() [ Upstream commit 51f458d9612177f69c2e2c437034ae15f93078e7 ] Unfortunately, Michal's change to fix AP_VLAN crypto tailroom caused a locking issue that was reported by lockdep, but only in a few cases - the issue was a classic ABBA deadlock caused by taking the mtx after the key_mtx, where normally they're taken the other way around. As the key mutex protects the field in question (I'm adding a few annotations to make that clear) only the iteration needs to be protected, but we can also iterate the interface list with just RCU protection while holding the key mutex. Fixes: f9dca80b98ca ("mac80211: fix AP_VLAN crypto tailroom calculation") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mac80211/key.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a907f2d5c12d..81e9785f38bc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) if (sdata->vif.type != NL80211_IFTYPE_AP) return; - mutex_lock(&sdata->local->mtx); + /* crypto_tx_tailroom_needed_cnt is protected by this */ + assert_key_lock(sdata->local); + + rcu_read_lock(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list) vlan->crypto_tx_tailroom_needed_cnt += delta; - mutex_unlock(&sdata->local->mtx); + rcu_read_unlock(); } static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) @@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ + assert_key_lock(sdata->local); + update_vlan_tailroom_need_count(sdata, 1); if (!sdata->crypto_tx_tailroom_needed_cnt++) { @@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { + assert_key_lock(sdata->local); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); update_vlan_tailroom_need_count(sdata, -delta); -- cgit v1.2.3 From 1bc31b1e80a716b903cf40ae4d679c820f71d518 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 18 Jun 2015 18:36:03 +0300 Subject: mvneta: add forgotten initialization of autonegotiation bits [ Upstream commit 538761b794c1542f1c6e31eadd9d7aae118889f7 ] The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state signaling") changed mvneta_adjust_link() so that it does not clear the auto-negotiation bits in MVNETA_GMAC_AUTONEG_CONFIG register. This was necessary for auto-negotiation mode to work. Unfortunately I haven't checked if these bits are ever initialized. It appears they are not. This patch adds the missing initialization of the auto-negotiation bits in the MVNETA_GMAC_AUTONEG_CONFIG register. It fixes the following regression: https://www.mail-archive.com/netdev@vger.kernel.org/msg67928.html Since the patch was tested to fix a regression, it should be applied to stable tree. Tested-by: Arnaud Ebalard CC: Thomas Petazzoni CC: Florian Fainelli CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Stas Sergeev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff06..74176ec4f39d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1013,6 +1013,12 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } mvneta_set_ucast_table(pp, -1); -- cgit v1.2.3 From c31967d447989b85b631dda39487a319df21e03a Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 18 Jun 2015 09:15:34 -0700 Subject: tcp: Do not call tcp_fastopen_reset_cipher from interrupt context [ Upstream commit dfea2aa654243f70dc53b8648d0bbdeec55a7df1 ] tcp_fastopen_reset_cipher really cannot be called from interrupt context. It allocates the tcp_fastopen_context with GFP_KERNEL and calls crypto_alloc_cipher, which allocates all kind of stuff with GFP_KERNEL. Thus, we might sleep when the key-generation is triggered by an incoming TFO cookie-request which would then happen in interrupt- context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP: [ 36.001813] BUG: sleeping function called from invalid context at mm/slub.c:1266 [ 36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill [ 36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14 [ 36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 36.008250] 00000000000004f2 ffff88007f8838a8 ffffffff8171d53a ffff880075a084a8 [ 36.009630] ffff880075a08000 ffff88007f8838c8 ffffffff810967d3 ffff88007f883928 [ 36.011076] 0000000000000000 ffff88007f8838f8 ffffffff81096892 ffff88007f89be00 [ 36.012494] Call Trace: [ 36.012953] [] dump_stack+0x4f/0x6d [ 36.014085] [] ___might_sleep+0x103/0x170 [ 36.015117] [] __might_sleep+0x52/0x90 [ 36.016117] [] kmem_cache_alloc_trace+0x47/0x190 [ 36.017266] [] ? tcp_fastopen_reset_cipher+0x42/0x130 [ 36.018485] [] tcp_fastopen_reset_cipher+0x42/0x130 [ 36.019679] [] tcp_fastopen_init_key_once+0x61/0x70 [ 36.020884] [] __tcp_fastopen_cookie_gen+0x1c/0x60 [ 36.022058] [] tcp_try_fastopen+0x58f/0x730 [ 36.023118] [] tcp_conn_request+0x3e8/0x7b0 [ 36.024185] [] ? __module_text_address+0x12/0x60 [ 36.025327] [] tcp_v4_conn_request+0x51/0x60 [ 36.026410] [] tcp_rcv_state_process+0x190/0xda0 [ 36.027556] [] ? __inet_lookup_established+0x47/0x170 [ 36.028784] [] tcp_v4_do_rcv+0x16d/0x3d0 [ 36.029832] [] ? security_sock_rcv_skb+0x16/0x20 [ 36.030936] [] tcp_v4_rcv+0x77a/0x7b0 [ 36.031875] [] ? iptable_filter_hook+0x33/0x70 [ 36.032953] [] ip_local_deliver_finish+0x92/0x1f0 [ 36.034065] [] ip_local_deliver+0x9a/0xb0 [ 36.035069] [] ? ip_rcv+0x3d0/0x3d0 [ 36.035963] [] ip_rcv_finish+0x119/0x330 [ 36.036950] [] ip_rcv+0x2e7/0x3d0 [ 36.037847] [] __netif_receive_skb_core+0x552/0x930 [ 36.038994] [] __netif_receive_skb+0x27/0x70 [ 36.040033] [] process_backlog+0xd2/0x1f0 [ 36.041025] [] net_rx_action+0x122/0x310 [ 36.042007] [] __do_softirq+0x103/0x2f0 [ 36.042978] [] do_softirq_own_stack+0x1c/0x30 This patch moves the call to tcp_fastopen_init_key_once to the places where a listener socket creates its TFO-state, which always happens in user-context (either from the setsockopt, or implicitly during the listen()-call) Cc: Eric Dumazet Cc: Hannes Frederic Sowa Fixes: 222e83d2e0ae ("tcp: switch tcp_fastopen key generation to net_get_random_once") Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_fastopen.c | 2 -- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d04..a5aa54ea6533 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int backlog) err = 0; if (err) goto out; + + tcp_fastopen_init_key_once(true); } err = inet_csk_listen_start(sk, backlog); if (err) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1377f2a0472..bb2ce74f6004 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2545,10 +2545,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | - TCPF_LISTEN))) + TCPF_LISTEN))) { + tcp_fastopen_init_key_once(true); + err = fastopen_init_queue(sk, val); - else + } else { err = -EINVAL; + } break; case TCP_TIMESTAMP: if (!tp->repair) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 46b087a27503..f9c0fb84e435 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -78,8 +78,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path, struct tcp_fastopen_context *ctx; bool ok = false; - tcp_fastopen_init_key_once(true); - rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { -- cgit v1.2.3 From 6fc8b947b364ceb6d91e5b6f3e3d22cd9a013ac0 Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Fri, 19 Jun 2015 14:21:51 +0200 Subject: xen-netback: fix a BUG() during initialization [ Upstream commit 12b322ac85208de564ecf23aa754d796a91de21f ] Commit edafc132baac ("xen-netback: making the bandwidth limiter runtime settable") introduced the capability to change the bandwidth rate limit at runtime. But it also introduced a possible crashing bug. If netback receives two XenbusStateConnected without getting the hotplug-status watch firing in between, then it will try to register the watches for the rate limiter again. But this triggers a BUG() in the watch registration code. The fix modifies connect() to remove the possibly existing packet-rate watches before trying to install those watches. This behaviour is in line with how connect() deals with the hotplug-status watch. Signed-off-by: Imre Palik Cc: Matt Wilson Acked-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/xenbus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 968787abf78d..ec383b0f5443 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -681,6 +681,9 @@ static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) char *node; unsigned maxlen = strlen(dev->nodename) + sizeof("/rate"); + if (vif->credit_watch.node) + return -EADDRINUSE; + node = kmalloc(maxlen, GFP_KERNEL); if (!node) return -ENOMEM; @@ -770,6 +773,7 @@ static void connect(struct backend_info *be) } xen_net_read_rate(dev, &credit_bytes, &credit_usec); + xen_unregister_watchers(be->vif); xen_register_watchers(dev, be->vif); read_xenbus_vif_flags(be); -- cgit v1.2.3 From 66634bb1c4f1eda70583eddaf8c5e980f05a8fb9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 23 Jun 2015 08:34:39 +0300 Subject: ip: report the original address of ICMP messages [ Upstream commit 34b99df4e6256ddafb663c6de0711dceceddfe0e ] ICMP messages can trigger ICMP and local errors. In this case serr->port is 0 and starting from Linux 4.0 we do not return the original target address to the error queue readers. Add function to define which errors provide addr_offset. With this fix my ping command is not silent anymore. Fixes: c247f0534cc5 ("ip: fix error queue empty skb handling") Signed-off-by: Julian Anastasov Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 11 ++++++++++- net/ipv6/datagram.c | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7cfb0893f263..6ddde89996f4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -432,6 +432,15 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv4 supports cmsg on all imcp errors and some timestamps * * Timestamp code paths do not initialize the fields expected by cmsg: @@ -498,7 +507,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv4_datagram_support_addr(serr)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 762a58c772b8..62d908e64eeb 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. * * At one point, excluding local errors was a quick test to identify icmp/icmp6 @@ -389,7 +399,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv6_datagram_support_addr(serr)) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; -- cgit v1.2.3 From f3f6617f6b90f2e27ee7362f8a2f4063c7eac6a7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 25 Jun 2015 11:29:41 +0300 Subject: net/mlx4_en: Release TX QP when destroying TX ring [ Upstream commit 0eb08514fdbdcd16fd6870680cd638f203662e9d ] TX ring QP wasn't released at mlx4_en_destroy_tx_ring. Instead, the code used the deprecated base_tx_qpn field. Move TX QP release to mlx4_en_destroy_tx_ring and remove the base_tx_qpn field. Fixes: ddae0349fdb7 ('net/mlx4: Change QP allocation scheme') Signed-off-by: Eran Ben Elisha Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ---- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 1 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index cf467a9f6cc7..a5a0b8420d26 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1973,10 +1973,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } - if (priv->base_tx_qpn) { - mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num); - priv->base_tx_qpn = 0; - } } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7bed3a88579f..0ab298f036a8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -180,6 +180,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); + mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d021f079f181..9a4b3807eb0a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -579,7 +579,6 @@ struct mlx4_en_priv { int vids[128]; bool wol; struct device *ddev; - int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; -- cgit v1.2.3 From 7a9aa8ab0c706b3d2770a3996c9e63f08074c855 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 25 Jun 2015 11:29:42 +0300 Subject: net/mlx4_en: Wake TX queues only when there's enough room [ Upstream commit 488a9b48e398b157703766e2cd91ea45ac6997c5 ] Indication of a single completed packet, marked by txbbs_skipped being bigger then zero, in not enough in order to wake up a stopped TX queue. The completed packet may contain a single TXBB, while next packet to be sent (after the wake up) may have multiple TXBBs (LSO/TSO packets for example), causing overflow in queue followed by WQE corruption and TX queue timeout. Instead, wake the stopped queue only when there's enough room for the worst case (maximum sized WQE) packet that we should need to handle after the queue is opened again. Also created an helper routine - mlx4_en_is_tx_ring_full, which checks if the current TX ring is full or not. It provides better code readability and removes code duplication. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 19 +++++++++++-------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 0ab298f036a8..c10d98f6ad96 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -66,6 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->stride = stride; + ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node); @@ -232,6 +233,11 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } +static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) +{ + return ring->prod - ring->cons > ring->full_size; +} + static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) @@ -474,11 +480,10 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netdev_tx_completed_queue(ring->tx_queue, packets, bytes); - /* - * Wakeup Tx queue if this stopped, and at least 1 packet - * was completed + /* Wakeup Tx queue if this stopped, and ring is not full. */ - if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) { + if (netif_tx_queue_stopped(ring->tx_queue) && + !mlx4_en_is_tx_ring_full(ring)) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } @@ -922,8 +927,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); /* Check available TXBBs And 2K spare for prefetch */ - stop_queue = (int)(ring->prod - ring_cons) > - ring->size - HEADROOM - MAX_DESC_TXBBS; + stop_queue = mlx4_en_is_tx_ring_full(ring); if (unlikely(stop_queue)) { netif_tx_stop_queue(ring->tx_queue); ring->queue_stopped++; @@ -992,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) smp_rmb(); ring_cons = ACCESS_ONCE(ring->cons); - if (unlikely(((int)(ring->prod - ring_cons)) <= - ring->size - HEADROOM - MAX_DESC_TXBBS)) { + if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9a4b3807eb0a..909fcf803c54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -279,6 +279,7 @@ struct mlx4_en_tx_ring { u32 size; /* number of TXBBs */ u32 size_mask; u16 stride; + u32 full_size; u16 cqn; /* index of port CQ associated with this ring */ u32 buf_size; __be32 doorbell_qpn; -- cgit v1.2.3 From 1b74080050336e29a82abc2a9b2e1f901485a362 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 25 Jun 2015 11:29:43 +0300 Subject: net/mlx4_en: Fix wrong csum complete report when rxvlan offload is disabled [ Upstream commit 79a258526ce1051cb9684018c25a89d51ac21be8 ] The check_csum() function relied on hwtstamp_rx_filter to know if rxvlan offload is disabled. This is wrong since rxvlan offload can be switched on/off regardless of hwtstamp_rx_filter. Also moved check_csum to query CQE information to identify VLAN packets and removed the check of IP packets, since it has been validated before. Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE') Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2a77a6b19121..eab4e080ebd2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -723,7 +723,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, - int hwtstamp_rx_filter) + netdev_features_t dev_features) { __wsum hw_checksum = 0; @@ -731,14 +731,8 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, hw_checksum = csum_unfold((__force __sum16)cqe->checksum); - if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && - hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { - /* next protocol non IPv4 or IPv6 */ - if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IP) && - ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IPV6)) - return -1; + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) && + !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); hdr += sizeof(struct vlan_hdr); } @@ -901,7 +895,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (ip_summed == CHECKSUM_COMPLETE) { void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); - if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, gro_skb, va, + dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_none++; ring->csum_complete--; @@ -956,7 +951,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, skb, skb->data, dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_complete--; ring->csum_none++; -- cgit v1.2.3 From 62a9ad17a245002cc611fc4667c2919ef422d8ee Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 25 Jun 2015 11:29:44 +0300 Subject: mlx4: Disable HA for SRIOV PF RoCE devices [ Upstream commit 7254acffeeec3c0a75b9c5364c29a6eb00014930 ] When in HA mode, the driver exposes an IB (RoCE) device instance with only one port. Under SRIOV, the existing implementation doesn't go well with the PF RoCE driver's role of Special QPs Para-Virtualization, etc. As such, disable HA for the mlx4 PF RoCE device in SRIOV mode. Fixes: a57500903093 ('IB/mlx4: Add port aggregation support') Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/intf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 6fce58718837..0d80aed59043 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -93,8 +93,14 @@ int mlx4_register_interface(struct mlx4_interface *intf) mutex_lock(&intf_mutex); list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) + list_for_each_entry(priv, &dev_list, dev_list) { + if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) { + mlx4_dbg(&priv->dev, + "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol); + intf->flags &= ~MLX4_INTFF_BONDING; + } mlx4_add_device(intf, priv); + } mutex_unlock(&intf_mutex); -- cgit v1.2.3 From 6c10c84170c40bf3d4a9953ae5a2ffe59ad736d4 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Thu, 25 Jun 2015 22:21:02 +0530 Subject: net: phy: fix phy link up when limiting speed via device tree [ Upstream commit eb686231fce3770299760f24fdcf5ad041f44153 ] When limiting phy link speed using "max-speed" to 100mbps or less on a giga bit phy, phy never completes auto negotiation and phy state machine is held in PHY_AN. Fixing this issue by comparing the giga bit advertise though phydev->supported doesn't have it but phy has BMSR_ESTATEN set. So that auto negotiation is restarted as old and new advertise are different and link comes up fine. Signed-off-by: Mugunthan V N Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index bdfe51fc3a65..d551df62e61a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -796,10 +796,11 @@ static int genphy_config_advert(struct phy_device *phydev) if (phydev->supported & (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) { adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); - if (adv != oldadv) - changed = 1; } + if (adv != oldadv) + changed = 1; + err = phy_write(phydev, MII_CTRL1000, adv); if (err < 0) return err; -- cgit v1.2.3 From 7e2a3d667c4c9ca494c73f5930365240a31c4eb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Jun 2015 07:32:29 +0200 Subject: bnx2x: fix lockdep splat [ Upstream commit d53c66a5b80698620f7c9ba2372fff4017e987b8 ] Michel reported following lockdep splat [ 44.718117] INFO: trying to register non-static key. [ 44.723081] the code is fine but needs lockdep annotation. [ 44.728559] turning off the locking correctness validator. [ 44.734036] CPU: 8 PID: 5483 Comm: ethtool Not tainted 4.1.0 [ 44.770289] Call Trace: [ 44.772741] [] dump_stack+0x4c/0x65 [ 44.777879] [] ? console_unlock+0x1f1/0x510 [ 44.783708] [] __lock_acquire+0x1d05/0x1f10 [ 44.789538] [] ? mark_held_locks+0x6a/0x90 [ 44.795276] [] ? trace_hardirqs_on_caller+0x105/0x1d0 [ 44.801967] [] ? trace_hardirqs_on+0xd/0x10 [ 44.807793] [] ? hrtimer_try_to_cancel+0x4a/0x250 [ 44.814142] [] lock_acquire+0xb6/0x290 [ 44.819537] [] ? flush_work+0x5/0x280 [ 44.824844] [] flush_work+0x3d/0x280 [ 44.830061] [] ? flush_work+0x5/0x280 [ 44.835366] [] ? schedule_hrtimeout_range+0x13/0x20 [ 44.841889] [] ? usleep_range+0x4b/0x50 [ 44.847365] [] ? mark_held_locks+0x6a/0x90 [ 44.853102] [] ? __cancel_work_timer+0x105/0x1c0 [ 44.859359] [] ? trace_hardirqs_on_caller+0x105/0x1d0 [ 44.866045] [] __cancel_work_timer+0x9f/0x1c0 [ 44.872048] [] ? bnx2x_func_stop+0x42/0x90 [bnx2x] [ 44.878481] [] cancel_work_sync+0x10/0x20 [ 44.884134] [] bnx2x_chip_cleanup+0x245/0x730 [bnx2x] [ 44.890829] [] ? up+0x32/0x50 [ 44.895439] [] ? del_timer_sync+0x5/0xd0 [ 44.901005] [] bnx2x_nic_unload+0x20d/0x8e0 [bnx2x] [ 44.907527] [] ? might_fault+0x5f/0xb0 [ 44.912921] [] bnx2x_reload_if_running+0x2c/0x50 [bnx2x] [ 44.919879] [] bnx2x_set_ringparam+0x2b5/0x460 [bnx2x] [ 44.926664] [] dev_ethtool+0x55b/0x1c40 [ 44.932148] [] ? rtnl_lock+0x17/0x20 [ 44.937364] [] dev_ioctl+0x17b/0x630 [ 44.942582] [] sock_do_ioctl+0x5d/0x70 [ 44.947972] [] sock_ioctl+0x73/0x280 [ 44.953192] [] do_vfs_ioctl+0x88/0x5b0 [ 44.958587] [] ? up_read+0x23/0x40 [ 44.963631] [] ? __fget_light+0x6c/0xa0 [ 44.969105] [] SyS_ioctl+0x91/0xb0 [ 44.974149] [] system_call_fastpath+0x12/0x6f As bnx2x_init_ptp() is only called if bp->flags contains PTP_SUPPORTED, we also need to guard bnx2x_stop_ptp() with same condition, otherwise ptp_task workqueue is not initialized and kernel barfs on cancel_work_sync() Fixes: eeed018cbfa30 ("bnx2x: Add timestamping and PTP hardware clock support") Reported-by: Michel Lespinasse Signed-off-by: Eric Dumazet Cc: Michal Kalderon Cc: Ariel Elior Cc: Yuval Mintz Cc: David Decotigny Acked-by: Sony Chacko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 33501bcddc48..8a97d28f3d65 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9323,7 +9323,8 @@ unload_error: * function stop ramrod is sent, since as part of this ramrod FW access * PTP registers. */ - bnx2x_stop_ptp(bp); + if (bp->flags & PTP_SUPPORTED) + bnx2x_stop_ptp(bp); /* Disable HW interrupts, NAPI */ bnx2x_netif_stop(bp, 1); -- cgit v1.2.3 From 67866a8c30f4707cef895635d5d08cb8105d2f5e Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 29 Jun 2015 10:41:03 +0200 Subject: sctp: Fix race between OOTB responce and route removal [ Upstream commit 29c4afc4e98f4dc0ea9df22c631841f9c220b944 ] There is NULL pointer dereference possible during statistics update if the route used for OOTB responce is removed at unfortunate time. If the route exists when we receive OOTB packet and we finally jump into sctp_packet_transmit() to send ABORT, but in the meantime route is removed under our feet, we take "no_route" path and try to update stats with IP_INC_STATS(sock_net(asoc->base.sk), ...). But sctp_ootb_pkt_new() used to prepare responce packet doesn't call sctp_transport_set_owner() and therefore there is no asoc associated with this packet. Probably temporary asoc just for OOTB responces is overkill, so just introduce a check like in all other places in sctp_packet_transmit(), where "asoc" is dereferenced. To reproduce this, one needs to 0. ensure that sctp module is loaded (otherwise ABORT is not generated) 1. remove default route on the machine 2. while true; do ip route del [interface-specific route] ip route add [interface-specific route] done 3. send enough OOTB packets (i.e. HB REQs) from another host to trigger ABORT responce On x86_64 the crash looks like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] sctp_packet_transmit+0x63c/0x730 [sctp] PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: ... CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.0.5-1-ARCH #1 Hardware name: ... task: ffffffff818124c0 ti: ffffffff81800000 task.ti: ffffffff81800000 RIP: 0010:[] [] sctp_packet_transmit+0x63c/0x730 [sctp] RSP: 0018:ffff880127c037b8 EFLAGS: 00010296 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000015ff66b480 RDX: 00000015ff66b400 RSI: ffff880127c17200 RDI: ffff880123403700 RBP: ffff880127c03888 R08: 0000000000017200 R09: ffffffff814625af R10: ffffea00047e4680 R11: 00000000ffffff80 R12: ffff8800b0d38a28 R13: ffff8800b0d38a28 R14: ffff8800b3e88000 R15: ffffffffa05f24e0 FS: 0000000000000000(0000) GS:ffff880127c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000020 CR3: 00000000c855b000 CR4: 00000000000007f0 Stack: ffff880127c03910 ffff8800b0d38a28 ffffffff8189d240 ffff88011f91b400 ffff880127c03828 ffffffffa05c94c5 0000000000000000 ffff8800baa1c520 0000000000000000 0000000000000001 0000000000000000 0000000000000000 Call Trace: [] ? sctp_sf_tabort_8_4_8.isra.20+0x85/0x140 [sctp] [] ? sctp_transport_put+0x52/0x80 [sctp] [] sctp_do_sm+0xb8c/0x19a0 [sctp] [] ? trigger_load_balance+0x90/0x210 [] ? update_process_times+0x59/0x60 [] ? timerqueue_add+0x60/0xb0 [] ? enqueue_hrtimer+0x29/0xa0 [] ? read_tsc+0x9/0x10 [] ? put_page+0x55/0x60 [] ? clockevents_program_event+0x6d/0x100 [] ? skb_free_head+0x58/0x80 [] ? chksum_update+0x1b/0x27 [crc32c_generic] [] ? crypto_shash_update+0xce/0xf0 [] sctp_endpoint_bh_rcv+0x113/0x280 [sctp] [] sctp_inq_push+0x46/0x60 [sctp] [] sctp_rcv+0x880/0x910 [sctp] [] ? sctp_packet_transmit_chunk+0xb0/0xb0 [sctp] [] ? sctp_csum_update+0x20/0x20 [sctp] [] ? ip_route_input_noref+0x235/0xd30 [] ? ack_ioapic_level+0x7b/0x150 [] ip_local_deliver_finish+0xae/0x210 [] ip_local_deliver+0x35/0x90 [] ip_rcv_finish+0xf5/0x370 [] ip_rcv+0x2b8/0x3a0 [] __netif_receive_skb_core+0x763/0xa50 [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xd0 [] napi_gro_receive+0xe8/0x120 [] rtl8169_poll+0x2da/0x660 [r8169] [] net_rx_action+0x21a/0x360 [] __do_softirq+0xe1/0x2d0 [] irq_exit+0xad/0xb0 [] do_IRQ+0x58/0xf0 [] common_interrupt+0x6d/0x6d [] ? hrtimer_start+0x18/0x20 [] ? sctp_transport_destroy_rcu+0x29/0x30 [sctp] [] ? mwait_idle+0x60/0xa0 [] arch_cpu_idle+0xf/0x20 [] cpu_startup_entry+0x3ec/0x480 [] rest_init+0x85/0x90 [] start_kernel+0x48b/0x4ac [] ? early_idt_handlers+0x120/0x120 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0x161/0x184 Code: 90 48 8b 80 b8 00 00 00 48 89 85 70 ff ff ff 48 83 bd 70 ff ff ff 00 0f 85 cd fa ff ff 48 89 df 31 db e8 18 63 e7 e0 48 8b 45 80 <48> 8b 40 20 48 8b 40 30 48 8b 80 68 01 00 00 65 48 ff 40 78 e9 RIP [] sctp_packet_transmit+0x63c/0x730 [sctp] RSP CR2: 0000000000000020 ---[ end trace 5aec7fd2dc983574 ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff) drm_kms_helper: panic occurred, switching back to text console ---[ end Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Alexander Sverdlin Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index fc5e45b8a832..abe7c2db2412 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -599,7 +599,9 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the -- cgit v1.2.3 From 8c6e5415f83cb5ba0da1321ec743b650c8a16764 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 29 Jun 2015 11:22:12 -0500 Subject: amd-xgbe: Add the __GFP_NOWARN flag to Rx buffer allocation [ Upstream commit 472cfe7127760d68b819cf35a26e5a1b44b30f4e ] When allocating Rx related buffers, alloc_pages is called using an order number that is decreased until successful. A system under stress can experience failures during this allocation process resulting in a warning being issued. This message can be of concern to end users even though the failure is not fatal. Since the failure is not fatal and can occur multiple times, the driver should include the __GFP_NOWARN flag to suppress the warning message from being issued. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index d81fc6bd4759..5c92fb71b37e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -263,7 +263,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, int ret; /* Try to obtain pages, decreasing order if necessary */ - gfp |= __GFP_COLD | __GFP_COMP; + gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; while (order >= 0) { pages = alloc_pages(gfp, order); if (pages) -- cgit v1.2.3 From b5aded8311788994641bd1d04b4b922f9c202b8f Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 30 Jun 2015 16:20:20 +0200 Subject: net: mvneta: introduce compatible string "marvell, armada-xp-neta" [ Upstream commit f522a975a8101895a85354b9c143f41b8248e71a ] The mvneta driver supports the Ethernet IP found in the Armada 370, XP, 380 and 385 SoCs. Since at least one more hardware feature is available for the Armada XP SoCs then a way to identify them is needed. This patch introduces a new compatible string "marvell,armada-xp-neta". Signed-off-by: Simon Guinot Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Cc: # v3.8+ Acked-by: Gregory CLEMENT Acked-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt | 2 +- drivers/net/ethernet/marvell/mvneta.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index 750d577e8083..f5a8ca29aff0 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -1,7 +1,7 @@ * Marvell Armada 370 / Armada XP Ethernet Controller (NETA) Required properties: -- compatible: should be "marvell,armada-370-neta". +- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta". - reg: address and length of the register set for the device. - interrupts: interrupt for the device - phy: See ethernet.txt file in the same directory. diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 74176ec4f39d..4fb27eac6ce5 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3185,6 +3185,7 @@ static int mvneta_remove(struct platform_device *pdev) static const struct of_device_id mvneta_match[] = { { .compatible = "marvell,armada-370-neta" }, + { .compatible = "marvell,armada-xp-neta" }, { } }; MODULE_DEVICE_TABLE(of, mvneta_match); -- cgit v1.2.3 From 5c40e8bf8f8239a5e1fe85ceeb8c6d930f28ef8e Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 30 Jun 2015 16:20:21 +0200 Subject: ARM: mvebu: update Ethernet compatible string for Armada XP [ Upstream commit ea3b55fe83b5fcede82d183164b9d6831b26e33b ] This patch updates the Ethernet DT nodes for Armada XP SoCs with the compatible string "marvell,armada-xp-neta". Signed-off-by: Simon Guinot Fixes: 77916519cba3 ("arm: mvebu: Armada XP MV78230 has only three Ethernet interfaces") Cc: # v3.8+ Acked-by: Gregory CLEMENT Reviewed-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-370-xp.dtsi | 2 -- arch/arm/boot/dts/armada-370.dtsi | 8 ++++++++ arch/arm/boot/dts/armada-xp-mv78260.dtsi | 2 +- arch/arm/boot/dts/armada-xp-mv78460.dtsi | 2 +- arch/arm/boot/dts/armada-xp.dtsi | 10 +++++++++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index ec96f0b36346..06a2f2ae9d1e 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -270,7 +270,6 @@ }; eth0: ethernet@70000 { - compatible = "marvell,armada-370-neta"; reg = <0x70000 0x4000>; interrupts = <8>; clocks = <&gateclk 4>; @@ -286,7 +285,6 @@ }; eth1: ethernet@74000 { - compatible = "marvell,armada-370-neta"; reg = <0x74000 0x4000>; interrupts = <10>; clocks = <&gateclk 3>; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 00b50db57c9c..ca4257b2f77d 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -307,6 +307,14 @@ dmacap,memset; }; }; + + ethernet@70000 { + compatible = "marvell,armada-370-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-370-neta"; + }; }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi index 8479fdc9e9c2..c5fdc99f0dbe 100644 --- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi @@ -318,7 +318,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi index 661d54c81580..0e24f1a38540 100644 --- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi @@ -356,7 +356,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 013d63f69e36..8fdd6d7c0ab1 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -177,7 +177,7 @@ }; eth2: ethernet@30000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x30000 0x4000>; interrupts = <12>; clocks = <&gateclk 2>; @@ -220,6 +220,14 @@ }; }; + ethernet@70000 { + compatible = "marvell,armada-xp-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-xp-neta"; + }; + xor@f0900 { compatible = "marvell,orion-xor"; reg = <0xF0900 0x100 -- cgit v1.2.3 From bfa06e6258be556d44aad030fe1babeed4f92240 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 30 Jun 2015 16:20:22 +0200 Subject: net: mvneta: disable IP checksum with jumbo frames for Armada 370 [ Upstream commit b65657fc240ae6c1d2a1e62db9a0e61ac9631d7a ] The Ethernet controller found in the Armada 370, 380 and 385 SoCs don't support TCP/IP checksumming with frame sizes larger than 1600 bytes. This patch fixes the issue by disabling the features NETIF_F_IP_CSUM and NETIF_F_TSO for the Armada 370 and compatibles SoCs when the MTU is set to a value greater than 1600 bytes. Signed-off-by: Simon Guinot Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Cc: # v3.8+ Acked-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4fb27eac6ce5..74d0389bf233 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -310,6 +310,7 @@ struct mvneta_port { unsigned int link; unsigned int duplex; unsigned int speed; + unsigned int tx_csum_limit; int use_inband_status:1; }; @@ -2508,8 +2509,10 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) dev->mtu = mtu; - if (!netif_running(dev)) + if (!netif_running(dev)) { + netdev_update_features(dev); return 0; + } /* The interface is running, so we have to force a * reallocation of the queues @@ -2538,9 +2541,26 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mvneta_start_dev(pp); mvneta_port_up(pp); + netdev_update_features(dev); + return 0; } +static netdev_features_t mvneta_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct mvneta_port *pp = netdev_priv(dev); + + if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) { + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + netdev_info(dev, + "Disable IP checksum for MTU greater than %dB\n", + pp->tx_csum_limit); + } + + return features; +} + /* Get mac address */ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr) { @@ -2862,6 +2882,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_set_rx_mode = mvneta_set_rx_mode, .ndo_set_mac_address = mvneta_set_mac_addr, .ndo_change_mtu = mvneta_change_mtu, + .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, .ndo_do_ioctl = mvneta_ioctl, }; @@ -3107,6 +3128,9 @@ static int mvneta_probe(struct platform_device *pdev) } } + if (of_device_is_compatible(dn, "marvell,armada-370-neta")) + pp->tx_csum_limit = 1600; + pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; -- cgit v1.2.3 From 1e4205d4e0b0579b906de8ec7c5920968bfb32a9 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Wed, 20 May 2015 14:52:40 +0100 Subject: usb: gadget: f_fs: add extra check before unregister_gadget_item commit f14e9ad17f46051b02bffffac2036486097de19e upstream. ffs_closed can race with configfs_rmdir which will call config_item_release, so add an extra check to avoid calling the unregister_gadget_item with an null gadget item. Signed-off-by: Rui Miguel Silva Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 3507f880eb74..45b8c8b338df 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3435,6 +3435,7 @@ done: static void ffs_closed(struct ffs_data *ffs) { struct ffs_dev *ffs_obj; + struct f_fs_opts *opts; ENTER(); ffs_dev_lock(); @@ -3449,8 +3450,13 @@ static void ffs_closed(struct ffs_data *ffs) ffs_obj->ffs_closed_callback) ffs_obj->ffs_closed_callback(ffs); - if (!ffs_obj->opts || ffs_obj->opts->no_configfs - || !ffs_obj->opts->func_inst.group.cg_item.ci_parent) + if (ffs_obj->opts) + opts = ffs_obj->opts; + else + goto done; + + if (opts->no_configfs || !opts->func_inst.group.cg_item.ci_parent + || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) goto done; unregister_gadget_item(ffs_obj->opts-> -- cgit v1.2.3 From 0e566fe9dea0750713fa267b5a0b23f0b78b26ea Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Mon, 11 May 2015 20:03:24 +0300 Subject: crypto: talitos - avoid memleak in talitos_alg_alloc() commit 5fa7dadc898567ce14d6d6d427e7bd8ce6eb5d39 upstream. Fixes: 1d11911a8c57 ("crypto: talitos - fix warning: 'alg' may be used uninitialized in this function") Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414afa29a..c52b4b32bcbd 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2561,6 +2561,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, break; default: dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type); + kfree(t_alg); return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 44cb6ff1db11f2a147cccc943927a5ca20d1bb81 Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Mon, 11 May 2015 20:04:49 +0300 Subject: Revert "crypto: talitos - convert to use be16_add_cpu()" commit 69d9cd8c592f1abce820dbce7181bbbf6812cfbd upstream. This reverts commit 7291a932c6e27d9768e374e9d648086636daf61c. The conversion to be16_add_cpu() is incorrect in case cryptlen is negative due to premature (i.e. before addition / subtraction) implicit conversion of cryptlen (int -> u16) leading to sign loss. Cc: Wei Yongjun Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c52b4b32bcbd..f062158d4dc9 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -925,7 +925,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, sg_count--; link_tbl_ptr--; } - be16_add_cpu(&link_tbl_ptr->len, cryptlen); + link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len) + + cryptlen); /* tag end of link table */ link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; -- cgit v1.2.3 From 72e09509a225e553f3736b3822a91f99f8c4777c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jun 2015 17:47:42 +0100 Subject: iommu/arm-smmu: Fix broken ATOS check commit d38f0ff9ab35414644995bae187d015c31aae19c upstream. Commit 83a60ed8f0b5 ("iommu/arm-smmu: fix ARM_SMMU_FEAT_TRANS_OPS condition") accidentally negated the ID0_ATOSNS predicate in the ATOS feature check, causing the driver to attempt ATOS requests on SMMUv2 hardware without the ATOS feature implemented. This patch restores the predicate to the correct value. Reported-by: Varun Sethi Signed-off-by: Will Deacon Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 66a803b9dd3a..65075ef75e2a 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1567,7 +1567,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) { + if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } -- cgit v1.2.3 From 396887ba3a77ee7f8b17ea85afa8501262e230b4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Jun 2015 10:48:34 +0200 Subject: iommu/amd: Handle large pages correctly in free_pagetable commit 0b3fff54bc01e8e6064d222a33e6fa7adabd94cd upstream. Make sure that we are skipping over large PTEs while walking the page-table tree. Fixes: 5c34c403b723 ("iommu/amd: Fix memory leak in free_pagetable") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e1c7e9e51045..ca9f4edbb940 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1869,9 +1869,15 @@ static void free_pt_##LVL (unsigned long __pt) \ pt = (u64 *)__pt; \ \ for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ if (!IOMMU_PTE_PRESENT(pt[i])) \ continue; \ \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ FN(p); \ } \ -- cgit v1.2.3 From 4b81f9f8194ffa58b27d269233c9f778b758cb2b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 12 Jun 2015 11:45:02 +0200 Subject: mmc: sdhci: fix low memory corruption commit 62a7f368ffbc13d9aedfdd7aeae711b177db69ac upstream. When dma mapping (dma_map_sg) fails in sdhci_pre_dma_transfer, -EINVAL is returned. There are 3 callers of sdhci_pre_dma_transfer: * sdhci_pre_req and sdhci_adma_table_pre: handle negative return * sdhci_prepare_data: handles 0 (error) and "else" (good) only sdhci_prepare_data is therefore broken. When it receives -EINVAL from sdhci_pre_dma_transfer, it assumes 1 sg mapping was mapped. Later, this non-existent mapping with address 0 is kmap'ped and written to: Corrupted low memory at ffff880000001000 (1000 phys) = 22b7d67df2f6d1cf Corrupted low memory at ffff880000001008 (1008 phys) = 63848a5216b7dd95 Corrupted low memory at ffff880000001010 (1010 phys) = 330eb7ddef39e427 Corrupted low memory at ffff880000001018 (1018 phys) = 8017ac7295039bda Corrupted low memory at ffff880000001020 (1020 phys) = 8ce039eac119074f ... So teach sdhci_prepare_data to understand negative return values from sdhci_pre_dma_transfer and disable DMA in that case, as well as for zero. It was introduced in 348487cb28e66b032bae1b38424d81bf5b444408 (mmc: sdhci: use pipeline mmc requests to improve performance). The commit seems to be suspicious also by assigning host->sg_count both in sdhci_pre_dma_transfer and sdhci_adma_table_pre. Signed-off-by: Jiri Slaby Fixes: 348487cb28e6 Cc: Ulf Hansson Cc: Haibo Chen Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c80287a02735..9231cdfe2757 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -848,7 +848,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) int sg_cnt; sg_cnt = sdhci_pre_dma_transfer(host, data, NULL); - if (sg_cnt == 0) { + if (sg_cnt <= 0) { /* * This only happens when someone fed * us an invalid request. -- cgit v1.2.3 From cd430d3e58e79d93597c26ee684730302940ae8c Mon Sep 17 00:00:00 2001 From: Joe Konno Date: Tue, 12 May 2015 07:59:42 -0700 Subject: intel_pstate: set BYT MSR with wrmsrl_on_cpu() commit 0dd23f94251f49da99a6cbfb22418b2d757d77d6 upstream. Commit 007bea098b86 (intel_pstate: Add setting voltage value for baytrail P states.) introduced byt_set_pstate() with the assumption that it would always be run by the CPU whose MSR is to be written by it. It turns out, however, that is not always the case in practice, so modify byt_set_pstate() to enforce the MSR write done by it to always happen on the right CPU. Fixes: 007bea098b86 (intel_pstate: Add setting voltage value for baytrail P states.) Signed-off-by: Joe Konno Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c4..c45d274a75c8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -535,7 +535,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) val |= vid; - wrmsrl(MSR_IA32_PERF_CTL, val); + wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } #define BYT_BCLK_FREQS 5 -- cgit v1.2.3 From 805f18e0bcbf894c4412e3069c7ac4c3e1feb9bf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 4 Jun 2015 15:57:25 -0400 Subject: selinux: fix setting of security labels on NFS commit 9fc2b4b436cff7d8403034676014f1be9d534942 upstream. Before calling into the filesystem, vfs_setxattr calls security_inode_setxattr, which ends up calling selinux_inode_setxattr in our case. That returns -EOPNOTSUPP whenever SBLABEL_MNT is not set. SBLABEL_MNT was supposed to be set by sb_finish_set_opts, which sets it only if selinux_is_sblabel_mnt returns true. The selinux_is_sblabel_mnt logic was broken by eadcabc697e9 "SELinux: do all flags twiddling in one place", which didn't take into the account the SECURITY_FS_USE_NATIVE behavior that had been introduced for nfs with eb9ae686507b "SELinux: Add new labeling type native labels". This caused setxattr's of security labels over NFSv4.2 to fail. Cc: Eric Paris Cc: David Quigley Reported-by: Richard Chan Signed-off-by: J. Bruce Fields Acked-by: Stephen Smalley [PM: added the stable dependency] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7dade28affba..212070e1de1a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -403,6 +403,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) return sbsec->behavior == SECURITY_FS_USE_XATTR || sbsec->behavior == SECURITY_FS_USE_TRANS || sbsec->behavior == SECURITY_FS_USE_TASK || + sbsec->behavior == SECURITY_FS_USE_NATIVE || /* Special handling. Genfs but also in-core setxattr handler */ !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || -- cgit v1.2.3 From c8bdf091472978ecfc884148b50be5f93761fdf9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Mar 2015 10:59:43 +0000 Subject: arm: KVM: force execution of HCPTR access on VM exit commit 85e84ba31039595995dae80b277378213602891b upstream. On VM entry, we disable access to the VFP registers in order to perform a lazy save/restore of these registers. On VM exit, we restore access, test if we did enable them before, and save/restore the guest/host registers if necessary. In this sequence, the FPEXC register is always accessed, irrespective of the trapping configuration. If the guest didn't touch the VFP registers, then the HCPTR access has now enabled such access, but we're missing a barrier to ensure architectural execution of the new HCPTR configuration. If the HCPTR access has been delayed/reordered, the subsequent access to FPEXC will cause a trap, which we aren't prepared to handle at all. The same condition exists when trapping to enable VFP for the guest. The fix is to introduce a barrier after enabling VFP access. In the vmexit case, it can be relaxed to only takes place if the guest hasn't accessed its view of the VFP registers, making the access to FPEXC safe. The set_hcptr macro is modified to deal with both vmenter/vmexit and vmtrap operations, and now takes an optional label that is branched to when the guest hasn't touched the VFP registers. Reported-by: Vikram Sethi Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/interrupts.S | 10 ++++------ arch/arm/kvm/interrupts_head.S | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 79caf79b304a..f7db3a5d80e3 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -170,13 +170,9 @@ __kvm_vcpu_return: @ Don't trap coprocessor accesses for host kernel set_hstr vmexit set_hdcr vmexit - set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore #ifdef CONFIG_VFPv3 - @ Save floating point registers we if let guest use them. - tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) - bne after_vfp_restore - @ Switch VFP/NEON hardware state to the host's add r7, vcpu, #VCPU_VFP_GUEST store_vfp_state r7 @@ -188,6 +184,8 @@ after_vfp_restore: @ Restore FPEXC_EN which we clobbered on entry pop {r2} VFPFMXR FPEXC, r2 +#else +after_vfp_restore: #endif @ Reset Hyp-role @@ -483,7 +481,7 @@ switch_to_guest_vfp: push {r3-r7} @ NEON/VFP used. Turn on VFP access. - set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) @ Switch VFP/NEON hardware state to the guest's add r7, r0, #VCPU_VFP_HOST diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 35e4a3a0c476..48efe2ee452c 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -591,8 +591,13 @@ ARM_BE8(rev r6, r6 ) .endm /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return - * (hardware reset value is 0). Keep previous value in r2. */ -.macro set_hcptr operation, mask + * (hardware reset value is 0). Keep previous value in r2. + * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if + * VFP wasn't already enabled (always executed on vmtrap). + * If a label is specified with vmexit, it is branched to if VFP wasn't + * enabled. + */ +.macro set_hcptr operation, mask, label = none mrc p15, 4, r2, c1, c1, 2 ldr r3, =\mask .if \operation == vmentry @@ -601,6 +606,17 @@ ARM_BE8(rev r6, r6 ) bic r3, r2, r3 @ Don't trap defined coproc-accesses .endif mcr p15, 4, r3, c1, c1, 2 + .if \operation != vmentry + .if \operation == vmexit + tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + beq 1f + .endif + isb + .if \label != none + b \label + .endif +1: + .endif .endm /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return -- cgit v1.2.3 From 3f3587c4ff8c828aac436237aeca8694a26defd3 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 10 Jun 2015 15:19:24 +0100 Subject: ARM: kvm: psci: fix handling of unimplemented functions commit e2d997366dc5b6c9d14035867f73957f93e7578c upstream. According to the PSCI specification and the SMC/HVC calling convention, PSCI function_ids that are not implemented must return NOT_SUPPORTED as return value. Current KVM implementation takes an unhandled PSCI function_id as an error and injects an undefined instruction into the guest if PSCI implementation is called with a function_id that is not handled by the resident PSCI version (ie it is not implemented), which is not the behaviour expected by a guest when calling a PSCI function_id that is not implemented. This patch fixes this issue by returning NOT_SUPPORTED whenever the kvm PSCI call is executed for a function_id that is not implemented by the PSCI kvm layer. Cc: Christoffer Dall Acked-by: Sudeep Holla Signed-off-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/psci.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 02fa8eff6ae1..531e922486b2 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -230,10 +230,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; - case PSCI_0_2_FN_MIGRATE: - case PSCI_0_2_FN64_MIGRATE: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_MIGRATE_INFO_TYPE: /* * Trusted OS is MP hence does not require migration @@ -242,10 +238,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) */ val = PSCI_0_2_TOS_MP; break; - case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: - case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -271,7 +263,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; default: - return -EINVAL; + val = PSCI_RET_NOT_SUPPORTED; + break; } *vcpu_reg(vcpu, 0) = val; @@ -291,12 +284,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; - case KVM_PSCI_FN_CPU_SUSPEND: - case KVM_PSCI_FN_MIGRATE: + default: val = PSCI_RET_NOT_SUPPORTED; break; - default: - return -EINVAL; } *vcpu_reg(vcpu, 0) = val; -- cgit v1.2.3 From 3544f27efa25890516158c8e13d21e878969d125 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jan 2015 13:58:57 +0300 Subject: ARM: tegra20: Store CPU "resettable" status in IRAM commit 4d48edb3c3e1234d6b3fcdfb9ac24d7c6de449cb upstream. Commit 7232398abc6a ("ARM: tegra: Convert PMC to a driver") changed tegra_resume() location storing from late to early and, as a result, broke suspend on Tegra20. PMC scratch register 41 is used by tegra LP1 resume code for retrieving stored physical memory address of common resume function and in the same time used by tegra20_cpu_shutdown() (shared by Tegra20 cpuidle driver and platform SMP code), which is storing CPU1 "resettable" status. It implies strict order of scratch register usage, otherwise resume function address is lost on Tegra20 after disabling non-boot CPU's on suspend. Fix it by storing "resettable" status in IRAM instead of PMC scratch register. Signed-off-by: Dmitry Osipenko Fixes: 7232398abc6a (ARM: tegra: Convert PMC to a driver) Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tegra/cpuidle-tegra20.c | 5 ++--- arch/arm/mach-tegra/reset-handler.S | 10 +++++++--- arch/arm/mach-tegra/reset.h | 4 ++++ arch/arm/mach-tegra/sleep-tegra20.S | 37 ++++++++++++++++++++--------------- arch/arm/mach-tegra/sleep.h | 4 ++++ 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 88de2dce2e87..7469347b1749 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -34,6 +34,7 @@ #include "iomap.h" #include "irq.h" #include "pm.h" +#include "reset.h" #include "sleep.h" #ifdef CONFIG_PM_SLEEP @@ -70,15 +71,13 @@ static struct cpuidle_driver tegra_idle_driver = { #ifdef CONFIG_PM_SLEEP #ifdef CONFIG_SMP -static void __iomem *pmc = IO_ADDRESS(TEGRA_PMC_BASE); - static int tegra20_reset_sleeping_cpu_1(void) { int ret = 0; tegra_pen_lock(); - if (readl(pmc + PMC_SCRATCH41) == CPU_RESETTABLE) + if (readb(tegra20_cpu1_resettable_status) == CPU_RESETTABLE) tegra20_cpu_shutdown(1); else ret = -EINVAL; diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 71be4af5e975..e3070fdab80b 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -169,10 +169,10 @@ after_errata: cmp r6, #TEGRA20 bne 1f /* If not CPU0, don't let CPU0 reset CPU1 now that CPU1 is coming up. */ - mov32 r5, TEGRA_PMC_BASE - mov r0, #0 + mov32 r5, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + mov r0, #CPU_NOT_RESETTABLE cmp r10, #0 - strne r0, [r5, #PMC_SCRATCH41] + strneb r0, [r5, #__tegra20_cpu1_resettable_status_offset] 1: #endif @@ -281,6 +281,10 @@ __tegra_cpu_reset_handler_data: .rept TEGRA_RESET_DATA_SIZE .long 0 .endr + .globl __tegra20_cpu1_resettable_status_offset + .equ __tegra20_cpu1_resettable_status_offset, \ + . - __tegra_cpu_reset_handler_start + .byte 0 .align L1_CACHE_SHIFT ENTRY(__tegra_cpu_reset_handler_end) diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h index 76a93434c6ee..29c3dec0126a 100644 --- a/arch/arm/mach-tegra/reset.h +++ b/arch/arm/mach-tegra/reset.h @@ -35,6 +35,7 @@ extern unsigned long __tegra_cpu_reset_handler_data[TEGRA_RESET_DATA_SIZE]; void __tegra_cpu_reset_handler_start(void); void __tegra_cpu_reset_handler(void); +void __tegra20_cpu1_resettable_status_offset(void); void __tegra_cpu_reset_handler_end(void); void tegra_secondary_startup(void); @@ -47,6 +48,9 @@ void tegra_secondary_startup(void); (IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \ ((u32)&__tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_LP2] - \ (u32)__tegra_cpu_reset_handler_start))) +#define tegra20_cpu1_resettable_status \ + (IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \ + (u32)__tegra20_cpu1_resettable_status_offset)) #endif #define tegra_cpu_reset_handler_offset \ diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S index be4bc5f853f5..e6b684e14322 100644 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ b/arch/arm/mach-tegra/sleep-tegra20.S @@ -97,9 +97,10 @@ ENDPROC(tegra20_hotplug_shutdown) ENTRY(tegra20_cpu_shutdown) cmp r0, #0 reteq lr @ must not be called for CPU 0 - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_RESETTABLE - str r12, [r1] + strb r12, [r1, r2] cpu_to_halt_reg r1, r0 ldr r3, =TEGRA_FLOW_CTRL_VIRT @@ -182,38 +183,41 @@ ENDPROC(tegra_pen_unlock) /* * tegra20_cpu_clear_resettable(void) * - * Called to clear the "resettable soon" flag in PMC_SCRATCH41 when + * Called to clear the "resettable soon" flag in IRAM variable when * it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_clear_resettable) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_NOT_RESETTABLE - str r12, [r1] + strb r12, [r1, r2] ret lr ENDPROC(tegra20_cpu_clear_resettable) /* * tegra20_cpu_set_resettable_soon(void) * - * Called to set the "resettable soon" flag in PMC_SCRATCH41 when + * Called to set the "resettable soon" flag in IRAM variable when * it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_set_resettable_soon) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_RESETTABLE_SOON - str r12, [r1] + strb r12, [r1, r2] ret lr ENDPROC(tegra20_cpu_set_resettable_soon) /* * tegra20_cpu_is_resettable_soon(void) * - * Returns true if the "resettable soon" flag in PMC_SCRATCH41 has been + * Returns true if the "resettable soon" flag in IRAM variable has been * set because it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_is_resettable_soon) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 - ldr r12, [r1] + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset + ldrb r12, [r1, r2] cmp r12, #CPU_RESETTABLE_SOON moveq r0, #1 movne r0, #0 @@ -256,9 +260,10 @@ ENTRY(tegra20_sleep_cpu_secondary_finish) mov r0, #TEGRA_FLUSH_CACHE_LOUIS bl tegra_disable_clean_inv_dcache - mov32 r0, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT + ldr r4, =__tegra20_cpu1_resettable_status_offset mov r3, #CPU_RESETTABLE - str r3, [r0] + strb r3, [r0, r4] bl tegra_cpu_do_idle @@ -274,10 +279,10 @@ ENTRY(tegra20_sleep_cpu_secondary_finish) bl tegra_pen_lock - mov32 r3, TEGRA_PMC_VIRT - add r0, r3, #PMC_SCRATCH41 + mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT + ldr r4, =__tegra20_cpu1_resettable_status_offset mov r3, #CPU_NOT_RESETTABLE - str r3, [r0] + strb r3, [r0, r4] bl tegra_pen_unlock diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h index 92d46ec1361a..0d59360d891d 100644 --- a/arch/arm/mach-tegra/sleep.h +++ b/arch/arm/mach-tegra/sleep.h @@ -18,6 +18,7 @@ #define __MACH_TEGRA_SLEEP_H #include "iomap.h" +#include "irammap.h" #define TEGRA_ARM_PERIF_VIRT (TEGRA_ARM_PERIF_BASE - IO_CPU_PHYS \ + IO_CPU_VIRT) @@ -29,6 +30,9 @@ + IO_APB_VIRT) #define TEGRA_PMC_VIRT (TEGRA_PMC_BASE - IO_APB_PHYS + IO_APB_VIRT) +#define TEGRA_IRAM_RESET_BASE_VIRT (IO_IRAM_VIRT + \ + TEGRA_IRAM_RESET_HANDLER_OFFSET) + /* PMC_SCRATCH37-39 and 41 are used for tegra_pen_lock and idle */ #define PMC_SCRATCH37 0x130 #define PMC_SCRATCH38 0x134 -- cgit v1.2.3 From 301773b62355e8a6028a8023be2d04c7317915d9 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 16 Jun 2015 14:12:57 +0200 Subject: ARM: mvebu: fix suspend to RAM on big-endian configurations commit 2f5bc307be2480ba89e4c5d118f406f04a4a7299 upstream. The current Armada XP suspend to RAM implementation, as added in commit 27432825ae19f ("ARM: mvebu: Armada XP GP specific suspend/resume code") does not handle big-endian configurations properly: the small bit of assembly code putting the DRAM in self-refresh and toggling the GPIOs to turn off power forgets to convert the values to little-endian. This commit fixes that by making sure the two values we will write to the DRAM controller register and GPIO register are already in little-endian before entering the critical assembly code. Signed-off-by: Thomas Petazzoni Fixes: 27432825ae19f ("ARM: mvebu: Armada XP GP specific suspend/resume code") Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mvebu/pm-board.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c index 6dfd4ab97b2a..301ab38d38ba 100644 --- a/arch/arm/mach-mvebu/pm-board.c +++ b/arch/arm/mach-mvebu/pm-board.c @@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd) for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) ackcmd |= BIT(pic_raw_gpios[i]); + srcmd = cpu_to_le32(srcmd); + ackcmd = cpu_to_le32(ackcmd); + /* * Wait a while, the PIC needs quite a bit of time between the * two GPIO commands. -- cgit v1.2.3 From b9d118e11a1b94c155a1cc985f1a2d3a9e03da2f Mon Sep 17 00:00:00 2001 From: preeti Date: Wed, 24 Jun 2015 01:48:01 -0500 Subject: tick/idle/powerpc: Do not register idle states with CPUIDLE_FLAG_TIMER_STOP set in periodic mode commit cc5a2f7b8f39e7db559778f7913a2410257b3e50 upstream. On some archs, the local clockevent device stops in deep cpuidle states. The broadcast framework is used to wakeup cpus in these idle states, in which either an external clockevent device is used to send wakeup ipis or the hrtimer broadcast framework kicks in in the absence of such a device. One cpu is nominated as the broadcast cpu and this cpu sends wakeup ipis to sleeping cpus at the appropriate time. This is the implementation in the oneshot mode of broadcast. In periodic mode of broadcast however, the presence of such cpuidle states results in the cpuidle driver calling tick_broadcast_enable() which shuts down the local clockevent devices of all the cpus and appoints the tick broadcast device as the clockevent device for each of them. This works on those archs where the tick broadcast device is a real clockevent device. But on archs which depend on the hrtimer mode of broadcast, the tick broadcast device hapens to be a pseudo device. The consequence is that the local clockevent devices of all cpus are shutdown and the kernel hangs at boot time in periodic mode. Let us thus not register the cpuidle states which have CPUIDLE_FLAG_TIMER_STOP flag set, on archs which depend on the hrtimer mode of broadcast in periodic mode. This patch takes care of doing this on powerpc. The cpus would not have entered into such deep cpuidle states in periodic mode on powerpc anyway. So there is no loss here. Signed-off-by: Preeti U Murthy Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle-powernv.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 59372077ec7c..3442764a5293 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -60,6 +60,8 @@ static int nap_loop(struct cpuidle_device *dev, return index; } +/* Register for fastsleep only in oneshot mode of broadcast */ +#ifdef CONFIG_TICK_ONESHOT static int fastsleep_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -83,7 +85,7 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; } - +#endif /* * States for dedicated partition case. */ @@ -209,7 +211,14 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; powernv_states[nr_idle_states].enter = &nap_loop; - } else if (flags[i] & OPAL_PM_SLEEP_ENABLED || + } + + /* + * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come + * within this config dependency check. + */ +#ifdef CONFIG_TICK_ONESHOT + if (flags[i] & OPAL_PM_SLEEP_ENABLED || flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ strcpy(powernv_states[nr_idle_states].name, "FastSleep"); @@ -218,7 +227,7 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].target_residency = 300000; powernv_states[nr_idle_states].enter = &fastsleep_loop; } - +#endif powernv_states[nr_idle_states].exit_latency = ((unsigned int)latency_ns[i]) / 1000; -- cgit v1.2.3 From f6707abd213039644950edb760d5c46503f065fa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:10:24 +1000 Subject: powerpc/perf: Fix book3s kernel to userspace backtraces commit 72e349f1124a114435e599479c9b8d14bfd1ebcd upstream. When we take a PMU exception or a software event we call perf_read_regs(). This overloads regs->result with a boolean that describes if we should use the sampled instruction address register (SIAR) or the regs. If the exception is in kernel, we start with the kernel regs and backtrace through the kernel stack. At this point we switch to the userspace regs and backtrace the user stack with perf_callchain_user(). Unfortunately these regs have not got the perf_read_regs() treatment, so regs->result could be anything. If it is non zero, perf_instruction_pointer() decides to use the SIAR, and we get issues like this: 0.11% qemu-system-ppc [kernel.kallsyms] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--52.35%-- 0 | | | |--46.39%-- __hrtimer_start_range_ns | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | | | | |--67.08%-- _raw_spin_lock_irqsave <--- hi mum | | | | | | | --100.00%-- 0x7e714 | | | 0x7e714 Notice the bogus _raw_spin_irqsave when we transition from kernel (system_call) to userspace (0x7e714). We inserted what was in the SIAR. Add a check in regs_use_siar() to check that the regs in question are from a PMU exception. With this fix the backtrace makes sense: 0.47% qemu-system-ppc [kernel.vmlinux] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--53.83%-- 0 | | | |--44.73%-- hrtimer_try_to_cancel | | kvmppc_start_thread | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | __ioctl | | 0x7e714 | | 0x7e714 Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 12b638425bb9..d90893b76e7c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { } static bool regs_use_siar(struct pt_regs *regs) { - return !!regs->result; + /* + * When we take a performance monitor exception the regs are setup + * using perf_read_regs() which overloads some fields, in particular + * regs->result to tell us whether to use SIAR. + * + * However if the regs are from another exception, eg. a syscall, then + * they have not been setup using perf_read_regs() and so regs->result + * is something random. + */ + return ((TRAP(regs) == 0xf00) && regs->result); } /* -- cgit v1.2.3 From 9dc6d43528e147f9d243df8da6ccf54c7ee244de Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 9 Jun 2015 17:31:38 -0500 Subject: x86/PCI: Use host bridge _CRS info on systems with >32 bit addressing commit 3d9fecf6bfb8b12bc2f9a4c7109895a2a2bb9436 upstream. We enable _CRS on all systems from 2008 and later. On older systems, we ignore _CRS and assume the whole physical address space (excluding RAM and other devices) is available for PCI devices, but on systems that support physical address spaces larger than 4GB, it's doubtful that the area above 4GB is really available for PCI. After d56dbf5bab8c ("PCI: Allocate 64-bit BARs above 4G when possible"), we try to use that space above 4GB *first*, so we're more likely to put a device there. On Juan's Toshiba Satellite Pro U200, BIOS left the graphics, sound, 1394, and card reader devices unassigned (but only after Windows had been booted). Only the sound device had a 64-bit BAR, so it was the only device placed above 4GB, and hence the only device that didn't work. Keep _CRS enabled even on pre-2008 systems if they support physical address space larger than 4GB. Fixes: d56dbf5bab8c ("PCI: Allocate 64-bit BARs above 4G when possible") Reported-and-tested-by: Juan Dayer Reported-and-tested-by: Alan Horsfield Link: https://bugzilla.kernel.org/show_bug.cgi?id=99221 Link: https://bugzilla.opensuse.org/show_bug.cgi?id=907092 Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/acpi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 14a63ed6fe09..546f18ad09f0 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -121,8 +121,10 @@ void __init pci_acpi_crs_quirks(void) { int year; - if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) - pci_use_crs = false; + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) { + if (iomem_resource.end <= 0xffffffff) + pci_use_crs = false; + } dmi_check_system(pci_crs_quirks); -- cgit v1.2.3 From b6f2faffa0a189c28a2f2242c0b23fb031ba3075 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 9 Jun 2015 18:54:07 -0500 Subject: x86/PCI: Use host bridge _CRS info on Foxconn K8M890-8237A commit 1dace0116d0b05c967d94644fc4dfe96be2ecd3d upstream. The Foxconn K8M890-8237A has two PCI host bridges, and we can't assign resources correctly without the information from _CRS that tells us which address ranges are claimed by which bridge. In the bugs mentioned below, we incorrectly assign a sound card address (this example is from 1033299): bus: 00 index 2 [mem 0x80000000-0xfcffffffff] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-7f]) pci_root PNP0A08:00: host bridge window [mem 0x80000000-0xbfefffff] (ignored) pci_root PNP0A08:00: host bridge window [mem 0xc0000000-0xdfffffff] (ignored) pci_root PNP0A08:00: host bridge window [mem 0xf0000000-0xfebfffff] (ignored) ACPI: PCI Root Bridge [PCI1] (domain 0000 [bus 80-ff]) pci_root PNP0A08:01: host bridge window [mem 0xbff00000-0xbfffffff] (ignored) pci 0000:80:01.0: [1106:3288] type 0 class 0x000403 pci 0000:80:01.0: reg 10: [mem 0xbfffc000-0xbfffffff 64bit] pci 0000:80:01.0: address space collision: [mem 0xbfffc000-0xbfffffff 64bit] conflicts with PCI Bus #00 [mem 0x80000000-0xfcffffffff] pci 0000:80:01.0: BAR 0: assigned [mem 0xfd00000000-0xfd00003fff 64bit] BUG: unable to handle kernel paging request at ffffc90000378000 IP: [] azx_create+0x37c/0x822 [snd_hda_intel] We assigned 0xfd_0000_0000, but that is not in any of the host bridge windows, and the sound card doesn't work. Turn on pci=use_crs automatically for this system. Link: https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 Link: https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/acpi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 546f18ad09f0..ff9911707160 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -81,6 +81,17 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), }, }, + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */ + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */ + { + .callback = set_use_crs, + .ident = "Foxconn K8M890-8237A", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"), + DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, /* Now for the blacklist.. */ -- cgit v1.2.3 From dff1316f4f15d9c3ff3ff9c11f2c92e0a2040a49 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 May 2015 08:35:43 +0200 Subject: KVM: mips: use id_to_memslot correctly commit 69a1220060c1523fd0515216eaa29e22f133b894 upstream. The argument to KVM_GET_DIRTY_LOG is a memslot id; it may not match the position in the memslots array, which is sorted by gfn. Reviewed-by: James Hogan Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bb68e8d520e8..52f205ae1281 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -982,7 +982,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = &kvm->memslots->memslots[log->slot]; + memslot = id_to_memslot(kvm->memslots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); -- cgit v1.2.3 From 4f3d3bc20372afb29708307608946e2bb56a5a64 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 27 Apr 2015 15:07:16 +0100 Subject: MIPS: Fix KVM guest fixmap address commit 8e748c8d09a9314eedb5c6367d9acfaacddcdc88 upstream. KVM guest kernels for trap & emulate run in user mode, with a modified set of kernel memory segments. However the fixmap address is still in the normal KSeg3 region at 0xfffe0000 regardless, causing problems when cache alias handling makes use of them when handling copy on write. Therefore define FIXADDR_TOP as 0x7ffe0000 in the guest kernel mapped region when CONFIG_KVM_GUEST is defined. Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9887/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mach-generic/spaces.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h index 9488fa5f8866..afc96ecb9004 100644 --- a/arch/mips/include/asm/mach-generic/spaces.h +++ b/arch/mips/include/asm/mach-generic/spaces.h @@ -94,7 +94,11 @@ #endif #ifndef FIXADDR_TOP +#ifdef CONFIG_KVM_GUEST +#define FIXADDR_TOP ((unsigned long)(long)(int)0x7ffe0000) +#else #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif +#endif #endif /* __ASM_MACH_GENERIC_SPACES_H */ -- cgit v1.2.3 From 14fe2f14d73609cfdb19b71467d085e5347f7353 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 30 Apr 2015 13:33:59 +0200 Subject: KVM: s390: fix external call injection without sigp interpretation commit b938eacea0b6881f2116a061e6da3ec840e75137 upstream. Commit ea5f49692575 ("KVM: s390: only one external call may be pending at a time") introduced a bug on machines that don't have SIGP interpretation facility installed. The injection of an external call will now always fail with -EBUSY (if none is already pending). This leads to the following symptoms: - An external call will be injected but with the wrong "src cpu id", as this id will not be remembered. - The target vcpu will not be woken up, therefore the guest will hang if it cannot deal with unexpected failures of the SIGP EXTERNAL CALL instruction. - If an external call is already pending, -EBUSY will not be reported. Reviewed-by: Christian Borntraeger Reviewed-by: Jens Freimann Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9de47265ef73..2836b25f63d3 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1061,7 +1061,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) if (sclp_has_sigpif()) return __inject_extcall_sigpif(vcpu, src_id); - if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) + if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; *extcall = irq->u.extcall; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); -- cgit v1.2.3 From 534c9f9886c25221d4f4dd396cd78b67214c1796 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 22 Jun 2015 13:20:12 +0200 Subject: KVM: s390: clear floating interrupt bitmap and parameters commit f2ae45edbca7ba5324eef01719ede0151dc5cead upstream. commit 6d3da24141 ("KVM: s390: deliver floating interrupts in order of priority") introduced a regression for the reset handling. We don't clear the bitmap of pending floating interrupts and interrupt parameters. This could result in stale interrupts even after a reset. Let's fix this by clearing the pending bitmap and the parameters for service and machine check interrupts. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2836b25f63d3..b745a109bfc1 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1606,6 +1606,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) int i; spin_lock(&fi->lock); + fi->pending_irqs = 0; + memset(&fi->srv_signal, 0, sizeof(fi->srv_signal)); + memset(&fi->mchk, 0, sizeof(fi->mchk)); for (i = 0; i < FIRQ_LIST_COUNT; i++) clear_irq_list(&fi->lists[i]); for (i = 0; i < FIRQ_MAX_COUNT; i++) -- cgit v1.2.3 From 17de07163877d0f1fdd9185130ac4111f5f340f7 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 21 May 2015 15:39:31 +0200 Subject: s390/bpf: Fix backward jumps commit b035b60ded132592055c0f9bd1cc280259c7de4b upstream. Currently all backward jumps crash for JITed s390x eBPF programs with an illegal instruction program check and kernel panic. Because for negative values the opcode of the jump instruction is overriden by the negative branch offset an illegal instruction is generated by the JIT: 000003ff802da378: c01100000002 lgfi %r1,2 000003ff802da37e: fffffff52065 unknown <-- illegal instruction 000003ff802da384: b904002e lgr %r2,%r14 So fix this and mask the offset in order not to damage the opcode. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 55423d8be580..9afb9d602f84 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -227,7 +227,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) ({ \ /* Branch instruction needs 6 bytes */ \ int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ - _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \ + _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) -- cgit v1.2.3 From 2c7a81c60df147a14735ca3c0f039cf4507b65fc Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 11 Jun 2015 19:59:04 +0200 Subject: s390/kdump: fix REGSET_VX_LOW vector register ELF notes commit 3c8e5105e759e7b2d88ea8a85b1285e535bc7500 upstream. The REGSET_VX_LOW ELF notes should contain the lower 64 bit halfes of the first sixteen 128 bit vector registers. Unfortunately currently we copy the upper halfes. Fix this and correctly copy the lower halfes. Fixes: a62bc0739253 ("s390/kdump: add support for vector extension") Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/crash_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f73c8059022..49b74454d7ee 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -415,7 +415,7 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) ptr += len; /* Copy lower halves of SIMD registers 0-15 */ for (i = 0; i < 16; i++) { - memcpy(ptr, &vx_regs[i], 8); + memcpy(ptr, &vx_regs[i].u[2], 8); ptr += 8; } return ptr; -- cgit v1.2.3 From aae3d9fecc22c1c7565c94f0818704a1908543ce Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 29 Jun 2015 16:44:01 +0200 Subject: KVM: s390: virtio-ccw: don't overwrite config space values commit 431dae778aea4eed31bd12e5ee82edc571cd4d70 upstream. Eric noticed problems with vhost-scsi and virtio-ccw: vhost-scsi complained about overwriting values in the config space, which was triggered by a broken implementation of virtio-ccw's config get/set routines. It was probably sheer luck that we did not hit this before. When writing a value to the config space, the WRITE_CONF ccw will always write from the beginning of the config space up to and including the value to be set. If the config space up to the value has not yet been retrieved from the device, however, we'll end up overwriting values. Keep track of the known config space and update if needed to avoid this. Moreover, READ_CONF will only read the number of bytes it has been instructed to retrieve, so we must not copy more than that to the buffer, or we might overwrite trailing values. Reported-by: Eric Farman Signed-off-by: Cornelia Huck Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- drivers/s390/kvm/virtio_ccw.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 6f1fa1773e76..f8d8fdb26b72 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -65,6 +65,7 @@ struct virtio_ccw_device { bool is_thinint; bool going_away; bool device_lost; + unsigned int config_ready; void *airq_info; }; @@ -833,8 +834,11 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; - memcpy(vcdev->config, config_area, sizeof(vcdev->config)); - memcpy(buf, &vcdev->config[offset], len); + memcpy(vcdev->config, config_area, offset + len); + if (buf) + memcpy(buf, &vcdev->config[offset], len); + if (vcdev->config_ready < offset + len) + vcdev->config_ready = offset + len; out_free: kfree(config_area); @@ -857,6 +861,9 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, if (!config_area) goto out_free; + /* Make sure we don't overwrite fields. */ + if (vcdev->config_ready < offset) + virtio_ccw_get_config(vdev, 0, NULL, offset); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); -- cgit v1.2.3 From da75b45af7778af80c7c1d9853868e9bb4b75065 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Jun 2015 14:43:35 +0100 Subject: KVM: arm/arm64: vgic: Avoid injecting reserved IRQ numbers commit 4839ddc27b7212ec58874f62c97da7400c8523be upstream. Commit fd1d0ddf2ae9 (KVM: arm/arm64: check IRQ number on userland injection) rightly limited the range of interrupts userspace can inject in a guest, but failed to consider the (unlikely) case where a guest is configured with 1024 interrupts. In this case, interrupts ranging from 1020 to 1023 are unuseable, as they have a special meaning for the GIC CPU interface. Make sure that these number cannot be used as an IRQ. Also delete a redundant (and similarily buggy) check in kvm_set_irq. Reported-by: Peter Maydell Cc: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 78fb8201014f..950064a0942d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1561,7 +1561,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, goto out; } - if (irq_num >= kvm->arch.vgic.nr_irqs) + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) return -EINVAL; vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); @@ -2161,10 +2161,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, BUG_ON(!vgic_initialized(kvm)); - if (spi > kvm->arch.vgic.nr_irqs) - return -EINVAL; return kvm_vgic_inject_irq(kvm, 0, spi, level); - } /* MSI not implemented yet */ -- cgit v1.2.3 From 58382447b9a9989da551a7b17e72756f6e238bb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 30 Jun 2015 22:19:17 +0200 Subject: KVM: x86: properly restore LVT0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit db1385624c686fe99fe2d1b61a36e1537b915d08 upstream. Legacy NMI watchdog didn't work after migration/resume, because vapics_in_nmi_mode was left at 0. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4c7deb4f78a1..556397ee4809 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1808,6 +1808,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); apic_update_lvtt(apic); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; -- cgit v1.2.3 From 8ed8b759437fadfd18202be9bb65a7f80c3c6d63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 1 Jul 2015 15:31:49 +0200 Subject: KVM: x86: make vapics_in_nmi_mode atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 42720138b06301cc8a7ee8a495a6d021c4b6a9bc upstream. Writes were a bit racy, but hard to turn into a bug at the same time. (Particularly because modern Linux doesn't use this feature anymore.) Signed-off-by: Radim Krčmář [Actually the next patch makes it much, much easier to trigger the race so I'm including this one for stable@ as well. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/i8254.c | 2 +- arch/x86/kvm/lapic.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4a555beef19..41b06fca39f7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -591,7 +591,7 @@ struct kvm_arch { struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - int vapics_in_nmi_mode; + atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4dce6f8b6129..f90952f64e79 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -305,7 +305,7 @@ static void pit_do_work(struct kthread_work *work) * LVT0 to NMI delivery. Other PIC interrupts are just sent to * VCPU0, and only if its LVT0 is in EXTINT mode. */ - if (kvm->arch.vapics_in_nmi_mode > 0) + if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 556397ee4809..67d07e051436 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1250,10 +1250,10 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) if (!nmi_wd_enabled) { apic_debug("Receive NMI setting on APIC_LVT0 " "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; + atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) -- cgit v1.2.3 From 4c1215740834bef3dec9e1ef9c4f5bbe86f1f6cb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:52 +0200 Subject: fs: Fix S_NOSEC handling commit 2426f3910069ed47c0cc58559a6d088af7920201 upstream. file_remove_suid() could mistakenly set S_NOSEC inode bit when root was modifying the file. As a result following writes to the file by ordinary user would avoid clearing suid or sgid bits. Fix the bug by checking actual mode bits before setting S_NOSEC. Signed-off-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index ea37cd17b53f..6e342cadef81 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1693,8 +1693,8 @@ int file_remove_suid(struct file *file) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; + if (!error) + inode_has_no_xattr(inode); return error; } -- cgit v1.2.3 From 60c92e3205a6eb78c7970bf370cc9c550bd0c132 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Jun 2015 10:09:32 +1000 Subject: fs/ufs: revert "ufs: fix deadlocks introduced by sb mutex merge" commit 13b987ea275840d74d9df9a44326632fab1894da upstream. This reverts commit 9ef7db7f38d0 ("ufs: fix deadlocks introduced by sb mutex merge") That patch tried to solve commit 0244756edc4b98c ("ufs: sb mutex merge + mutex_destroy") which is itself partially reverted due to multiple deadlocks. Signed-off-by: Fabian Frederick Suggested-by: Jan Kara Cc: Ian Campbell Cc: Evgeniy Dushistov Cc: Alexey Khoroshilov Cc: Roger Pau Monne Cc: Ian Jackson Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ufs/inode.c | 5 ++++- fs/ufs/namei.c | 14 ++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index be7d42c7d938..2d93ab07da8a 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -902,6 +902,9 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) + if (want_delete) { + lock_ufs(inode->i_sb); ufs_free_inode(inode); + unlock_ufs(inode->i_sb); + } } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index e491a93a7e9a..1f5223c9e1e2 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -128,12 +128,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; + lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_notlocked; + goto out; - lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -184,9 +184,13 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; + lock_ufs(dir->i_sb); + inode_inc_link_count(dir); + inode = ufs_new_inode(dir, S_IFDIR|mode); + err = PTR_ERR(inode); if (IS_ERR(inode)) - return PTR_ERR(inode); + goto out_dir; inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -194,9 +198,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -215,6 +216,7 @@ out_fail: inode_dec_link_count(inode); unlock_new_inode(inode); iput (inode); +out_dir: inode_dec_link_count(dir); unlock_ufs(dir->i_sb); goto out; -- cgit v1.2.3 From 0da5a72210cbe177ab1043be058403ca49eb7a2b Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Jun 2015 10:09:32 +1000 Subject: fs/ufs: restore s_lock mutex commit cdd9eefdf905e92e7fc6cc393314efe68dc6ff66 upstream. Commit 0244756edc4b98c ("ufs: sb mutex merge + mutex_destroy") generated deadlocks in read/write mode on mkdir. This patch partially reverts it keeping fixes by Andrew Morton and mutex_destroy() [AV: fixed a missing bit in ufs_remount()] Signed-off-by: Fabian Frederick Reported-by: Ian Campbell Suggested-by: Jan Kara Cc: Ian Campbell Cc: Evgeniy Dushistov Cc: Alexey Khoroshilov Cc: Roger Pau Monne Cc: Ian Jackson Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/balloc.c | 34 +++++++++++++++++----------------- fs/ufs/ialloc.c | 16 ++++++++-------- fs/ufs/super.c | 10 ++++++++++ fs/ufs/ufs.h | 1 + 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2c1036080d52..a7106eda5024 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -51,8 +51,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); - - lock_ufs(sb); + + mutex_lock(&UFS_SB(sb)->s_lock); cgno = ufs_dtog(uspi, fragment); bit = ufs_dtogd(uspi, fragment); @@ -115,13 +115,13 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - - unlock_ufs(sb); + + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return; } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) goto failed; } - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); do_more: overflow = 0; @@ -211,12 +211,12 @@ do_more: } ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed_unlock: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); failed: UFSD("EXIT (FAILED)\n"); return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, usb1 = ubh_get_usb_first(uspi); *err = -ENOSPC; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); tmp = ufs_data_ptr_to_cpu(sb, p); if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, "fragment %llu, tmp %llu\n", (unsigned long long)fragment, (unsigned long long)tmp); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return INVBLOCK; } if (fragment < UFS_I(inode)->i_lastfrag) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } else { if (tmp) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, * There is not enough space for user on the device */ if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, fragment + count); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return result; } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7caa01652888..fd0203ce1f7f 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode) ino = inode->i_ino; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode) bit = ufs_inotocgoff (ino); ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sbi = UFS_SB(sb); uspi = sbi->s_uspi; - lock_ufs(sb); + mutex_lock(&sbi->s_lock); /* * Try to place the inode in its parent directory @@ -331,21 +331,21 @@ cg_found: sync_dirty_buffer(bh); brelse(bh); } - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); clear_nlink(inode); unlock_new_inode(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); make_bad_inode(inode); iput (inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b3bc3e7ae79d..afe9955654c8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -694,6 +694,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) unsigned flags; lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); UFSD("ENTER\n"); @@ -711,6 +712,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_put_cstotal(sb); UFSD("EXIT\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; @@ -1277,6 +1279,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) sync_filesystem(sb); lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1290,6 +1293,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } @@ -1297,12 +1301,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { pr_err("ufstype can't be changed during remount\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } @@ -1326,6 +1332,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ #ifndef CONFIG_UFS_FS_WRITE pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; #else @@ -1335,11 +1342,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { pr_err("this ufstype is read-only supported\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { pr_err("failed during remounting\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EPERM; } @@ -1347,6 +1356,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 2a07396d5f9e..cf6368d42d4a 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -30,6 +30,7 @@ struct ufs_sb_info { int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct mutex s_lock; }; struct ufs_inode_info { -- cgit v1.2.3 From 1eda16d166170124b56e4090075ce6997c3d43af Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 24 May 2015 09:25:00 -0500 Subject: vfs: Remove incorrect debugging WARN in prepend_path commit 93e3bce6287e1fb3e60d3324ed08555b5bbafa89 upstream. The warning message in prepend_path is unclear and outdated. It was added as a warning that the mechanism for generating names of pseudo files had been removed from prepend_path and d_dname should be used instead. Unfortunately the warning reads like a general warning, making it unclear what to do with it. Remove the warning. The transition it was added to warn about is long over, and I added code several years ago which in rare cases causes the warning to fire on legitimate code, and the warning is now firing and scaring people for no good reason. Reported-by: Ivan Delalande Reported-by: Omar Sandoval Fixes: f48cfddc6729e ("vfs: In d_path don't call d_dname on a mount point") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 37b5afdaf698..50bb3c207621 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2927,17 +2927,6 @@ restart: vfsmnt = &mnt->mnt; continue; } - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || - dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, - dentry->d_name.name); - } if (!error) error = is_mounted(vfsmnt) ? 1 : 2; break; -- cgit v1.2.3 From c89d4319ae55186496c43b7a6e510aa1d09dd387 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2015 08:10:09 -0600 Subject: vfs: Ignore unlocked mounts in fs_fully_visible commit ceeb0e5d39fcdf4dca2c997bf225c7fc49200b37 upstream. Limit the mounts fs_fully_visible considers to locked mounts. Unlocked can always be unmounted so considering them adds hassle but no security benefit. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1b9e11167bae..1d4a97c573e0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3185,11 +3185,15 @@ bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; - /* This mount is not fully visible if there are any child mounts - * that cover anything except for empty directories. + /* This mount is not fully visible if there are any + * locked child mounts that cover anything except for + * empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; + /* Only worry about locked mounts */ + if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) + continue; if (!S_ISDIR(inode->i_mode)) goto next; if (inode->i_nlink > 2) -- cgit v1.2.3 From 918ef5dc2e9761f4cf34e8059e74cc2f834234ca Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jun 2015 14:52:04 +0200 Subject: ufs: Fix warning from unlock_new_inode() commit 12ecbb4b1d765a5076920999298d9625439dbe58 upstream. Commit e4502c63f56aeca88 (ufs: deal with nfsd/iget races) introduced unlock_new_inode() call into ufs_add_nondir(). However that function gets called also from ufs_link() which hands it already initialized inode and thus unlock_new_inode() complains. The problem is harmless but annoying. Fix the problem by opencoding necessary stuff in ufs_link() Fixes: e4502c63f56aeca887ced37f24e0def1ef11cec8 Signed-off-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 1f5223c9e1e2..2346b83fa12b 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -174,7 +174,12 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); ihold(inode); - error = ufs_add_nondir(dentry, inode); + error = ufs_add_link(dentry, inode); + if (error) { + inode_dec_link_count(inode); + iput(inode); + } else + d_instantiate(dentry, inode); unlock_ufs(dir->i_sb); return error; } -- cgit v1.2.3 From b94332a90b6572713d0a4ca92563e9655a7dd33d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 Jun 2015 11:26:34 +0200 Subject: ufs: Fix possible deadlock when looking up directories commit 514d748f69c97a51a2645eb198ac5c6218f22ff9 upstream. Commit e4502c63f56aeca88 (ufs: deal with nfsd/iget races) made ufs create inodes with I_NEW flag set. However ufs_mkdir() never cleared this flag. Thus if someone ever tried to lookup the directory by inode number, he would deadlock waiting for I_NEW to be cleared. Luckily this mostly happens only if the filesystem is exported over NFS since otherwise we have the inode attached to dentry and don't look it up by inode number. In rare cases dentry can get freed without inode being freed and then we'd hit the deadlock even without NFS export. Fix the problem by clearing I_NEW before instantiating new directory inode. Fixes: e4502c63f56aeca887ced37f24e0def1ef11cec8 Reported-by: Fabian Frederick Signed-off-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 2346b83fa12b..60ee32249b72 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -212,6 +212,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) goto out_fail; unlock_ufs(dir->i_sb); + unlock_new_inode(inode); d_instantiate(dentry, inode); out: return err; -- cgit v1.2.3 From 4e1fc88c6112cb9b691807cc4fe0b6bfa66b1e60 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 17 Jun 2015 18:15:45 +0200 Subject: fs/ufs: restore s_lock mutex_init() commit e4f95517f18271b1da36cfc5d700e46844396d6e upstream. Add last missing line in commit "cdd9eefdf905" ("fs/ufs: restore s_lock mutex") Signed-off-by: Fabian Frederick Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index afe9955654c8..dc33f9416340 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -801,6 +801,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); mutex_init(&sbi->mutex); + mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* -- cgit v1.2.3 From 5cf9896dc5c72a6c68c36140568b755f697f7760 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Jul 2015 09:50:06 -0700 Subject: Linux 4.1.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1caf4ad3eb8a..cef84c061f02 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Series 4800 -- cgit v1.2.3 From c2f633b99857d27333de18f53d123a180672c52b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 15:54:49 -0500 Subject: fs: Add helper functions for permanently empty directories. commit fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d upstream. To ensure it is safe to mount proc and sysfs I need to check if filesystems that are mounted on top of them are mounted on truly empty directories. Given that some directories can gain entries over time, knowing that a directory is empty right now is insufficient. Therefore add supporting infrastructure for permantently empty directories that proc and sysfs can use when they create mount points for filesystems and fs_fully_visible can use to test for permanently empty directories to ensure that nothing will be gained by mounting a fresh copy of proc or sysfs. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 2 files changed, 98 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index cb1fb4b9b637..02813592e121 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, return -EINVAL; } EXPORT_SYMBOL(simple_nosetlease); + + +/* + * Operations for a permanently empty directory. + */ +static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = d_inode(dentry); + generic_fillattr(inode, stat); + return 0; +} + +static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) +{ + return -EPERM; +} + +static int empty_dir_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + return -EOPNOTSUPP; +} + +static int empty_dir_removexattr(struct dentry *dentry, const char *name) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) +{ + return -EOPNOTSUPP; +} + +static const struct inode_operations empty_dir_inode_operations = { + .lookup = empty_dir_lookup, + .permission = generic_permission, + .setattr = empty_dir_setattr, + .getattr = empty_dir_getattr, + .setxattr = empty_dir_setxattr, + .getxattr = empty_dir_getxattr, + .removexattr = empty_dir_removexattr, + .listxattr = empty_dir_listxattr, +}; + +static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) +{ + /* An empty directory has two entries . and .. at offsets 0 and 1 */ + return generic_file_llseek_size(file, offset, whence, 2, 2); +} + +static int empty_dir_readdir(struct file *file, struct dir_context *ctx) +{ + dir_emit_dots(file, ctx); + return 0; +} + +static const struct file_operations empty_dir_operations = { + .llseek = empty_dir_llseek, + .read = generic_read_dir, + .iterate = empty_dir_readdir, + .fsync = noop_fsync, +}; + + +void make_empty_dir_inode(struct inode *inode) +{ + set_nlink(inode, 2); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_rdev = 0; + inode->i_size = 2; + inode->i_blkbits = PAGE_SHIFT; + inode->i_blocks = 0; + + inode->i_op = &empty_dir_inode_operations; + inode->i_fop = &empty_dir_operations; +} + +bool is_empty_dir_inode(struct inode *inode) +{ + return (inode->i_fop == &empty_dir_operations) && + (inode->i_op == &empty_dir_inode_operations); +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..0d5ae7d5dc53 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2780,6 +2780,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); -- cgit v1.2.3 From bdbdf7ee9d561da7b4b840435c963344061953d6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 22:09:14 -0500 Subject: sysctl: Allow creating permanently empty directories that serve as mountpoints. commit f9bd6733d3f11e24f3949becf277507d422ee1eb upstream. Add a magic sysctl table sysctl_mount_point that when used to create a directory forces that directory to be permanently empty. Update the code to use make_empty_dir_inode when accessing permanently empty directories. Update the code to not allow adding to permanently empty directories. Update /proc/sys/fs/binfmt_misc to be a permanently empty directory. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/sysctl.h | 3 +++ kernel/sysctl.c | 8 +------- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fea2561d773b..fdda62e6115e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* Support for permanently empty directories */ + +struct ctl_table sysctl_mount_point[] = { + { } +}; + +static bool is_empty_dir(struct ctl_table_header *head) +{ + return head->ctl_table[0].child == sysctl_mount_point; +} + +static void set_empty_dir(struct ctl_dir *dir) +{ + dir->header.ctl_table[0].child = sysctl_mount_point; +} + +static void clear_empty_dir(struct ctl_dir *dir) + +{ + dir->header.ctl_table[0].child = NULL; +} + void proc_sys_poll_notify(struct ctl_table_poll *poll) { if (!poll) @@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) struct ctl_table *entry; int err; + /* Is this a permanently empty directory? */ + if (is_empty_dir(&dir->header)) + return -EROFS; + + /* Am I creating a permanently empty directory? */ + if (header->ctl_table == sysctl_mount_point) { + if (!RB_EMPTY_ROOT(&dir->root)) + return -EINVAL; + set_empty_dir(dir); + } + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -202,6 +235,8 @@ fail: erase_header(header); put_links(header); fail_links: + if (header->ctl_table == sysctl_mount_point) + clear_empty_dir(dir); header->parent = NULL; drop_sysctl_table(&dir->header); return err; @@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mode |= S_IFDIR; inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; + if (is_empty_dir(head)) + make_empty_dir_inode(inode); } out: return inode; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea5697..fa7bc29925c9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2082b1a88fb9..c3eee4c6d6c1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1531,12 +1531,6 @@ static struct ctl_table vm_table[] = { { } }; -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) -static struct ctl_table binfmt_misc_table[] = { - { } -}; -#endif - static struct ctl_table fs_table[] = { { .procname = "inode-nr", @@ -1690,7 +1684,7 @@ static struct ctl_table fs_table[] = { { .procname = "binfmt_misc", .mode = 0555, - .child = binfmt_misc_table, + .child = sysctl_mount_point, }, #endif { -- cgit v1.2.3 From a2020b02c1ec4fffddb77785773dff533428f814 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 May 2015 16:44:25 -0500 Subject: proc: Allow creating permanently empty directories that serve as mount points commit eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 upstream. Add a new function proc_create_mount_point that when used to creates a directory that can not be added to. Add a new function is_empty_pde to test if a function is a mount point. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Update /proc/openprom and /proc/fs/nfsd to be permanently empty directories. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/proc/generic.c | 23 +++++++++++++++++++++++ fs/proc/inode.c | 4 ++++ fs/proc/internal.h | 6 ++++++ fs/proc/root.c | 4 ++-- 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index df6327a2b865..e5dee5c3188e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, WARN(1, "create '/proc/%s' by hand\n", qstr.name); return NULL; } + if (is_empty_pde(*parent)) { + WARN(1, "attempt to add to permanently empty directory"); + return NULL; + } ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); if (!ent) @@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name, } EXPORT_SYMBOL(proc_mkdir); +struct proc_dir_entry *proc_create_mount_point(const char *name) +{ + umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, name, mode, 2); + if (ent) { + ent->data = NULL; + ent->proc_fops = NULL; + ent->proc_iops = NULL; + if (proc_register(parent, ent) < 0) { + kfree(ent); + parent->nlink--; + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8272aaba1bb0..e3eb5524639f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; + } if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c835b94c0cd3..aa2781095bd1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) } extern void pde_put(struct proc_dir_entry *); +static inline bool is_empty_pde(const struct proc_dir_entry *pde) +{ + return S_ISDIR(pde->mode) && !pde->proc_iops; +} +struct proc_dir_entry *proc_create_mount_point(const char *name); + /* * inode.c */ diff --git a/fs/proc/root.c b/fs/proc/root.c index b7fa4bfe896a..3d987dfdef83 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -182,10 +182,10 @@ void __init proc_root_init(void) #endif proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); - proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ + proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ - proc_mkdir("openprom", NULL); + proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); -- cgit v1.2.3 From 80c298105bf80e33ae494eaeba51f00352b5373c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:09:29 -0500 Subject: kernfs: Add support for always empty directories. commit ea015218f2f7ace2dad9cedd21ed95bdba2886d7 upstream. Add a new function kernfs_create_empty_dir that can be used to create directory that can not be modified. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 38 +++++++++++++++++++++++++++++++++++++- fs/kernfs/inode.c | 2 ++ include/linux/kernfs.h | 3 +++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fffca9517321..2d48d28e1640 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; + if (parent->flags & KERNFS_EMPTY_DIR) + goto out_unlock; + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) goto out_unlock; @@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, return ERR_PTR(rc); } +/** + * kernfs_create_empty_dir - create an always empty directory + * @parent: parent in which to create a new directory + * @name: name of the new directory + * + * Returns the created node on success, ERR_PTR() value on failure. + */ +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name) +{ + struct kernfs_node *kn; + int rc; + + /* allocate */ + kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR); + if (!kn) + return ERR_PTR(-ENOMEM); + + kn->flags |= KERNFS_EMPTY_DIR; + kn->dir.root = parent->dir.root; + kn->ns = NULL; + kn->priv = NULL; + + /* link in */ + rc = kernfs_add_one(kn); + if (!rc) + return kn; + + kernfs_put(kn); + return ERR_PTR(rc); +} + static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&kernfs_mutex); error = -ENOENT; - if (!kernfs_active(kn) || !kernfs_active(new_parent)) + if (!kernfs_active(kn) || !kernfs_active(new_parent) || + (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; error = 0; diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 2da8493a380b..756dd56aaf60 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) case KERNFS_DIR: inode->i_op = &kernfs_dir_iops; inode->i_fop = &kernfs_dir_fops; + if (kn->flags & KERNFS_EMPTY_DIR) + make_empty_dir_inode(inode); break; case KERNFS_FILE: inode->i_size = kn->attr.size; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b..29d1896c3ba5 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, -- cgit v1.2.3 From 9924f6e89823a41bfd272ab759636276b9f9ee9c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:31:40 -0500 Subject: sysfs: Add support for permanently empty directories to serve as mount points. commit 87d2846fcf88113fae2341da1ca9a71f0d916f2c upstream. Add two functions sysfs_create_mount_point and sysfs_remove_mount_point that hang a permanently empty directory off of a kobject or remove a permanently emptpy directory hanging from a kobject. Export these new functions so modular filesystems can use them. Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 15 +++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0b45ff42f374..94374e435025 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); } + +/** + * sysfs_create_mount_point - create an always empty directory + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to add + */ +int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *kn, *parent = parent_kobj->sd; + + kn = kernfs_create_empty_dir(parent, name); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(parent, name); + return PTR_ERR(kn); + } + + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_create_mount_point); + +/** + * sysfs_remove_mount_point - remove an always empty directory. + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to remove + * + */ +void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *parent = parent_kobj->sd; + + kernfs_remove_by_name_ns(parent, name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_mount_point); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17e..9f65758311a4 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) -- cgit v1.2.3 From 28dd1f346b2f0fc2ab8285046ed0bd91e9b808d3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 17:35:41 -0500 Subject: sysfs: Create mountpoints with sysfs_create_mount_point commit f9bb48825a6b5d02f4cabcc78967c75db903dcdc upstream. This allows for better documentation in the code and it allows for a simpler and fully correct version of fs_fully_visible to be written. The mount points converted and their filesystems are: /sys/hypervisor/s390/ s390_hypfs /sys/kernel/config/ configfs /sys/kernel/debug/ debugfs /sys/firmware/efi/efivars/ efivarfs /sys/fs/fuse/connections/ fusectl /sys/fs/pstore/ pstore /sys/kernel/tracing/ tracefs /sys/fs/cgroup/ cgroup /sys/kernel/security/ securityfs /sys/fs/selinux/ selinuxfs /sys/fs/smackfs/ smackfs Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/s390/hypfs/inode.c | 12 ++++-------- drivers/firmware/efi/efi.c | 6 ++---- fs/configfs/mount.c | 10 ++++------ fs/debugfs/inode.c | 11 ++++------- fs/fuse/inode.c | 9 +++------ fs/pstore/inode.c | 12 ++++-------- fs/tracefs/inode.c | 6 ++---- kernel/cgroup.c | 10 ++++------ security/inode.c | 10 ++++------ security/selinux/selinuxfs.c | 11 +++++------ security/smack/smackfs.c | 8 ++++---- 11 files changed, 40 insertions(+), 65 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b98..2eeb0a0f506d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,8 +456,6 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static struct kobject *s390_kobj; - static int __init hypfs_init(void) { int rc; @@ -481,18 +479,16 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_sprp_exit; } - s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); - if (!s390_kobj) { - rc = -ENOMEM; + rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); + if (rc) goto fail_hypfs_diag0c_exit; - } rc = register_filesystem(&hypfs_type); if (rc) goto fail_filesystem; return 0; fail_filesystem: - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); fail_hypfs_diag0c_exit: hypfs_diag0c_exit(); fail_hypfs_sprp_exit: @@ -510,7 +506,7 @@ fail_dbfs_exit: static void __exit hypfs_exit(void) { unregister_filesystem(&hypfs_type); - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); hypfs_diag0c_exit(); hypfs_sprp_exit(); hypfs_vm_exit(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3061bb8629dc..e14363d12690 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -65,7 +65,6 @@ static int __init parse_efi_cmdline(char *str) early_param("efi", parse_efi_cmdline); static struct kobject *efi_kobj; -static struct kobject *efivars_kobj; /* * Let's not leave out systab information that snuck into @@ -212,10 +211,9 @@ static int __init efisubsys_init(void) goto err_remove_group; /* and the standard mountpoint for efivarfs */ - efivars_kobj = kobject_create_and_add("efivars", efi_kobj); - if (!efivars_kobj) { + error = sysfs_create_mount_point(efi_kobj, "efivars"); + if (error) { pr_err("efivars: Subsystem registration failed.\n"); - error = -ENOMEM; goto err_remove_group; } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 537356742091..a8f3b589a2df 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -129,8 +129,6 @@ void configfs_release_fs(void) } -static struct kobject *config_kobj; - static int __init configfs_init(void) { int err = -ENOMEM; @@ -141,8 +139,8 @@ static int __init configfs_init(void) if (!configfs_dir_cachep) goto out; - config_kobj = kobject_create_and_add("config", kernel_kobj); - if (!config_kobj) + err = sysfs_create_mount_point(kernel_kobj, "config"); + if (err) goto out2; err = register_filesystem(&configfs_fs_type); @@ -152,7 +150,7 @@ static int __init configfs_init(void) return 0; out3: pr_err("Unable to register filesystem!\n"); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); out2: kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; @@ -163,7 +161,7 @@ out: static void __exit configfs_exit(void) { unregister_filesystem(&configfs_fs_type); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c1e7ffb0dab6..12756040ca20 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -716,20 +716,17 @@ bool debugfs_initialized(void) } EXPORT_SYMBOL_GPL(debugfs_initialized); - -static struct kobject *debug_kobj; - static int __init debugfs_init(void) { int retval; - debug_kobj = kobject_create_and_add("debug", kernel_kobj); - if (!debug_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "debug"); + if (retval) + return retval; retval = register_filesystem(&debug_fs_type); if (retval) - kobject_put(debug_kobj); + sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f39..18dacf9ed8ff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void) } static struct kobject *fuse_kobj; -static struct kobject *connections_kobj; static int fuse_sysfs_init(void) { @@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void) goto out_err; } - connections_kobj = kobject_create_and_add("connections", fuse_kobj); - if (!connections_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fuse_kobj, "connections"); + if (err) goto out_fuse_unregister; - } return 0; @@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void) static void fuse_sysfs_cleanup(void) { - kobject_put(connections_kobj); + sysfs_remove_mount_point(fuse_kobj, "connections"); kobject_put(fuse_kobj); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index dc43b5f29305..3adcc4669fac 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = { .kill_sb = pstore_kill_sb, }; -static struct kobject *pstore_kobj; - static int __init init_pstore_fs(void) { - int err = 0; + int err; /* Create a convenient mount point for people to access pstore */ - pstore_kobj = kobject_create_and_add("pstore", fs_kobj); - if (!pstore_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "pstore"); + if (err) goto out; - } err = register_filesystem(&pstore_fs_type); if (err < 0) - kobject_put(pstore_kobj); + sysfs_remove_mount_point(fs_kobj, "pstore"); out: return err; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a..a43df11a163f 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -631,14 +631,12 @@ bool tracefs_initialized(void) return tracefs_registered; } -static struct kobject *trace_kobj; - static int __init tracefs_init(void) { int retval; - trace_kobj = kobject_create_and_add("tracing", kernel_kobj); - if (!trace_kobj) + retval = sysfs_create_mount_point(kernel_kobj, "tracing"); + if (retval) return -EINVAL; retval = register_filesystem(&trace_fs_type); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 469dd547770c..e8a5491be756 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1924,8 +1924,6 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; -static struct kobject *cgroup_kobj; - /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -5044,13 +5042,13 @@ int __init cgroup_init(void) ss->bind(init_css_set.subsys[ssid]); } - cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); - if (!cgroup_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "cgroup"); + if (err) + return err; err = register_filesystem(&cgroup_fs_type); if (err < 0) { - kobject_put(cgroup_kobj); + sysfs_remove_mount_point(fs_kobj, "cgroup"); return err; } diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8..0e37e4fba8fa 100644 --- a/security/inode.c +++ b/security/inode.c @@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(securityfs_remove); -static struct kobject *security_kobj; - static int __init securityfs_init(void) { int retval; - security_kobj = kobject_create_and_add("security", kernel_kobj); - if (!security_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "security"); + if (retval) + return retval; retval = register_filesystem(&fs_type); if (retval) - kobject_put(security_kobj); + sysfs_remove_mount_point(kernel_kobj, "security"); return retval; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index d2787cca1fcb..3d2201413028 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = { }; struct vfsmount *selinuxfs_mount; -static struct kobject *selinuxfs_kobj; static int __init init_sel_fs(void) { @@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void) if (!selinux_enabled) return 0; - selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj); - if (!selinuxfs_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "selinux"); + if (err) + return err; err = register_filesystem(&sel_fs_type); if (err) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); return err; } @@ -1887,7 +1886,7 @@ __initcall(init_sel_fs); #ifdef CONFIG_SECURITY_SELINUX_DISABLE void exit_sel_fs(void) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); kern_unmount(selinuxfs_mount); unregister_filesystem(&sel_fs_type); } diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index d9682985349e..ac4cac7c661a 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2241,16 +2241,16 @@ static const struct file_operations smk_revoke_subj_ops = { .llseek = generic_file_llseek, }; -static struct kset *smackfs_kset; /** * smk_init_sysfs - initialize /sys/fs/smackfs * */ static int smk_init_sysfs(void) { - smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); - if (!smackfs_kset) - return -ENOMEM; + int err; + err = sysfs_create_mount_point(fs_kobj, "smackfs"); + if (err) + return err; return 0; } -- cgit v1.2.3 From 8e7c56b6f14d1388d5aa2feb5b28dd6360199cef Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 20:51:09 -0500 Subject: mnt: Update fs_fully_visible to test for permanently empty directories commit 7236c85e1be51a9e25ba0f6e087a66ca89605a49 upstream. fs_fully_visible attempts to make fresh mounts of proc and sysfs give the mounter no more access to proc and sysfs than if they could have by creating a bind mount. One aspect of proc and sysfs that makes this particularly tricky is that there are other filesystems that typically mount on top of proc and sysfs. As those filesystems are mounted on empty directories in practice it is safe to ignore them. However testing to ensure filesystems are mounted on empty directories has not been something the in kernel data structures have supported so the current test for an empty directory which checks to see if nlink <= 2 is a bit lacking. proc and sysfs have recently been modified to use the new empty_dir infrastructure to create all of their dedicated mount points. Instead of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a directory is empty, test for is_empty_dir_inode(inode). That small change guaranteess mounts found on proc and sysfs really are safe to ignore, because the directories are not only empty but nothing can ever be added to them. This guarantees there is nothing to worry about when mounting proc and sysfs. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1d4a97c573e0..c40b48cd7647 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3194,9 +3194,8 @@ bool fs_fully_visible(struct file_system_type *type) /* Only worry about locked mounts */ if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) continue; - if (!S_ISDIR(inode->i_mode)) - goto next; - if (inode->i_nlink > 2) + /* Is the directory permanetly empty? */ + if (!is_empty_dir_inode(inode)) goto next; } visible = true; -- cgit v1.2.3 From b5eb51f2ee063044401492650e9e01bb35974870 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:22:29 -0500 Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace commit 1b852bceb0d111e510d1a15826ecc4a19358d512 upstream. Fresh mounts of proc and sysfs are a very special case that works very much like a bind mount. Unfortunately the current structure can not preserve the MNT_LOCK... mount flags. Therefore refactor the logic into a form that can be modified to preserve those lock bits. Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount of the filesystem be fully visible in the current mount namespace, before the filesystem may be mounted. Move the logic for calling fs_fully_visible from proc and sysfs into fs/namespace.c where it has greater access to mount namespace state. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 8 +++++++- fs/proc/root.c | 5 +---- fs/sysfs/mount.c | 5 +---- include/linux/fs.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index c40b48cd7647..50a0d950404c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,6 +2332,8 @@ unlock: return err; } +static bool fs_fully_visible(struct file_system_type *fs_type); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -2363,6 +2365,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } + if (type->fs_flags & FS_USERNS_VISIBLE) { + if (!fs_fully_visible(type)) + return -EPERM; + } } mnt = vfs_kern_mount(type, flags, name, data); @@ -3164,7 +3170,7 @@ bool current_chrooted(void) return chrooted; } -bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mount *mnt; diff --git a/fs/proc/root.c b/fs/proc/root.c index 3d987dfdef83..68feb0f70e63 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - /* Does the mounter have privilege over the pid namespace? */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; void __init proc_root_init(void) diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8a49486bf30c..1c6ac6fcee9f 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, bool new_sb; if (!(flags & MS_KERNMOUNT)) { - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } @@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0d5ae7d5dc53..571aab91bfc0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1897,6 +1897,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1984,7 +1985,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); -- cgit v1.2.3 From 51c2c47ef6349d49e49002054f8c0d11d3b5646e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:49:47 -0500 Subject: mnt: Modify fs_fully_visible to deal with locked ro nodev and atime commit 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 upstream. Ignore an existing mount if the locked readonly, nodev or atime attributes are less permissive than the desired attributes of the new mount. On success ensure the new mount locks all of the same readonly, nodev and atime attributes as the old mount. The nosuid and noexec attributes are not checked here as this change is destined for stable and enforcing those attributes causes a regression in lxc and libvirt-lxc where those applications will not start and there are no known executables on sysfs or proc and no known way to create exectuables without code modifications Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 50a0d950404c..02c6875dd945 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,7 +2332,7 @@ unlock: return err; } -static bool fs_fully_visible(struct file_system_type *fs_type); +static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags); /* * create a new mount for userspace and request it to be added into the @@ -2366,7 +2366,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type)) + if (!fs_fully_visible(type, &mnt_flags)) return -EPERM; } } @@ -3170,9 +3170,10 @@ bool current_chrooted(void) return chrooted; } -static bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; @@ -3191,6 +3192,19 @@ static bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; + /* Verify the mount flags are equal to or more permissive + * than the proposed new mount. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(new_flags & MNT_READONLY)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(new_flags & MNT_NODEV)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) + continue; + /* This mount is not fully visible if there are any * locked child mounts that cover anything except for * empty directories. @@ -3204,6 +3218,10 @@ static bool fs_fully_visible(struct file_system_type *type) if (!is_empty_dir_inode(inode)) goto next; } + /* Preserve the locked attributes */ + *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \ + MNT_LOCK_NODEV | \ + MNT_LOCK_ATIME); visible = true; goto found; next: ; -- cgit v1.2.3 From 03c29ef2e86669e356493153d80b5c4ed0fc2f84 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 28 May 2015 10:58:49 +0800 Subject: gpio: crystalcove: set IRQCHIP_SKIP_SET_WAKE for the irqchip commit 61e749d7e1627d375156553ea0ae83c4f6bb5a9b upstream. The CrystalCove GPIO irqchip doesn't have irq_set_wake callback defined so we should set IRQCHIP_SKIP_SET_WAKE for it or it would cause an irq desc's wake_depth unbalanced warning during system resume phase from the gpio_keys driver, which is the driver for the power button of the ASUS T100 laptop. Signed-off-by: Aaron Lu Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-crystalcove.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 91a7ffe83135..ab457fc00e75 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -255,6 +255,7 @@ static struct irq_chip crystalcove_irqchip = { .irq_set_type = crystalcove_irq_type, .irq_bus_lock = crystalcove_bus_lock, .irq_bus_sync_unlock = crystalcove_bus_sync_unlock, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data) -- cgit v1.2.3 From 9b553d64d949da26b63759e4ae9af863676bad37 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 May 2015 13:21:37 +0200 Subject: gpio: rcar: Check for irq_set_irq_wake() failures commit 501ef0f95a57e7c32138733c468394a52244c85b upstream. If an interrupt controller doesn't support wake-up configuration, irq_set_irq_wake() returns an error code. Then any subsequent call trying to deconfigure wake-up will cause an imbalance, and a warning will be printed: WARNING: CPU: 1 PID: 1341 at kernel/irq/manage.c:540 irq_set_irq_wake+0x9c/0xf8() Unbalanced IRQ 26 wake disable To fix this, refrain from any further parent interrupt controller (de)configuration if irq_set_irq_wake() failed. Alternative fixes would be: - calling "gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE)" from the platform code, - setting "gic_chip.flags = IRQCHIP_SKIP_SET_WAKE" in the GIC driver code, but these were withheld as the GIC hardware doesn't really support wake-up interrupts. Fixes: ab82fa7da4dce5c7 ("gpio: rcar: Prevent module clock disable when wake-up is enabled") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-rcar.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index fd3977465948..1e14a6c74ed1 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -177,8 +177,17 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on) struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct gpio_rcar_priv *p = container_of(gc, struct gpio_rcar_priv, gpio_chip); - - irq_set_irq_wake(p->irq_parent, on); + int error; + + if (p->irq_parent) { + error = irq_set_irq_wake(p->irq_parent, on); + if (error) { + dev_dbg(&p->pdev->dev, + "irq %u doesn't support irq_set_wake\n", + p->irq_parent); + p->irq_parent = 0; + } + } if (!p->clk) return 0; -- cgit v1.2.3 From a6556a506ea1c737e8adafd59015786448ffbf3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 May 2015 11:13:05 -0700 Subject: rcu: Correctly handle non-empty Tiny RCU callback list with none ready commit 6e91f8cb138625be96070b778d9ba71ce520ea7e upstream. If, at the time __rcu_process_callbacks() is invoked, there are callbacks in Tiny RCU's callback list, but none of them are ready to be invoked, the current list-management code will knit the non-ready callbacks out of the list. This can result in hangs and possibly worse. This commit therefore inserts a check for there being no callbacks that can be invoked immediately. This bug is unlikely to occur -- you have to get a new callback between the time rcu_sched_qs() or rcu_bh_qs() was called, but before we get to __rcu_process_callbacks(). It was detected by the addition of RCU-bh testing to rcutorture, which in turn was instigated by Iftekhar Ahmed's mutation testing. Although this bug was made much more likely by 915e8a4fe45e (rcu: Remove fastpath from __rcu_process_callbacks()), this did not cause the bug, but rather made it much more probable. That said, it takes more than 40 hours of rcutorture testing, on average, for this bug to appear, so this fix cannot be considered an emergency. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tiny.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 069742d61c68..ec3086879cb5 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -170,6 +170,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + if (rcp->donetail == &rcp->rcucblist) { + /* No callbacks ready, so just leave. */ + local_irq_restore(flags); + return; + } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; -- cgit v1.2.3 From 77d10175396e764488886923eeaec58c3ff1fb7b Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 13 May 2015 08:50:27 -0500 Subject: ipr: Increase default adapter init stage change timeout commit 45c44b5ff9caa743ed9c2bfd44307c536c9caf1e upstream. Increase the default init stage change timeout from 15 seconds to 30 seconds. This resolves issues we have seen with some adapters not transitioning to the first init stage within 15 seconds, which results in adapter initialization failures. Signed-off-by: Brian King Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 47412cf4eaac..73790a1d0969 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -272,7 +272,7 @@ #define IPR_RUNTIME_RESET 0x40000000 #define IPR_IPL_INIT_MIN_STAGE_TIME 5 -#define IPR_IPL_INIT_DEFAULT_STAGE_TIME 15 +#define IPR_IPL_INIT_DEFAULT_STAGE_TIME 30 #define IPR_IPL_INIT_STAGE_UNKNOWN 0x0 #define IPR_IPL_INIT_STAGE_TRANSOP 0xB0000000 #define IPR_IPL_INIT_STAGE_MASK 0xff000000 -- cgit v1.2.3 From 37100f76f93236e99a89c0f0c5531eff03920725 Mon Sep 17 00:00:00 2001 From: Ryan Underwood Date: Sun, 25 Jan 2015 16:07:09 -0800 Subject: Disable write buffering on Toshiba ToPIC95 commit 2fb22a8042fe96b4220843f79241c116d90922c4 upstream. Disable write buffering on the Toshiba ToPIC95 if it is enabled by somebody (it is not supposed to be a power-on default according to the datasheet). On the ToPIC95, practically no 32-bit Cardbus card will work under heavy load without locking up the whole system if this is left enabled. I tried about a dozen. It does not affect 16-bit cards. This is similar to the O2 bugs in early controller revisions it seems. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=55961 Signed-off-by: Ryan C. Underwood Signed-off-by: Dominik Brodowski Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/topic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pcmcia/topic.h b/drivers/pcmcia/topic.h index 615a45a8fe86..582688fe7505 100644 --- a/drivers/pcmcia/topic.h +++ b/drivers/pcmcia/topic.h @@ -104,6 +104,9 @@ #define TOPIC_EXCA_IF_CONTROL 0x3e /* 8 bit */ #define TOPIC_EXCA_IFC_33V_ENA 0x01 +#define TOPIC_PCI_CFG_PPBCN 0x3e /* 16-bit */ +#define TOPIC_PCI_CFG_PPBCN_WBEN 0x0400 + static void topic97_zoom_video(struct pcmcia_socket *sock, int onoff) { struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket); @@ -138,6 +141,7 @@ static int topic97_override(struct yenta_socket *socket) static int topic95_override(struct yenta_socket *socket) { u8 fctrl; + u16 ppbcn; /* enable 3.3V support for 16bit cards */ fctrl = exca_readb(socket, TOPIC_EXCA_IF_CONTROL); @@ -146,6 +150,18 @@ static int topic95_override(struct yenta_socket *socket) /* tell yenta to use exca registers to power 16bit cards */ socket->flags |= YENTA_16BIT_POWER_EXCA | YENTA_16BIT_POWER_DF; + /* Disable write buffers to prevent lockups under load with numerous + Cardbus cards, observed on Tecra 500CDT and reported elsewhere on the + net. This is not a power-on default according to the datasheet + but some BIOSes seem to set it. */ + if (pci_read_config_word(socket->dev, TOPIC_PCI_CFG_PPBCN, &ppbcn) == 0 + && socket->dev->revision <= 7 + && (ppbcn & TOPIC_PCI_CFG_PPBCN_WBEN)) { + ppbcn &= ~TOPIC_PCI_CFG_PPBCN_WBEN; + pci_write_config_word(socket->dev, TOPIC_PCI_CFG_PPBCN, ppbcn); + dev_info(&socket->dev->dev, "Disabled ToPIC95 Cardbus write buffers.\n"); + } + return 0; } -- cgit v1.2.3 From 08e394684b82b0987295d96201071f52504c83f5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Jun 2015 11:56:22 +0200 Subject: ALSA: pcm: Fix pcm_class sysfs output commit 60b93030b44a8c2cd015cebe5624fd7552ec67ec upstream. The pcm_class sysfs of each PCM substream gives only "none" since the recent code change to embed the struct device. Fix the code to point directly to the embedded device object properly. Fixes: ef46c7af93f9 ('ALSA: pcm: Embed struct device') Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index b25bcf5b8644..dfed728d8c87 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -1027,7 +1027,8 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) static ssize_t show_pcm_class(struct device *dev, struct device_attribute *attr, char *buf) { - struct snd_pcm *pcm; + struct snd_pcm_str *pstr = container_of(dev, struct snd_pcm_str, dev); + struct snd_pcm *pcm = pstr->pcm; const char *str; static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = { [SNDRV_PCM_CLASS_GENERIC] = "generic", @@ -1036,8 +1037,7 @@ static ssize_t show_pcm_class(struct device *dev, [SNDRV_PCM_CLASS_DIGITIZER] = "digitizer", }; - if (! (pcm = dev_get_drvdata(dev)) || - pcm->dev_class > SNDRV_PCM_CLASS_LAST) + if (pcm->dev_class > SNDRV_PCM_CLASS_LAST) str = "none"; else str = strs[pcm->dev_class]; -- cgit v1.2.3 From 704ffc4cf721867e35cb922b12cae3210d6f7e67 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 24 Jun 2015 10:46:33 +0200 Subject: ALSA: hda - Fix Dock Headphone on Thinkpad X250 seen as a Line Out commit ec56af67a10a0d82b79027878a81fce08d002d50 upstream. Thinkpad X250, when attached to a dock, has two headphone outs but no line out. Make sure we don't try to turn this into one headphone and one line out (since that disables the headphone amp on the dock). Alsa-info at http://www.alsa-project.org/db/?f=36f8764e1d782397928feec715d0ef90dfddd4c1 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6d010452c1f5..9b0f1ed02cc9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4505,6 +4505,7 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, + ALC292_FIXUP_TPT440_DOCK2, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -4953,6 +4954,12 @@ static const struct hda_fixup alc269_fixups[] = { .chain_id = ALC269_FIXUP_HEADSET_MODE }, [ALC292_FIXUP_TPT440_DOCK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + .chained = true, + .chain_id = ALC292_FIXUP_TPT440_DOCK2 + }, + [ALC292_FIXUP_TPT440_DOCK2] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { { 0x16, 0x21211010 }, /* dock headphone */ -- cgit v1.2.3 From 167bdde510b17d86767b71282c91036ab671340f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jun 2015 14:37:18 -0400 Subject: ALSA: hda - set proper caps for newer AMD hda audio in KB/KV commit 650474fb737c3e0ea0f6ab8e43c2cd161080ce5c upstream. Fixes audio problems on newer asics. Noticed by: Kelly Anderson Signed-off-by: Alex Deucher Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b6db25b23dd3..c403dd10d126 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2054,6 +2054,8 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* ATI HDMI */ + { PCI_DEVICE(0x1002, 0x1308), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x793b), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x7919), @@ -2062,6 +2064,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x970f), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, + { PCI_DEVICE(0x1002, 0x9840), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaa00), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0xaa08), -- cgit v1.2.3 From c69c5674d87690f87e48a4267bc68e62fc605d9c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Jun 2015 08:48:54 +0200 Subject: ALSA: hda - Disable widget power-save for VIA codecs commit 735c75cf4d434862e38c01dcfb2ce8d2fcb9035f upstream. The widget power-save that was enabled in 4.1 kernel seems resulting in the silent output on VIA codecs by some reason. Some widgets get wrong power states. As a quick fix, turn this flag off while keeping power_down_unused flag. This will bring back to the state of 4.0.x. Fixes: 688b12cc3ca8 ('ALSA: hda - Use the new power control for VIA codecs') Reported-and-tested-by: Harald Dunkel Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index bab6c04932aa..0baeecc2213c 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -238,7 +238,9 @@ static int via_pin_power_ctl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = codec->power_save_node; + struct via_spec *spec = codec->spec; + + ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused; return 0; } @@ -249,9 +251,9 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol, struct via_spec *spec = codec->spec; bool val = !!ucontrol->value.enumerated.item[0]; - if (val == codec->power_save_node) + if (val == spec->gen.power_down_unused) return 0; - codec->power_save_node = val; + /* codec->power_save_node = val; */ /* widget PM seems yet broken */ spec->gen.power_down_unused = val; analog_low_current_mode(codec); return 1; -- cgit v1.2.3 From d097fff2c1fbeef22c20f0d6a9a9ed236baabba7 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 26 Jun 2015 12:35:17 +0800 Subject: ALSA: hda - restore the MIC FIXUP for some Dell machines commit 831bfdf9520e389357cfeee42a6174a73ce7bdb7 upstream. Those FIXUPs were applied to the machines through pin quirks, but recently the PCI_QUIRK makes them can't apply to the machines. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=99851 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9b0f1ed02cc9..5e5e40a66e92 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4516,6 +4516,8 @@ enum { ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13_GPIO6, + ALC288_FIXUP_DELL_XPS_13, + ALC288_FIXUP_DISABLE_AAMIX, ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, }; @@ -5046,9 +5048,23 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC288_FIXUP_DISABLE_AAMIX] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6 + }, + [ALC288_FIXUP_DELL_XPS_13] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_dell_xps13, + .chained = true, + .chain_id = ALC288_FIXUP_DISABLE_AAMIX + }, [ALC292_FIXUP_DISABLE_AAMIX] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE }, [ALC292_FIXUP_DELL_E7X] = { .type = HDA_FIXUP_FUNC, @@ -5079,7 +5095,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X), + SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), -- cgit v1.2.3 From 4b461d112cdbca4183f4b2f0d67a081a650269ab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 27 Jun 2015 10:21:13 +0200 Subject: ALSA: hda - Add headset support to Acer Aspire V5 commit 7819717b11346b8a5420b223b46600e394049c66 upstream. Acer Aspire V5 with ALC282 codec needs the similar quirk like Dell laptops to support the headset mic. The headset mic pin is 0x19 and it's not exposed by BIOS, thus we need to fix the pincfg as well. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96201 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5e5e40a66e92..fc67aeddd80e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4478,6 +4478,7 @@ enum { ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, + ALC269_FIXUP_ASPIRE_HEADSET_MIC, ALC269_FIXUP_ASUS_X101_FUNC, ALC269_FIXUP_ASUS_X101_VERB, ALC269_FIXUP_ASUS_X101, @@ -4754,6 +4755,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_no_hp_mic, }, + [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5079,6 +5089,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700), + SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), -- cgit v1.2.3 From f367c3b94b1a20a27eb51a2646d7409a1da2d761 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Jun 2015 08:38:02 +0200 Subject: ALSA: hda - Fix the dock headphone output on Fujitsu Lifebook E780 commit 4df3fd1700abbb53bd874143dfd1f9ac9e7cbf4b upstream. Fujitsu Lifebook E780 sets the sequence number 0x0f to only only of the two headphones, thus the driver tries to assign another as the line-out, and this results in the inconsistent mapping between the created jack ctl and the actual I/O. Due to this, PulseAudio doesn't handle it properly and gets the silent output. The fix is to ignore the non-HP sequencer checks. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=99681 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc67aeddd80e..944a87b395b8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4458,6 +4458,7 @@ enum { ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_LIFEBOOK_HP_PIN, + ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4627,6 +4628,10 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5191,6 +5196,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), -- cgit v1.2.3 From 13e888e7678c544cf4460eb997f6b9f660193563 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Jun 2015 10:56:53 +0200 Subject: ALSA: hda - Add a fixup for Dell E7450 commit 4275554dccdf0afac07b2b638ba7456095629558 upstream. Dell E7450 [0128:062e] needs the same quirk as other E7xx models. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100571 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 944a87b395b8..0e75998db39f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5109,6 +5109,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), + SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), -- cgit v1.2.3 From 9e6004867b1a9e4ededb8d56edb827de67073291 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2015 01:33:36 +0200 Subject: ACPI / init: Switch over platform to the ACPI mode later commit b064a8fa77dfead647564c46ac8fc5b13bd1ab73 upstream. Commit 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()" moved the ACPI subsystem initialization, including the ACPI mode enabling, to an earlier point in the initialization sequence, to allow the timekeeping subsystem use ACPI early. Unfortunately, that resulted in boot regressions on some systems and the early ACPI initialization was moved toward its original position in the kernel initialization code by commit c4e1acbb35e4 "ACPI / init: Invoke early ACPI initialization later". However, that turns out to be insufficient, as boot is still broken on the Tyan S8812 mainboard. To fix that issue, split the ACPI early initialization code into two pieces so the majority of it still located in acpi_early_init() and the part switching over the platform into the ACPI mode goes into a new function, acpi_subsystem_init(), executed at the original early ACPI initialization spot. That fixes the Tyan S8812 boot problem, but still allows ACPI tables to be loaded earlier which is useful to the EFI code in efi_enter_virtual_mode(). Link: https://bugzilla.kernel.org/show_bug.cgi?id=97141 Fixes: 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()" Reported-and-tested-by: Marius Tolzmann Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Reviewed-by: Hanjun Guo Reviewed-by: Lee, Chun-Yi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/bus.c | 56 ++++++++++++++++++++++++++++++++++++++-------------- include/linux/acpi.h | 2 ++ init/main.c | 1 + 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index c412fdb28d34..513e7230e3d0 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -470,6 +470,16 @@ static int __init acpi_bus_init_irq(void) return 0; } +/** + * acpi_early_init - Initialize ACPICA and populate the ACPI namespace. + * + * The ACPI tables are accessible after this, but the handling of events has not + * been initialized and the global lock is not available yet, so AML should not + * be executed at this point. + * + * Doing this before switching the EFI runtime services to virtual mode allows + * the EfiBootServices memory to be freed slightly earlier on boot. + */ void __init acpi_early_init(void) { acpi_status status; @@ -533,26 +543,42 @@ void __init acpi_early_init(void) acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi; } #endif + return; + + error0: + disable_acpi(); +} + +/** + * acpi_subsystem_init - Finalize the early initialization of ACPI. + * + * Switch over the platform to the ACPI mode (if possible), initialize the + * handling of ACPI events, install the interrupt and global lock handlers. + * + * Doing this too early is generally unsafe, but at the same time it needs to be + * done before all things that really depend on ACPI. The right spot appears to + * be before finalizing the EFI initialization. + */ +void __init acpi_subsystem_init(void) +{ + acpi_status status; + + if (acpi_disabled) + return; status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX "Unable to enable ACPI\n"); - goto error0; + disable_acpi(); + } else { + /* + * If the system is using ACPI then we can be reasonably + * confident that any regulators are managed by the firmware + * so tell the regulator core it has everything it needs to + * know. + */ + regulator_has_full_constraints(); } - - /* - * If the system is using ACPI then we can be reasonably - * confident that any regulators are managed by the firmware - * so tell the regulator core it has everything it needs to - * know. - */ - regulator_has_full_constraints(); - - return; - - error0: - disable_acpi(); - return; } static int __init acpi_bus_init(void) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..4550be3bb63b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -440,6 +440,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 extern void acpi_early_init(void); +extern void acpi_subsystem_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); @@ -494,6 +495,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) } static inline void acpi_early_init(void) { } +static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { diff --git a/init/main.c b/init/main.c index 2115055faeac..2a89545e0a5d 100644 --- a/init/main.c +++ b/init/main.c @@ -664,6 +664,7 @@ asmlinkage __visible void __init start_kernel(void) check_bugs(); + acpi_subsystem_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { -- cgit v1.2.3 From 5ab4a6010600c24c37f5178d6da981e18cbb090a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2015 01:32:38 +0200 Subject: ACPI / PM: Add missing pm_generic_complete() invocation commit 3d56402d3fa8d10749eeb36293dd1992bd5ad0c3 upstream. Add missing invocation of pm_generic_complete() to acpi_subsys_complete() to allow ->complete callbacks provided by the drivers of devices using the ACPI PM domain to be executed during system resume. Fixes: f25c0ae2b4c4 (ACPI / PM: Avoid resuming devices in ACPI PM domain during system suspend) Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 735db11a9b00..8217e0bda60f 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -953,6 +953,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_prepare); */ void acpi_subsys_complete(struct device *dev) { + pm_generic_complete(dev); /* * If the device had been runtime-suspended before the system went into * the sleep state it is going out of and it has never been resumed till -- cgit v1.2.3 From c75c95bb4bb656d1d2914caf17d71317941100f2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Jun 2015 18:32:02 +0200 Subject: ACPI / PNP: Avoid conflicting resource reservations commit 0f1b414d190724617eb1cdd615592fa8cd9d0b50 upstream. Commit b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" overlooked the fact that the memory and/or I/O regions reserved by acpi_reserve_resources() may conflict with those reserved by the PNP "system" driver. If that conflict actually takes place, it causes the reservations made by the "system" driver to fail while before commit b9a5e5e18fbf all reservations made by it and by acpi_reserve_resources() would be successful. In turn, that allows the resources that haven't been reserved by the "system" driver to be used by others (e.g. PCI) which sometimes leads to functional problems (up to and including boot failures). To fix that issue, introduce a common resource reservation routine, acpi_reserve_region(), to be used by both acpi_reserve_resources() and the "system" driver, that will track all resources reserved by it and avoid making conflicting requests. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 6 +- drivers/acpi/resource.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pnp/system.c | 35 ++++++++--- include/linux/acpi.h | 10 +++ 4 files changed, 197 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7ccba395c9dd..5226a8b921ae 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,11 +175,7 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - /* Resources are never freed */ - if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) - request_region(addr, length, desc); - else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - request_mem_region(addr, length, desc); + acpi_reserve_region(addr, length, gas->space_id, 0, desc); } static void __init acpi_reserve_resources(void) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8244f013f210..fcb7807ea8b7 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86 @@ -621,3 +622,162 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); + +struct reserved_region { + struct list_head node; + u64 start; + u64 end; +}; + +static LIST_HEAD(reserved_io_regions); +static LIST_HEAD(reserved_mem_regions); + +static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, + char *desc) +{ + unsigned int length = end - start + 1; + struct resource *res; + + res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? + request_region(start, length, desc) : + request_mem_region(start, length, desc); + if (!res) + return -EIO; + + res->flags &= ~flags; + return 0; +} + +static int add_region_before(u64 start, u64 end, u8 space_id, + unsigned long flags, char *desc, + struct list_head *head) +{ + struct reserved_region *reg; + int error; + + reg = kmalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) + return -ENOMEM; + + error = request_range(start, end, space_id, flags, desc); + if (error) + return error; + + reg->start = start; + reg->end = end; + list_add_tail(®->node, head); + return 0; +} + +/** + * acpi_reserve_region - Reserve an I/O or memory region as a system resource. + * @start: Starting address of the region. + * @length: Length of the region. + * @space_id: Identifier of address space to reserve the region from. + * @flags: Resource flags to clear for the region after requesting it. + * @desc: Region description (for messages). + * + * Reserve an I/O or memory region as a system resource to prevent others from + * using it. If the new region overlaps with one of the regions (in the given + * address space) already reserved by this routine, only the non-overlapping + * parts of it will be reserved. + * + * Returned is either 0 (success) or a negative error code indicating a resource + * reservation problem. It is the code of the first encountered error, but the + * routine doesn't abort until it has attempted to request all of the parts of + * the new region that don't overlap with other regions reserved previously. + * + * The resources requested by this routine are never released. + */ +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc) +{ + struct list_head *regions; + struct reserved_region *reg; + u64 end = start + length - 1; + int ret = 0, error = 0; + + if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) + regions = &reserved_io_regions; + else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + regions = &reserved_mem_regions; + else + return -EINVAL; + + if (list_empty(regions)) + return add_region_before(start, end, space_id, flags, desc, regions); + + list_for_each_entry(reg, regions, node) + if (reg->start == end + 1) { + /* The new region can be prepended to this one. */ + ret = request_range(start, end, space_id, flags, desc); + if (!ret) + reg->start = start; + + return ret; + } else if (reg->start > end) { + /* No overlap. Add the new region here and get out. */ + return add_region_before(start, end, space_id, flags, + desc, ®->node); + } else if (reg->end == start - 1) { + goto combine; + } else if (reg->end >= start) { + goto overlap; + } + + /* The new region goes after the last existing one. */ + return add_region_before(start, end, space_id, flags, desc, regions); + + overlap: + /* + * The new region overlaps an existing one. + * + * The head part of the new region immediately preceding the existing + * overlapping one can be combined with it right away. + */ + if (reg->start > start) { + error = request_range(start, reg->start - 1, space_id, flags, desc); + if (error) + ret = error; + else + reg->start = start; + } + + combine: + /* + * The new region is adjacent to an existing one. If it extends beyond + * that region all the way to the next one, it is possible to combine + * all three of them. + */ + while (reg->end < end) { + struct reserved_region *next = NULL; + u64 a = reg->end + 1, b = end; + + if (!list_is_last(®->node, regions)) { + next = list_next_entry(reg, node); + if (next->start <= end) + b = next->start - 1; + } + error = request_range(a, b, space_id, flags, desc); + if (!error) { + if (next && next->start == b + 1) { + reg->end = next->end; + list_del(&next->node); + kfree(next); + } else { + reg->end = end; + break; + } + } else if (next) { + if (!ret) + ret = error; + + reg = next; + } else { + break; + } + } + + return ret ? ret : error; +} +EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 49c1720df59a..515f33882ab8 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,6 +7,7 @@ * Bjorn Helgaas */ +#include #include #include #include @@ -22,25 +23,41 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; +#ifdef CONFIG_ACPI +static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) +{ + u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; + return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); +} +#else +static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) +{ + struct resource *res; + + res = io ? request_region(start, length, desc) : + request_mem_region(start, length, desc); + if (res) { + res->flags &= ~IORESOURCE_BUSY; + return true; + } + return false; +} +#endif + static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - struct resource *res; + bool reserved; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - if (port) - res = request_region(start, end - start + 1, regionid); - else - res = request_mem_region(start, end - start + 1, regionid); - if (res) - res->flags &= ~IORESOURCE_BUSY; - else + reserved = __reserve_range(start, end - start + 1, !!port, regionid); + if (!reserved) kfree(regionid); /* @@ -49,7 +66,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - res ? "has been" : "could not be"); + reserved ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4550be3bb63b..5da2d2e9d38e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -332,6 +332,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc); + #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -527,6 +530,13 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } +static inline int acpi_reserve_region(u64 start, unsigned int length, + u8 space_id, unsigned long flags, + char *desc) +{ + return -ENXIO; +} + struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) -- cgit v1.2.3 From 8841d6439b1d6504d9c7498ebfdee1408e09460e Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Mon, 4 May 2015 11:13:03 +0200 Subject: iio: accel: kxcjk-1013: add the "KXCJ9000" ACPI id commit 61e2c70da9cfc79e8485eafa0f98b5919b04bbe1 upstream. This id has been seen in the DSDT of the Teclast X98 Air 3G tablet based on Intel Bay Trail. Signed-off-by: Antonio Ospite Cc: Bastien Nocera Reviewed-by: Daniel Baluta Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxcjk-1013.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 51da3692d561..5b7a860df524 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1418,6 +1418,7 @@ static const struct dev_pm_ops kxcjk1013_pm_ops = { static const struct acpi_device_id kx_acpi_match[] = { {"KXCJ1013", KXCJK1013}, {"KXCJ1008", KXCJ91008}, + {"KXCJ9000", KXCJ91008}, {"KXTJ1009", KXTJ21009}, {"SMO8500", KXCJ91008}, { }, -- cgit v1.2.3 From f3ff4345ef597115869a227dbf738dde157f8521 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 May 2015 16:55:18 -0300 Subject: tools selftests: Fix 'clean' target with make 3.81 commit 60df4642a83546fa6ea8286f5094ce8c0906c3ec upstream. Make 3.81 doesn't have the 'undefine' command. Using undefine to clear LDFLAGS fails when make version 3.81 is used. Fix it to use override to clear LDFLAGS. Tested-by: Shuah Khan Cc: David Ahern Cc: Ingo Molnar Cc: Michael Ellerman Link: http://lkml.kernel.org/r/20150514151225.GH23588@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Shuah Khan Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 95abddcd7839..f76830643086 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -27,7 +27,7 @@ TARGETS_HOTPLUG += memory-hotplug # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. ifeq (1,$(MAKELEVEL)) -undefine LDFLAGS +override LDFLAGS = override MAKEFLAGS = endif -- cgit v1.2.3 From eb1eecd100ce48d4f8368a0c475ecb937abd40ec Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Nov 2014 15:42:09 +0530 Subject: ARC: add smp barriers around atomics per Documentation/atomic_ops.txt commit 2576c28e3f623ed401db7e6197241865328620ef upstream. - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers Since ARCv2 only provides load/load, store/store and all/all, we need the full barrier - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified values were lacking the explicit smp barriers. - Non LLOCK/SCOND varaints don't need the explicit barriers since that is implicity provided by the spin locks used to implement the critical section (the spin lock barriers in turn are also fixed in this commit as explained above Cc: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/atomic.h | 21 +++++++++++++++++++++ arch/arc/include/asm/bitops.h | 19 +++++++++++++++++++ arch/arc/include/asm/cmpxchg.h | 17 +++++++++++++++++ arch/arc/include/asm/spinlock.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430..20b7dc17979e 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned int temp; \ \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ @@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ : "r"(&v->counter), "ir"(i) \ : "cc"); \ \ + smp_mb(); \ + \ return temp; \ } @@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ unsigned long flags; \ unsigned long temp; \ \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ atomic_ops_lock(flags); \ temp = v->counter; \ temp c_op i; \ @@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) #define __atomic_add_unless(v, a, u) \ ({ \ int c, old; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ c = old; \ + \ + smp_mb(); \ + \ c; \ }) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939..624a9d048ca9 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -117,6 +117,12 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND themselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bset %1, %0, %3 \n" @@ -126,6 +132,8 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -139,6 +147,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bclr %1, %0, %3 \n" @@ -148,6 +158,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -161,6 +173,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bxor %1, %0, %3 \n" @@ -170,6 +184,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -249,6 +265,9 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ bitops_lock(flags); old = *m; diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d..c9b1f461a587 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -10,6 +10,8 @@ #define __ASM_ARC_CMPXCHG_H #include + +#include #include #ifdef CONFIG_ARC_HAS_LLSC @@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) { unsigned long prev; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND thmeselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%1] \n" " brne %0, %2, 2f \n" @@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) "r"(new) /* can't be "ir". scond can't take limm for "b" */ : "cc"); + smp_mb(); + return prev; } @@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) int prev; volatile unsigned long *p = ptr; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ atomic_ops_lock(flags); prev = *p; if (prev == expected) @@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, switch (size) { case 4: + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r"(val) : "r"(ptr) : "memory"); + smp_mb(); + return val; } return __xchg_bad_pointer(); diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e..e1651df6a93d 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + /* + * This smp_mb() is technically superfluous, we only need the one + * after the lock for providing the ACQUIRE semantics. + * However doing the "right" thing was regressing hackbench + * so keeping this, pending further investigation + */ + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" : "+&r" (tmp) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); + + /* + * ACQUIRE barrier to ensure load/store after taking the lock + * don't "bleed-up" out of the critical section (leak-in is allowed) + * http://www.spinics.net/lists/kernel/msg2010409.html + * + * ARCv2 only has load-load, store-store and all-all barrier + * thus need the full all-all barrier + */ + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + smp_mb(); + return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); } @@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + /* + * RELEASE barrier: given the instructions avail on ARCv2, full barrier + * is the only option + */ + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + /* + * superfluous, but keeping for now - see pairing version in + * arch_spin_lock above + */ smp_mb(); } -- cgit v1.2.3 From 3e43ff498fb1a9f8a58c7746c207b8ffd9a9a87d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Nov 2014 15:54:01 +0530 Subject: ARC: add compiler barrier to LLSC based cmpxchg commit d57f727264f1425a94689bafc7e99e502cb135b5 upstream. When auditing cmpxchg call sites, Chuck noted that gcc was optimizing away some of the desired LDs. | do { | new = old = *ipi_data_ptr; | new |= 1U << msg; | } while (cmpxchg(ipi_data_ptr, old, new) != old); was generating to below | 8015cef8: ld r2,[r4,0] <-- First LD | 8015cefc: bset r1,r2,r1 | | 8015cf00: llock r3,[r4] <-- atomic op | 8015cf04: brne r3,r2,8015cf10 | 8015cf08: scond r1,[r4] | 8015cf0c: bnz 8015cf00 | | 8015cf10: brne r3,r2,8015cf00 <-- Branch doesn't go to orig LD Although this was fixed by adding a ACCESS_ONCE in this call site, it seems safer (for now at least) to add compiler barrier to LLSC based cmpxchg Reported-by: Chuck Jordan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/cmpxchg.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index c9b1f461a587..44fd531f4d7b 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -33,10 +33,11 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) " scond %3, [%1] \n" " bnz 1b \n" "2: \n" - : "=&r"(prev) - : "r"(ptr), "ir"(expected), - "r"(new) /* can't be "ir". scond can't take limm for "b" */ - : "cc"); + : "=&r"(prev) /* Early clobber, to prevent reg reuse */ + : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ + "ir"(expected), + "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ + : "cc", "memory"); /* so that gcc knows memory is being written here */ smp_mb(); -- cgit v1.2.3 From 16e860b30b2c3703d92a6cd701a6e602d897f481 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 14 Jun 2015 02:09:06 +0300 Subject: arc: fix use of uninitialized arc_pmu commit 7002f77541f877a5590615ceb3da32b114f14b62 upstream. static arc_pmu in the arch/arc/kernel/perf_event.c is not initialized as it's shadowed by a local variable of the same name in the arc_pmu_device_probe. Signed-off-by: Max Filippov Fixes: 03c94fcf954d "ARC: perf: make @arc_pmu static global" Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/perf_event.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index fd2ec50102f2..57b58f52d825 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -266,7 +266,6 @@ static int arc_pmu_add(struct perf_event *event, int flags) static int arc_pmu_device_probe(struct platform_device *pdev) { - struct arc_pmu *arc_pmu; struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; int i, j, ret; -- cgit v1.2.3 From 9f8e1f603600b300906e695d106a0e5cc39dcf82 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 May 2015 16:13:01 +0900 Subject: power_supply: Fix NULL pointer dereference during bq27x00_battery probe commit 8e59c7f23410d5ca6b350a178b861a3d68c49edf upstream. Power supply is often registered during probe of a driver. The power_supply_register() returns pointer to newly allocated structure as return value. However before returning the power_supply_register() calls back the get_property() method provided by the driver through uevent. In that time the driver probe is still in progress and driver did not assigned pointer to power supply to its local variables. This leads to NULL pointer dereference from get_property() function. Starting from bq27x00_battery_probe(): di->bat = power_supply_register() device_add() kobject_uevent() power_supply_uevent() power_supply_show_property() power_supply_get_property() bq27x00_battery_get_property() dereference of (di->bat) which is NULL here The first uevent of power supply (the one coming from device creation) should not call back to the driver. To prevent that from happening, increment the atomic use counter at the end of power_supply_register(). This means that power_supply_get_property() will return -ENODEV. IMPORTANT: The patch has impact on this first uevent sent from power supply because it will not contain properties from power supply. The uevent with properties will be sent later after indicating that power supply has changed. This also has a race now, but will be fixed in other patches. Reported-by: H. Nikolaus Schaller Signed-off-by: Krzysztof Kozlowski Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Tested-By: Dr. H. Nikolaus Schaller Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/power_supply_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 2ed4a4a6b3c5..15da277e0e8d 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -659,7 +659,6 @@ __power_supply_register(struct device *parent, dev->release = power_supply_dev_release; dev_set_drvdata(dev, psy); psy->desc = desc; - atomic_inc(&psy->use_cnt); if (cfg) { psy->drv_data = cfg->drv_data; psy->of_node = cfg->of_node; @@ -700,6 +699,16 @@ __power_supply_register(struct device *parent, if (rc) goto create_triggers_failed; + /* + * Update use_cnt after any uevents (most notably from device_add()). + * We are here still during driver's probe but + * the power_supply_uevent() calls back driver's get_property + * method so: + * 1. Driver did not assigned the returned struct power_supply, + * 2. Driver could not finish initialization (anything in its probe + * after calling power_supply_register()). + */ + atomic_inc(&psy->use_cnt); power_supply_changed(psy); return psy; -- cgit v1.2.3 From f6795f11a4dfb7fbbd4b34668271a553141c0aa7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 May 2015 16:13:02 +0900 Subject: power_supply: Fix possible NULL pointer dereference on early uevent commit 7f1a57fdd6cb6e7be2ed31878a34655df38e1861 upstream. Don't call the power_supply_changed() from power_supply_register() when parent is still probing because it may lead to accessing parent too early. In bq27x00_battery this caused NULL pointer exception because uevent of power_supply_changed called back the the get_property() method provided by the driver. The get_property() method accessed pointer which should be returned by power_supply_register(). Starting from bq27x00_battery_probe(): di->bat = power_supply_register() power_supply_changed() kobject_uevent() power_supply_uevent() power_supply_show_property() power_supply_get_property() bq27x00_battery_get_property() dereference of di->bat which is NULL here The dereference of di->bat (value returned by power_supply_register()) is the currently visible problem. However calling back the methods provided by driver before ending the probe may lead to accessing other driver-related data which is not yet initialized. The call to power_supply_changed() is postponed till probing ends - mutex of parent device is released. Reported-by: H. Nikolaus Schaller Signed-off-by: Krzysztof Kozlowski Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Tested-By: Dr. H. Nikolaus Schaller Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/power_supply_core.c | 50 +++++++++++++++++++++++++++++++++++---- include/linux/power_supply.h | 1 + 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 15da277e0e8d..4bc0c7f459a5 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(power_supply_notifier); static struct device_type power_supply_dev_type; +#define POWER_SUPPLY_DEFERRED_REGISTER_TIME msecs_to_jiffies(10) + static bool __power_supply_is_supplied_by(struct power_supply *supplier, struct power_supply *supply) { @@ -121,6 +123,30 @@ void power_supply_changed(struct power_supply *psy) } EXPORT_SYMBOL_GPL(power_supply_changed); +/* + * Notify that power supply was registered after parent finished the probing. + * + * Often power supply is registered from driver's probe function. However + * calling power_supply_changed() directly from power_supply_register() + * would lead to execution of get_property() function provided by the driver + * too early - before the probe ends. + * + * Avoid that by waiting on parent's mutex. + */ +static void power_supply_deferred_register_work(struct work_struct *work) +{ + struct power_supply *psy = container_of(work, struct power_supply, + deferred_register_work.work); + + if (psy->dev.parent) + mutex_lock(&psy->dev.parent->mutex); + + power_supply_changed(psy); + + if (psy->dev.parent) + mutex_unlock(&psy->dev.parent->mutex); +} + #ifdef CONFIG_OF #include @@ -645,6 +671,10 @@ __power_supply_register(struct device *parent, struct power_supply *psy; int rc; + if (!parent) + pr_warn("%s: Expected proper parent device for '%s'\n", + __func__, desc->name); + psy = kzalloc(sizeof(*psy), GFP_KERNEL); if (!psy) return ERR_PTR(-ENOMEM); @@ -671,6 +701,8 @@ __power_supply_register(struct device *parent, goto dev_set_name_failed; INIT_WORK(&psy->changed_work, power_supply_changed_work); + INIT_DELAYED_WORK(&psy->deferred_register_work, + power_supply_deferred_register_work); rc = power_supply_check_supplies(psy); if (rc) { @@ -709,7 +741,10 @@ __power_supply_register(struct device *parent, * after calling power_supply_register()). */ atomic_inc(&psy->use_cnt); - power_supply_changed(psy); + + queue_delayed_work(system_power_efficient_wq, + &psy->deferred_register_work, + POWER_SUPPLY_DEFERRED_REGISTER_TIME); return psy; @@ -729,7 +764,8 @@ dev_set_name_failed: /** * power_supply_register() - Register new power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -750,7 +786,8 @@ EXPORT_SYMBOL_GPL(power_supply_register); /** * power_supply_register() - Register new non-waking-source power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -779,7 +816,8 @@ static void devm_power_supply_release(struct device *dev, void *res) /** * power_supply_register() - Register managed power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -814,7 +852,8 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register); /** * power_supply_register() - Register managed non-waking-source power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -858,6 +897,7 @@ void power_supply_unregister(struct power_supply *psy) { WARN_ON(atomic_dec_return(&psy->use_cnt)); cancel_work_sync(&psy->changed_work); + cancel_delayed_work_sync(&psy->deferred_register_work); sysfs_remove_link(&psy->dev.kobj, "powers"); power_supply_remove_triggers(psy); psy_unregister_cooler(psy); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 75a1dd8dc56e..a80f1fd01ddb 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -237,6 +237,7 @@ struct power_supply { /* private */ struct device dev; struct work_struct changed_work; + struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; atomic_t use_cnt; -- cgit v1.2.3 From 5acb6674291b15ea45d5bd65baff96a896f11ec6 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sat, 13 Jun 2015 08:51:17 +0300 Subject: mei: me: wait for power gating exit confirmation commit 3dc196eae1db548f05e53e5875ff87b8ff79f249 upstream. Fix the hbm power gating state machine so it will wait till it receives confirmation interrupt for the PG_ISOLATION_EXIT message. In process of the suspend flow the devices first have to exit from the power gating state (runtime pm resume). If we do not handle the confirmation interrupt after sending PG_ISOLATION_EXIT message, we may receive it already after the suspend flow has changed the device state and interrupt will be interpreted as a spurious event, consequently link reset will be invoked which will prevent the device from completing the suspend flow kernel: [6603] mei_reset:136: mei_me 0000:00:16.0: powering down: end of reset kernel: [476] mei_me_irq_thread_handler:643: mei_me 0000:00:16.0: function called after ISR to handle the interrupt processing. kernel: mei_me 0000:00:16.0: FW not ready: resetting Cc: Gabriele Mazzotta Link: https://bugzilla.kernel.org/show_bug.cgi?id=86241 Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=770397 Tested-by: Gabriele Mazzotta Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- drivers/misc/mei/hw-me.c | 59 ++++++++++++++++++++++++++++++++++++++++++---- drivers/misc/mei/hw-txe.c | 13 ++++++++++ drivers/misc/mei/mei_dev.h | 11 +++++++++ 4 files changed, 80 insertions(+), 5 deletions(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 1e99ef6a54a2..b2b9f4382d77 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -699,7 +699,7 @@ void mei_host_client_init(struct work_struct *work) bool mei_hbuf_acquire(struct mei_device *dev) { if (mei_pg_state(dev) == MEI_PG_ON || - dev->pg_event == MEI_PG_EVENT_WAIT) { + mei_pg_in_transition(dev)) { dev_dbg(dev->dev, "device is in pg\n"); return false; } diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 6fb75e62a764..43d7101ff993 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -663,17 +663,46 @@ int mei_me_pg_exit_sync(struct mei_device *dev) mutex_lock(&dev->device_lock); reply: - if (dev->pg_event == MEI_PG_EVENT_RECEIVED) - ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED) + ret = 0; else ret = -ETIME; +out: dev->pg_event = MEI_PG_EVENT_IDLE; hw->pg_state = MEI_PG_OFF; return ret; } +/** + * mei_me_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_me_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event >= MEI_PG_EVENT_WAIT && + dev->pg_event <= MEI_PG_EVENT_INTR_WAIT; +} + /** * mei_me_pg_is_enabled - detect if PG is supported by HW * @@ -704,6 +733,24 @@ notsupported: return false; } +/** + * mei_me_pg_intr - perform pg processing in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT) + return; + + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + hw->pg_state = MEI_PG_OFF; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); +} + /** * mei_me_irq_quick_handler - The ISR of the MEI device * @@ -761,6 +808,8 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) goto end; } + mei_me_pg_intr(dev); + /* check if we need to start the dev */ if (!mei_host_is_ready(dev)) { if (mei_hw_is_ready(dev)) { @@ -797,9 +846,10 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* * During PG handshake only allowed write is the replay to the * PG exit message, so block calling write function - * if the pg state is not idle + * if the pg event is in PG handshake */ - if (dev->pg_event == MEI_PG_EVENT_IDLE) { + if (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_RECEIVED) { rets = mei_irq_write_handler(dev, &complete_list); dev->hbuf_is_ready = mei_hbuf_is_ready(dev); } @@ -824,6 +874,7 @@ static const struct mei_hw_ops mei_me_hw_ops = { .hw_config = mei_me_hw_config, .hw_start = mei_me_hw_start, + .pg_in_transition = mei_me_pg_in_transition, .pg_is_enabled = mei_me_pg_is_enabled, .intr_clear = mei_me_intr_clear, diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 7abafe7d120d..964136b35733 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -301,6 +301,18 @@ int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req) return 0; } +/** + * mei_txe_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_txe_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event == MEI_PG_EVENT_WAIT; +} + /** * mei_txe_pg_is_enabled - detect if PG is supported by HW * @@ -1138,6 +1150,7 @@ static const struct mei_hw_ops mei_txe_hw_ops = { .hw_config = mei_txe_hw_config, .hw_start = mei_txe_hw_start, + .pg_in_transition = mei_txe_pg_in_transition, .pg_is_enabled = mei_txe_pg_is_enabled, .intr_clear = mei_txe_intr_clear, diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index f066ecd71939..f84c39ee28a8 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -271,6 +271,7 @@ struct mei_cl { * @fw_status : get fw status registers * @pg_state : power gating state of the device + * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled * @intr_clear : clear pending interrupts @@ -300,6 +301,7 @@ struct mei_hw_ops { int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); enum mei_pg_state (*pg_state)(struct mei_device *dev); + bool (*pg_in_transition)(struct mei_device *dev); bool (*pg_is_enabled)(struct mei_device *dev); void (*intr_clear)(struct mei_device *dev); @@ -398,11 +400,15 @@ struct mei_cl_device { * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete * @MEI_PG_EVENT_RECEIVED: the driver received pg event + * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt + * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt */ enum mei_pg_event { MEI_PG_EVENT_IDLE, MEI_PG_EVENT_WAIT, MEI_PG_EVENT_RECEIVED, + MEI_PG_EVENT_INTR_WAIT, + MEI_PG_EVENT_INTR_RECEIVED, }; /** @@ -717,6 +723,11 @@ static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) return dev->ops->pg_state(dev); } +static inline bool mei_pg_in_transition(struct mei_device *dev) +{ + return dev->ops->pg_in_transition(dev); +} + static inline bool mei_pg_is_enabled(struct mei_device *dev) { return dev->ops->pg_is_enabled(dev); -- cgit v1.2.3 From 6bc62fd9b8493bd6a07bb73a5d63ed28fea40b78 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 14 Apr 2015 10:27:26 +0300 Subject: mei: txe: reduce suspend/resume time commit fe292283c23329218e384bffc6cb4bfa3fd92277 upstream. HW has to be in known state before the initialisation sequence is started. The polling step for settling aliveness was set to 200ms while in practise this can be done in up to 30msecs. Signed-off-by: Tomas Winkler Signed-off-by: Barak Yoresh Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-txe.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 964136b35733..bae680c648ff 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -218,26 +219,25 @@ static u32 mei_txe_aliveness_get(struct mei_device *dev) * * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set * - * Return: > 0 if the expected value was received, -ETIME otherwise + * Return: 0 if the expected value was received, -ETIME otherwise */ static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected) { struct mei_txe_hw *hw = to_txe_hw(dev); - int t = 0; + ktime_t stop, start; + start = ktime_get(); + stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT)); do { hw->aliveness = mei_txe_aliveness_get(dev); if (hw->aliveness == expected) { dev->pg_event = MEI_PG_EVENT_IDLE; - dev_dbg(dev->dev, - "aliveness settled after %d msecs\n", t); - return t; + dev_dbg(dev->dev, "aliveness settled after %lld usecs\n", + ktime_to_us(ktime_sub(ktime_get(), start))); + return 0; } - mutex_unlock(&dev->device_lock); - msleep(MSEC_PER_SEC / 5); - mutex_lock(&dev->device_lock); - t += MSEC_PER_SEC / 5; - } while (t < SEC_ALIVENESS_WAIT_TIMEOUT); + usleep_range(20, 50); + } while (ktime_before(ktime_get(), stop)); dev->pg_event = MEI_PG_EVENT_IDLE; dev_err(dev->dev, "aliveness timed out\n"); -- cgit v1.2.3 From eeac30f17f468292f1141c6f5d95afc38a18260f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 12 Jun 2015 11:24:41 +0100 Subject: arm64: Do not attempt to use init_mm in reset_context() commit 565630d503ef24e44c252bed55571b3a0d68455f upstream. After secondary CPU boot or hotplug, the active_mm of the idle thread is &init_mm. The init_mm.pgd (swapper_pg_dir) is only meant for TTBR1_EL1 and must not be set in TTBR0_EL1. Since when active_mm == &init_mm the TTBR0_EL1 is already set to the reserved value, there is no need to perform any context reset. Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/context.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index baa758d37021..76c1e6cd36fc 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -92,6 +92,14 @@ static void reset_context(void *info) unsigned int cpu = smp_processor_id(); struct mm_struct *mm = current->active_mm; + /* + * current->active_mm could be init_mm for the idle thread immediately + * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to + * the reserved value, so no need to reset any context. + */ + if (mm == &init_mm) + return; + smp_rmb(); asid = cpu_last_asid + cpu; -- cgit v1.2.3 From f6b01e505aa5785eaeda028fc0f151599f90b557 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 15 Jun 2015 16:40:27 +0100 Subject: arm64: entry: fix context tracking for el0_sp_pc commit 46b0567c851cf85d6ba6f23eef385ec9111d09bc upstream. Commit 6c81fe7925cc4c42 ("arm64: enable context tracking") did not update el0_sp_pc to use ct_user_exit, but this appears to have been unintentional. In commit 6ab6463aeb5fbc75 ("arm64: adjust el0_sync so that a function can be called") we made x0 available, and in the return to userspace we call ct_user_enter in the kernel_exit macro. Due to this, we currently don't correctly inform RCU of the user->kernel transition, and may erroneously account for time spent in the kernel as if we were in an extended quiescent state when CONFIG_CONTEXT_TRACKING is enabled. As we do record the kernel->user transition, a userspace application making accesses from an unaligned stack pointer can demonstrate the imbalance, provoking the following warning: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 3660 at kernel/context_tracking.c:75 context_tracking_enter+0xd8/0xe4() Modules linked in: CPU: 2 PID: 3660 Comm: a.out Not tainted 4.1.0-rc7+ #8 Hardware name: ARM Juno development board (r0) (DT) Call trace: [] dump_backtrace+0x0/0x124 [] show_stack+0x10/0x1c [] dump_stack+0x84/0xc8 [] warn_slowpath_common+0x98/0xd0 [] warn_slowpath_null+0x14/0x20 [] context_tracking_enter+0xd4/0xe4 [] preempt_schedule_irq+0xd4/0x114 [] el1_preempt+0x4/0x28 [] exit_files+0x38/0x4c [] do_exit+0x430/0x978 [] do_group_exit+0x40/0xd4 [] get_signal+0x23c/0x4f4 [] do_signal+0x1ac/0x518 [] do_notify_resume+0x5c/0x68 ---[ end trace 963c192600337066 ]--- This patch adds the missing ct_user_exit to the el0_sp_pc entry path, correcting the context tracking for this case. Signed-off-by: Mark Rutland Acked-by: Will Deacon Fixes: 6c81fe7925cc ("arm64: enable context tracking") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 959fe8733560..bddd04d031db 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -517,6 +517,7 @@ el0_sp_pc: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp -- cgit v1.2.3 From da8de4cde423fcd62233467ff9c067137beda0e8 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Tue, 16 Jun 2015 17:38:47 +0100 Subject: arm64: mm: Fix freeing of the wrong memmap entries with !SPARSEMEM_VMEMMAP commit b9bcc919931611498e856eae9bf66337330d04cc upstream. The memmap freeing code in free_unused_memmap() computes the end of each memblock by adding the memblock size onto the base. However, if SPARSEMEM is enabled then the value (start) used for the base may already have been rounded downwards to work out which memmap entries to free after the previous memblock. This may cause memmap entries that are in use to get freed. In general, you're not likely to hit this problem unless there are at least 2 memblocks and one of them is not aligned to a sparsemem section boundary. Note that carve-outs can increase the number of memblocks by splitting the regions listed in the device tree. This problem doesn't occur with SPARSEMEM_VMEMMAP, because the vmemmap code deals with freeing the unused regions of the memmap instead of requiring the arch code to do it. This patch gets the memblock base out of the memblock directly when computing the block end address to ensure the correct value is used. Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 597831bdddf3..ad87ce826cce 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -262,7 +262,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(start + __phys_to_pfn(reg->size), + prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), MAX_ORDER_NR_PAGES); } -- cgit v1.2.3 From e3334dca73de24e5798759b14ed9e4f58e241fbd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Jun 2015 13:56:33 +0100 Subject: arm64: vdso: work-around broken ELF toolchains in Makefile commit 6f1a6ae87c0c60d7c462ef8fd071f291aa7a9abb upstream. When building the kernel with a bare-metal (ELF) toolchain, the -shared option may not be passed down to collect2, resulting in silent corruption of the vDSO image (in particular, the DYNAMIC section is omitted). The effect of this corruption is that the dynamic linker fails to find the vDSO symbols and libc is instead used for the syscalls that we intended to optimise (e.g. gettimeofday). Functionally, there is no issue as the sigreturn trampoline is still intact and located by the kernel. This patch fixes the problem by explicitly passing -shared to the linker when building the vDSO. Reported-by: Szabolcs Nagy Reported-by: James Greenlaigh Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/vdso/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index ff3bddea482d..f6fe17d88da5 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -15,6 +15,10 @@ ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared +# down to collect2, resulting in silent corruption of the vDSO image. +ccflags-y += -Wl,-shared + obj-y += vdso.o extra-y += vdso.lds vdso-offsets.h CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -- cgit v1.2.3 From 3baf726f001b69454f3eb18a589c508992622be9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 24 Jun 2015 16:58:26 -0700 Subject: mm: kmemleak: allow safe memory scanning during kmemleak disabling commit c5f3b1a51a591c18c8b33983908e7fdda6ae417e upstream. The kmemleak scanning thread can run for minutes. Callbacks like kmemleak_free() are allowed during this time, the race being taken care of by the object->lock spinlock. Such lock also prevents a memory block from being freed or unmapped while it is being scanned by blocking the kmemleak_free() -> ... -> __delete_object() function until the lock is released in scan_object(). When a kmemleak error occurs (e.g. it fails to allocate its metadata), kmemleak_enabled is set and __delete_object() is no longer called on freed objects. If kmemleak_scan is running at the same time, kmemleak_free() no longer waits for the object scanning to complete, allowing the corresponding memory block to be freed or unmapped (in the case of vfree()). This leads to kmemleak_scan potentially triggering a page fault. This patch separates the kmemleak_free() enabling/disabling from the overall kmemleak_enabled nob so that we can defer the disabling of the object freeing tracking until the scanning thread completed. The kmemleak_free_part() is deliberately ignored by this patch since this is only called during boot before the scanning thread started. Signed-off-by: Catalin Marinas Reported-by: Vignesh Radhakrishnan Tested-by: Vignesh Radhakrishnan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/kmemleak.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f0fe4f2c1fa7..41df5b8efd25 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -195,6 +195,8 @@ static struct kmem_cache *scan_area_cache; /* set if tracing memory operations is enabled */ static int kmemleak_enabled; +/* same as above but only for the kmemleak_free() callback */ +static int kmemleak_free_enabled; /* set in the late_initcall if there were no errors */ static int kmemleak_initialized; /* enables or disables early logging of the memory operations */ @@ -942,7 +944,7 @@ void __ref kmemleak_free(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); - if (kmemleak_enabled && ptr && !IS_ERR(ptr)) + if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) delete_object_full((unsigned long)ptr); else if (kmemleak_early_log) log_early(KMEMLEAK_FREE, ptr, 0, 0); @@ -982,7 +984,7 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr) pr_debug("%s(0x%p)\n", __func__, ptr); - if (kmemleak_enabled && ptr && !IS_ERR(ptr)) + if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) for_each_possible_cpu(cpu) delete_object_full((unsigned long)per_cpu_ptr(ptr, cpu)); @@ -1750,6 +1752,13 @@ static void kmemleak_do_cleanup(struct work_struct *work) mutex_lock(&scan_mutex); stop_scan_thread(); + /* + * Once the scan thread has stopped, it is safe to no longer track + * object freeing. Ordering of the scan thread stopping and the memory + * accesses below is guaranteed by the kthread_stop() function. + */ + kmemleak_free_enabled = 0; + if (!kmemleak_found_leaks) __kmemleak_do_cleanup(); else @@ -1776,6 +1785,8 @@ static void kmemleak_disable(void) /* check whether it is too early for a kernel thread */ if (kmemleak_initialized) schedule_work(&cleanup_work); + else + kmemleak_free_enabled = 0; pr_info("Kernel memory leak detector disabled\n"); } @@ -1840,8 +1851,10 @@ void __init kmemleak_init(void) if (kmemleak_error) { local_irq_restore(flags); return; - } else + } else { kmemleak_enabled = 1; + kmemleak_free_enabled = 1; + } local_irq_restore(flags); /* -- cgit v1.2.3 From 03445a4c2324f4adddd6b6c9b92879c1c754238a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 24 Jun 2015 16:58:51 -0700 Subject: mm: kmemleak_alloc_percpu() should follow the gfp from per_alloc() commit 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 upstream. Beginning at commit d52d3997f843 ("ipv6: Create percpu rt6_info"), the following INFO splat is logged: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-next-20150612 #1 Not tainted ------------------------------- kernel/sched/core.c:7318 Illegal context switch in RCU-bh read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 3 locks held by systemd/1: #0: (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv+0x1f/0x40 #1: (rcu_read_lock_bh){......}, at: [] ipv6_add_addr+0x62/0x540 #2: (addrconf_hash_lock){+...+.}, at: [] ipv6_add_addr+0x184/0x540 stack backtrace: CPU: 0 PID: 1 Comm: systemd Not tainted 4.1.0-rc7-next-20150612 #1 Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.20 04/17/2014 Call Trace: dump_stack+0x4c/0x6e lockdep_rcu_suspicious+0xe7/0x120 ___might_sleep+0x1d5/0x1f0 __might_sleep+0x4d/0x90 kmem_cache_alloc+0x47/0x250 create_object+0x39/0x2e0 kmemleak_alloc_percpu+0x61/0xe0 pcpu_alloc+0x370/0x630 Additional backtrace lines are truncated. In addition, the above splat is followed by several "BUG: sleeping function called from invalid context at mm/slub.c:1268" outputs. As suggested by Martin KaFai Lau, these are the clue to the fix. Routine kmemleak_alloc_percpu() always uses GFP_KERNEL for its allocations, whereas it should follow the gfp from its callers. Reviewed-by: Catalin Marinas Reviewed-by: Kamalesh Babulal Acked-by: Martin KaFai Lau Signed-off-by: Larry Finger Cc: Martin KaFai Lau Cc: Catalin Marinas Cc: Tejun Heo Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/kmemleak.h | 6 ++++-- mm/kmemleak.c | 9 +++++---- mm/percpu.c | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index e705467ddb47..d0a1f99e24e3 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,7 +28,8 @@ extern void kmemleak_init(void) __ref; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; -extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref; +extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) __ref; extern void kmemleak_free(const void *ptr) __ref; extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; @@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, gfp_t gfp) { } -static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { } static inline void kmemleak_free(const void *ptr) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 41df5b8efd25..3716cdb8ba42 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -909,12 +909,13 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc); * kmemleak_alloc_percpu - register a newly allocated __percpu object * @ptr: __percpu pointer to beginning of the object * @size: size of the object + * @gfp: flags used for kmemleak internal memory allocations * * This function is called from the kernel percpu allocator when a new object - * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL - * allocation. + * (memory block) is allocated (alloc_percpu). */ -void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { unsigned int cpu; @@ -927,7 +928,7 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) if (kmemleak_enabled && ptr && !IS_ERR(ptr)) for_each_possible_cpu(cpu) create_object((unsigned long)per_cpu_ptr(ptr, cpu), - size, 0, GFP_KERNEL); + size, 0, gfp); else if (kmemleak_early_log) log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0); } diff --git a/mm/percpu.c b/mm/percpu.c index dfd02484e8de..2dd74487a0af 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1030,7 +1030,7 @@ area_found: memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); - kmemleak_alloc_percpu(ptr, size); + kmemleak_alloc_percpu(ptr, size, gfp); return ptr; fail_unlock: -- cgit v1.2.3 From 1021c97205005db4f101e7fa55095ec13118ac03 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 24 Jun 2015 16:58:48 -0700 Subject: mm, thp: respect MPOL_PREFERRED policy with non-local node commit 0867a57c4f80a566dda1bac975b42fcd857cb489 upstream. Since commit 077fcf116c8c ("mm/thp: allocate transparent hugepages on local node"), we handle THP allocations on page fault in a special way - for non-interleave memory policies, the allocation is only attempted on the node local to the current CPU, if the policy's nodemask allows the node. This is motivated by the assumption that THP benefits cannot offset the cost of remote accesses, so it's better to fallback to base pages on the local node (which might still be available, while huge pages are not due to fragmentation) than to allocate huge pages on a remote node. The nodemask check prevents us from violating e.g. MPOL_BIND policies where the local node is not among the allowed nodes. However, the current implementation can still give surprising results for the MPOL_PREFERRED policy when the preferred node is different than the current CPU's local node. In such case we should honor the preferred node and not use the local node, which is what this patch does. If hugepage allocation on the preferred node fails, we fall back to base pages and don't try other nodes, with the same motivation as is done for the local node hugepage allocations. The patch also moves the MPOL_INTERLEAVE check around to simplify the hugepage specific test. The difference can be demonstrated using in-tree transhuge-stress test on the following 2-node machine where half memory on one node was occupied to show the difference. > numactl --hardware available: 2 nodes (0-1) node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 24 25 26 27 28 29 30 31 32 33 34 35 node 0 size: 7878 MB node 0 free: 3623 MB node 1 cpus: 12 13 14 15 16 17 18 19 20 21 22 23 36 37 38 39 40 41 42 43 44 45 46 47 node 1 size: 8045 MB node 1 free: 7818 MB node distances: node 0 1 0: 10 21 1: 21 10 Before the patch: > numactl -p0 -C0 ./transhuge-stress transhuge-stress: 2.197 s/loop, 0.276 ms/page, 7249.168 MiB/s 7962 succeed, 0 failed, 1786 different pages > numactl -p0 -C12 ./transhuge-stress transhuge-stress: 2.962 s/loop, 0.372 ms/page, 5376.172 MiB/s 7962 succeed, 0 failed, 3873 different pages Number of successful THP allocations corresponds to free memory on node 0 in the first case and node 1 in the second case, i.e. -p parameter is ignored and cpu binding "wins". After the patch: > numactl -p0 -C0 ./transhuge-stress transhuge-stress: 2.183 s/loop, 0.274 ms/page, 7295.516 MiB/s 7962 succeed, 0 failed, 1760 different pages > numactl -p0 -C12 ./transhuge-stress transhuge-stress: 2.878 s/loop, 0.361 ms/page, 5533.638 MiB/s 7962 succeed, 0 failed, 1750 different pages > numactl -p1 -C0 ./transhuge-stress transhuge-stress: 4.628 s/loop, 0.581 ms/page, 3440.893 MiB/s 7962 succeed, 0 failed, 3918 different pages The -p parameter is respected regardless of cpu binding. > numactl -C0 ./transhuge-stress transhuge-stress: 2.202 s/loop, 0.277 ms/page, 7230.003 MiB/s 7962 succeed, 0 failed, 1750 different pages > numactl -C12 ./transhuge-stress transhuge-stress: 3.020 s/loop, 0.379 ms/page, 5273.324 MiB/s 7962 succeed, 0 failed, 3916 different pages Without -p parameter, hugepage restriction to CPU-local node works as before. Fixes: 077fcf116c8c ("mm/thp: allocate transparent hugepages on local node") Signed-off-by: Vlastimil Babka Cc: Aneesh Kumar K.V Acked-by: David Rientjes Cc: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 747743237d9f..99d4c1d0b858 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1972,35 +1972,41 @@ retry_cpuset: pol = get_vma_policy(vma, addr); cpuset_mems_cookie = read_mems_allowed_begin(); - if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage && - pol->mode != MPOL_INTERLEAVE)) { + if (pol->mode == MPOL_INTERLEAVE) { + unsigned nid; + + nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); + mpol_cond_put(pol); + page = alloc_page_interleave(gfp, order, nid); + goto out; + } + + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { + int hpage_node = node; + /* * For hugepage allocation and non-interleave policy which - * allows the current node, we only try to allocate from the - * current node and don't fall back to other nodes, as the - * cost of remote accesses would likely offset THP benefits. + * allows the current node (or other explicitly preferred + * node) we only try to allocate from the current/preferred + * node and don't fall back to other nodes, as the cost of + * remote accesses would likely offset THP benefits. * * If the policy is interleave, or does not allow the current * node in its nodemask, we allocate the standard way. */ + if (pol->mode == MPOL_PREFERRED && + !(pol->flags & MPOL_F_LOCAL)) + hpage_node = pol->v.preferred_node; + nmask = policy_nodemask(gfp, pol); - if (!nmask || node_isset(node, *nmask)) { + if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = alloc_pages_exact_node(node, + page = alloc_pages_exact_node(hpage_node, gfp | __GFP_THISNODE, order); goto out; } } - if (pol->mode == MPOL_INTERLEAVE) { - unsigned nid; - - nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); - mpol_cond_put(pol); - page = alloc_page_interleave(gfp, order, nid); - goto out; - } - nmask = policy_nodemask(gfp, pol); zl = policy_zonelist(gfp, pol, node); mpol_cond_put(pol); -- cgit v1.2.3 From 88822cdb258714ce2fdc9a6eed87e7dd7d205a6b Mon Sep 17 00:00:00 2001 From: Arun Chandran Date: Mon, 15 Jun 2015 15:59:02 +0530 Subject: regmap: Fix regmap_bulk_read in BE mode commit 15b8d2c41fe5839582029f65c5f7004db451cc2b upstream. In big endian mode regmap_bulk_read gives incorrect data for byte reads. This is because memcpy of a single byte from an address after full word read gives different results when endianness differs. ie. we get little-end in LE and big-end in BE. Signed-off-by: Arun Chandran Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 6273ff072f3e..9f7f78ede01b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2318,7 +2318,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, &ival); if (ret != 0) return ret; - memcpy(val + (i * val_bytes), &ival, val_bytes); + map->format.format_val(val + (i * val_bytes), ival, 0); } } -- cgit v1.2.3 From 6489f7a496378983980e8219dfaeaf66d581590b Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Tue, 16 Jun 2015 13:53:19 +0200 Subject: regmap: Fix possible shift overflow in regmap_field_init() commit 921cc29473a0d7c109105c1876ddb432f4a4be7d upstream. The way the mask is generated in regmap_field_init() is wrong. Indeed, a field initialized with msb = 31 and lsb = 0 provokes a shift overflow while calculating the mask field. On some 32 bits architectures, such as x86, the generated mask is 0, instead of the expected 0xffffffff. This patch uses GENMASK() to fix the problem, as this macro is already safe regarding shift overflow. Signed-off-by: Maxime Coquelin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 9f7f78ede01b..1c76dcb502cf 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -945,11 +945,10 @@ EXPORT_SYMBOL_GPL(devm_regmap_init); static void regmap_field_init(struct regmap_field *rm_field, struct regmap *regmap, struct reg_field reg_field) { - int field_bits = reg_field.msb - reg_field.lsb + 1; rm_field->regmap = regmap; rm_field->reg = reg_field.reg; rm_field->shift = reg_field.lsb; - rm_field->mask = ((BIT(field_bits) - 1) << reg_field.lsb); + rm_field->mask = GENMASK(reg_field.msb, reg_field.lsb); rm_field->id_size = reg_field.id_size; rm_field->id_offset = reg_field.id_offset; } -- cgit v1.2.3 From 1a9850fbeb65734827a7dd0088ea357b0f22a242 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 18 May 2015 10:01:03 -0700 Subject: regulator: max77686: fix gpio_enabled shift wrapping bug commit c53403a37cf083ce85da720f18918f73580d0064 upstream. The code should handle more than 32 bits here because "id" can be a value up to MAX77686_REGULATORS (currently 34). Convert the gpio_enabled type to DECLARE_BITMAP and use test_bit/set_bit. Fixes: 3307e9025d29 ("regulator: max77686: Add GPIO control") Reported-by: Dan Carpenter Signed-off-by: Joe Perches Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/max77686.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/max77686.c b/drivers/regulator/max77686.c index 15fb1416bfbd..c064e32fb3b9 100644 --- a/drivers/regulator/max77686.c +++ b/drivers/regulator/max77686.c @@ -88,7 +88,7 @@ enum max77686_ramp_rate { }; struct max77686_data { - u64 gpio_enabled:MAX77686_REGULATORS; + DECLARE_BITMAP(gpio_enabled, MAX77686_REGULATORS); /* Array indexed by regulator id */ unsigned int opmode[MAX77686_REGULATORS]; @@ -121,7 +121,7 @@ static unsigned int max77686_map_normal_mode(struct max77686_data *max77686, case MAX77686_BUCK8: case MAX77686_BUCK9: case MAX77686_LDO20 ... MAX77686_LDO22: - if (max77686->gpio_enabled & (1 << id)) + if (test_bit(id, max77686->gpio_enabled)) return MAX77686_GPIO_CONTROL; } @@ -277,7 +277,7 @@ static int max77686_of_parse_cb(struct device_node *np, } if (gpio_is_valid(config->ena_gpio)) { - max77686->gpio_enabled |= (1 << desc->id); + set_bit(desc->id, max77686->gpio_enabled); return regmap_update_bits(config->regmap, desc->enable_reg, desc->enable_mask, -- cgit v1.2.3 From c17210c30c65355713afb618da1e24b970fa69c8 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 9 Jun 2015 20:09:42 +0000 Subject: regulator: core: fix constraints output buffer commit a7068e3932eee8268c4ce4e080a338ee7b8a27bf upstream. The buffer for condtraints debug isn't big enough to hold the output in all cases. So fix this issue by increasing the buffer. Signed-off-by: Stefan Wahren Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 443eaab933fc..8a28116b5805 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -779,7 +779,7 @@ static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state) static void print_constraints(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; - char buf[80] = ""; + char buf[160] = ""; int count = 0; int ret; -- cgit v1.2.3 From 9da8e034daa3670221442392ce9ea17474591c34 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Mon, 1 Jun 2015 17:48:37 +0200 Subject: livepatch: add module locking around kallsyms calls commit 9a1bd63cdae4b623494c4ebaf723a91c35ec49fb upstream. The list of loaded modules is walked through in module_kallsyms_on_each_symbol (called by kallsyms_on_each_symbol). The module_mutex lock should be acquired to prevent potential corruptions in the list. This was uncovered with new lockdep asserts in module code introduced by the commit 0be964be0d45 ("module: Sanitize RCU usage and locking") in recent next- trees. Signed-off-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- kernel/livepatch/core.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 284e2691e380..9ec555732f1a 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -179,7 +179,9 @@ static int klp_find_object_symbol(const char *objname, const char *name, .count = 0 }; + mutex_lock(&module_mutex); kallsyms_on_each_symbol(klp_find_callback, &args); + mutex_unlock(&module_mutex); if (args.count == 0) pr_err("symbol '%s' not found in symbol table\n", name); @@ -219,13 +221,19 @@ static int klp_verify_vmlinux_symbol(const char *name, unsigned long addr) .name = name, .addr = addr, }; + int ret; - if (kallsyms_on_each_symbol(klp_verify_callback, &args)) - return 0; + mutex_lock(&module_mutex); + ret = kallsyms_on_each_symbol(klp_verify_callback, &args); + mutex_unlock(&module_mutex); - pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", - name, addr); - return -EINVAL; + if (!ret) { + pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", + name, addr); + return -EINVAL; + } + + return 0; } static int klp_find_verify_func_addr(struct klp_object *obj, -- cgit v1.2.3 From 927973d93be4524ad25518e0e3412cd27d425e29 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sun, 10 May 2015 07:50:45 +0000 Subject: spi: fix race freeing dummy_tx/rx before it is unmapped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e76ef88f607174082023f50b87fe12dcdbe5db5 upstream. Fix a race (with some kernel configurations) where a queued master->pump_messages runs and frees dummy_tx/rx before spi_unmap_msg is running (or is finished). This results in the following messages: BUG: Bad page state in process page:db7ba030 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x200(arch_1) page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set ... Reported-by: Noralf Trønnes Suggested-by: Noralf Trønnes Tested-by: Noralf Trønnes Signed-off-by: Martin Sperl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 50910d85df5a..d35c1a13217c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -988,9 +988,6 @@ void spi_finalize_current_message(struct spi_master *master) spin_lock_irqsave(&master->queue_lock, flags); mesg = master->cur_msg; - master->cur_msg = NULL; - - queue_kthread_work(&master->kworker, &master->pump_messages); spin_unlock_irqrestore(&master->queue_lock, flags); spi_unmap_msg(master, mesg); @@ -1003,9 +1000,13 @@ void spi_finalize_current_message(struct spi_master *master) } } - trace_spi_message_done(mesg); - + spin_lock_irqsave(&master->queue_lock, flags); + master->cur_msg = NULL; master->cur_msg_prepared = false; + queue_kthread_work(&master->kworker, &master->pump_messages); + spin_unlock_irqrestore(&master->queue_lock, flags); + + trace_spi_message_done(mesg); mesg->state = NULL; if (mesg->complete) -- cgit v1.2.3 From 46afcceeebddd0f6e1d328c9c9c93059adffdf08 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 26 May 2015 11:44:42 +0200 Subject: spi: orion: Fix maximum baud rates for Armada 370/XP commit ce2f6ea1cbd41d78224f703af980a6ceeb0eb56a upstream. The commit df59fa7f4bca "spi: orion: support armada extended baud rates" was too optimistic for the maximum baud rate that the Armada SoCs can support. According to the hardware datasheet the maximum frequency supported by the Armada 370 SoC is tclk/4. But for the Armada XP, Armada 38x and Armada 39x SoCs the limitation is 50MHz and for the Armada 375 it is tclk/15. Currently the armada-370-spi compatible is only used by the Armada 370 and the Armada XP device tree. On Armada 370, tclk cannot be higher than 200MHz. In order to be able to handle both SoCs, we can take the minimum of 50MHz and tclk/4. A proper solution is adding a compatible string for each SoC, but it can't be done as a fix for compatibility reason (we can't modify device tree that have been already released) and it will be part of a separate patch. Fixes: df59fa7f4bca (spi: orion: support armada extended baud rates) Reported-by: Kostya Porotchkin Signed-off-by: Gregory CLEMENT Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-orion.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 861664776672..ff97cabdaa81 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -61,6 +61,12 @@ enum orion_spi_type { struct orion_spi_dev { enum orion_spi_type typ; + /* + * min_divisor and max_hz should be exclusive, the only we can + * have both is for managing the armada-370-spi case with old + * device tree + */ + unsigned long max_hz; unsigned int min_divisor; unsigned int max_divisor; u32 prescale_mask; @@ -387,8 +393,9 @@ static const struct orion_spi_dev orion_spi_dev_data = { static const struct orion_spi_dev armada_spi_dev_data = { .typ = ARMADA_SPI, - .min_divisor = 1, + .min_divisor = 4, .max_divisor = 1920, + .max_hz = 50000000, .prescale_mask = ARMADA_SPI_CLK_PRESCALE_MASK, }; @@ -454,7 +461,21 @@ static int orion_spi_probe(struct platform_device *pdev) goto out; tclk_hz = clk_get_rate(spi->clk); - master->max_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->min_divisor); + + /* + * With old device tree, armada-370-spi could be used with + * Armada XP, however for this SoC the maximum frequency is + * 50MHz instead of tclk/4. On Armada 370, tclk cannot be + * higher than 200MHz. So, in order to be able to handle both + * SoCs, we can take the minimum of 50MHz and tclk/4. + */ + if (of_device_is_compatible(pdev->dev.of_node, + "marvell,armada-370-spi")) + master->max_speed_hz = min(devdata->max_hz, + DIV_ROUND_UP(tclk_hz, devdata->min_divisor)); + else + master->max_speed_hz = + DIV_ROUND_UP(tclk_hz, devdata->min_divisor); master->min_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->max_divisor); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- cgit v1.2.3 From bcd201e2ae1c3e431a89a02a48c05112f19eb65e Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 11 May 2015 12:20:18 -0300 Subject: spi: pl022: Specify 'num-cs' property as required in devicetree binding commit ea6055c46eda1e19e02209814955e13f334bbe1b upstream. Since commit 39a6ac11df65 ("spi/pl022: Devicetree support w/o platform data") the 'num-cs' parameter cannot be passed through platform data when probing with devicetree. Instead, it's a required devicetree property. Fix the binding documentation so the property is properly specified. Fixes: 39a6ac11df65 ("spi/pl022: Devicetree support w/o platform data") Signed-off-by: Ezequiel Garcia Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/spi/spi_pl022.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt index 22ed6797216d..4d1673ca8cf8 100644 --- a/Documentation/devicetree/bindings/spi/spi_pl022.txt +++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt @@ -4,9 +4,9 @@ Required properties: - compatible : "arm,pl022", "arm,primecell" - reg : Offset and length of the register set for the device - interrupts : Should contain SPI controller interrupt +- num-cs : total number of chipselects Optional properties: -- num-cs : total number of chipselects - cs-gpios : should specify GPIOs used for chipselects. The gpios will be referred to as reg = in the SPI child nodes. If unspecified, a single SPI device without a chip select can be used. -- cgit v1.2.3 From 3705ac339307a3f6b5a84e48cb82be0167b4e633 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:22:19 +0200 Subject: scsi_transport_srp: Introduce srp_wait_for_queuecommand() commit be34c62ddf39d1931780b07a6f4241393e4ba2ee upstream. Introduce the helper function srp_wait_for_queuecommand(). Move the definition of scsi_request_fn_active(). Add a comment above srp_wait_for_queuecommand() that support for scsi-mq needs to be added. This patch does not change any functionality. A second call to srp_wait_for_queuecommand() will be introduced in the next patch. Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_transport_srp.c | 54 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index ae45bd99baed..e05cd7e2d6d3 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -396,6 +396,36 @@ static void srp_reconnect_work(struct work_struct *work) } } +/** + * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. + * + * To do: add support for scsi-mq in this function. + */ +static int scsi_request_fn_active(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + struct request_queue *q; + int request_fn_active = 0; + + shost_for_each_device(sdev, shost) { + q = sdev->request_queue; + + spin_lock_irq(q->queue_lock); + request_fn_active += q->request_fn_active; + spin_unlock_irq(q->queue_lock); + } + + return request_fn_active; +} + +/* Wait until ongoing shost->hostt->queuecommand() calls have finished. */ +static void srp_wait_for_queuecommand(struct Scsi_Host *shost) +{ + while (scsi_request_fn_active(shost)) + msleep(20); +} + static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); @@ -503,27 +533,6 @@ void srp_start_tl_fail_timers(struct srp_rport *rport) } EXPORT_SYMBOL(srp_start_tl_fail_timers); -/** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @shost: SCSI host for which to count the number of scsi_request_fn() callers. - */ -static int scsi_request_fn_active(struct Scsi_Host *shost) -{ - struct scsi_device *sdev; - struct request_queue *q; - int request_fn_active = 0; - - shost_for_each_device(sdev, shost) { - q = sdev->request_queue; - - spin_lock_irq(q->queue_lock); - request_fn_active += q->request_fn_active; - spin_unlock_irq(q->queue_lock); - } - - return request_fn_active; -} - /** * srp_reconnect_rport() - reconnect to an SRP target port * @rport: SRP target port. @@ -559,8 +568,7 @@ int srp_reconnect_rport(struct srp_rport *rport) if (res) goto out; scsi_target_block(&shost->shost_gendev); - while (scsi_request_fn_active(shost)) - msleep(20); + srp_wait_for_queuecommand(shost); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); -- cgit v1.2.3 From 3646ac368739a302008cc7bf33a985da2cfa1a17 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:22:44 +0200 Subject: scsi_transport_srp: Fix a race condition commit 535fb906225fb7436cb658144d0c0cea14a26f3e upstream. Avoid that srp_terminate_io() can get invoked while srp_queuecommand() is in progress. This patch avoids that an I/O timeout can trigger the following kernel warning: WARNING: at drivers/infiniband/ulp/srp/ib_srp.c:1447 srp_terminate_io+0xef/0x100 [ib_srp]() Call Trace: [] dump_stack+0x4e/0x68 [] warn_slowpath_common+0x81/0xa0 [] warn_slowpath_null+0x1a/0x20 [] srp_terminate_io+0xef/0x100 [ib_srp] [] __rport_fail_io_fast+0xba/0xc0 [scsi_transport_srp] [] rport_fast_io_fail_timedout+0xe0/0xf0 [scsi_transport_srp] [] process_one_work+0x1db/0x780 [] worker_thread+0x11b/0x450 [] kthread+0xe4/0x100 [] ret_from_fork+0x7c/0xb0 See also patch "scsi_transport_srp: Add transport layer error handling" (commit ID 29c17324803c). Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_transport_srp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index e05cd7e2d6d3..f115f67a6ba5 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -439,8 +439,10 @@ static void __rport_fail_io_fast(struct srp_rport *rport) /* Involve the LLD if possible to terminate all I/O on the rport. */ i = to_srp_internal(shost->transportt); - if (i->f->terminate_rport_io) + if (i->f->terminate_rport_io) { + srp_wait_for_queuecommand(shost); i->f->terminate_rport_io(rport); + } } /** -- cgit v1.2.3 From 5e72c7cc287d9415debd623e96ac275f20d37645 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:23:14 +0200 Subject: IB/srp: Remove an extraneous scsi_host_put() from an error path commit fb49c8bbaae70b14fea2b4590a90a21539f88526 upstream. Fix a scsi_get_host() / scsi_host_put() imbalance in the error path of srp_create_target(). See also patch "IB/srp: Avoid that I/O hangs due to a cable pull during LUN scanning" (commit ID 34aa654ecb8e). Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 918814cd0f80..5ce6cfd86476 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3172,11 +3172,11 @@ static ssize_t srp_create_target(struct device *dev, ret = srp_parse_options(buf, target); if (ret) - goto err; + goto out; ret = scsi_init_shared_tag_map(target_host, target_host->can_queue); if (ret) - goto err; + goto out; target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; @@ -3187,7 +3187,7 @@ static ssize_t srp_create_target(struct device *dev, be64_to_cpu(target->ioc_guid), be64_to_cpu(target->initiator_ext)); ret = -EEXIST; - goto err; + goto out; } if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && @@ -3208,7 +3208,7 @@ static ssize_t srp_create_target(struct device *dev, spin_lock_init(&target->lock); ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) - goto err; + goto out; ret = -ENOMEM; target->ch_count = max_t(unsigned, num_online_nodes(), @@ -3219,7 +3219,7 @@ static ssize_t srp_create_target(struct device *dev, target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) - goto err; + goto out; node_idx = 0; for_each_online_node(node) { @@ -3315,9 +3315,6 @@ err_disconnect: } kfree(target->ch); - -err: - scsi_host_put(target_host); goto out; } -- cgit v1.2.3 From 86e4f5b10e79205f5eb254a3c60d111931f2a274 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:23:36 +0200 Subject: IB/srp: Fix a connection setup race commit 8de9fe3a1d4ac8c3e4953fa4b7d81f863f5196ad upstream. Avoid that receiving a DREQ while RDMA channels are being established causes target->qp_in_error to be reset. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5ce6cfd86476..f2daabd1e91d 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -993,8 +993,6 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) WARN_ON_ONCE(!multich && target->connected); - target->qp_in_error = false; - ret = srp_lookup_path(ch); if (ret) return ret; @@ -1243,6 +1241,9 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (j = 0; j < target->queue_size; ++j) list_add(&ch->tx_ring[j]->list, &ch->free_tx); } + + target->qp_in_error = false; + for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; if (ret || !ch->target) { -- cgit v1.2.3 From c1ab680046ba170050e91ec49577699de1a24e1a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:23:57 +0200 Subject: IB/srp: Fix connection state tracking commit c014c8cd31b161e12deb81c0f7f477811bd1eddc upstream. Reception of a DREQ message only causes the state of a single channel to change. Hence move the 'connected' member variable from the target to the channel data structure. This patch avoids that following false positive warning can be reported by srp_destroy_qp(): WARNING: at drivers/infiniband/ulp/srp/ib_srp.c:617 srp_destroy_qp+0xa6/0x120 [ib_srp]() Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] srp_destroy_qp+0xa6/0x120 [ib_srp] [] srp_free_ch_ib+0x82/0x1e0 [ib_srp] [] srp_create_target+0x7ab/0x998 [ib_srp] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xef/0x170 [] vfs_write+0xc8/0x190 [] sys_write+0x51/0x90 Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 60 ++++++++++++++++++------------------- drivers/infiniband/ulp/srp/ib_srp.h | 2 +- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f2daabd1e91d..c418d1fde1ed 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -465,14 +465,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - struct srp_target_port *target = ch->target; static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; struct ib_recv_wr *bad_wr; int ret; /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(target->connected); + WARN_ON_ONCE(ch->connected); ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); @@ -811,35 +810,19 @@ static bool srp_queue_remove_work(struct srp_target_port *target) return changed; } -static bool srp_change_conn_state(struct srp_target_port *target, - bool connected) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->connected != connected) { - target->connected = connected; - changed = true; - } - spin_unlock_irq(&target->lock); - - return changed; -} - static void srp_disconnect_target(struct srp_target_port *target) { struct srp_rdma_ch *ch; int i; - if (srp_change_conn_state(target, false)) { - /* XXX should send SRP_I_LOGOUT request */ + /* XXX should send SRP_I_LOGOUT request */ - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - } + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ch->connected = false; + if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); } } } @@ -986,12 +969,26 @@ static void srp_rport_delete(struct srp_rport *rport) srp_queue_remove_work(target); } +/** + * srp_connected_ch() - number of connected channels + * @target: SRP target port. + */ +static int srp_connected_ch(struct srp_target_port *target) +{ + int i, c = 0; + + for (i = 0; i < target->ch_count; i++) + c += target->ch[i].connected; + + return c; +} + static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) { struct srp_target_port *target = ch->target; int ret; - WARN_ON_ONCE(!multich && target->connected); + WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); ret = srp_lookup_path(ch); if (ret) @@ -1014,7 +1011,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) */ switch (ch->status) { case 0: - srp_change_conn_state(target, true); + ch->connected = true; return 0; case SRP_PORT_REDIRECT: @@ -1930,7 +1927,7 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, return; } - if (target->connected && !target->qp_in_error) { + if (ch->connected && !target->qp_in_error) { if (wr_id & LOCAL_INV_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX "LOCAL_INV failed with status %d\n", @@ -2368,7 +2365,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); - srp_change_conn_state(target, false); + ch->connected = false; if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -2424,7 +2421,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (!target->connected || target->qp_in_error) + if (!ch->connected || target->qp_in_error) return -1; init_completion(&ch->tsk_mgmt_done); @@ -2798,7 +2795,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); - if (!target->connected || target->qp_in_error) { + if (srp_connected_ch(target) < target->ch_count || + target->qp_in_error) { shost_printk(KERN_INFO, target->scsi_host, PFX "SCSI scan failed - removing SCSI host\n"); srp_queue_remove_work(target); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a611556406ac..e690847a46dd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -170,6 +170,7 @@ struct srp_rdma_ch { struct completion tsk_mgmt_done; u8 tsk_mgmt_status; + bool connected; }; /** @@ -214,7 +215,6 @@ struct srp_target_port { __be16 pkey; u32 rq_tmo_jiffies; - bool connected; int zero_req_lim; -- cgit v1.2.3 From b14524edc36d494d75c760743cc9435ef4c12a2f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:24:17 +0200 Subject: IB/srp: Fix reconnection failure handling commit a44074f14ba1ea0747ea737026eb929b81993dc3 upstream. Although it is possible to let SRP I/O continue if a reconnect results in a reduction of the number of channels, the current code does not handle this scenario correctly. Instead of making the reconnect code more complex, consider this as a reconnection failure. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c418d1fde1ed..75c01b27bd0b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1243,11 +1243,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (ret || !ch->target) { - if (i > 1) - ret = 0; + if (ret || !ch->target) break; - } ret = srp_connect_ch(ch, multich); multich = true; } -- cgit v1.2.3 From 25d8f169eee423940158a0ab33ed5dd1dd995cf9 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 May 2015 17:02:58 +0800 Subject: genirq: devres: Fix testing return value of request_any_context_irq() commit 63781394c540dd9e666a6b21d70b64dd52bce76e upstream. request_any_context_irq() returns a negative value on failure. It returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED on success. So fix testing return value of request_any_context_irq(). Also fixup the return value of devm_request_any_context_irq() to make it consistent with request_any_context_irq(). Fixes: 0668d3065128 ("genirq: Add devm_request_any_context_irq()") Signed-off-by: Axel Lin Reviewed-by: Stephen Boyd Link: http://lkml.kernel.org/r/1431334978.17783.4.camel@ingics.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/devres.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index d5d0f7345c54..74d90a754268 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -104,7 +104,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return -ENOMEM; rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); - if (rc) { + if (rc < 0) { devres_free(dr); return rc; } @@ -113,7 +113,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, dr->dev_id = dev_id; devres_add(dev, dr); - return 0; + return rc; } EXPORT_SYMBOL(devm_request_any_context_irq); -- cgit v1.2.3 From ab12dcd70c11074e6ed28b0304f059a075e33db0 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 3 Apr 2015 12:51:05 +0800 Subject: video: mxsfb: Make sure axi clock is enabled when accessing registers commit 2fa3b4c4a78a5db3502ab9e32630ea660ff923d0 upstream. The LCDIF engines embedded in i.MX6sl and i.MX6sx SoCs need the axi clock as the engine's system clock. The clock should be enabled when accessing LCDIF registers, otherwise the kernel would hang up. We should also keep the clock enabled when the engine is being active to scan out frames from memory. This patch makes sure the axi clock is enabled when accessing registers so that the kernel hang up issue can be fixed. Reported-by: Peter Chen Tested-by: Peter Chen Signed-off-by: Liu Ying Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/mxsfb.c | 68 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/drivers/video/fbdev/mxsfb.c b/drivers/video/fbdev/mxsfb.c index f8ac4a452f26..0f64165b0147 100644 --- a/drivers/video/fbdev/mxsfb.c +++ b/drivers/video/fbdev/mxsfb.c @@ -316,6 +316,18 @@ static int mxsfb_check_var(struct fb_var_screeninfo *var, return 0; } +static inline void mxsfb_enable_axi_clk(struct mxsfb_info *host) +{ + if (host->clk_axi) + clk_prepare_enable(host->clk_axi); +} + +static inline void mxsfb_disable_axi_clk(struct mxsfb_info *host) +{ + if (host->clk_axi) + clk_disable_unprepare(host->clk_axi); +} + static void mxsfb_enable_controller(struct fb_info *fb_info) { struct mxsfb_info *host = to_imxfb_host(fb_info); @@ -333,14 +345,13 @@ static void mxsfb_enable_controller(struct fb_info *fb_info) } } - if (host->clk_axi) - clk_prepare_enable(host->clk_axi); - if (host->clk_disp_axi) clk_prepare_enable(host->clk_disp_axi); clk_prepare_enable(host->clk); clk_set_rate(host->clk, PICOS2KHZ(fb_info->var.pixclock) * 1000U); + mxsfb_enable_axi_clk(host); + /* if it was disabled, re-enable the mode again */ writel(CTRL_DOTCLK_MODE, host->base + LCDC_CTRL + REG_SET); @@ -380,11 +391,11 @@ static void mxsfb_disable_controller(struct fb_info *fb_info) reg = readl(host->base + LCDC_VDCTRL4); writel(reg & ~VDCTRL4_SYNC_SIGNALS_ON, host->base + LCDC_VDCTRL4); + mxsfb_disable_axi_clk(host); + clk_disable_unprepare(host->clk); if (host->clk_disp_axi) clk_disable_unprepare(host->clk_disp_axi); - if (host->clk_axi) - clk_disable_unprepare(host->clk_axi); host->enabled = 0; @@ -421,6 +432,8 @@ static int mxsfb_set_par(struct fb_info *fb_info) mxsfb_disable_controller(fb_info); } + mxsfb_enable_axi_clk(host); + /* clear the FIFOs */ writel(CTRL1_FIFO_CLEAR, host->base + LCDC_CTRL1 + REG_SET); @@ -438,6 +451,7 @@ static int mxsfb_set_par(struct fb_info *fb_info) ctrl |= CTRL_SET_WORD_LENGTH(3); switch (host->ld_intf_width) { case STMLCDIF_8BIT: + mxsfb_disable_axi_clk(host); dev_err(&host->pdev->dev, "Unsupported LCD bus width mapping\n"); return -EINVAL; @@ -451,6 +465,7 @@ static int mxsfb_set_par(struct fb_info *fb_info) writel(CTRL1_SET_BYTE_PACKAGING(0x7), host->base + LCDC_CTRL1); break; default: + mxsfb_disable_axi_clk(host); dev_err(&host->pdev->dev, "Unhandled color depth of %u\n", fb_info->var.bits_per_pixel); return -EINVAL; @@ -504,6 +519,8 @@ static int mxsfb_set_par(struct fb_info *fb_info) fb_info->fix.line_length * fb_info->var.yoffset, host->base + host->devdata->next_buf); + mxsfb_disable_axi_clk(host); + if (reenable) mxsfb_enable_controller(fb_info); @@ -582,10 +599,14 @@ static int mxsfb_pan_display(struct fb_var_screeninfo *var, offset = fb_info->fix.line_length * var->yoffset; + mxsfb_enable_axi_clk(host); + /* update on next VSYNC */ writel(fb_info->fix.smem_start + offset, host->base + host->devdata->next_buf); + mxsfb_disable_axi_clk(host); + return 0; } @@ -608,13 +629,17 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, unsigned line_count; unsigned period; unsigned long pa, fbsize; - int bits_per_pixel, ofs; + int bits_per_pixel, ofs, ret = 0; u32 transfer_count, vdctrl0, vdctrl2, vdctrl3, vdctrl4, ctrl; + mxsfb_enable_axi_clk(host); + /* Only restore the mode when the controller is running */ ctrl = readl(host->base + LCDC_CTRL); - if (!(ctrl & CTRL_RUN)) - return -EINVAL; + if (!(ctrl & CTRL_RUN)) { + ret = -EINVAL; + goto err; + } vdctrl0 = readl(host->base + LCDC_VDCTRL0); vdctrl2 = readl(host->base + LCDC_VDCTRL2); @@ -635,7 +660,8 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, break; case 1: default: - return -EINVAL; + ret = -EINVAL; + goto err; } fb_info->var.bits_per_pixel = bits_per_pixel; @@ -673,10 +699,14 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, pa = readl(host->base + host->devdata->cur_buf); fbsize = fb_info->fix.line_length * vmode->yres; - if (pa < fb_info->fix.smem_start) - return -EINVAL; - if (pa + fbsize > fb_info->fix.smem_start + fb_info->fix.smem_len) - return -EINVAL; + if (pa < fb_info->fix.smem_start) { + ret = -EINVAL; + goto err; + } + if (pa + fbsize > fb_info->fix.smem_start + fb_info->fix.smem_len) { + ret = -EINVAL; + goto err; + } ofs = pa - fb_info->fix.smem_start; if (ofs) { memmove(fb_info->screen_base, fb_info->screen_base + ofs, fbsize); @@ -689,7 +719,11 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, clk_prepare_enable(host->clk); host->enabled = 1; - return 0; +err: + if (ret) + mxsfb_disable_axi_clk(host); + + return ret; } static int mxsfb_init_fbinfo_dt(struct mxsfb_info *host, @@ -915,7 +949,9 @@ static int mxsfb_probe(struct platform_device *pdev) } if (!host->enabled) { + mxsfb_enable_axi_clk(host); writel(0, host->base + LCDC_CTRL); + mxsfb_disable_axi_clk(host); mxsfb_set_par(fb_info); mxsfb_enable_controller(fb_info); } @@ -954,11 +990,15 @@ static void mxsfb_shutdown(struct platform_device *pdev) struct fb_info *fb_info = platform_get_drvdata(pdev); struct mxsfb_info *host = to_imxfb_host(fb_info); + mxsfb_enable_axi_clk(host); + /* * Force stop the LCD controller as keeping it running during reboot * might interfere with the BootROM's boot mode pads sampling. */ writel(CTRL_RUN, host->base + LCDC_CTRL + REG_CLR); + + mxsfb_disable_axi_clk(host); } static struct platform_driver mxsfb_driver = { -- cgit v1.2.3 From 2c6f129c8fcf59946e62216792e162b9d9f0dc8e Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 24 Apr 2015 14:57:10 +0300 Subject: leds / PM: fix hibernation on arm when gpio-led used with CPU led trigger commit 084609bf727981c7a2e6e69aefe0052c9d793300 upstream. Setting a dev_pm_ops suspend/resume pair of callbacks but not a set of hibernation callbacks means those pm functions will not be called upon hibernation - that leads to system crash on ARM during freezing if gpio-led is used in combination with CPU led trigger. It may happen after freeze_noirq stage (GPIO is suspended) and before syscore_suspend stage (CPU led trigger is suspended) - usually when disable_nonboot_cpus() is called. Log: PM: noirq freeze of devices complete after 1.425 msecs Disabling non-boot CPUs ... ^ system may crash or stuck here with message (TI AM572x) WARNING: CPU: 0 PID: 3100 at drivers/bus/omap_l3_noc.c:148 l3_interrupt_handler+0x22c/0x370() 44000000.ocp:L3 Custom Error: MASTER MPU TARGET L4_PER1_P3 (Idle): Data Access in Supervisor mode during Functional access CPU1: shutdown ^ or here Fix this by using SIMPLE_DEV_PM_OPS, which appropriately assigns the suspend and hibernation callbacks and move led_suspend/led_resume under CONFIG_PM_SLEEP to avoid build warnings. Fixes: 73e1ab41a80d (leds: Convert led class driver from legacy pm ops to dev_pm_ops) Signed-off-by: Grygorii Strashko Acked-by: Jacek Anaszewski Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/leds/led-class.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 728681debdbe..7fb2a19ac649 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -187,6 +187,7 @@ void led_classdev_resume(struct led_classdev *led_cdev) } EXPORT_SYMBOL_GPL(led_classdev_resume); +#ifdef CONFIG_PM_SLEEP static int led_suspend(struct device *dev) { struct led_classdev *led_cdev = dev_get_drvdata(dev); @@ -206,11 +207,9 @@ static int led_resume(struct device *dev) return 0; } +#endif -static const struct dev_pm_ops leds_class_dev_pm_ops = { - .suspend = led_suspend, - .resume = led_resume, -}; +static SIMPLE_DEV_PM_OPS(leds_class_dev_pm_ops, led_suspend, led_resume); static int match_name(struct device *dev, const void *data) { -- cgit v1.2.3 From 857814ee65dbc942b18b2dc713124ffff043035e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 7 May 2015 17:55:16 -0700 Subject: mtd: fix: avoid race condition when accessing mtd->usecount commit 073db4a51ee43ccb827f54a4261c0583b028d5ab upstream. On A MIPS 32-cores machine a BUG_ON was triggered because some acesses to mtd->usecount were done without taking mtd_table_mutex. kernel: Call Trace: kernel: [] __put_mtd_device+0x20/0x50 kernel: [] blktrans_release+0x8c/0xd8 kernel: [] __blkdev_put+0x1a8/0x200 kernel: [] blkdev_close+0x1c/0x30 kernel: [] __fput+0xac/0x250 kernel: [] task_work_run+0xd8/0x120 kernel: [] work_notifysig+0x10/0x18 kernel: kernel: Code: 2442ffff ac8202d8 000217fe <00020336> dc820128 10400003 00000000 0040f809 00000000 kernel: ---[ end trace 080fbb4579b47a73 ]--- Fixed by taking the mutex in blktrans_open and blktrans_release. Note that this locking is already suggested in include/linux/mtd/blktrans.h: struct mtd_blktrans_ops { ... /* Called with mtd_table_mutex held; no race with add/remove */ int (*open)(struct mtd_blktrans_dev *dev); void (*release)(struct mtd_blktrans_dev *dev); ... }; But we weren't following it. Originally reported by (and patched by) Zhang and Giuseppe, independently. Improved and rewritten. Reported-by: Zhang Xingcai Reported-by: Giuseppe Cantavenera Tested-by: Giuseppe Cantavenera Acked-by: Alexander Sverdlin Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/mtd_blkdevs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 2b0c52870999..df7c6c70757a 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -197,6 +197,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/ mutex_lock(&dev->lock); + mutex_lock(&mtd_table_mutex); if (dev->open) goto unlock; @@ -220,6 +221,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) unlock: dev->open++; + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); return ret; @@ -230,6 +232,7 @@ error_release: error_put: module_put(dev->tr->owner); kref_put(&dev->ref, blktrans_dev_release); + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); return ret; @@ -243,6 +246,7 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode) return; mutex_lock(&dev->lock); + mutex_lock(&mtd_table_mutex); if (--dev->open) goto unlock; @@ -256,6 +260,7 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode) __put_mtd_device(dev->mtd); } unlock: + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); } -- cgit v1.2.3 From 4aa339cddbcc05b7f8ff4f0960550929aa77213e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 28 May 2015 10:22:10 +0200 Subject: mtd: dc21285: use raw spinlock functions for nw_gpio_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e5babdf928e5d0c432a8d4b99f20421ce14d1ab6 upstream. Since commit bd31b85960a7 (which is in 3.2-rc1) nw_gpio_lock is a raw spinlock that needs usage of the corresponding raw functions. This fixes: drivers/mtd/maps/dc21285.c: In function 'nw_en_write': drivers/mtd/maps/dc21285.c:41:340: warning: passing argument 1 of 'spinlock_check' from incompatible pointer type spin_lock_irqsave(&nw_gpio_lock, flags); In file included from include/linux/seqlock.h:35:0, from include/linux/time.h:5, from include/linux/stat.h:18, from include/linux/module.h:10, from drivers/mtd/maps/dc21285.c:8: include/linux/spinlock.h:299:102: note: expected 'struct spinlock_t *' but argument is of type 'struct raw_spinlock_t *' static inline raw_spinlock_t *spinlock_check(spinlock_t *lock) ^ drivers/mtd/maps/dc21285.c:43:25: warning: passing argument 1 of 'spin_unlock_irqrestore' from incompatible pointer type spin_unlock_irqrestore(&nw_gpio_lock, flags); ^ In file included from include/linux/seqlock.h:35:0, from include/linux/time.h:5, from include/linux/stat.h:18, from include/linux/module.h:10, from drivers/mtd/maps/dc21285.c:8: include/linux/spinlock.h:370:91: note: expected 'struct spinlock_t *' but argument is of type 'struct raw_spinlock_t *' static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) Fixes: bd31b85960a7 ("locking, ARM: Annotate low level hw locks as raw") Signed-off-by: Uwe Kleine-König Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/maps/dc21285.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c index f8a7dd14cee0..70a3db3ab856 100644 --- a/drivers/mtd/maps/dc21285.c +++ b/drivers/mtd/maps/dc21285.c @@ -38,9 +38,9 @@ static void nw_en_write(void) * we want to write a bit pattern XXX1 to Xilinx to enable * the write gate, which will be open for about the next 2ms. */ - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_cpld_modify(CPLD_FLASH_WR_ENABLE, CPLD_FLASH_WR_ENABLE); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); /* * let the ISA bus to catch on... -- cgit v1.2.3 From 7044198591216ee701f98eeefecabc9d0ad6c2ef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 13 Apr 2015 16:23:36 +0200 Subject: PCI: Propagate the "ignore hotplug" setting to parent commit 0824965140fff1bf640a987dc790d1594a8e0699 upstream. Refine the mechanism introduced by commit f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") to propagate the ignore_hotplug setting of the device to its parent bridge in case hotplug notifications related to the graphics adapter switching are given for the bridge rather than for the device itself (they need to be ignored in both cases). Link: https://bugzilla.kernel.org/show_bug.cgi?id=61891 Link: https://bugs.freedesktop.org/show_bug.cgi?id=88927 Fixes: b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device") Reported-and-tested-by: tiagdtd-lava Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 11 +++++++++++ include/linux/pci.h | 6 +----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index acc4b6ef78c4..c44393f26fd3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4324,6 +4324,17 @@ bool pci_device_is_present(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_device_is_present); +void pci_ignore_hotplug(struct pci_dev *dev) +{ + struct pci_dev *bridge = dev->bus->self; + + dev->ignore_hotplug = 1; + /* Propagate the "ignore hotplug" setting to the parent bridge. */ + if (bridge) + bridge->ignore_hotplug = 1; +} +EXPORT_SYMBOL_GPL(pci_ignore_hotplug); + #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; static DEFINE_SPINLOCK(resource_alignment_lock); diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..ef45ffe9ca88 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1006,6 +1006,7 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); +void pci_ignore_hotplug(struct pci_dev *dev); /* ROM control related routines */ int pci_enable_rom(struct pci_dev *pdev); @@ -1043,11 +1044,6 @@ bool pci_dev_run_wake(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); -static inline void pci_ignore_hotplug(struct pci_dev *dev) -{ - dev->ignore_hotplug = 1; -} - static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { -- cgit v1.2.3 From 30e8a1821385dbd85830b407103f28d989764911 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 May 2015 17:23:51 -0700 Subject: PCI: Add pci_bus_addr_t commit 3a9ad0b4fdcd57f775d3615004c8c64c021a9e7d upstream. David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") fails to boot on sparc/T5-8: pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000) The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA addresses, i.e., bus addresses returned via the DMA API (dma_map_single(), etc.), while the PCI core assumed dma_addr_t could hold *any* bus address, including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that tripped over this mismatch. Add pci_bus_addr_t, which is wide enough to hold any PCI bus address, including both raw BAR values and DMA addresses. This will be 64 bits on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then dma_addr_t only needs to be wide enough to hold addresses from the DMA API. [bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at least as wide as dma_addr_t, documentation] Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB") Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231 Reported-by: David Ahern Tested-by: David Ahern Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- Documentation/DMA-API-HOWTO.txt | 29 +++++++++++++++++------------ Documentation/DMA-API.txt | 30 +++++++++++++++--------------- drivers/pci/Kconfig | 4 ++++ drivers/pci/bus.c | 10 +++++----- drivers/pci/probe.c | 12 ++++++------ include/linux/pci.h | 12 +++++++++--- include/linux/types.h | 12 ++++++++++-- 7 files changed, 66 insertions(+), 43 deletions(-) diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 0f7afb2bb442..aef8cc5a677b 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -25,13 +25,18 @@ physical addresses. These are the addresses in /proc/iomem. The physical address is not directly useful to a driver; it must use ioremap() to map the space and produce a virtual address. -I/O devices use a third kind of address: a "bus address" or "DMA address". -If a device has registers at an MMIO address, or if it performs DMA to read -or write system memory, the addresses used by the device are bus addresses. -In some systems, bus addresses are identical to CPU physical addresses, but -in general they are not. IOMMUs and host bridges can produce arbitrary +I/O devices use a third kind of address: a "bus address". If a device has +registers at an MMIO address, or if it performs DMA to read or write system +memory, the addresses used by the device are bus addresses. In some +systems, bus addresses are identical to CPU physical addresses, but in +general they are not. IOMMUs and host bridges can produce arbitrary mappings between physical and bus addresses. +From a device's point of view, DMA uses the bus address space, but it may +be restricted to a subset of that space. For example, even if a system +supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU +so devices only need to use 32-bit DMA addresses. + Here's a picture and some examples: CPU CPU Bus @@ -72,11 +77,11 @@ can use virtual address X to access the buffer, but the device itself cannot because DMA doesn't go through the CPU virtual memory system. In some simple systems, the device can do DMA directly to physical address -Y. But in many others, there is IOMMU hardware that translates bus +Y. But in many others, there is IOMMU hardware that translates DMA addresses to physical addresses, e.g., it translates Z to Y. This is part of the reason for the DMA API: the driver can give a virtual address X to an interface like dma_map_single(), which sets up any required IOMMU -mapping and returns the bus address Z. The driver then tells the device to +mapping and returns the DMA address Z. The driver then tells the device to do DMA to Z, and the IOMMU maps it to the buffer at address Y in system RAM. @@ -98,7 +103,7 @@ First of all, you should make sure #include is in your driver, which provides the definition of dma_addr_t. This type -can hold any valid DMA or bus address for the platform and should be used +can hold any valid DMA address for the platform and should be used everywhere you hold a DMA address returned from the DMA mapping functions. What memory is DMA'able? @@ -316,7 +321,7 @@ There are two types of DMA mappings: Think of "consistent" as "synchronous" or "coherent". The current default is to return consistent memory in the low 32 - bits of the bus space. However, for future compatibility you should + bits of the DMA space. However, for future compatibility you should set the consistent mask even if this default is fine for your driver. @@ -403,7 +408,7 @@ dma_alloc_coherent() returns two values: the virtual address which you can use to access it from the CPU and dma_handle which you pass to the card. -The CPU virtual address and the DMA bus address are both +The CPU virtual address and the DMA address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk @@ -645,8 +650,8 @@ PLEASE NOTE: The 'nents' argument to the dma_unmap_sg call must be dma_map_sg call. Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}() -counterpart, because the bus address space is a shared resource and -you could render the machine unusable by consuming all bus addresses. +counterpart, because the DMA address space is a shared resource and +you could render the machine unusable by consuming all DMA addresses. If you need to use the same streaming DMA region multiple times and touch the data in between the DMA transfers, the buffer needs to be synced diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 52088408668a..7eba542eff7c 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -18,10 +18,10 @@ Part I - dma_ API To get the dma_ API, you must #include . This provides dma_addr_t and the interfaces described below. -A dma_addr_t can hold any valid DMA or bus address for the platform. It -can be given to a device to use as a DMA source or target. A CPU cannot -reference a dma_addr_t directly because there may be translation between -its physical address space and the bus address space. +A dma_addr_t can hold any valid DMA address for the platform. It can be +given to a device to use as a DMA source or target. A CPU cannot reference +a dma_addr_t directly because there may be translation between its physical +address space and the DMA address space. Part Ia - Using large DMA-coherent buffers ------------------------------------------ @@ -42,7 +42,7 @@ It returns a pointer to the allocated region (in the processor's virtual address space) or NULL if the allocation failed. It also returns a which may be cast to an unsigned integer the -same width as the bus and given to the device as the bus address base of +same width as the bus and given to the device as the DMA address base of the region. Note: consistent memory can be expensive on some platforms, and the @@ -193,7 +193,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) Maps a piece of processor virtual memory so it can be accessed by the -device and returns the bus address of the memory. +device and returns the DMA address of the memory. The direction for both APIs may be converted freely by casting. However the dma_ API uses a strongly typed enumerator for its @@ -212,20 +212,20 @@ contiguous piece of memory. For this reason, memory to be mapped by this API should be obtained from sources which guarantee it to be physically contiguous (like kmalloc). -Further, the bus address of the memory must be within the +Further, the DMA address of the memory must be within the dma_mask of the device (the dma_mask is a bit mask of the -addressable region for the device, i.e., if the bus address of -the memory ANDed with the dma_mask is still equal to the bus +addressable region for the device, i.e., if the DMA address of +the memory ANDed with the dma_mask is still equal to the DMA address, then the device can perform DMA to the memory). To ensure that the memory allocated by kmalloc is within the dma_mask, the driver may specify various platform-dependent flags to restrict -the bus address range of the allocation (e.g., on x86, GFP_DMA -guarantees to be within the first 16MB of available bus addresses, +the DMA address range of the allocation (e.g., on x86, GFP_DMA +guarantees to be within the first 16MB of available DMA addresses, as required by ISA devices). Note also that the above constraints on physical contiguity and dma_mask may not apply if the platform has an IOMMU (a device which -maps an I/O bus address to a physical memory address). However, to be +maps an I/O DMA address to a physical memory address). However, to be portable, device driver writers may *not* assume that such an IOMMU exists. @@ -296,7 +296,7 @@ reduce current DMA mapping usage or delay and try again later). dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) -Returns: the number of bus address segments mapped (this may be shorter +Returns: the number of DMA address segments mapped (this may be shorter than passed in if some elements of the scatter/gather list are physically or virtually adjacent and an IOMMU maps them with a single entry). @@ -340,7 +340,7 @@ must be the same as those and passed in to the scatter/gather mapping API. Note: must be the number you passed in, *not* the number of -bus address entries returned. +DMA address entries returned. void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -507,7 +507,7 @@ it's asked for coherent memory for this device. phys_addr is the CPU physical address to which the memory is currently assigned (this will be ioremapped so the CPU can access the region). -device_addr is the bus address the device needs to be programmed +device_addr is the DMA address the device needs to be programmed with to actually address this memory (this will be handed out as the dma_addr_t in dma_alloc_coherent()). diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7a8f1c5e65af..73de4efcbe6e 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -1,6 +1,10 @@ # # PCI configuration # +config PCI_BUS_ADDR_T_64BIT + def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) + depends on PCI + config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" depends on PCI diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 90fa3a78fb7c..6fbd3f2b5992 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -92,11 +92,11 @@ void pci_bus_remove_resources(struct pci_bus *bus) } static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL}; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT static struct pci_bus_region pci_64_bit = {0, - (dma_addr_t) 0xffffffffffffffffULL}; -static struct pci_bus_region pci_high = {(dma_addr_t) 0x100000000ULL, - (dma_addr_t) 0xffffffffffffffffULL}; + (pci_bus_addr_t) 0xffffffffffffffffULL}; +static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL, + (pci_bus_addr_t) 0xffffffffffffffffULL}; #endif /* @@ -200,7 +200,7 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t), void *alignf_data) { -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT int rc; if (res->flags & IORESOURCE_MEM_64) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6675a7a1b9fc..c91185721345 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -254,8 +254,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } if (res->flags & IORESOURCE_MEM_64) { - if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && - sz64 > 0x100000000ULL) { + if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8) + && sz64 > 0x100000000ULL) { res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; res->start = 0; res->end = 0; @@ -264,7 +264,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, goto out; } - if ((sizeof(dma_addr_t) < 8) && l) { + if ((sizeof(pci_bus_addr_t) < 8) && l) { /* Above 32-bit boundary; try to reallocate */ res->flags |= IORESOURCE_UNSET; res->start = 0; @@ -399,7 +399,7 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; u64 base64, limit64; - dma_addr_t base, limit; + pci_bus_addr_t base, limit; struct pci_bus_region region; struct resource *res; @@ -426,8 +426,8 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) } } - base = (dma_addr_t) base64; - limit = (dma_addr_t) limit64; + base = (pci_bus_addr_t) base64; + limit = (pci_bus_addr_t) limit64; if (base != base64) { dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n", diff --git a/include/linux/pci.h b/include/linux/pci.h index ef45ffe9ca88..3ef3a52068df 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -577,9 +577,15 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val); +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +typedef u64 pci_bus_addr_t; +#else +typedef u32 pci_bus_addr_t; +#endif + struct pci_bus_region { - dma_addr_t start; - dma_addr_t end; + pci_bus_addr_t start; + pci_bus_addr_t end; }; struct pci_dynids { @@ -1124,7 +1130,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); -static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar) +static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { struct pci_bus_region region; diff --git a/include/linux/types.h b/include/linux/types.h index 59698be03490..8715287c3b1f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -139,12 +139,20 @@ typedef unsigned long blkcnt_t; */ #define pgoff_t unsigned long -/* A dma_addr_t can hold any valid DMA or bus address for the platform */ +/* + * A dma_addr_t can hold any valid DMA address, i.e., any address returned + * by the DMA API. + * + * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32 + * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits, + * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses, + * so they don't care about the size of the actual bus addresses. + */ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT typedef u64 dma_addr_t; #else typedef u32 dma_addr_t; -#endif /* dma_addr_t */ +#endif typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; -- cgit v1.2.3 From 5224e2a708f617fb69ea7cb56613838af40af188 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 8 Jun 2015 17:10:50 -0600 Subject: PCI: pciehp: Wait for hotplug command completion where necessary commit a5dd4b4b0570b3bf880d563969b245dfbd170c1e upstream. The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp_hpc.c | 52 +++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0ebf754fc177..6d6868811e56 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -176,20 +176,17 @@ static void pcie_wait_cmd(struct controller *ctrl) jiffies_to_msecs(jiffies - ctrl->cmd_started)); } -/** - * pcie_write_cmd - Issue controller command - * @ctrl: controller to which the command is issued - * @cmd: command value written to slot control register - * @mask: bitmask of slot control register to be modified - */ -static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) +static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd, + u16 mask, bool wait) { struct pci_dev *pdev = ctrl_dev(ctrl); u16 slot_ctrl; mutex_lock(&ctrl->ctrl_lock); - /* Wait for any previous command that might still be in progress */ + /* + * Always wait for any previous command that might still be in progress + */ pcie_wait_cmd(ctrl); pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl); @@ -201,9 +198,33 @@ static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) ctrl->cmd_started = jiffies; ctrl->slot_ctrl = slot_ctrl; + /* + * Optionally wait for the hardware to be ready for a new command, + * indicating completion of the above issued command. + */ + if (wait) + pcie_wait_cmd(ctrl); + mutex_unlock(&ctrl->ctrl_lock); } +/** + * pcie_write_cmd - Issue controller command + * @ctrl: controller to which the command is issued + * @cmd: command value written to slot control register + * @mask: bitmask of slot control register to be modified + */ +static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) +{ + pcie_do_write_cmd(ctrl, cmd, mask, true); +} + +/* Same as above without waiting for the hardware to latch */ +static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask) +{ + pcie_do_write_cmd(ctrl, cmd, mask, false); +} + bool pciehp_check_link_active(struct controller *ctrl) { struct pci_dev *pdev = ctrl_dev(ctrl); @@ -422,7 +443,7 @@ void pciehp_set_attention_status(struct slot *slot, u8 value) default: return; } - pcie_write_cmd(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC); + pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); } @@ -434,7 +455,8 @@ void pciehp_green_led_on(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_ON); @@ -447,7 +469,8 @@ void pciehp_green_led_off(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_OFF); @@ -460,7 +483,8 @@ void pciehp_green_led_blink(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_BLINK); @@ -613,7 +637,7 @@ void pcie_enable_notification(struct controller *ctrl) PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_DLLSCE); - pcie_write_cmd(ctrl, cmd, mask); + pcie_write_cmd_nowait(ctrl, cmd, mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd); } @@ -664,7 +688,7 @@ int pciehp_reset_slot(struct slot *slot, int probe) pci_reset_bridge_secondary_bus(ctrl->pcie->port); pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd(ctrl, ctrl_mask, ctrl_mask); + pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); if (pciehp_poll_mode) -- cgit v1.2.3 From d389ad7c0e6c02efaa3fe7992efb2452022e4b63 Mon Sep 17 00:00:00 2001 From: Zhichang Yuan Date: Fri, 24 Apr 2015 17:05:09 +0800 Subject: of/pci: Fix pci_address_to_pio() conversion of CPU address to I/O port commit 5dbb4c6167229c8d4f528e8ec26699a7305000a3 upstream. 41f8bba7f555 ("of/pci: Add pci_register_io_range() and pci_pio_to_address()") added support for systems with several I/O ranges described by OF bindings. It modified pci_address_to_pio() look up the io_range for a given CPU physical address, but the conversion was wrong. Fix the conversion of address to I/O port. [bhelgaas: changelog] Fixes: 41f8bba7f555 ("of/pci: Add pci_register_io_range() and pci_pio_to_address()") Signed-off-by: Zhichang Yuan Signed-off-by: Bjorn Helgaas Acked-by: Liviu Dudau Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 78a7dcbec7d8..6906a3f61bd8 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -765,7 +765,7 @@ unsigned long __weak pci_address_to_pio(phys_addr_t address) spin_lock(&io_range_lock); list_for_each_entry(res, &io_range_list, list) { if (address >= res->start && address < res->start + res->size) { - addr = res->start - address + offset; + addr = address - res->start + offset; break; } offset += res->size; -- cgit v1.2.3 From 7890602ea4fa5793f5ecad24bcca0049fa7ca06d Mon Sep 17 00:00:00 2001 From: Frodo Lai Date: Tue, 16 Jun 2015 15:03:53 -0700 Subject: Input: pixcir_i2c_ts - fix receive error commit 469d7d22cea146e40efe8c330e5164b4d8f13934 upstream. The i2c_master_recv() uses readsize to receive data from i2c but compares to size of rdbuf which is always 27. This would cause problem when the max_fingers is not 5. Change the comparison value to readsize instead. Fixes: 36874c7e219 ("Input: pixcir_i2c_ts - support up to 5 fingers and hardware tracking IDs:) Signed-off-by: Frodo Lai Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/pixcir_i2c_ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 2c2107147319..8f3e243a62bf 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -78,7 +78,7 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, } ret = i2c_master_recv(tsdata->client, rdbuf, readsize); - if (ret != sizeof(rdbuf)) { + if (ret != readsize) { dev_err(&tsdata->client->dev, "%s: i2c_master_recv failed(), ret=%d\n", __func__, ret); -- cgit v1.2.3 From c8bde72f9af412de57f0ceae218d648640118b0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jul 2015 10:10:33 -0700 Subject: Linux 4.1.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cef84c061f02..e3cdec4898be 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Series 4800 -- cgit v1.2.3