From 89e357d83c06b6fac581c3ca7f0ee3ae7e67109e Mon Sep 17 00:00:00 2001
From: Yuejie Shi <syjcnss@gmail.com>
Date: Fri, 31 Mar 2017 15:10:20 +0800
Subject: af_key: Add lock to key dump

A dump may come in the middle of another dump, modifying its dump
structure members. This race condition will result in NULL pointer
dereference in kernel. So add a lock to prevent that race.

Fixes: 83321d6b9872 ("[AF_KEY]: Dump SA/SP entries non-atomically")
Signed-off-by: Yuejie Shi <syjcnss@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 46 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index c6252ed42c1d..1b0ea80133f1 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -63,6 +63,7 @@ struct pfkey_sock {
 		} u;
 		struct sk_buff	*skb;
 	} dump;
+	struct mutex dump_lock;
 };
 
 static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
@@ -139,6 +140,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 	struct sock *sk;
+	struct pfkey_sock *pfk;
 	int err;
 
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -153,6 +155,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
 	if (sk == NULL)
 		goto out;
 
+	pfk = pfkey_sk(sk);
+	mutex_init(&pfk->dump_lock);
+
 	sock->ops = &pfkey_ops;
 	sock_init_data(sock, sk);
 
@@ -281,13 +286,23 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
 	struct sadb_msg *hdr;
 	int rc;
 
+	mutex_lock(&pfk->dump_lock);
+	if (!pfk->dump.dump) {
+		rc = 0;
+		goto out;
+	}
+
 	rc = pfk->dump.dump(pfk);
-	if (rc == -ENOBUFS)
-		return 0;
+	if (rc == -ENOBUFS) {
+		rc = 0;
+		goto out;
+	}
 
 	if (pfk->dump.skb) {
-		if (!pfkey_can_dump(&pfk->sk))
-			return 0;
+		if (!pfkey_can_dump(&pfk->sk)) {
+			rc = 0;
+			goto out;
+		}
 
 		hdr = (struct sadb_msg *) pfk->dump.skb->data;
 		hdr->sadb_msg_seq = 0;
@@ -298,6 +313,9 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
 	}
 
 	pfkey_terminate_dump(pfk);
+
+out:
+	mutex_unlock(&pfk->dump_lock);
 	return rc;
 }
 
@@ -1793,19 +1811,26 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
 	struct xfrm_address_filter *filter = NULL;
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 
-	if (pfk->dump.dump != NULL)
+	mutex_lock(&pfk->dump_lock);
+	if (pfk->dump.dump != NULL) {
+		mutex_unlock(&pfk->dump_lock);
 		return -EBUSY;
+	}
 
 	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
-	if (proto == 0)
+	if (proto == 0) {
+		mutex_unlock(&pfk->dump_lock);
 		return -EINVAL;
+	}
 
 	if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
 		struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
 
 		filter = kmalloc(sizeof(*filter), GFP_KERNEL);
-		if (filter == NULL)
+		if (filter == NULL) {
+			mutex_unlock(&pfk->dump_lock);
 			return -ENOMEM;
+		}
 
 		memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
 		       sizeof(xfrm_address_t));
@@ -1821,6 +1846,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
 	pfk->dump.dump = pfkey_dump_sa;
 	pfk->dump.done = pfkey_dump_sa_done;
 	xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
+	mutex_unlock(&pfk->dump_lock);
 
 	return pfkey_do_dump(pfk);
 }
@@ -2679,14 +2705,18 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb
 {
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 
-	if (pfk->dump.dump != NULL)
+	mutex_lock(&pfk->dump_lock);
+	if (pfk->dump.dump != NULL) {
+		mutex_unlock(&pfk->dump_lock);
 		return -EBUSY;
+	}
 
 	pfk->dump.msg_version = hdr->sadb_msg_version;
 	pfk->dump.msg_portid = hdr->sadb_msg_pid;
 	pfk->dump.dump = pfkey_dump_sp;
 	pfk->dump.done = pfkey_dump_sp_done;
 	xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN);
+	mutex_unlock(&pfk->dump_lock);
 
 	return pfkey_do_dump(pfk);
 }
-- 
cgit v1.2.3


From daffad2123fc3efcc2c70bd6e905960a31886556 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@st.com>
Date: Thu, 16 Mar 2017 09:16:40 +0100
Subject: clk: stm32f4: fix: exclude values 0 and 1 for PLLQ

0000: PLLQ = 0, wrong configuration
0001: PLLQ = 1, wrong configuration
...
0010: PLLQ = 2
0011: PLLQ = 3
0100: PLLQ = 4
...
1111: PLLQ = 1

Use divider table to exclude 0 and 1 values.

Fixes: 83135ad3c517 ("clk: stm32f4: Add PLL_I2S & PLL_SAI for STM32F429/469 boards")

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-stm32f4.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index ab609a76706f..cf9449b3dbd9 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -429,6 +429,13 @@ static const struct clk_div_table pll_divp_table[] = {
 	{ 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 }
 };
 
+static const struct clk_div_table pll_divq_table[] = {
+	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 },
+	{ 8, 8 }, { 9, 9 }, { 10, 10 }, { 11, 11 }, { 12, 12 }, { 13, 13 },
+	{ 14, 14 }, { 15, 15 },
+	{ 0 }
+};
+
 static const struct clk_div_table pll_divr_table[] = {
 	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 }
 };
@@ -496,9 +503,9 @@ struct stm32f4_div_data {
 
 #define MAX_PLL_DIV 3
 static const struct stm32f4_div_data  div_data[MAX_PLL_DIV] = {
-	{ 16, 2, 0,			pll_divp_table	},
-	{ 24, 4, CLK_DIVIDER_ONE_BASED, NULL		},
-	{ 28, 3, 0,			pll_divr_table	},
+	{ 16, 2, 0, pll_divp_table },
+	{ 24, 4, 0, pll_divq_table },
+	{ 28, 3, 0, pll_divr_table },
 };
 
 struct stm32f4_pll_data {
-- 
cgit v1.2.3


From a8f60d1fadf7b8b54449fcc9d6b15248917478ba Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Sun, 9 Apr 2017 22:09:38 +0200
Subject: s390/mm: fix CMMA vs KSM vs others

On heavy paging with KSM I see guest data corruption. Turns out that
KSM will add pages to its tree, where the mapping return true for
pte_unused (or might become as such later).  KSM will unmap such pages
and reinstantiate with different attributes (e.g. write protected or
special, e.g. in replace_page or write_protect_page)). This uncovered
a bug in our pagetable handling: We must remove the unused flag as
soon as an entry becomes present again.

Cc: stable@vger.kernel.org
Signed-of-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 93e37b12e882..ecec682bb516 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1051,6 +1051,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 {
 	if (!MACHINE_HAS_NX)
 		pte_val(entry) &= ~_PAGE_NOEXEC;
+	if (pte_present(entry))
+		pte_val(entry) &= ~_PAGE_UNUSED;
 	if (mm_has_pgste(mm))
 		ptep_set_pte_at(mm, addr, ptep, entry);
 	else
-- 
cgit v1.2.3


From b9c1153f7a9cb2d53b845615a0edd510f7fe8341 Mon Sep 17 00:00:00 2001
From: Dongdong Liu <liudongdong3@huawei.com>
Date: Thu, 23 Mar 2017 21:18:17 +0800
Subject: PCI: hisi: Fix DT binding (hisi-pcie-almost-ecam)

The "hisilicon,pcie-almost-ecam" binding goes against the usual DT
conventions, and is non-sensical in that it describes the IP based on
what it isn't.  Fix the DT binding with "hisilicon,hip06-pcie-ecam"
and "hisilicon,hip07-pcie-ecam".

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 Documentation/devicetree/bindings/pci/hisilicon-pcie.txt | 10 ++++++++--
 drivers/pci/dwc/pcie-hisi.c                              |  6 +++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
index b7fa3b97986d..a339dbb15493 100644
--- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
@@ -44,13 +44,19 @@ Hip05 Example (note that Hip06 is the same except compatible):
 	};
 
 HiSilicon Hip06/Hip07 PCIe host bridge DT (almost-ECAM) description.
+
+Some BIOSes place the host controller in a mode where it is ECAM
+compliant for all devices other than the root complex. In such cases,
+the host controller should be described as below.
+
 The properties and their meanings are identical to those described in
 host-generic-pci.txt except as listed below.
 
 Properties of the host controller node that differ from
 host-generic-pci.txt:
 
-- compatible     : Must be "hisilicon,pcie-almost-ecam"
+- compatible     : Must be "hisilicon,hip06-pcie-ecam", or
+		   "hisilicon,hip07-pcie-ecam"
 
 - reg            : Two entries: First the ECAM configuration space for any
 		   other bus underneath the root bus. Second, the base
@@ -59,7 +65,7 @@ host-generic-pci.txt:
 
 Example:
 	pcie0: pcie@a0090000 {
-		compatible = "hisilicon,pcie-almost-ecam";
+		compatible = "hisilicon,hip06-pcie-ecam";
 		reg = <0 0xb0000000 0 0x2000000>,  /*  ECAM configuration space */
 		      <0 0xa0090000 0 0x10000>; /* host bridge registers */
 		bus-range = <0  31>;
diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c
index fd66a3199db7..cf9d6a9d9fd4 100644
--- a/drivers/pci/dwc/pcie-hisi.c
+++ b/drivers/pci/dwc/pcie-hisi.c
@@ -380,9 +380,13 @@ struct pci_ecam_ops hisi_pcie_platform_ops = {
 
 static const struct of_device_id hisi_pcie_almost_ecam_of_match[] = {
 	{
-		.compatible = "hisilicon,pcie-almost-ecam",
+		.compatible =  "hisilicon,hip06-pcie-ecam",
 		.data	    = (void *) &hisi_pcie_platform_ops,
 	},
+	{
+		.compatible =  "hisilicon,hip07-pcie-ecam",
+		.data       = (void *) &hisi_pcie_platform_ops,
+	},
 	{},
 };
 
-- 
cgit v1.2.3


From aa01338c018469274848a973bcbd287ef341937c Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Mon, 27 Mar 2017 11:57:53 +0200
Subject: clk: sunxi-ng: fix build error without CONFIG_RESET_CONTROLLER

With CONFIG_RESET_CONTROLLER=n we get the following link error in the
sunxi-ng clk driver:

drivers/built-in.o: In function `sunxi_ccu_probe':
mux-core.c:(.text+0x12fe68): undefined reference to 'reset_controller_register'
mux-core.c:(.text+0x12fe68): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol 'reset_controller_register'

Fix this by adding the appropriate select statement.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 72109d2cf41b..e8eca1fc31e9 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -1,6 +1,7 @@
 config SUNXI_CCU
 	bool "Clock support for Allwinner SoCs"
 	depends on ARCH_SUNXI || COMPILE_TEST
+	select RESET_CONTROLLER
 	default ARCH_SUNXI
 
 if SUNXI_CCU
-- 
cgit v1.2.3


From e87741ac6c578b240409a8114b9350a58b0f9e93 Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Mon, 10 Apr 2017 14:15:44 +0200
Subject: clk: sunxi-ng: fix build failure in ccu-sun9i-a80 driver

The ccu-sun9i-a80 driver uses the ccu_mult_ops struct, but unlike the other
users it doesen't select the corresponding Kconfig symbol under which the
struct is compiled in.

This results in the following link error with CONFIG_SUN9I_A80_CCU=y and
CONFIG_SUNXI_CCU_MULT=n:

drivers/built-in.o:(.data+0x2d638): undefined reference to 'ccu_mult_ops'

Fix this by explicitly selecting CONFIG_SUNXI_CCU_MULT like the other
users of the struct.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index e8eca1fc31e9..1c2357301017 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -136,6 +136,7 @@ config SUN8I_V3S_CCU
 config SUN9I_A80_CCU
 	bool "Support for the Allwinner A80 CCU"
 	select SUNXI_CCU_DIV
+	select SUNXI_CCU_MULT
 	select SUNXI_CCU_GATE
 	select SUNXI_CCU_NKMP
 	select SUNXI_CCU_NM
-- 
cgit v1.2.3


From 02ae2bc6febd90cf3de61b6a1bdf491966ed410f Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Thu, 13 Apr 2017 10:13:52 +0800
Subject: clk: sunxi-ng: Add clk notifier to gate then ungate PLL clocks

In common PLL designs, changes to the dividers take effect almost
immediately, while changes to the multipliers (implemented as
dividers in the feedback loop) take a few cycles to work into
the feedback loop for the PLL to stablize.

Sometimes when the PLL clock rate is changed, the decrease in the
divider is too much for the decrease in the multiplier to catch up.
The PLL clock rate will spike, and in some cases, might lock up
completely. This is especially the case if the divider changed is
the pre-divider, which affects the reference frequency.

This patch introduces a clk notifier callback that will gate and
then ungate a clk after a rate change, effectively resetting it,
so it continues to work, despite any possible lockups. Care must
be taken to reparent any consumers to other temporary clocks during
the rate change, and that this notifier callback must be the first
to be registered.

This is intended to fix occasional lockups with cpufreq on newer
Allwinner SoCs, such as the A33 and the H3. Previously it was
thought that reparenting the cpu clock away from the PLL while
it stabilized was enough, as this worked quite well on the A31.

On the A33, hangs have been observed after cpufreq was recently
introduced. With the H3, a more thorough test [1] showed that
reparenting alone isn't enough. The system still locks up unless
the dividers are limited to 1.

A hunch was if the PLL was stuck in some unknown state, perhaps
gating then ungating it would bring it back to normal. Tests
done by Icenowy Zheng using Ondrej's test firmware shows this
to be a valid solution.

[1] http://www.spinics.net/lists/arm-kernel/msg552501.html

Reported-by: Ondrej Jirman <megous@megous.com>
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Tested-by: Icenowy Zheng <icenowy@aosc.io>
Tested-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_common.c | 49 +++++++++++++++++++++++++++++++++++++++
 drivers/clk/sunxi-ng/ccu_common.h | 12 ++++++++++
 2 files changed, 61 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index 8a47bafd7890..9d8724715a43 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -14,11 +14,13 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/iopoll.h>
 #include <linux/slab.h>
 
 #include "ccu_common.h"
+#include "ccu_gate.h"
 #include "ccu_reset.h"
 
 static DEFINE_SPINLOCK(ccu_lock);
@@ -39,6 +41,53 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock)
 	WARN_ON(readl_relaxed_poll_timeout(addr, reg, reg & lock, 100, 70000));
 }
 
+/*
+ * This clock notifier is called when the frequency of a PLL clock is
+ * changed. In common PLL designs, changes to the dividers take effect
+ * almost immediately, while changes to the multipliers (implemented
+ * as dividers in the feedback loop) take a few cycles to work into
+ * the feedback loop for the PLL to stablize.
+ *
+ * Sometimes when the PLL clock rate is changed, the decrease in the
+ * divider is too much for the decrease in the multiplier to catch up.
+ * The PLL clock rate will spike, and in some cases, might lock up
+ * completely.
+ *
+ * This notifier callback will gate and then ungate the clock,
+ * effectively resetting it, so it proceeds to work. Care must be
+ * taken to reparent consumers to other temporary clocks during the
+ * rate change, and that this notifier callback must be the first
+ * to be registered.
+ */
+static int ccu_pll_notifier_cb(struct notifier_block *nb,
+			       unsigned long event, void *data)
+{
+	struct ccu_pll_nb *pll = to_ccu_pll_nb(nb);
+	int ret = 0;
+
+	if (event != POST_RATE_CHANGE)
+		goto out;
+
+	ccu_gate_helper_disable(pll->common, pll->enable);
+
+	ret = ccu_gate_helper_enable(pll->common, pll->enable);
+	if (ret)
+		goto out;
+
+	ccu_helper_wait_for_lock(pll->common, pll->lock);
+
+out:
+	return notifier_from_errno(ret);
+}
+
+int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb)
+{
+	pll_nb->clk_nb.notifier_call = ccu_pll_notifier_cb;
+
+	return clk_notifier_register(pll_nb->common->hw.clk,
+				     &pll_nb->clk_nb);
+}
+
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 		    const struct sunxi_ccu_desc *desc)
 {
diff --git a/drivers/clk/sunxi-ng/ccu_common.h b/drivers/clk/sunxi-ng/ccu_common.h
index 73d81dc58fc5..d6fdd7a789aa 100644
--- a/drivers/clk/sunxi-ng/ccu_common.h
+++ b/drivers/clk/sunxi-ng/ccu_common.h
@@ -83,6 +83,18 @@ struct sunxi_ccu_desc {
 
 void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock);
 
+struct ccu_pll_nb {
+	struct notifier_block	clk_nb;
+	struct ccu_common	*common;
+
+	u32	enable;
+	u32	lock;
+};
+
+#define to_ccu_pll_nb(_nb) container_of(_nb, struct ccu_pll_nb, clk_nb)
+
+int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb);
+
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 		    const struct sunxi_ccu_desc *desc);
 
-- 
cgit v1.2.3


From 372fa10172a2f9e1bfbc70778449628e82b72341 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Thu, 13 Apr 2017 10:13:53 +0800
Subject: clk: sunxi-ng: a33: gate then ungate PLL CPU clk after rate change

This patch utilizes the new PLL clk notifier to gate then ungate the
PLL CPU clock after rate changes. This should mitigate the system
hangs observed after the introduction of cpufreq for the A33.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Tested-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun8i-a33.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
index a7b3c08ed0e2..2c69b631967a 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
@@ -752,6 +752,13 @@ static const struct sunxi_ccu_desc sun8i_a33_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun8i_a33_ccu_resets),
 };
 
+static struct ccu_pll_nb sun8i_a33_pll_cpu_nb = {
+	.common	= &pll_cpux_clk.common,
+	/* copy from pll_cpux_clk */
+	.enable	= BIT(31),
+	.lock	= BIT(28),
+};
+
 static struct ccu_mux_nb sun8i_a33_cpu_nb = {
 	.common		= &cpux_clk.common,
 	.cm		= &cpux_clk.mux,
@@ -783,6 +790,10 @@ static void __init sun8i_a33_ccu_setup(struct device_node *node)
 
 	sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
 
+	/* Gate then ungate PLL CPU after any rate changes */
+	ccu_pll_notifier_register(&sun8i_a33_pll_cpu_nb);
+
+	/* Reparent CPU during PLL CPU rate changes */
 	ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
 				  &sun8i_a33_cpu_nb);
 }
-- 
cgit v1.2.3


From 9e478066eae41211c92a8f63cc69aafc391bd6ab Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Apr 2017 14:23:49 +0200
Subject: mac80211: fix MU-MIMO follow-MAC mode

There are two bugs in the follow-MAC code:
 * it treats the radiotap header as the 802.11 header
   (therefore it can't possibly work)
 * it doesn't verify that the skb data it accesses is actually
   present in the header, which is mitigated by the first point

Fix this by moving all of this out into a separate function.
This function copies the data it needs using skb_copy_bits()
to make sure it can be accessed if it's paged, and offsets
that by the possibly present vendor radiotap header.

This also makes all those conditions more readable.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 65 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e48724a6725e..4b12c70c85f0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -208,6 +208,51 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
 	return len;
 }
 
+static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
+					 struct sk_buff *skb,
+					 int rtap_vendor_space)
+{
+	struct {
+		struct ieee80211_hdr_3addr hdr;
+		u8 category;
+		u8 action_code;
+	} __packed action;
+
+	if (!sdata)
+		return;
+
+	BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1);
+
+	if (skb->len < rtap_vendor_space + sizeof(action) +
+		       VHT_MUMIMO_GROUPS_DATA_LEN)
+		return;
+
+	if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr))
+		return;
+
+	skb_copy_bits(skb, rtap_vendor_space, &action, sizeof(action));
+
+	if (!ieee80211_is_action(action.hdr.frame_control))
+		return;
+
+	if (action.category != WLAN_CATEGORY_VHT)
+		return;
+
+	if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT)
+		return;
+
+	if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr))
+		return;
+
+	skb = skb_copy(skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+	skb_queue_tail(&sdata->skb_queue, skb);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+}
+
 /*
  * ieee80211_add_rx_radiotap_header - add radiotap header
  *
@@ -515,7 +560,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	struct net_device *prev_dev = NULL;
 	int present_fcs_len = 0;
 	unsigned int rtap_vendor_space = 0;
-	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_sub_if_data *monitor_sdata =
 		rcu_dereference(local->monitor_sdata);
 
@@ -553,6 +597,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		return remove_monitor_info(local, origskb, rtap_vendor_space);
 	}
 
+	ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space);
+
 	/* room for the radiotap header based on driver features */
 	rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb);
 	needed_headroom = rt_hdrlen - rtap_vendor_space;
@@ -618,23 +664,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		ieee80211_rx_stats(sdata->dev, skb->len);
 	}
 
-	mgmt = (void *)skb->data;
-	if (monitor_sdata &&
-	    skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN &&
-	    ieee80211_is_action(mgmt->frame_control) &&
-	    mgmt->u.action.category == WLAN_CATEGORY_VHT &&
-	    mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT &&
-	    is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) &&
-	    ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) {
-		struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC);
-
-		if (mu_skb) {
-			mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-			skb_queue_tail(&monitor_sdata->skb_queue, mu_skb);
-			ieee80211_queue_work(&local->hw, &monitor_sdata->work);
-		}
-	}
-
 	if (prev_dev) {
 		skb->dev = prev_dev;
 		netif_receive_skb(skb);
-- 
cgit v1.2.3


From 05b7278d510a6a1c23624caee5b0a9667a72e745 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@umich.edu>
Date: Thu, 23 Mar 2017 14:36:20 -0400
Subject: nfsd: fix oops on unsupported operation

I'm hitting the BUG in nfsd4_max_reply() at fs/nfsd/nfs4proc.c:2495 when
client sends an operation the server doesn't support.

in nfsd4_max_reply() it checks for NULL rsize_bop but a non-supported
operation wouldn't have that set.

Cc: Kinglong Mee <kinglongmee@gmail.com>
Fixes: 2282cd2c05e2 "NFSD: Get response size before operation..."
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index cbeeda1e94a2..d86031b6ad79 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2489,7 +2489,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 
 int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	if (op->opnum == OP_ILLEGAL)
+	if (op->opnum == OP_ILLEGAL || op->status == nfserr_notsupp)
 		return op_encode_hdr_size * sizeof(__be32);
 
 	BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
-- 
cgit v1.2.3


From 62a6cfddcc0a5313e7da3e8311ba16226fe0ac10 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Sun, 16 Apr 2017 20:37:24 +0100
Subject: cifs: Do not send echoes before Negotiate is complete

commit 4fcd1813e640 ("Fix reconnect to not defer smb3 session reconnect
long after socket reconnect") added support for Negotiate requests to
be initiated by echo calls.

To avoid delays in calling echo after a reconnect, I added the patch
introduced by the commit b8c600120fc8 ("Call echo service immediately
after socket reconnect").

This has however caused a regression with cifs shares which do not have
support for echo calls to trigger Negotiate requests. On connections
which need to call Negotiation, the echo calls trigger an error which
triggers a reconnect which in turn triggers another echo call. This
results in a loop which is only broken when an operation is performed on
the cifs share. For an idle share, it can DOS a server.

The patch uses the smb_operation can_echo() for cifs so that it is
called only if connection has been already been setup.

kernel bz: 194531

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Tested-by: Jonathan Liu <net147@gmail.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb1ops.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index cc93ba4da9b5..27bc360c7ffd 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1015,6 +1015,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile)
 	return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
 }
 
+static bool
+cifs_can_echo(struct TCP_Server_Info *server)
+{
+	if (server->tcpStatus == CifsGood)
+		return true;
+
+	return false;
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -1049,6 +1058,7 @@ struct smb_version_operations smb1_operations = {
 	.get_dfs_refer = CIFSGetDFSRefer,
 	.qfs_tcon = cifs_qfs_tcon,
 	.is_path_accessible = cifs_is_path_accessible,
+	.can_echo = cifs_can_echo,
 	.query_path_info = cifs_query_path_info,
 	.query_file_info = cifs_query_file_info,
 	.get_srv_inum = cifs_get_srv_inum,
-- 
cgit v1.2.3


From 096f41d3a8fcbb8dde7f71379b1ca85fe213eded Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 13 Apr 2017 18:35:59 +0800
Subject: af_key: Fix sadb_x_ipsecrequest parsing

The parsing of sadb_x_ipsecrequest is broken in a number of ways.
First of all we're not verifying sadb_x_ipsecrequest_len.  This
is needed when the structure carries addresses at the end.  Worse
we don't even look at the length when we parse those optional
addresses.

The migration code had similar parsing code that's better but
it also has some deficiencies.  The length is overcounted first
of all as it includes the header itself.  It also fails to check
the length before dereferencing the sa_family field.

This patch fixes those problems in parse_sockaddr_pair and then
uses it in parse_ipsecrequest.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1b0ea80133f1..be8cecc65002 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -66,6 +66,10 @@ struct pfkey_sock {
 	struct mutex dump_lock;
 };
 
+static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
+			       xfrm_address_t *saddr, xfrm_address_t *daddr,
+			       u16 *family);
+
 static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
 {
 	return (struct pfkey_sock *)sk;
@@ -1939,19 +1943,14 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 
 	/* addresses present only in tunnel mode */
 	if (t->mode == XFRM_MODE_TUNNEL) {
-		u8 *sa = (u8 *) (rq + 1);
-		int family, socklen;
+		int err;
 
-		family = pfkey_sockaddr_extract((struct sockaddr *)sa,
-						&t->saddr);
-		if (!family)
-			return -EINVAL;
-
-		socklen = pfkey_sockaddr_len(family);
-		if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen),
-					   &t->id.daddr) != family)
-			return -EINVAL;
-		t->encap_family = family;
+		err = parse_sockaddr_pair(
+			(struct sockaddr *)(rq + 1),
+			rq->sadb_x_ipsecrequest_len - sizeof(*rq),
+			&t->saddr, &t->id.daddr, &t->encap_family);
+		if (err)
+			return err;
 	} else
 		t->encap_family = xp->family;
 
@@ -1971,7 +1970,11 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
 	if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy))
 		return -EINVAL;
 
-	while (len >= sizeof(struct sadb_x_ipsecrequest)) {
+	while (len >= sizeof(*rq)) {
+		if (len < rq->sadb_x_ipsecrequest_len ||
+		    rq->sadb_x_ipsecrequest_len < sizeof(*rq))
+			return -EINVAL;
+
 		if ((err = parse_ipsecrequest(xp, rq)) < 0)
 			return err;
 		len -= rq->sadb_x_ipsecrequest_len;
@@ -2434,7 +2437,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_NET_KEY_MIGRATE
 static int pfkey_sockaddr_pair_size(sa_family_t family)
 {
 	return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2);
@@ -2446,7 +2448,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
 {
 	int af, socklen;
 
-	if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
+	if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
 		return -EINVAL;
 
 	af = pfkey_sockaddr_extract(sa, saddr);
@@ -2462,6 +2464,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
 	return 0;
 }
 
+#ifdef CONFIG_NET_KEY_MIGRATE
 static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 				    struct xfrm_migrate *m)
 {
@@ -2469,13 +2472,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 	struct sadb_x_ipsecrequest *rq2;
 	int mode;
 
-	if (len <= sizeof(struct sadb_x_ipsecrequest) ||
-	    len < rq1->sadb_x_ipsecrequest_len)
+	if (len < sizeof(*rq1) ||
+	    len < rq1->sadb_x_ipsecrequest_len ||
+	    rq1->sadb_x_ipsecrequest_len < sizeof(*rq1))
 		return -EINVAL;
 
 	/* old endoints */
 	err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1),
-				  rq1->sadb_x_ipsecrequest_len,
+				  rq1->sadb_x_ipsecrequest_len - sizeof(*rq1),
 				  &m->old_saddr, &m->old_daddr,
 				  &m->old_family);
 	if (err)
@@ -2484,13 +2488,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 	rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len);
 	len -= rq1->sadb_x_ipsecrequest_len;
 
-	if (len <= sizeof(struct sadb_x_ipsecrequest) ||
-	    len < rq2->sadb_x_ipsecrequest_len)
+	if (len <= sizeof(*rq2) ||
+	    len < rq2->sadb_x_ipsecrequest_len ||
+	    rq2->sadb_x_ipsecrequest_len < sizeof(*rq2))
 		return -EINVAL;
 
 	/* new endpoints */
 	err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1),
-				  rq2->sadb_x_ipsecrequest_len,
+				  rq2->sadb_x_ipsecrequest_len - sizeof(*rq2),
 				  &m->new_saddr, &m->new_daddr,
 				  &m->new_family);
 	if (err)
-- 
cgit v1.2.3


From 9e1ba4f27f018742a1aa95d11e35106feba08ec1 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 10:38:13 +0530
Subject: powerpc/kprobe: Fix oops when kprobed on 'stdu' instruction

If we set a kprobe on a 'stdu' instruction on powerpc64, we see a kernel
OOPS:

  Bad kernel stack pointer cd93c840 at c000000000009868
  Oops: Bad kernel stack pointer, sig: 6 [#1]
  ...
  GPR00: c000001fcd93cb30 00000000cd93c840 c0000000015c5e00 00000000cd93c840
  ...
  NIP [c000000000009868] resume_kernel+0x2c/0x58
  LR [c000000000006208] program_check_common+0x108/0x180

On a 64-bit system when the user probes on a 'stdu' instruction, the kernel does
not emulate actual store in emulate_step() because it may corrupt the exception
frame. So the kernel does the actual store operation in exception return code
i.e. resume_kernel().

resume_kernel() loads the saved stack pointer from memory using lwz, which only
loads the low 32-bits of the address, causing the kernel crash.

Fix this by loading the 64-bit value instead.

Fixes: be96f63375a1 ("powerpc: Split out instruction analysis part of emulate_step()")
Cc: stable@vger.kernel.org # v3.18+
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
[mpe: Change log massage, add stable tag]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6432d4bf08c8..767ef6d68c9e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -689,7 +689,7 @@ resume_kernel:
 
 	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
 
-	lwz	r3,GPR1(r1)
+	ld	r3,GPR1(r1)
 	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
 	mr	r4,r1			/* src:  current exception frame */
 	mr	r1,r3			/* Reroute the trampoline frame to r1 */
@@ -703,8 +703,8 @@ resume_kernel:
 	addi	r6,r6,8
 	bdnz	2b
 
-	/* Do real store operation to complete stwu */
-	lwz	r5,GPR1(r1)
+	/* Do real store operation to complete stdu */
+	ld	r5,GPR1(r1)
 	std	r8,0(r5)
 
 	/* Clear _TIF_EMULATE_STACK_STORE flag */
-- 
cgit v1.2.3


From be5c5e843c4afa1c8397cb740b6032bd4142f32d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 18 Apr 2017 14:08:15 +1000
Subject: powerpc/64: Fix HMI exception on LE with CONFIG_RELOCATABLE=y

Prior to commit 2337d207288f ("powerpc/64: CONFIG_RELOCATABLE support for hmi
interrupts"), the branch from hmi_exception_early() to hmi_exception_realmode()
was just a bl hmi_exception_realmode, which the linker would turn into a bl to
the local entry point of hmi_exception_realmode. This was broken when
CONFIG_RELOCATABLE=y because hmi_exception_realmode() is not in the low part of
the kernel text that is copied down to 0x0.

But in fixing that, we added a new bug on little endian kernels. Because the
branch is now a bctrl when CONFIG_RELOCATABLE=y, we branch to the global entry
point of hmi_exception_realmode(). The global entry point must be called with
r12 containing the address of hmi_exception_realmode(), because it uses that
value to calculate the TOC value (r2).

This may manifest as a checkstop, because we take a junk value from r12 which
came from HSRR1, add a small constant to it and then use that as the TOC
pointer. The HSRR1 value will have 0x9 as the top nibble, which puts it above
RAM and somewhere in MMIO space.

Fix it by changing the BRANCH_LINK_TO_FAR() macro to always use r12 to load the
label we're branching to. This means r12 will be setup correctly on LE, fixing
this bug, and r12 is also volatile across function calls on BE so it's a good
choice anyway.

Fixes: 2337d207288f ("powerpc/64: CONFIG_RELOCATABLE support for hmi interrupts")
Reported-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/exception-64s.h | 8 ++++----
 arch/powerpc/kernel/exceptions-64s.S     | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 14752eee3d0c..ed3beadd2cc5 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -236,9 +236,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	mtctr	reg;							\
 	bctr
 
-#define BRANCH_LINK_TO_FAR(reg, label)					\
-	__LOAD_FAR_HANDLER(reg, label);					\
-	mtctr	reg;							\
+#define BRANCH_LINK_TO_FAR(label)					\
+	__LOAD_FAR_HANDLER(r12, label);					\
+	mtctr	r12;							\
 	bctrl
 
 /*
@@ -265,7 +265,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define BRANCH_TO_COMMON(reg, label)					\
 	b	label
 
-#define BRANCH_LINK_TO_FAR(reg, label)					\
+#define BRANCH_LINK_TO_FAR(label)					\
 	bl	label
 
 #define BRANCH_TO_KVM(reg, label)					\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 857bf7c5b946..6353019966e6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -982,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
 	EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
 	EXCEPTION_PROLOG_COMMON_3(0xe60)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode)
+	BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
 	/* Windup the stack. */
 	/* Move original HSRR0 and HSRR1 into the respective regs */
 	ld	r9,_MSR(r1)
-- 
cgit v1.2.3


From ee8f844e3c5a73b999edf733df1c529d6503ec2f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 18 Apr 2017 15:31:07 +0100
Subject: KEYS: Disallow keyrings beginning with '.' to be joined as session
 keyrings

This fixes CVE-2016-9604.

Keyrings whose name begin with a '.' are special internal keyrings and so
userspace isn't allowed to create keyrings by this name to prevent
shadowing.  However, the patch that added the guard didn't fix
KEYCTL_JOIN_SESSION_KEYRING.  Not only can that create dot-named keyrings,
it can also subscribe to them as a session keyring if they grant SEARCH
permission to the user.

This, for example, allows a root process to set .builtin_trusted_keys as
its session keyring, at which point it has full access because now the
possessor permissions are added.  This permits root to add extra public
keys, thereby bypassing module verification.

This also affects kexec and IMA.

This can be tested by (as root):

	keyctl session .builtin_trusted_keys
	keyctl add user a a @s
	keyctl list @s

which on my test box gives me:

	2 keys in keyring:
	180010936: ---lswrv     0     0 asymmetric: Build time autogenerated kernel key: ae3d4a31b82daa8e1a75b49dc2bba949fd992a05
	801382539: --alswrv     0     0 user: a


Fix this by rejecting names beginning with a '.' in the keyctl.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
cc: linux-ima-devel@lists.sourceforge.net
cc: stable@vger.kernel.org
---
 security/keys/keyctl.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 52c34532c785..ab082a2e8fdd 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -273,7 +273,8 @@ error:
  * Create and join an anonymous session keyring or join a named session
  * keyring, creating it if necessary.  A named session keyring must have Search
  * permission for it to be joined.  Session keyrings without this permit will
- * be skipped over.
+ * be skipped over.  It is not permitted for userspace to create or join
+ * keyrings whose name begin with a dot.
  *
  * If successful, the ID of the joined session keyring will be returned.
  */
@@ -290,12 +291,16 @@ long keyctl_join_session_keyring(const char __user *_name)
 			ret = PTR_ERR(name);
 			goto error;
 		}
+
+		ret = -EPERM;
+		if (name[0] == '.')
+			goto error_name;
 	}
 
 	/* join the session */
 	ret = join_session_keyring(name);
+error_name:
 	kfree(name);
-
 error:
 	return ret;
 }
-- 
cgit v1.2.3


From c1644fe041ebaf6519f6809146a77c3ead9193af Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 18 Apr 2017 15:31:08 +0100
Subject: KEYS: Change the name of the dead type to ".dead" to prevent user
 access

This fixes CVE-2017-6951.

Userspace should not be able to do things with the "dead" key type as it
doesn't have some of the helper functions set upon it that the kernel
needs.  Attempting to use it may cause the kernel to crash.

Fix this by changing the name of the type to ".dead" so that it's rejected
up front on userspace syscalls by key_get_type_from_user().

Though this doesn't seem to affect recent kernels, it does affect older
ones, certainly those prior to:

	commit c06cfb08b88dfbe13be44a69ae2fdc3a7c902d81
	Author: David Howells <dhowells@redhat.com>
	Date:   Tue Sep 16 17:36:06 2014 +0100
	KEYS: Remove key_type::match in favour of overriding default by match_preparse

which went in before 3.18-rc1.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: stable@vger.kernel.org
---
 security/keys/gc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/keys/gc.c b/security/keys/gc.c
index addf060399e0..9cb4fe4478a1 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -46,7 +46,7 @@ static unsigned long key_gc_flags;
  * immediately unlinked.
  */
 struct key_type key_type_dead = {
-	.name = "dead",
+	.name = ".dead",
 };
 
 /*
-- 
cgit v1.2.3


From c9f838d104fed6f2f61d68164712e3204bf5271b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Apr 2017 15:31:09 +0100
Subject: KEYS: fix keyctl_set_reqkey_keyring() to not leak thread keyrings

This fixes CVE-2017-7472.

Running the following program as an unprivileged user exhausts kernel
memory by leaking thread keyrings:

	#include <keyutils.h>

	int main()
	{
		for (;;)
			keyctl_set_reqkey_keyring(KEY_REQKEY_DEFL_THREAD_KEYRING);
	}

Fix it by only creating a new thread keyring if there wasn't one before.
To make things more consistent, make install_thread_keyring_to_cred()
and install_process_keyring_to_cred() both return 0 if the corresponding
keyring is already present.

Fixes: d84f4f992cbd ("CRED: Inaugurate COW credentials")
Cc: stable@vger.kernel.org # 2.6.29+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 security/keys/keyctl.c       | 11 ++++-------
 security/keys/process_keys.c | 44 +++++++++++++++++++++++++++-----------------
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index ab082a2e8fdd..4ad3212adebe 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1258,8 +1258,8 @@ error:
  * Read or set the default keyring in which request_key() will cache keys and
  * return the old setting.
  *
- * If a process keyring is specified then this will be created if it doesn't
- * yet exist.  The old setting will be returned if successful.
+ * If a thread or process keyring is specified then it will be created if it
+ * doesn't yet exist.  The old setting will be returned if successful.
  */
 long keyctl_set_reqkey_keyring(int reqkey_defl)
 {
@@ -1284,11 +1284,8 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
 
 	case KEY_REQKEY_DEFL_PROCESS_KEYRING:
 		ret = install_process_keyring_to_cred(new);
-		if (ret < 0) {
-			if (ret != -EEXIST)
-				goto error;
-			ret = 0;
-		}
+		if (ret < 0)
+			goto error;
 		goto set;
 
 	case KEY_REQKEY_DEFL_DEFAULT:
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index b6fdd22205b1..9139b18fc863 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -128,13 +128,18 @@ error:
 }
 
 /*
- * Install a fresh thread keyring directly to new credentials.  This keyring is
- * allowed to overrun the quota.
+ * Install a thread keyring to the given credentials struct if it didn't have
+ * one already.  This is allowed to overrun the quota.
+ *
+ * Return: 0 if a thread keyring is now present; -errno on failure.
  */
 int install_thread_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
 
+	if (new->thread_keyring)
+		return 0;
+
 	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
 				KEY_POS_ALL | KEY_USR_VIEW,
 				KEY_ALLOC_QUOTA_OVERRUN,
@@ -147,7 +152,9 @@ int install_thread_keyring_to_cred(struct cred *new)
 }
 
 /*
- * Install a fresh thread keyring, discarding the old one.
+ * Install a thread keyring to the current task if it didn't have one already.
+ *
+ * Return: 0 if a thread keyring is now present; -errno on failure.
  */
 static int install_thread_keyring(void)
 {
@@ -158,8 +165,6 @@ static int install_thread_keyring(void)
 	if (!new)
 		return -ENOMEM;
 
-	BUG_ON(new->thread_keyring);
-
 	ret = install_thread_keyring_to_cred(new);
 	if (ret < 0) {
 		abort_creds(new);
@@ -170,17 +175,17 @@ static int install_thread_keyring(void)
 }
 
 /*
- * Install a process keyring directly to a credentials struct.
+ * Install a process keyring to the given credentials struct if it didn't have
+ * one already.  This is allowed to overrun the quota.
  *
- * Returns -EEXIST if there was already a process keyring, 0 if one installed,
- * and other value on any other error
+ * Return: 0 if a process keyring is now present; -errno on failure.
  */
 int install_process_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
 
 	if (new->process_keyring)
-		return -EEXIST;
+		return 0;
 
 	keyring = keyring_alloc("_pid", new->uid, new->gid, new,
 				KEY_POS_ALL | KEY_USR_VIEW,
@@ -194,11 +199,9 @@ int install_process_keyring_to_cred(struct cred *new)
 }
 
 /*
- * Make sure a process keyring is installed for the current process.  The
- * existing process keyring is not replaced.
+ * Install a process keyring to the current task if it didn't have one already.
  *
- * Returns 0 if there is a process keyring by the end of this function, some
- * error otherwise.
+ * Return: 0 if a process keyring is now present; -errno on failure.
  */
 static int install_process_keyring(void)
 {
@@ -212,14 +215,18 @@ static int install_process_keyring(void)
 	ret = install_process_keyring_to_cred(new);
 	if (ret < 0) {
 		abort_creds(new);
-		return ret != -EEXIST ? ret : 0;
+		return ret;
 	}
 
 	return commit_creds(new);
 }
 
 /*
- * Install a session keyring directly to a credentials struct.
+ * Install the given keyring as the session keyring of the given credentials
+ * struct, replacing the existing one if any.  If the given keyring is NULL,
+ * then install a new anonymous session keyring.
+ *
+ * Return: 0 on success; -errno on failure.
  */
 int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 {
@@ -254,8 +261,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 }
 
 /*
- * Install a session keyring, discarding the old one.  If a keyring is not
- * supplied, an empty one is invented.
+ * Install the given keyring as the session keyring of the current task,
+ * replacing the existing one if any.  If the given keyring is NULL, then
+ * install a new anonymous session keyring.
+ *
+ * Return: 0 on success; -errno on failure.
  */
 static int install_session_keyring(struct key *keyring)
 {
-- 
cgit v1.2.3


From 5ef1ecf060f28ecef313b5723f1fd39bf5a35f56 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 29 Mar 2017 20:54:37 +0200
Subject: mmc: sdio: fix alignment issue in struct sdio_func

Certain 64-bit systems (e.g. Amlogic Meson GX) require buffers to be
used for DMA to be 8-byte-aligned. struct sdio_func has an embedded
small DMA buffer not meeting this requirement.
When testing switching to descriptor chain mode in meson-gx driver
SDIO is broken therefore. Fix this by allocating the small DMA buffer
separately as kmalloc ensures that the returned memory area is
properly aligned for every basic data type.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Tested-by: Helmut Klein <hgkr.klein@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/sdio_bus.c   | 12 +++++++++++-
 include/linux/mmc/sdio_func.h |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index e992a7f8a16f..2b32b88949ba 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -267,7 +267,7 @@ static void sdio_release_func(struct device *dev)
 	sdio_free_func_cis(func);
 
 	kfree(func->info);
-
+	kfree(func->tmpbuf);
 	kfree(func);
 }
 
@@ -282,6 +282,16 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card)
 	if (!func)
 		return ERR_PTR(-ENOMEM);
 
+	/*
+	 * allocate buffer separately to make sure it's properly aligned for
+	 * DMA usage (incl. 64 bit DMA)
+	 */
+	func->tmpbuf = kmalloc(4, GFP_KERNEL);
+	if (!func->tmpbuf) {
+		kfree(func);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	func->card = card;
 
 	device_initialize(&func->dev);
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index aab032a6ae61..97ca105347a6 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -53,7 +53,7 @@ struct sdio_func {
 	unsigned int		state;		/* function state */
 #define SDIO_STATE_PRESENT	(1<<0)		/* present in sysfs */
 
-	u8			tmpbuf[4];	/* DMA:able scratch buffer */
+	u8			*tmpbuf;	/* DMA:able scratch buffer */
 
 	unsigned		num_info;	/* number of info strings */
 	const char		**info;		/* info strings */
-- 
cgit v1.2.3


From 704de489e0e3640a2ee2d0daf173e9f7375582ba Mon Sep 17 00:00:00 2001
From: Thorsten Leemhuis <linux@leemhuis.info>
Date: Tue, 18 Apr 2017 11:14:28 -0700
Subject: Input: elantech - add Fujitsu Lifebook E547 to force crc_enabled

Temporary got a Lifebook E547 into my hands and noticed the touchpad
only works after running:

	echo "1" > /sys/devices/platform/i8042/serio2/crc_enabled

Add it to the list of machines that need this workaround.

Cc: stable@vger.kernel.org
Signed-off-by: Thorsten Leemhuis <linux@leemhuis.info>
Reviewed-by: Ulrik De Bie <ulrik.debie-os@e2big.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elantech.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index efc8ec342351..e73d968023f7 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1118,6 +1118,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
  * Asus UX32VD             0x361f02        00, 15, 0e      clickpad
  * Avatar AVIU-145A2       0x361f00        ?               clickpad
  * Fujitsu LIFEBOOK E544   0x470f00        d0, 12, 09      2 hw buttons
+ * Fujitsu LIFEBOOK E547   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E554   0x570f01        40, 14, 0c      2 hw buttons
  * Fujitsu T725            0x470f01        05, 12, 09      2 hw buttons
  * Fujitsu H730            0x570f00        c0, 14, 0c      3 hw buttons (**)
@@ -1523,6 +1524,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E544"),
 		},
 	},
+	{
+		/* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E547"),
+		},
+	},
 	{
 		/* Fujitsu LIFEBOOK E554  does not work with crc_enabled == 0 */
 		.matches = {
-- 
cgit v1.2.3


From a6db2c86033bc41329770e90c20d4f1fec3824e4 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 11 Apr 2017 15:55:43 -0700
Subject: mmc: dw_mmc: Don't allow Runtime PM for SDIO cards

According to the SDIO standard interrupts are normally signalled in a
very complicated way.  They require the card clock to be running and
require the controller to be paying close attention to the signals
coming from the card.  This simply can't happen with the clock stopped
or with the controller in a low power mode.

To that end, we'll disable runtime_pm when we detect that an SDIO card
was inserted.  This is much like with what we do with the special
"SDMMC_CLKEN_LOW_PWR" bit that dw_mmc supports.

NOTE: we specifically do this Runtime PM disabling at card init time
rather than in the enable_sdio_irq() callback.  This is _different_
than how SDHCI does it.  Why do we do it differently?

- Unlike SDHCI, dw_mmc uses the standard sdio_irq code in Linux (AKA
  dw_mmc doesn't set MMC_CAP2_SDIO_IRQ_NOTHREAD).
- Because we use the standard sdio_irq code:
  - We see a constant stream of enable_sdio_irq(0) and
    enable_sdio_irq(1) calls.  This is because the standard code
    disables interrupts while processing and re-enables them after.
  - While interrupts are disabled, there's technically a period where
    we could get runtime disabled while processing interrupts.
  - If we are runtime disabled while processing interrupts, we'll
    reset the controller at resume time (see dw_mci_runtime_resume),
    which seems like a terrible idea because we could possibly have
    another interrupt pending.

To fix the above isues we'd want to put something in the standard
sdio_irq code that makes sure to call pm_runtime get/put when
interrupts are being actively being processed.  That's possible to do,
but it seems like a more complicated mechanism when we really just
want the runtime pm disabled always for SDIO cards given that all the
other bits needed to get Runtime PM vs. SDIO just aren't there.

NOTE: at some point in time someone might come up with a fancy way to
do SDIO interrupts and still allow (some) amount of runtime PM.
Technically we could turn off the card clock if we used an alternate
way of signaling SDIO interrupts (and out of band interrupt is one way
to do this).  We probably wouldn't actually want to fully runtime
suspend in this case though--at least not with the current
dw_mci_runtime_resume() which basically fully resets the controller at
resume time.

Fixes: e9ed8835e990 ("mmc: dw_mmc: add runtime PM callback")
Cc: <stable@vger.kernel.org>
Reported-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index a9ac0b457313..8718432751c5 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -22,6 +22,7 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
@@ -1621,10 +1622,16 @@ static void dw_mci_init_card(struct mmc_host *mmc, struct mmc_card *card)
 
 		if (card->type == MMC_TYPE_SDIO ||
 		    card->type == MMC_TYPE_SD_COMBO) {
-			set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			if (!test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) {
+				pm_runtime_get_noresume(mmc->parent);
+				set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			}
 			clk_en_a = clk_en_a_old & ~clken_low_pwr;
 		} else {
-			clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			if (test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) {
+				pm_runtime_put_noidle(mmc->parent);
+				clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			}
 			clk_en_a = clk_en_a_old | clken_low_pwr;
 		}
 
-- 
cgit v1.2.3


From 286f3f478796fb4f9e003e9f7d649f3c33f08d2f Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Thu, 13 Apr 2017 08:39:49 -0700
Subject: HID: wacom: Treat HID_DG_TOOLSERIALNUMBER as unsigned

Because HID_DG_TOOLSERIALNUMBER doesn't first cast the value recieved from HID
to an unsigned type, sign-extension rules can cause the value of
wacom_wac->serial[0] to inadvertently wind up with all 32 of its highest bits
set if the highest bit of "value" was set.

This can cause problems for Tablet PC devices which use AES sensors and the
xf86-input-wacom userspace driver. It is not uncommon for AES sensors to send a
serial number of '0' while the pen is entering or leaving proximity. The
xf86-input-wacom driver ignores events with a serial number of '0' since it
cannot match them up to an in-use tool.  To ensure the xf86-input-wacom driver
does not ignore the final out-of-proximity event, the kernel does not send
MSC_SERIAL events when the value of wacom_wac->serial[0] is '0'. If the highest
bit of HID_DG_TOOLSERIALNUMBER is set by an in-prox pen which later leaves
proximity and sends a '0' for HID_DG_TOOLSERIALNUMBER, then only the lowest 32
bits of wacom_wac->serial[0] are actually cleared, causing the kernel to send
an MSC_SERIAL event. Since the 'input_event' function takes an 'int' as
argument, only those lowest (now-cleared) 32 bits of wacom_wac->serial[0] are
sent to userspace, causing xf86-input-wacom to ignore the event. If the event
was the final out-of-prox event, then xf86-input-wacom may remain in a state
where it believes the pen is in proximity and refuses to allow other devices
under its control (e.g. the touchscreen) to move the cursor.

It should be noted that EMR devices and devices which use both the
HID_DG_TOOLSERIALNUMBER and WACOM_HID_WD_SERIALHI usages (in that order) would
be immune to this issue. It appears only AES devices are affected.

Fixes: f85c9dc678a ("HID: wacom: generic: Support tool ID and additional tool types")
Cc: stable@vger.kernel.org
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 94250c293be2..603f7fcd8df6 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2006,7 +2006,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
 		return;
 	case HID_DG_TOOLSERIALNUMBER:
 		wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL);
-		wacom_wac->serial[0] |= value;
+		wacom_wac->serial[0] |= (__u32)value;
 		return;
 	case WACOM_HID_WD_SENSE:
 		wacom_wac->hid_data.sense_state = value;
-- 
cgit v1.2.3


From df62db5be2e5f070ecd1a5ece5945b590ee112e0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 19 Apr 2017 12:07:08 -0400
Subject: tracing: Allocate the snapshot buffer before enabling probe

Currently the snapshot trigger enables the probe and then allocates the
snapshot. If the probe triggers before the allocation, it could cause the
snapshot to fail and turn tracing off. It's best to allocate the snapshot
buffer first, and then enable the trigger. If something goes wrong in the
enabling of the trigger, the snapshot buffer is still allocated, but it can
also be freed by the user by writting zero into the snapshot buffer file.

Also add a check of the return status of alloc_snapshot().

Cc: stable@vger.kernel.org
Fixes: 77fd5c15e3 ("tracing: Add snapshot trigger to function probes")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d484452ae648..0ad75e9698f6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6733,11 +6733,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
 		return ret;
 
  out_reg:
-	ret = register_ftrace_function_probe(glob, ops, count);
+	ret = alloc_snapshot(&global_trace);
+	if (ret < 0)
+		goto out;
 
-	if (ret >= 0)
-		alloc_snapshot(&global_trace);
+	ret = register_ftrace_function_probe(glob, ops, count);
 
+ out:
 	return ret < 0 ? ret : 0;
 }
 
-- 
cgit v1.2.3


From 892c7788c72480e8282b8aafe0783a244cc9f47f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 4 Apr 2017 12:54:35 +0200
Subject: backlight: pwm_bl: Fix GPIO out for unimplemented .get_direction()

Commit 7613c922315e308a ("backlight: pwm_bl: Move the checks for initial
power state to a separate function") not just moved some code, but made
slight changes in semantics.

If a gpiochip doesn't implement the optional .get_direction() callback,
gpiod_get_direction always returns -EINVAL, which is never equal to
GPIOF_DIR_IN, leading to the GPIO not being configured for output.

To avoid this, invert the test and check for not GPIOF_DIR_OUT instead,
like the original code did.

This restores the display on r8a7740/armadillo.

Fixes: 7613c922315e308a ("backlight: pwm_bl: Move the checks for initial power state to a separate function")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 drivers/video/backlight/pwm_bl.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index d7efcb632f7d..002f1ce22bd0 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -297,14 +297,15 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	}
 
 	/*
-	 * If the GPIO is configured as input, change the direction to output
-	 * and set the GPIO as active.
+	 * If the GPIO is not known to be already configured as output, that
+	 * is, if gpiod_get_direction returns either GPIOF_DIR_IN or -EINVAL,
+	 * change the direction to output and set the GPIO as active.
 	 * Do not force the GPIO to active when it was already output as it
 	 * could cause backlight flickering or we would enable the backlight too
 	 * early. Leave the decision of the initial backlight state for later.
 	 */
 	if (pb->enable_gpio &&
-	    gpiod_get_direction(pb->enable_gpio) == GPIOF_DIR_IN)
+	    gpiod_get_direction(pb->enable_gpio) != GPIOF_DIR_OUT)
 		gpiod_direction_output(pb->enable_gpio, 1);
 
 	pb->power_supply = devm_regulator_get(&pdev->dev, "power");
-- 
cgit v1.2.3


From 3a5088c8c11ac9d56f7e90acaaae9e0e98c1ff94 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 15 Apr 2017 20:38:22 +0800
Subject: block: respect BLK_MQ_F_NO_SCHED

If one driver claims that it doesn't support io scheduler via
BLK_MQ_F_NO_SCHED, we should not allow to change and show the
availabe io schedulers.

This patch adds check to enhance this behaviour.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/elevator.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/block/elevator.c b/block/elevator.c
index dbeecf7be719..4d9084a14c10 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1098,12 +1098,20 @@ int elevator_change(struct request_queue *q, const char *name)
 }
 EXPORT_SYMBOL(elevator_change);
 
+static inline bool elv_support_iosched(struct request_queue *q)
+{
+	if (q->mq_ops && q->tag_set && (q->tag_set->flags &
+				BLK_MQ_F_NO_SCHED))
+		return false;
+	return true;
+}
+
 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 			  size_t count)
 {
 	int ret;
 
-	if (!(q->mq_ops || q->request_fn))
+	if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q))
 		return count;
 
 	ret = __elevator_change(q, name);
@@ -1135,7 +1143,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
 			continue;
 		}
-		if (__e->uses_mq && q->mq_ops)
+		if (__e->uses_mq && q->mq_ops && elv_support_iosched(q))
 			len += sprintf(name+len, "%s ", __e->elevator_name);
 		else if (!__e->uses_mq && !q->mq_ops)
 			len += sprintf(name+len, "%s ", __e->elevator_name);
-- 
cgit v1.2.3


From 4981d04dd8f1ab19e2cce008da556d7f099b6e68 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 15 Apr 2017 20:38:23 +0800
Subject: mtip32xx: pass BLK_MQ_F_NO_SCHED

The recent introduced MQ IO scheduler breaks mtip32xx in the
following way.

mtip32xx use the 'request_index' passed to .init_request() as
hardware tag index for initializing hardware queue, and it
actually require that rq->tag is always same with 'request_index'
passed to .init_request(). Current blk-mq IO scheduler can't
guarantee this point, so this patch passes BLK_MQ_F_NO_SCHED
and at least make mtip32xx working.

This patch fixes the following strange hardware failure. The
issue can be triggered easily when doing I/O with mq-deadline
enabled.

[  186.972578] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 32993
[  186.972578] {1}[Hardware Error]: event severity: fatal
[  186.972579] {1}[Hardware Error]:  Error 0, type: fatal
[  186.972580] {1}[Hardware Error]:   section_type: PCIe error
[  186.972580] {1}[Hardware Error]:   port_type: 0, PCIe end point
[  186.972581] {1}[Hardware Error]:   version: 1.0
[  186.972581] {1}[Hardware Error]:   command: 0x0407, status: 0x0010
[  186.972582] {1}[Hardware Error]:   device_id: 0000:07:00.0
[  186.972582] {1}[Hardware Error]:   slot: 4
[  186.972583] {1}[Hardware Error]:   secondary_bus: 0x00
[  186.972583] {1}[Hardware Error]:   vendor_id: 0x1344, device_id: 0x5150
[  186.972584] {1}[Hardware Error]:   class_code: 008001
[  186.972585] Kernel panic - not syncing: Fatal hardware error!

Reported-by: Jozef Mikovic <jmikovic@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f96ab717534c..1d1dc11aa5fa 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3969,7 +3969,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	dd->tags.reserved_tags = 1;
 	dd->tags.cmd_size = sizeof(struct mtip_cmd);
 	dd->tags.numa_node = dd->numa_node;
-	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
+	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED;
 	dd->tags.driver_data = dd;
 	dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
 
-- 
cgit v1.2.3


From fe8c470ab87d90e4b5115902dd94eced7e3305c3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 19:47:04 +0200
Subject: ACPI / power: Avoid maybe-uninitialized warning

gcc -O2 cannot always prove that the loop in acpi_power_get_inferred_state()
is enterered at least once, so it assumes that cur_state might not get
initialized:

drivers/acpi/power.c: In function 'acpi_power_get_inferred_state':
drivers/acpi/power.c:222:9: error: 'cur_state' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This sets the variable to zero at the start of the loop, to ensure that
there is well-defined behavior even for an empty list. This gets rid of
the warning.

The warning first showed up when the -Os flag got removed in a bug fix
patch in linux-4.11-rc5.

I would suggest merging this addon patch on top of that bug fix to avoid
introducing a new warning in the stable kernels.

Fixes: 61b79e16c68d (ACPI: Fix incompatibility with mcount-based function graph tracing)
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/power.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index fcd4ce6f78d5..1c2b846c5776 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -200,6 +200,7 @@ static int acpi_power_get_list_state(struct list_head *list, int *state)
 		return -EINVAL;
 
 	/* The state of the list is 'on' IFF all resources are 'on'. */
+	cur_state = 0;
 	list_for_each_entry(entry, list, node) {
 		struct acpi_power_resource *resource = entry->resource;
 		acpi_handle handle = resource->device.handle;
-- 
cgit v1.2.3


From 80d136e138f59de35aafb0440e8ab2f51c40ccd5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 19 Apr 2017 09:52:46 +0200
Subject: mm: make mm_percpu_wq non freezable

Geert has reported a freeze during PM resume and some additional
debugging has shown that the device_resume worker cannot make a forward
progress because it waits for an event which is stuck waiting in
drain_all_pages:

  INFO: task kworker/u4:0:5 blocked for more than 120 seconds.
        Not tainted 4.11.0-rc7-koelsch-00029-g005882e53d62f25d-dirty #3476
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  kworker/u4:0    D    0     5      2 0x00000000
  Workqueue: events_unbound async_run_entry_fn
    __schedule
    schedule
    schedule_timeout
    wait_for_common
    dpm_wait_for_superior
    device_resume
    async_resume
    async_run_entry_fn
    process_one_work
    worker_thread
    kthread
  [...]
  bash            D    0  1703   1694 0x00000000
    __schedule
    schedule
    schedule_timeout
    wait_for_common
    flush_work
    drain_all_pages
    start_isolate_page_range
    alloc_contig_range
    cma_alloc
    __alloc_from_contiguous
    cma_allocator_alloc
    __dma_alloc
    arm_dma_alloc
    sh_eth_ring_init
    sh_eth_open
    sh_eth_resume
    dpm_run_callback
    device_resume
    dpm_resume
    dpm_resume_end
    suspend_devices_and_enter
    pm_suspend
    state_store
    kernfs_fop_write
    __vfs_write
    vfs_write
    SyS_write
  [...]
  Showing busy workqueues and worker pools:
  [...]
  workqueue mm_percpu_wq: flags=0xc
    pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=0/0
      delayed: drain_local_pages_wq, vmstat_update
    pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=0/0
      delayed: drain_local_pages_wq BAR(1703), vmstat_update

Tetsuo has properly noted that mm_percpu_wq is created as WQ_FREEZABLE
so it is frozen this early during resume so we are effectively
deadlocked.  Fix this by dropping WQ_FREEZABLE when creating
mm_percpu_wq.  We really want to have it operational all the time.

Fixes: ce612879ddc7 ("mm: move pcp and lru-pcp draining into single wq")
Reported-and-tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Debugged-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmstat.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 809025ed97ea..5a4f5c5a31e8 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1768,8 +1768,7 @@ void __init init_mm_internals(void)
 {
 	int ret __maybe_unused;
 
-	mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
-				       WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
 
 #ifdef CONFIG_SMP
 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
-- 
cgit v1.2.3


From 073c516ff73557a8f7315066856c04b50383ac34 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 19 Apr 2017 15:11:00 -0700
Subject: nsfs: mark dentry with DCACHE_RCUACCESS

Andrey reported a use-after-free in __ns_get_path():

  spin_lock include/linux/spinlock.h:299 [inline]
  lockref_get_not_dead+0x19/0x80 lib/lockref.c:179
  __ns_get_path+0x197/0x860 fs/nsfs.c:66
  open_related_ns+0xda/0x200 fs/nsfs.c:143
  sock_ioctl+0x39d/0x440 net/socket.c:1001
  vfs_ioctl fs/ioctl.c:45 [inline]
  do_vfs_ioctl+0x1bf/0x1780 fs/ioctl.c:685
  SYSC_ioctl fs/ioctl.c:700 [inline]
  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691

We are under rcu read lock protection at that point:

        rcu_read_lock();
        d = atomic_long_read(&ns->stashed);
        if (!d)
                goto slow;
        dentry = (struct dentry *)d;
        if (!lockref_get_not_dead(&dentry->d_lockref))
                goto slow;
        rcu_read_unlock();

but don't use a proper RCU API on the free path, therefore a parallel
__d_free() could free it at the same time.  We need to mark the stashed
dentry with DCACHE_RCUACCESS so that __d_free() will be called after all
readers leave RCU.

Fixes: e149ed2b805f ("take the targets of /proc/*/ns/* symlinks to separate fs")
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nsfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 1656843e87d2..323f492e0822 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -91,6 +91,7 @@ slow:
 		return ERR_PTR(-ENOMEM);
 	}
 	d_instantiate(dentry, inode);
+	dentry->d_flags |= DCACHE_RCUACCESS;
 	dentry->d_fsdata = (void *)ns->ops;
 	d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
 	if (d) {
-- 
cgit v1.2.3


From 78f7a45dac2a2d2002f98a3a95f7979867868d73 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 19 Apr 2017 14:29:46 -0400
Subject: ring-buffer: Have ring_buffer_iter_empty() return true when empty

I noticed that reading the snapshot file when it is empty no longer gives a
status. It suppose to show the status of the snapshot buffer as well as how
to allocate and use it. For example:

 ># cat snapshot
 # tracer: nop
 #
 #
 # * Snapshot is allocated *
 #
 # Snapshot commands:
 # echo 0 > snapshot : Clears and frees snapshot buffer
 # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.
 #                      Takes a snapshot of the main buffer.
 # echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)
 #                      (Doesn't have to be '2' works with any number that
 #                       is not a '0' or '1')

But instead it just showed an empty buffer:

 ># cat snapshot
 # tracer: nop
 #
 # entries-in-buffer/entries-written: 0/0   #P:4
 #
 #                              _-----=> irqs-off
 #                             / _----=> need-resched
 #                            | / _---=> hardirq/softirq
 #                            || / _--=> preempt-depth
 #                            ||| /     delay
 #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
 #              | |       |   ||||       |         |

What happened was that it was using the ring_buffer_iter_empty() function to
see if it was empty, and if it was, it showed the status. But that function
was returning false when it was empty. The reason was that the iter header
page was on the reader page, and the reader page was empty, but so was the
buffer itself. The check only tested to see if the iter was on the commit
page, but the commit page was no longer pointing to the reader page, but as
all pages were empty, the buffer is also.

Cc: stable@vger.kernel.org
Fixes: 651e22f2701b ("ring-buffer: Always reset iterator to reader page")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 54e7a90db848..ca47a4fa2986 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3405,11 +3405,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
 int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	struct buffer_page *reader;
+	struct buffer_page *head_page;
+	struct buffer_page *commit_page;
+	unsigned commit;
 
 	cpu_buffer = iter->cpu_buffer;
 
-	return iter->head_page == cpu_buffer->commit_page &&
-		iter->head == rb_commit_index(cpu_buffer);
+	/* Remember, trace recording is off when iterator is in use */
+	reader = cpu_buffer->reader_page;
+	head_page = cpu_buffer->head_page;
+	commit_page = cpu_buffer->commit_page;
+	commit = rb_page_commit(commit_page);
+
+	return ((iter->head_page == commit_page && iter->head == commit) ||
+		(iter->head_page == reader && commit_page == head_page &&
+		 head_page->read == commit &&
+		 iter->head == rb_page_commit(cpu_buffer->reader_page)));
 }
 EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
 
-- 
cgit v1.2.3


From 6f107fab8f18228936cd3df88a3bb6050865c2c8 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Wed, 19 Apr 2017 14:47:24 -0700
Subject: HID: wacom: Override incorrect logical maximum contact identifier

It apears that devices designed around Wacom's G11 chipset (e.g. Lenovo
ThinkPad Yoga 260, Lenovo ThinkPad X1 Yoga, Dell XPS 12 9250, Dell Venue
8 Pro 5855, etc.) suffer from a common issue in their HID descriptors.
The logical maximum is not updated for the "Contact Identifier" usage,
leaving it as just "1" despite these devices being capable of tracking
far more touches.

Commit 60a221869803 began ignoring usages with out-of-range values,
causing problems for devices based on this chipset. Touches after
the first will have an out-of-range Contact Identifier, and ignoring
that usage will cause the kernel to incorrectly slot each finger's
events (along with all the knock-on userspace effects that entails).

This commit checks for these buggy descriptors and updates the maximum
where required. Prior chipsets have used "255" as the maximum (and the
G11, at least, doesn't seem to actually use IDs outside the range of
1..CONTACTMAX) so continue using this value.

Cc: stable@vger.kernel.org
Fixes: 60a221869803 ("HID: wacom: generic: add support for touchring")
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 603f7fcd8df6..c68ac65db7ff 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2176,6 +2176,16 @@ static void wacom_wac_finger_usage_mapping(struct hid_device *hdev,
 		wacom_wac->hid_data.cc_index = field->index;
 		wacom_wac->hid_data.cc_value_index = usage->usage_index;
 		break;
+	case HID_DG_CONTACTID:
+		if ((field->logical_maximum - field->logical_minimum) < touch_max) {
+			/*
+			 * The HID descriptor for G11 sensors leaves logical
+			 * maximum set to '1' despite it being a multitouch
+			 * device. Override to a sensible number.
+			 */
+			field->logical_maximum = 255;
+		}
+		break;
 	}
 }
 
-- 
cgit v1.2.3


From 9f327845358d3dd0d8a5a7a5436b0aa5c432e757 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Wed, 19 Apr 2017 10:53:51 +0800
Subject: mmc: sdhci-esdhc-imx: increase the pad I/O drive strength for DDR50
 card

Currently for DDR50 card, it need tuning in default. We meet tuning fail
issue for DDR50 card and some data CRC error when DDR50 sd card works.

This is because the default pad I/O drive strength can't make sure DDR50
card work stable. So increase the pad I/O drive strength for DDR50 card,
and use pins_100mhz.

This fixes DDR50 card support for IMX since DDR50 tuning was enabled from
commit 9faac7b95ea4 ("mmc: sdhci: enable tuning for DDR50")

Tested-and-reported-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Cc: stable@vger.kernel.org # v4.4+
Acked-by: Dong Aisheng <aisheng.dong@nxp.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 7123ef96ed18..445fc47dc3e7 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -830,6 +830,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
 
 	switch (uhs) {
 	case MMC_TIMING_UHS_SDR50:
+	case MMC_TIMING_UHS_DDR50:
 		pinctrl = imx_data->pins_100mhz;
 		break;
 	case MMC_TIMING_UHS_SDR104:
-- 
cgit v1.2.3


From 3018e947d7fd536d57e2b550c33e456d921fff8c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 20 Apr 2017 21:32:16 +0200
Subject: mac80211: reject ToDS broadcast data frames

AP/AP_VLAN modes don't accept any real 802.11 multicast data
frames, but since they do need to accept broadcast management
frames the same is currently permitted for data frames. This
opens a security problem because such frames would be decrypted
with the GTK, and could even contain unicast L3 frames.

Since the spec says that ToDS frames must always have the BSSID
as the RA (addr1), reject any other data frames.

The problem was originally reported in "Predicting, Decrypting,
and Abusing WPA2/802.11 Group Keys" at usenix
https://www.usenix.org/conference/usenixsecurity16/technical-sessions/presentation/vanhoef
and brought to my attention by Jouni.

Cc: stable@vger.kernel.org
Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
--
Dave, I didn't want to send you a new pull request for a single
commit yet again - can you apply this one patch as is?
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4b12c70c85f0..4d7543d1a62c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3639,6 +3639,27 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
 			    !ether_addr_equal(bssid, hdr->addr1))
 				return false;
 		}
+
+		/*
+		 * 802.11-2016 Table 9-26 says that for data frames, A1 must be
+		 * the BSSID - we've checked that already but may have accepted
+		 * the wildcard (ff:ff:ff:ff:ff:ff).
+		 *
+		 * It also says:
+		 *	The BSSID of the Data frame is determined as follows:
+		 *	a) If the STA is contained within an AP or is associated
+		 *	   with an AP, the BSSID is the address currently in use
+		 *	   by the STA contained in the AP.
+		 *
+		 * So we should not accept data frames with an address that's
+		 * multicast.
+		 *
+		 * Accepting it also opens a security problem because stations
+		 * could encrypt it with the GTK and inject traffic that way.
+		 */
+		if (ieee80211_is_data(hdr->frame_control) && multicast)
+			return false;
+
 		return true;
 	case NL80211_IFTYPE_WDS:
 		if (bssid || !ieee80211_is_data(hdr->frame_control))
-- 
cgit v1.2.3


From c1f8d0f98c3bc12393821c1bf00d8eaa0bd58bd8 Mon Sep 17 00:00:00 2001
From: Mike Maloney <maloney@google.com>
Date: Tue, 18 Apr 2017 11:14:16 -0400
Subject: selftests/net: Fixes psock_fanout CBPF test case

'psock_fanout' has been failing since commit 4d7b9dc1f36a9 ("tools:
psock_lib: harden socket filter used by psock tests").  That commit
changed the CBPF filter to examine the full ethernet frame, and was
tested on 'psock_tpacket' which uses SOCK_RAW.  But 'psock_fanout' was
also using this same CBPF in two places, for filtering and fanout, on a
SOCK_DGRAM socket.

Change 'psock_fanout' to use SOCK_RAW so that the CBPF program used with
SO_ATTACH_FILTER can examine the entire frame.  Create a new CBPF
program for use with PACKET_FANOUT_DATA which ignores the header, as it
cannot see the ethernet header.

Tested: Ran tools/testing/selftests/net/psock_{fanout,tpacket} 10 times,
and they all passed.

Fixes: 4d7b9dc1f36a9 ("tools: psock_lib: harden socket filter used by psock tests")
Signed-off-by: 'Mike Maloney <maloneykernel@gmail.com>'
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_fanout.c | 22 ++++++++++++++++++++--
 tools/testing/selftests/net/psock_lib.h    | 13 +++----------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 412459369686..e62bb354820c 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -75,7 +75,7 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
 {
 	int fd, val;
 
-	fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
+	fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
 	if (fd < 0) {
 		perror("socket packet");
 		exit(1);
@@ -95,6 +95,24 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
 	return fd;
 }
 
+static void sock_fanout_set_cbpf(int fd)
+{
+	struct sock_filter bpf_filter[] = {
+		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80),	      /* ldb [80] */
+		BPF_STMT(BPF_RET+BPF_A, 0),		      /* ret A */
+	};
+	struct sock_fprog bpf_prog;
+
+	bpf_prog.filter = bpf_filter;
+	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
+		       sizeof(bpf_prog))) {
+		perror("fanout data cbpf");
+		exit(1);
+	}
+}
+
 static void sock_fanout_set_ebpf(int fd)
 {
 	const int len_off = __builtin_offsetof(struct __sk_buff, len);
@@ -270,7 +288,7 @@ static int test_datapath(uint16_t typeflags, int port_off,
 		exit(1);
 	}
 	if (type == PACKET_FANOUT_CBPF)
-		sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
+		sock_fanout_set_cbpf(fds[0]);
 	else if (type == PACKET_FANOUT_EBPF)
 		sock_fanout_set_ebpf(fds[0]);
 
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index a77da88bf946..7d990d6c861b 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -38,7 +38,7 @@
 # define __maybe_unused		__attribute__ ((__unused__))
 #endif
 
-static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
+static __maybe_unused void pair_udp_setfilter(int fd)
 {
 	/* the filter below checks for all of the following conditions that
 	 * are based on the contents of create_payload()
@@ -76,23 +76,16 @@ static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
 	};
 	struct sock_fprog bpf_prog;
 
-	if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA)
-		bpf_filter[5].code = 0x16;   /* RET A			      */
-
 	bpf_prog.filter = bpf_filter;
 	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
-	if (setsockopt(fd, lvl, optnum, &bpf_prog,
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
 		       sizeof(bpf_prog))) {
 		perror("setsockopt SO_ATTACH_FILTER");
 		exit(1);
 	}
 }
 
-static __maybe_unused void pair_udp_setfilter(int fd)
-{
-	sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER);
-}
-
 static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
 {
 	struct sockaddr_in saddr, daddr;
-- 
cgit v1.2.3


From 2f3bb64247b5b083d05ccecad9c2e139bbfdc294 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Tue, 18 Apr 2017 17:59:49 +0200
Subject: ipv6: sr: fix out-of-bounds access in SRH validation

This patch fixes an out-of-bounds access in seg6_validate_srh() when the
trailing data is less than sizeof(struct sr6_tlv).

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index a855eb325b03..5f44ffed2576 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -53,6 +53,9 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
 		struct sr6_tlv *tlv;
 		unsigned int tlv_len;
 
+		if (trailing < sizeof(*tlv))
+			return false;
+
 		tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset);
 		tlv_len = sizeof(*tlv) + tlv->len;
 
-- 
cgit v1.2.3


From 9d386cd9a755c8293e8916264d4d053878a7c9c7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 18 Apr 2017 22:14:26 +0300
Subject: dp83640: don't recieve time stamps twice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch is prompted by a static checker warning about a potential
use after free.  The concern is that netif_rx_ni() can free "skb" and we
call it twice.

When I look at the commit that added this, it looks like some stray
lines were added accidentally.  It doesn't make sense to me that we
would recieve the same data two times.  I asked the author but never
recieved a response.

I can't test this code, but I'm pretty sure my patch is correct.

Fixes: 4b063258ab93 ("dp83640: Delay scheduled work.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Stefan Sørensen <stefan.sorensen@spectralink.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83640.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index e2460a57e4b1..ed0d10f54f26 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1438,8 +1438,6 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
 		skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;
 		skb_queue_tail(&dp83640->rx_queue, skb);
 		schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT);
-	} else {
-		netif_rx_ni(skb);
 	}
 
 	return true;
-- 
cgit v1.2.3


From 74d209b8350a19d4f70252aa7a4e3d0b865676dd Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Wed, 19 Apr 2017 14:08:24 +0530
Subject: MAINTAINERS: update entry for TI's CPSW driver

Mugunthan V N, who was reviewing TI's CPSW driver patches is
not working for TI anymore and wont be reviewing patches for
that driver.

Drop Mugunthan as the maintiainer for this driver.

Grygorii continues to be a reviewer. Dave Miller applies the
patches directly and adding a maintainer is actually
misleading since get_maintainer.pl script stops suggesting
that Dave Miller be copied.

Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 676c139bc883..63f8abc02948 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12464,7 +12464,6 @@ F:	drivers/clk/ti/
 F:	include/linux/clk/ti.h
 
 TI ETHERNET SWITCH DRIVER (CPSW)
-M:	Mugunthan V N <mugunthanvnm@ti.com>
 R:	Grygorii Strashko <grygorii.strashko@ti.com>
 L:	linux-omap@vger.kernel.org
 L:	netdev@vger.kernel.org
-- 
cgit v1.2.3


From e9156cd26a495a18706e796f02a81fee41ec14f4 Mon Sep 17 00:00:00 2001
From: James Hughes <james.hughes@raspberrypi.org>
Date: Wed, 19 Apr 2017 11:13:40 +0100
Subject: smsc95xx: Use skb_cow_head to deal with cloned skbs

The driver was failing to check that the SKB wasn't cloned
before adding checksum data.
Replace existing handling to extend/copy the header buffer
with skb_cow_head.

Signed-off-by: James Hughes <james.hughes@raspberrypi.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Woojung Huh <Woojung.Huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 831aa33d078a..5f19fb0f025d 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -2001,13 +2001,13 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
 	/* We do not advertise SG, so skbs should be already linearized */
 	BUG_ON(skb_shinfo(skb)->nr_frags);
 
-	if (skb_headroom(skb) < overhead) {
-		struct sk_buff *skb2 = skb_copy_expand(skb,
-			overhead, 0, flags);
+	/* Make writable and expand header space by overhead if required */
+	if (skb_cow_head(skb, overhead)) {
+		/* Must deallocate here as returning NULL to indicate error
+		 * means the skb won't be deallocated in the caller.
+		 */
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	if (csum) {
-- 
cgit v1.2.3


From 66367dab30c7040e638e5496a47184cfc8ba39a4 Mon Sep 17 00:00:00 2001
From: "sudarsana.kalluru@cavium.com" <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Apr 2017 03:19:52 -0700
Subject: qed: Fix possible error in populating max_tc field.

Some adapters may not publish the max_tc value. Populate the default
value for max_tc field in case the mfw didn't provide one.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 5bd36a4a8fcd..c24436c911f3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -583,6 +583,13 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
 		   p_params->ets_cbs,
 		   p_ets->pri_tc_tbl[0], p_params->max_ets_tc);
 
+	if (p_params->ets_enabled && !p_params->max_ets_tc) {
+		p_params->max_ets_tc = QED_MAX_PFC_PRIORITIES;
+		DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+			   "ETS params: max_ets_tc is forced to %d\n",
+		p_params->max_ets_tc);
+	}
+
 	/* 8 bit tsa and bw data corresponding to each of the 8 TC's are
 	 * encoded in a type u32 array of size 2.
 	 */
-- 
cgit v1.2.3


From 6cf75f1cebb048cfc1424b4b8ac9bbc08d5f9f66 Mon Sep 17 00:00:00 2001
From: "sudarsana.kalluru@cavium.com" <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Apr 2017 03:19:53 -0700
Subject: qed: Fix sending an invalid PFC error mask to MFW.

PFC error-mask value is not supported by MFW, but this bit could be
set in the pfc bit-map of the operational parameters if remote device
supports it. These operational parameters are used as basis for
populating the dcbx config parameters. User provided configs will be
applied on top of these parameters and then send them to MFW when
requested. Driver need to clear the error-mask bit before sending the
config parameters to MFW.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index c24436c911f3..ff058a380003 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -1008,6 +1008,8 @@ qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn,
 	u8 pfc_map = 0;
 	int i;
 
+	*pfc &= ~DCBX_PFC_ERROR_MASK;
+
 	if (p_params->pfc.willing)
 		*pfc |= DCBX_PFC_WILLING_MASK;
 	else
-- 
cgit v1.2.3


From 62289ba27558553871fd047baadaaeda886c6a63 Mon Sep 17 00:00:00 2001
From: "sudarsana.kalluru@cavium.com" <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Apr 2017 03:19:54 -0700
Subject: qed: Fix possible system hang in the dcbnl-getdcbx() path.

qed_dcbnl_get_dcbx() API uses kmalloc in GFT_KERNEL mode. The API gets
invoked in the interrupt context by qed_dcbnl_getdcbx callback. Need
to invoke this kmalloc in atomic mode.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index ff058a380003..8f0783a62953 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -1264,7 +1264,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn,
 {
 	struct qed_dcbx_get *dcbx_info;
 
-	dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+	dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_ATOMIC);
 	if (!dcbx_info)
 		return NULL;
 
-- 
cgit v1.2.3


From c0c5dbe711c7d642415359ef5d216896a15a434b Mon Sep 17 00:00:00 2001
From: "sudarsana.kalluru@cavium.com" <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Apr 2017 03:19:55 -0700
Subject: qed: Fix issue in populating the PFC config paramters.

Change ieee_setpfc() callback implementation to populate traffic class
count with the user provided value.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 8f0783a62953..a6e2bbe629bd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -2082,6 +2082,8 @@ static int qed_dcbnl_ieee_setpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
 	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
 		dcbx_set.config.params.pfc.prio[i] = !!(pfc->pfc_en & BIT(i));
 
+	dcbx_set.config.params.pfc.max_tc = pfc->pfc_cap;
+
 	ptt = qed_ptt_acquire(hwfn);
 	if (!ptt)
 		return -EINVAL;
-- 
cgit v1.2.3


From e0535ce58b92d7baf0b33284a6c4f8f0338f943e Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Thu, 20 Apr 2017 14:08:26 +0200
Subject: net sched actions: allocate act cookie early

Policing filters do not use the TCA_ACT_* enum and the tb[]
nlattr array in tcf_action_init_1() doesn't get filled for
them so we should not try to look for a TCA_ACT_COOKIE
attribute in the then uninitialized array.
The error handling in cookie allocation then calls
tcf_hash_release() leading to invalid memory access later
on.
Additionally, if cookie allocation fails after an already
existing non-policing filter has successfully been changed,
tcf_action_release() should not be called, also we would
have to roll back the changes in the error handling, so
instead we now allocate the cookie early and assign it on
success at the end.

CVE-2017-7979
Fixes: 1045ba77a596 ("net sched actions: Add support for user cookies")
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 55 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b70aa57319ea..e05b924618a0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -529,20 +529,20 @@ errout:
 	return err;
 }
 
-static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb)
+static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 {
-	a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL);
-	if (!a->act_cookie)
-		return -ENOMEM;
+	struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return NULL;
 
-	a->act_cookie->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
-	if (!a->act_cookie->data) {
-		kfree(a->act_cookie);
-		return -ENOMEM;
+	c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
+	if (!c->data) {
+		kfree(c);
+		return NULL;
 	}
-	a->act_cookie->len = nla_len(tb[TCA_ACT_COOKIE]);
+	c->len = nla_len(tb[TCA_ACT_COOKIE]);
 
-	return 0;
+	return c;
 }
 
 struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
@@ -551,6 +551,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
+	struct tc_cookie *cookie = NULL;
 	char act_name[IFNAMSIZ];
 	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct nlattr *kind;
@@ -566,6 +567,18 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 			goto err_out;
 		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
+		if (tb[TCA_ACT_COOKIE]) {
+			int cklen = nla_len(tb[TCA_ACT_COOKIE]);
+
+			if (cklen > TC_COOKIE_MAX_SIZE)
+				goto err_out;
+
+			cookie = nla_memdup_cookie(tb);
+			if (!cookie) {
+				err = -ENOMEM;
+				goto err_out;
+			}
+		}
 	} else {
 		err = -EINVAL;
 		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
@@ -604,20 +617,12 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 	if (err < 0)
 		goto err_mod;
 
-	if (tb[TCA_ACT_COOKIE]) {
-		int cklen = nla_len(tb[TCA_ACT_COOKIE]);
-
-		if (cklen > TC_COOKIE_MAX_SIZE) {
-			err = -EINVAL;
-			tcf_hash_release(a, bind);
-			goto err_mod;
-		}
-
-		if (nla_memdup_cookie(a, tb) < 0) {
-			err = -ENOMEM;
-			tcf_hash_release(a, bind);
-			goto err_mod;
+	if (name == NULL && tb[TCA_ACT_COOKIE]) {
+		if (a->act_cookie) {
+			kfree(a->act_cookie->data);
+			kfree(a->act_cookie);
 		}
+		a->act_cookie = cookie;
 	}
 
 	/* module count goes up only when brand new policy is created
@@ -632,6 +637,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 err_mod:
 	module_put(a_o->owner);
 err_out:
+	if (cookie) {
+		kfree(cookie->data);
+		kfree(cookie);
+	}
 	return ERR_PTR(err);
 }
 
-- 
cgit v1.2.3


From ff5350a86b20de23991e474e006e2ff2732b218e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 20 Apr 2017 13:37:55 -0700
Subject: nvme: Adjust the Samsung APST quirk

I got a couple more reports: the Samsung APST issues appears to
affect multiple 950-series devices in Dell XPS 15 9550 and Precision
5510 laptops.  Change the quirk: rather than blacklisting the
firmware on the first problematic SSD that was reported, disable
APST on all 144d:a802 devices if they're installed in the two
affected Dell models.  While we're at it, disable only the deepest
sleep state instead of all of them -- the reporters say that this is
sufficient to fix the problem.

(I have a device that appears to be entirely identical to one of the
affected devices, but I have a different Dell laptop, so it's not
the case that all Samsung devices with firmware BXW75D0Q are broken
under all circumstances.)

Samsung engineers have an affected system, and hopefully they'll
give us a better workaround some time soon.  In the mean time, this
should minimize regressions.

See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184

Cc: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c | 18 ++++++++----------
 drivers/nvme/host/nvme.h |  5 +++++
 drivers/nvme/host/pci.c  | 26 +++++++++++++++++++++++++-
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9583a5f58a1d..00b2818dac31 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1315,6 +1315,14 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 			if (target)
 				table->entries[state] = target;
 
+			/*
+			 * Don't allow transitions to the deepest state
+			 * if it's quirked off.
+			 */
+			if (state == ctrl->npss &&
+			    (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
+				continue;
+
 			/*
 			 * Is this state a useful non-operational state for
 			 * higher-power states to autonomously transition to?
@@ -1387,16 +1395,6 @@ struct nvme_core_quirk_entry {
 };
 
 static const struct nvme_core_quirk_entry core_quirks[] = {
-	/*
-	 * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
-	 * the controller to go out to lunch.  It dies when the watchdog
-	 * timer reads CSTS and gets 0xffffffff.
-	 */
-	{
-		.vid = 0x144d,
-		.fr = "BXW75D0Q",
-		.quirks = NVME_QUIRK_NO_APST,
-	},
 };
 
 /* match is null-terminated but idstr is space-padded. */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2aa20e3e5675..ab2d6ec7eb5c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -83,6 +83,11 @@ enum nvme_quirks {
 	 * APST should not be used.
 	 */
 	NVME_QUIRK_NO_APST			= (1 << 4),
+
+	/*
+	 * The deepest sleep state should not be used.
+	 */
+	NVME_QUIRK_NO_DEEPEST_PS		= (1 << 5),
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 26a5fd05fe88..5d309535abbd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -19,6 +19,7 @@
 #include <linux/blk-mq-pci.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/genhd.h>
@@ -1943,10 +1944,31 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	return -ENODEV;
 }
 
+static unsigned long check_dell_samsung_bug(struct pci_dev *pdev)
+{
+	if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
+		/*
+		 * Several Samsung devices seem to drop off the PCIe bus
+		 * randomly when APST is on and uses the deepest sleep state.
+		 * This has been observed on a Samsung "SM951 NVMe SAMSUNG
+		 * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD
+		 * 950 PRO 256GB", but it seems to be restricted to two Dell
+		 * laptops.
+		 */
+		if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") &&
+		    (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
+		     dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
+			return NVME_QUIRK_NO_DEEPEST_PS;
+	}
+
+	return 0;
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
 	struct nvme_dev *dev;
+	unsigned long quirks = id->driver_data;
 
 	node = dev_to_node(&pdev->dev);
 	if (node == NUMA_NO_NODE)
@@ -1978,8 +2000,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto put_pci;
 
+	quirks |= check_dell_samsung_bug(pdev);
+
 	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
-			id->driver_data);
+			quirks);
 	if (result)
 		goto release_pools;
 
-- 
cgit v1.2.3


From be56945c4edd5a3da15f8254b68d1ddb1588d0c4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 20 Apr 2017 13:37:56 -0700
Subject: nvme: Quirk APST off on "THNSF5256GPUK TOSHIBA"

There's a report that it malfunctions with APST on.

See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184

Cc: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 00b2818dac31..eeb409c287b8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1395,6 +1395,15 @@ struct nvme_core_quirk_entry {
 };
 
 static const struct nvme_core_quirk_entry core_quirks[] = {
+	{
+		/*
+		 * This Toshiba device seems to die using any APST states.  See:
+		 * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11
+		 */
+		.vid = 0x1179,
+		.mn = "THNSF5256GPUK TOSHIBA",
+		.quirks = NVME_QUIRK_NO_APST,
+	}
 };
 
 /* match is null-terminated but idstr is space-padded. */
-- 
cgit v1.2.3


From 3a07bb1d7676ea24bc1585ba2a80604852c005e7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 20 Apr 2017 14:53:28 -0600
Subject: blk-mq: fix potential oops with polling and blk-mq scheduler

If we have a scheduler attached, blk_mq_tag_to_rq() on the
scheduled tags will return NULL if a request is no longer
in flight. This is different than using the normal tags,
where it will always return the fixed request. Check for
this condition for polling, in case we happen to enter
polling for a completed request.

The request address remains valid, so this check and return
should be perfectly safe.

Fixes: bd166ef183c2 ("blk-mq-sched: add framework for MQ capable IO schedulers")
Tested-by: Stephen Bates <sbates@raithlin.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 572966f49596..c7836a1ded97 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2928,8 +2928,17 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
 	if (!blk_qc_t_is_internal(cookie))
 		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
-	else
+	else {
 		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
+		/*
+		 * With scheduling, if the request has completed, we'll
+		 * get a NULL return here, as we clear the sched tag when
+		 * that happens. The request still remains valid, like always,
+		 * so we should be safe with just the NULL check.
+		 */
+		if (!rq)
+			return false;
+	}
 
 	return __blk_mq_poll(hctx, rq);
 }
-- 
cgit v1.2.3


From d34b0733b452ca3ef1dee36436dab08b8aa6a85c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 20 Apr 2017 14:37:43 -0700
Subject: Revert "mm, page_alloc: only use per-cpu allocator for irq-safe
 requests"

This reverts commit 374ad05ab64.

While the patch worked great for userspace allocations, the fact that
softirq loses the per-cpu allocator caused problems.  It needs to be
redone taking into account that a separate list is needed for hard/soft
IRQs or alternatively find a cheap way of detecting reentry due to an
interrupt.  Both are possible but sufficiently tricky that it shouldn't
be rushed.

Jesper had one method for allowing softirqs but reported that the cost
was high enough that it performed similarly to a plain revert.  His
figures for netperf TCP_STREAM were as follows

  Baseline v4.10.0  : 60316 Mbit/s
  Current 4.11.0-rc6: 47491 Mbit/s
  Jesper's patch    : 60662 Mbit/s
  This patch        : 60106 Mbit/s

As this is a regression, I wish to revert to noirq allocator for now and
go back to the drawing board.

Link: http://lkml.kernel.org/r/20170415145350.ixy7vtrzdzve57mh@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Tariq Toukan <ttoukan.linux@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f3d603cef2c0..07efbc3a8656 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1090,10 +1090,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 {
 	int migratetype = 0;
 	int batch_free = 0;
-	unsigned long nr_scanned, flags;
+	unsigned long nr_scanned;
 	bool isolated_pageblocks;
 
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	isolated_pageblocks = has_isolate_pageblock(zone);
 	nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
 	if (nr_scanned)
@@ -1142,7 +1142,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			trace_mm_page_pcpu_drain(page, 0, mt);
 		} while (--count && --batch_free && !list_empty(list));
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 }
 
 static void free_one_page(struct zone *zone,
@@ -1150,9 +1150,8 @@ static void free_one_page(struct zone *zone,
 				unsigned int order,
 				int migratetype)
 {
-	unsigned long nr_scanned, flags;
-	spin_lock_irqsave(&zone->lock, flags);
-	__count_vm_events(PGFREE, 1 << order);
+	unsigned long nr_scanned;
+	spin_lock(&zone->lock);
 	nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
 	if (nr_scanned)
 		__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
@@ -1162,7 +1161,7 @@ static void free_one_page(struct zone *zone,
 		migratetype = get_pfnblock_migratetype(page, pfn);
 	}
 	__free_one_page(page, pfn, zone, order, migratetype);
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1240,6 +1239,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long flags;
 	int migratetype;
 	unsigned long pfn = page_to_pfn(page);
 
@@ -1247,7 +1247,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
+	local_irq_save(flags);
+	__count_vm_events(PGFREE, 1 << order);
 	free_one_page(page_zone(page), page, pfn, order, migratetype);
+	local_irq_restore(flags);
 }
 
 static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2219,9 +2222,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			int migratetype, bool cold)
 {
 	int i, alloced = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
 		struct page *page = __rmqueue(zone, order, migratetype);
 		if (unlikely(page == NULL))
@@ -2257,7 +2259,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 	 * pages added to the pcp list.
 	 */
 	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return alloced;
 }
 
@@ -2485,20 +2487,17 @@ void free_hot_cold_page(struct page *page, bool cold)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
+	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
 	int migratetype;
 
-	if (in_interrupt()) {
-		__free_pages_ok(page, 0);
-		return;
-	}
-
 	if (!free_pcp_prepare(page))
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
 	set_pcppage_migratetype(page, migratetype);
-	preempt_disable();
+	local_irq_save(flags);
+	__count_vm_event(PGFREE);
 
 	/*
 	 * We only track unmovable, reclaimable and movable on pcp lists.
@@ -2515,7 +2514,6 @@ void free_hot_cold_page(struct page *page, bool cold)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
-	__count_vm_event(PGFREE);
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	if (!cold)
 		list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2529,7 +2527,7 @@ void free_hot_cold_page(struct page *page, bool cold)
 	}
 
 out:
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 /*
@@ -2654,8 +2652,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
 {
 	struct page *page;
 
-	VM_BUG_ON(in_interrupt());
-
 	do {
 		if (list_empty(list)) {
 			pcp->count += rmqueue_bulk(zone, 0,
@@ -2686,8 +2682,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	struct list_head *list;
 	bool cold = ((gfp_flags & __GFP_COLD) != 0);
 	struct page *page;
+	unsigned long flags;
 
-	preempt_disable();
+	local_irq_save(flags);
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, cold, pcp, list);
@@ -2695,7 +2692,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
 		zone_statistics(preferred_zone, zone);
 	}
-	preempt_enable();
+	local_irq_restore(flags);
 	return page;
 }
 
@@ -2711,7 +2708,7 @@ struct page *rmqueue(struct zone *preferred_zone,
 	unsigned long flags;
 	struct page *page;
 
-	if (likely(order == 0) && !in_interrupt()) {
+	if (likely(order == 0)) {
 		page = rmqueue_pcplist(preferred_zone, zone, order,
 				gfp_flags, migratetype);
 		goto out;
-- 
cgit v1.2.3


From fc280fe871449ead4bdbd1665fa52c7c01c64765 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Thu, 20 Apr 2017 14:37:46 -0700
Subject: mm: prevent NR_ISOLATE_* stats from going negative

Commit 6afcf8ef0ca0 ("mm, compaction: fix NR_ISOLATED_* stats for pfn
based migration") moved the dec_node_page_state() call (along with the
page_is_file_cache() call) to after putback_lru_page().

But page_is_file_cache() can change after putback_lru_page() is called,
so it should be called before putback_lru_page(), as it was before that
patch, to prevent NR_ISOLATE_* stats from going negative.

Without this fix, non-CONFIG_SMP kernels end up hanging in the
while(too_many_isolated()) { congestion_wait() } loop in
shrink_active_list() due to the negative stats.

 Mem-Info:
  active_anon:32567 inactive_anon:121 isolated_anon:1
  active_file:6066 inactive_file:6639 isolated_file:4294967295
                                                    ^^^^^^^^^^
  unevictable:0 dirty:115 writeback:0 unstable:0
  slab_reclaimable:2086 slab_unreclaimable:3167
  mapped:3398 shmem:18366 pagetables:1145 bounce:0
  free:1798 free_pcp:13 free_cma:0

Fixes: 6afcf8ef0ca0 ("mm, compaction: fix NR_ISOLATED_* stats for pfn based migration")
Link: http://lkml.kernel.org/r/1492683865-27549-1-git-send-email-rabin.vincent@axis.com
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Ming Ling <ming.ling@spreadtrum.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index ed97c2c14fa8..738f1d5f8350 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -184,9 +184,9 @@ void putback_movable_pages(struct list_head *l)
 			unlock_page(page);
 			put_page(page);
 		} else {
-			putback_lru_page(page);
 			dec_node_page_state(page, NR_ISOLATED_ANON +
 					page_is_file_cache(page));
+			putback_lru_page(page);
 		}
 	}
 }
-- 
cgit v1.2.3


From b0522e13b2e6266ac00dc579b79fbfee22753293 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 21 Apr 2017 10:44:47 -0400
Subject: MAINTAINERS: Add "B:" field for networking.

We want people to report bugs to the netdev list.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 63f8abc02948..08950e5f5a05 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8761,6 +8761,7 @@ W:	http://www.linuxfoundation.org/en/Net
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+B:	mailto:netdev@vger.kernel.org
 S:	Maintained
 F:	net/
 F:	include/net/
-- 
cgit v1.2.3


From 95b9b88d2da5e43e025400afcb492643933bf858 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Wed, 19 Apr 2017 16:10:19 +0200
Subject: ipv6: sr: fix double free of skb after handling invalid SRH

The icmpv6_param_prob() function already does a kfree_skb(),
this patch removes the duplicate one.

Fixes: 1ababeba4a21f3dba3da3523c670b207fb2feb62 ("ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header)")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 275cac628a95..25192a3b0cd7 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -388,7 +388,6 @@ looped_back:
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((&hdr->segments_left) -
 				   skb_network_header(skb)));
-		kfree_skb(skb);
 		return -1;
 	}
 
-- 
cgit v1.2.3


From b7c6d2675899cfff0180412c63fc9cbd5bacdb4d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Apr 2017 09:59:21 -0700
Subject: smsc75xx: use skb_cow_head() to deal with cloned skbs

We need to ensure there is enough headroom to push extra header,
but we also need to check if we are allowed to change headers.

skb_cow_head() is the proper helper to deal with this.

Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 0b17b40d7a4f..190de9a90f73 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2203,13 +2203,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
 {
 	u32 tx_cmd_a, tx_cmd_b;
 
-	if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
-		struct sk_buff *skb2 =
-			skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
+	if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
-- 
cgit v1.2.3


From a9e840a2081ed28c2b7caa6a9a0041c950b3c37d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Apr 2017 09:59:22 -0700
Subject: cx82310_eth: use skb_cow_head() to deal with cloned skbs

We need to ensure there is enough headroom to push extra header,
but we also need to check if we are allowed to change headers.

skb_cow_head() is the proper helper to deal with this.

Fixes: cc28a20e77b2 ("introduce cx82310_eth: Conexant CX82310-based ADSL router USB ethernet driver")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cx82310_eth.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index e221bfcee76b..947bea81d924 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 {
 	int len = skb->len;
 
-	if (skb_headroom(skb) < 2) {
-		struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
+	if (skb_cow_head(skb, 2)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 	skb_push(skb, 2);
 
-- 
cgit v1.2.3


From d532c1082f68176363ed766d09bf187616e282fe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Apr 2017 09:59:23 -0700
Subject: sr9700: use skb_cow_head() to deal with cloned skbs

We need to ensure there is enough headroom to push extra header,
but we also need to check if we are allowed to change headers.

skb_cow_head() is the proper helper to deal with this.

Fixes: c9b37458e956 ("USB2NET : SR9700 : One chip USB 1.1 USB2NET SR9700Device Driver Support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/sr9700.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 4a1e9c489f1f..aadfe1d1c37e 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 
 	len = skb->len;
 
-	if (skb_headroom(skb) < SR_TX_OVERHEAD) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
+	if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	__skb_push(skb, SR_TX_OVERHEAD);
-- 
cgit v1.2.3


From d4ca73591916b760478d2b04334d5dcadc028e9c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Apr 2017 09:59:24 -0700
Subject: lan78xx: use skb_cow_head() to deal with cloned skbs

We need to ensure there is enough headroom to push extra header,
but we also need to check if we are allowed to change headers.

skb_cow_head() is the proper helper to deal with this.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Cc: Woojung Huh <woojung.huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 9889a70ff4f6..636f48f19d1e 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2607,14 +2607,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
 {
 	u32 tx_cmd_a, tx_cmd_b;
 
-	if (skb_headroom(skb) < TX_OVERHEAD) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
+	if (skb_cow_head(skb, TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	if (lan78xx_linearize(skb) < 0)
-- 
cgit v1.2.3


From 6bc6895bdd6744e0136eaa4a11fbdb20a7db4e40 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Apr 2017 09:59:25 -0700
Subject: ch9200: use skb_cow_head() to deal with cloned skbs

We need to ensure there is enough headroom to push extra header,
but we also need to check if we are allowed to change headers.

skb_cow_head() is the proper helper to deal with this.

Fixes: 4a476bd6d1d9 ("usbnet: New driver for QinHeng CH9200 devices")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ch9200.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index 8a40202c0a17..c4f1c363e24b 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -254,14 +254,9 @@ static struct sk_buff *ch9200_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	tx_overhead = 0x40;
 
 	len = skb->len;
-	if (skb_headroom(skb) < tx_overhead) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, tx_overhead, 0, flags);
+	if (skb_cow_head(skb, tx_overhead)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	__skb_push(skb, tx_overhead);
-- 
cgit v1.2.3


From 39fba7835aacda65284a86e611774cbba71dac20 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Apr 2017 09:59:26 -0700
Subject: kaweth: use skb_cow_head() to deal with cloned skbs

We can use skb_cow_head() to properly deal with clones,
especially the ones coming from TCP stack that allow their head being
modified. This avoids a copy.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/kaweth.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 876f02f4945e..2a2c3edb6bad 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -803,18 +803,12 @@ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb,
 	}
 
 	/* We now decide whether we can put our special header into the sk_buff */
-	if (skb_cloned(skb) || skb_headroom(skb) < 2) {
-		/* no such luck - we make our own */
-		struct sk_buff *copied_skb;
-		copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC);
-		dev_kfree_skb_irq(skb);
-		skb = copied_skb;
-		if (!copied_skb) {
-			kaweth->stats.tx_errors++;
-			netif_start_queue(net);
-			spin_unlock_irq(&kaweth->device_lock);
-			return NETDEV_TX_OK;
-		}
+	if (skb_cow_head(skb, 2)) {
+		kaweth->stats.tx_errors++;
+		netif_start_queue(net);
+		spin_unlock_irq(&kaweth->device_lock);
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
 	}
 
 	private_header = (__le16 *)__skb_push(skb, 2);
-- 
cgit v1.2.3


From 43170c4e0ba709c79130c3fe5a41e66279950cd0 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Wed, 19 Apr 2017 21:26:07 +0300
Subject: gso: Validate assumption of frag_list segementation

Commit 07b26c9454a2 ("gso: Support partial splitting at the frag_list
pointer") assumes that all SKBs in a frag_list (except maybe the last
one) contain the same amount of GSO payload.

This assumption is not always correct, resulting in the following
warning message in the log:
    skb_segment: too many frags

For example, mlx5 driver in Striding RQ mode creates some RX SKBs with
one frag, and some with 2 frags.
After GRO, the frag_list SKBs end up having different amounts of payload.
If this frag_list SKB is then forwarded, the aforementioned assumption
is violated.

Validate the assumption, and fall back to software GSO if it not true.

Change-Id: Ia03983f4a47b6534dd987d7a2aad96d54d46d212
Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 35c1e2460206..f86bf69cfb8d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3082,22 +3082,32 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	if (sg && csum && (mss != GSO_BY_FRAGS))  {
 		if (!(features & NETIF_F_GSO_PARTIAL)) {
 			struct sk_buff *iter;
+			unsigned int frag_len;
 
 			if (!list_skb ||
 			    !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
 				goto normal;
 
-			/* Split the buffer at the frag_list pointer.
-			 * This is based on the assumption that all
-			 * buffers in the chain excluding the last
-			 * containing the same amount of data.
+			/* If we get here then all the required
+			 * GSO features except frag_list are supported.
+			 * Try to split the SKB to multiple GSO SKBs
+			 * with no frag_list.
+			 * Currently we can do that only when the buffers don't
+			 * have a linear part and all the buffers except
+			 * the last are of the same length.
 			 */
+			frag_len = list_skb->len;
 			skb_walk_frags(head_skb, iter) {
+				if (frag_len != iter->len && iter->next)
+					goto normal;
 				if (skb_headlen(iter))
 					goto normal;
 
 				len -= iter->len;
 			}
+
+			if (len != frag_len)
+				goto normal;
 		}
 
 		/* GSO partial only requires that we trim off any excess that
-- 
cgit v1.2.3


From 557c44be917c322860665be3d28376afa84aa936 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 19 Apr 2017 14:19:43 -0700
Subject: net: ipv6: RTF_PCPU should not be settable from userspace

Andrey reported a fault in the IPv6 route code:

kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff880069809600 task.stack: ffff880062dc8000
RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975
RSP: 0018:ffff880062dced30 EFLAGS: 00010206
RAX: dffffc0000000000 RBX: ffff8800670561c0 RCX: 0000000000000006
RDX: 0000000000000003 RSI: ffff880062dcfb28 RDI: 0000000000000018
RBP: ffff880062dced68 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffff880062dcfb28 R14: dffffc0000000000 R15: 0000000000000000
FS:  00007feebe37e7c0(0000) GS:ffff88006cb00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000205a0fe4 CR3: 000000006b5c9000 CR4: 00000000000006e0
Call Trace:
 ip6_pol_route+0x1512/0x1f20 net/ipv6/route.c:1128
 ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212
...

Andrey's syzkaller program passes rtmsg.rtmsg_flags with the RTF_PCPU bit
set. Flags passed to the kernel are blindly copied to the allocated
rt6_info by ip6_route_info_create making a newly inserted route appear
as though it is a per-cpu route. ip6_rt_cache_alloc sees the flag set
and expects rt->dst.from to be set - which it is not since it is not
really a per-cpu copy. The subsequent call to __ip6_dst_alloc then
generates the fault.

Fix by checking for the flag and failing with EINVAL.

Fixes: d52d3997f843f ("ipv6: Create percpu rt6_info")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6_route.h | 2 +-
 net/ipv6/route.c                | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index 85bbb1799df3..d496c02e14bc 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,7 +35,7 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
-#define RTF_PCPU	0x40000000
+#define RTF_PCPU	0x40000000	/* read-only: can not be set by user */
 #define RTF_LOCAL	0x80000000
 
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9db1418993f2..fb174b590fd3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1854,6 +1854,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 	int addr_type;
 	int err = -EINVAL;
 
+	/* RTF_PCPU is an internal flag; can not be set by userspace */
+	if (cfg->fc_flags & RTF_PCPU)
+		goto out;
+
 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
 		goto out;
 #ifndef CONFIG_IPV6_SUBTREES
-- 
cgit v1.2.3


From 89087c456fb5cb5e534edf1c30568a8baae4c906 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Thu, 20 Apr 2017 15:20:16 -0400
Subject: bpf: Fix values type used in test_maps

Maps of per-cpu type have their value element size adjusted to 8 if it
is specified smaller during various map operations.

This makes test_maps as a 32-bit binary fail, in fact the kernel
writes past the end of the value's array on the user's stack.

To be quite honest, I think the kernel should reject creation of a
per-cpu map that doesn't have a value size of at least 8 if that's
what the kernel is going to silently adjust to later.

If the user passed something smaller, it is a sizeof() calcualtion
based upon the type they will actually use (just like in this testcase
code) in later calls to the map operations.

Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY")
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index a0aa2009b0e0..20f1871874df 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -282,7 +282,7 @@ static void test_arraymap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	int key, next_key, fd, i;
-	long values[nr_cpus];
+	long long values[nr_cpus];
 
 	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
 			    sizeof(values[0]), 2, 0);
@@ -340,7 +340,7 @@ static void test_arraymap_percpu_many_keys(void)
 	 * allocator more than anything else
 	 */
 	unsigned int nr_keys = 2000;
-	long values[nr_cpus];
+	long long values[nr_cpus];
 	int key, fd, i;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
-- 
cgit v1.2.3


From 6f60f438108c66988c944894e5f673ab5d04fbc1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 20 Apr 2017 13:21:30 +0300
Subject: net: qrtr: potential use after free in qrtr_sendmsg()

If skb_pad() fails then it frees the skb so we should check for errors.

Fixes: bdabad3e363d ("net: Add Qualcomm IPC router")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index ae5ac175b2be..9da7368b0140 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -658,7 +658,9 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	if (plen != len) {
-		skb_pad(skb, plen - len);
+		rc = skb_pad(skb, plen - len);
+		if (rc)
+			goto out_node;
 		skb_put(skb, plen - len);
 	}
 
-- 
cgit v1.2.3


From cdb90499187c5084453665f1b0b2de72b069bbdf Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 20 Apr 2017 17:27:58 +0200
Subject: bpf, doc: update bpf maintainers entry

Add various related files that have been missing under
BPF entry covering essential parts of its infrastructure
and also add myself as co-maintainer.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 08950e5f5a05..38d3e4ed7208 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2585,12 +2585,26 @@ F:	include/uapi/linux/if_bonding.h
 
 BPF (Safe dynamic programs and tools)
 M:	Alexei Starovoitov <ast@kernel.org>
+M:	Daniel Borkmann <daniel@iogearbox.net>
 L:	netdev@vger.kernel.org
 L:	linux-kernel@vger.kernel.org
 S:	Supported
+F:	arch/x86/net/bpf_jit*
+F:	Documentation/networking/filter.txt
+F:	include/linux/bpf*
+F:	include/linux/filter.h
+F:	include/uapi/linux/bpf*
+F:	include/uapi/linux/filter.h
 F:	kernel/bpf/
-F:	tools/testing/selftests/bpf/
+F:	kernel/trace/bpf_trace.c
 F:	lib/test_bpf.c
+F:	net/bpf/
+F:	net/core/filter.c
+F:	net/sched/act_bpf.c
+F:	net/sched/cls_bpf.c
+F:	samples/bpf/
+F:	tools/net/bpf*
+F:	tools/testing/selftests/bpf/
 
 BROADCOM B44 10/100 ETHERNET DRIVER
 M:	Michael Chan <michael.chan@broadcom.com>
-- 
cgit v1.2.3


From 723b929ca0f79c0796f160c2eeda4597ee98d2b8 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 21 Apr 2017 20:42:16 +0300
Subject: ip6mr: fix notification device destruction

Andrey Konovalov reported a BUG caused by the ip6mr code which is caused
because we call unregister_netdevice_many for a device that is already
being destroyed. In IPv4's ipmr that has been resolved by two commits
long time ago by introducing the "notify" parameter to the delete
function and avoiding the unregister when called from a notifier, so
let's do the same for ip6mr.

The trace from Andrey:
------------[ cut here ]------------
kernel BUG at net/core/dev.c:6813!
invalid opcode: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 1 PID: 1165 Comm: kworker/u4:3 Not tainted 4.11.0-rc7+ #251
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Workqueue: netns cleanup_net
task: ffff880069208000 task.stack: ffff8800692d8000
RIP: 0010:rollback_registered_many+0x348/0xeb0 net/core/dev.c:6813
RSP: 0018:ffff8800692de7f0 EFLAGS: 00010297
RAX: ffff880069208000 RBX: 0000000000000002 RCX: 0000000000000001
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88006af90569
RBP: ffff8800692de9f0 R08: ffff8800692dec60 R09: 0000000000000000
R10: 0000000000000006 R11: 0000000000000000 R12: ffff88006af90070
R13: ffff8800692debf0 R14: dffffc0000000000 R15: ffff88006af90000
FS:  0000000000000000(0000) GS:ffff88006cb00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fe7e897d870 CR3: 00000000657e7000 CR4: 00000000000006e0
Call Trace:
 unregister_netdevice_many.part.105+0x87/0x440 net/core/dev.c:7881
 unregister_netdevice_many+0xc8/0x120 net/core/dev.c:7880
 ip6mr_device_event+0x362/0x3f0 net/ipv6/ip6mr.c:1346
 notifier_call_chain+0x145/0x2f0 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394
 raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1647
 call_netdevice_notifiers net/core/dev.c:1663
 rollback_registered_many+0x919/0xeb0 net/core/dev.c:6841
 unregister_netdevice_many.part.105+0x87/0x440 net/core/dev.c:7881
 unregister_netdevice_many net/core/dev.c:7880
 default_device_exit_batch+0x4fa/0x640 net/core/dev.c:8333
 ops_exit_list.isra.4+0x100/0x150 net/core/net_namespace.c:144
 cleanup_net+0x5a8/0xb40 net/core/net_namespace.c:463
 process_one_work+0xc04/0x1c10 kernel/workqueue.c:2097
 worker_thread+0x223/0x19c0 kernel/workqueue.c:2231
 kthread+0x35e/0x430 kernel/kthread.c:231
 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
Code: 3c 32 00 0f 85 70 0b 00 00 48 b8 00 02 00 00 00 00 ad de 49 89
47 78 e9 93 fe ff ff 49 8d 57 70 49 8d 5f 78 eb 9e e8 88 7a 14 fe <0f>
0b 48 8b 9d 28 fe ff ff e8 7a 7a 14 fe 48 b8 00 00 00 00 00
RIP: rollback_registered_many+0x348/0xeb0 RSP: ffff8800692de7f0
---[ end trace e0b29c57e9b3292c ]---

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6ba6c900ebcf..bf34d0950752 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -774,7 +774,8 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
+static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+		       struct list_head *head)
 {
 	struct mif_device *v;
 	struct net_device *dev;
@@ -820,7 +821,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 					     dev->ifindex, &in6_dev->cnf);
 	}
 
-	if (v->flags & MIFF_REGISTER)
+	if ((v->flags & MIFF_REGISTER) && !notify)
 		unregister_netdevice_queue(dev, head);
 
 	dev_put(dev);
@@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct notifier_block *this,
 	struct mr6_table *mrt;
 	struct mif_device *v;
 	int ct;
-	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
@@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct notifier_block *this,
 		v = &mrt->vif6_table[0];
 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 			if (v->dev == dev)
-				mif6_delete(mrt, ct, &list);
+				mif6_delete(mrt, ct, 1, NULL);
 		}
 	}
-	unregister_netdevice_many(&list);
 
 	return NOTIFY_DONE;
 }
@@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 	for (i = 0; i < mrt->maxvif; i++) {
 		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
 			continue;
-		mif6_delete(mrt, i, &list);
+		mif6_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1707,7 +1706,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
 			return -EFAULT;
 		rtnl_lock();
-		ret = mif6_delete(mrt, mifi, NULL);
+		ret = mif6_delete(mrt, mifi, 0, NULL);
 		rtnl_unlock();
 		return ret;
 
-- 
cgit v1.2.3


From c70b17b775edb21280e9de7531acf6db3b365274 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@oracle.com>
Date: Thu, 20 Apr 2017 15:57:31 -0700
Subject: netpoll: Check for skb->queue_mapping

Reducing real_num_tx_queues needs to be in sync with skb queue_mapping
otherwise skbs with queue_mapping greater than real_num_tx_queues
can be sent to the underlying driver and can result in kernel panic.

One such event is running netconsole and enabling VF on the same
device. Or running netconsole and changing number of tx queues via
ethtool on same device.

e.g.
Unable to handle kernel NULL pointer dereference
tsk->{mm,active_mm}->context = 0000000000001525
tsk->{mm,active_mm}->pgd = fff800130ff9a000
              \|/ ____ \|/
              "@'/ .. \`@"
              /_| \__/ |_\
                 \__U_/
kworker/48:1(475): Oops [#1]
CPU: 48 PID: 475 Comm: kworker/48:1 Tainted: G           OE
4.11.0-rc3-davem-net+ #7
Workqueue: events queue_process
task: fff80013113299c0 task.stack: fff800131132c000
TSTATE: 0000004480e01600 TPC: 00000000103f9e3c TNPC: 00000000103f9e40 Y:
00000000    Tainted: G           OE
TPC: <ixgbe_xmit_frame_ring+0x7c/0x6c0 [ixgbe]>
g0: 0000000000000000 g1: 0000000000003fff g2: 0000000000000000 g3:
0000000000000001
g4: fff80013113299c0 g5: fff8001fa6808000 g6: fff800131132c000 g7:
00000000000000c0
o0: fff8001fa760c460 o1: fff8001311329a50 o2: fff8001fa7607504 o3:
0000000000000003
o4: fff8001f96e63a40 o5: fff8001311d77ec0 sp: fff800131132f0e1 ret_pc:
000000000049ed94
RPC: <set_next_entity+0x34/0xb80>
l0: 0000000000000000 l1: 0000000000000800 l2: 0000000000000000 l3:
0000000000000000
l4: 000b2aa30e34b10d l5: 0000000000000000 l6: 0000000000000000 l7:
fff8001fa7605028
i0: fff80013111a8a00 i1: fff80013155a0780 i2: 0000000000000000 i3:
0000000000000000
i4: 0000000000000000 i5: 0000000000100000 i6: fff800131132f1a1 i7:
00000000103fa4b0
I7: <ixgbe_xmit_frame+0x30/0xa0 [ixgbe]>
Call Trace:
 [00000000103fa4b0] ixgbe_xmit_frame+0x30/0xa0 [ixgbe]
 [0000000000998c74] netpoll_start_xmit+0xf4/0x200
 [0000000000998e10] queue_process+0x90/0x160
 [0000000000485fa8] process_one_work+0x188/0x480
 [0000000000486410] worker_thread+0x170/0x4c0
 [000000000048c6b8] kthread+0xd8/0x120
 [0000000000406064] ret_from_fork+0x1c/0x2c
 [0000000000000000]           (null)
Disabling lock debugging due to kernel taint
Caller[00000000103fa4b0]: ixgbe_xmit_frame+0x30/0xa0 [ixgbe]
Caller[0000000000998c74]: netpoll_start_xmit+0xf4/0x200
Caller[0000000000998e10]: queue_process+0x90/0x160
Caller[0000000000485fa8]: process_one_work+0x188/0x480
Caller[0000000000486410]: worker_thread+0x170/0x4c0
Caller[000000000048c6b8]: kthread+0xd8/0x120
Caller[0000000000406064]: ret_from_fork+0x1c/0x2c
Caller[0000000000000000]:           (null)

Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9424673009c1..29be2466970c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -105,15 +105,21 @@ static void queue_process(struct work_struct *work)
 	while ((skb = skb_dequeue(&npinfo->txq))) {
 		struct net_device *dev = skb->dev;
 		struct netdev_queue *txq;
+		unsigned int q_index;
 
 		if (!netif_device_present(dev) || !netif_running(dev)) {
 			kfree_skb(skb);
 			continue;
 		}
 
-		txq = skb_get_tx_queue(dev, skb);
-
 		local_irq_save(flags);
+		/* check if skb->queue_mapping is still valid */
+		q_index = skb_get_queue_mapping(skb);
+		if (unlikely(q_index >= dev->real_num_tx_queues)) {
+			q_index = q_index % dev->real_num_tx_queues;
+			skb_set_queue_mapping(skb, q_index);
+		}
+		txq = netdev_get_tx_queue(dev, q_index);
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
 		if (netif_xmit_frozen_or_stopped(txq) ||
 		    netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
-- 
cgit v1.2.3