From 78fbe21d83e39e74f4b479a2978375ab7aaf3e37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 12 Jul 2017 19:25:47 -0700 Subject: disable new gcc-7.1.1 warnings for now commit bd664f6b3e376a8ef4990f87d08271cc2d01ba9a upstream. I made the mistake of upgrading my desktop to the new Fedora 26 that comes with gcc-7.1.1. There's nothing wrong per se that I've noticed, but I now have 1500 lines of warnings, mostly from the new format-truncation warning triggering all over the tree. We use 'snprintf()' and friends in a lot of places, and often know that the numbers are fairly small (ie a controller index or similar), but gcc doesn't know that, and sees an 'int', and thinks that it could be some huge number. And then complains when our buffers are not able to fit the name for the ten millionth controller. These warnings aren't necessarily bad per se, and we probably want to look through them subsystem by subsystem, but at least during the merge window they just mean that I can't even see if somebody is introducing any *real* problems when I pull. So warnings disabled for now. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index a872ece51ee5..0a2c352ba887 100644 --- a/Makefile +++ b/Makefile @@ -629,6 +629,9 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) +KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) +KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) -- cgit v1.2.3 From 70ec6b3635d95983e1fbcfb1c765cd7d2abd0ac8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 08:46:44 -0300 Subject: ir-core: fix gcc-7 warning on bool arithmetic commit bd7e31bbade02bc1e92aa00d5cf2cee2da66838a upstream. gcc-7 suggests that an expression using a bitwise not and a bitmask on a 'bool' variable is better written using boolean logic: drivers/media/rc/imon.c: In function 'imon_incoming_scancode': drivers/media/rc/imon.c:1725:22: error: '~' on a boolean expression [-Werror=bool-operation] ictx->pad_mouse = ~(ictx->pad_mouse) & 0x1; ^ drivers/media/rc/imon.c:1725:22: note: did you mean to use logical not? I agree. Fixes: 21677cfc562a ("V4L/DVB: ir-core: add imon driver") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/imon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 86cc70fe2534..2d4b83635018 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -1629,7 +1629,7 @@ static void imon_incoming_packet(struct imon_context *ictx, if (kc == KEY_KEYBOARD && !ictx->release_code) { ictx->last_keycode = kc; if (!nomouse) { - ictx->pad_mouse = ~(ictx->pad_mouse) & 0x1; + ictx->pad_mouse = !ictx->pad_mouse; dev_dbg(dev, "toggling to %s mode\n", ictx->pad_mouse ? "mouse" : "keyboard"); spin_unlock_irqrestore(&ictx->kc_lock, flags); -- cgit v1.2.3 From 63d32e8af0dd6343cde33993eabc9c7562d92fe9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 6 Jan 2017 15:33:14 -0500 Subject: dm mpath: cleanup -Wbool-operation warning in choose_pgpath() commit d19a55ccad15a486ffe03030570744e5d5bd9f8e upstream. Reported-by: David Binderman Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-mpath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ac8235bda61b..0d437c98ab08 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -431,7 +431,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned long flags; struct priority_group *pg; struct pgpath *pgpath; - bool bypassed = true; + unsigned bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { clear_bit(MPATHF_QUEUE_IO, &m->flags); @@ -470,7 +470,7 @@ check_current_pg: */ do { list_for_each_entry(pg, &m->priority_groups, list) { - if (pg->bypassed == bypassed) + if (pg->bypassed == !!bypassed) continue; pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) { -- cgit v1.2.3 From 60813b6a2ab40c8ad7f5e44819fe6e44fa4b459a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 May 2017 10:40:00 -0300 Subject: s5p-jpeg: don't return a random width/height commit a16e37726c444cbda91e73ed5f742e717bfe866f upstream. Gcc 7.1 complains about: drivers/media/platform/s5p-jpeg/jpeg-core.c: In function 's5p_jpeg_parse_hdr.isra.9': drivers/media/platform/s5p-jpeg/jpeg-core.c:1207:12: warning: 'width' may be used uninitialized in this function [-Wmaybe-uninitialized] result->w = width; ~~~~~~~~~~^~~~~~~ drivers/media/platform/s5p-jpeg/jpeg-core.c:1208:12: warning: 'height' may be used uninitialized in this function [-Wmaybe-uninitialized] result->h = height; ~~~~~~~~~~^~~~~~~~ Indeed the code would allow it to return a random value (although it shouldn't happen, in practice). So, explicitly set both to zero, just in case. Acked-by: Andrzej Pietrasiewicz Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-jpeg/jpeg-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 52dc7941db65..1da2c94e1dca 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -1099,10 +1099,10 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, struct s5p_jpeg_ctx *ctx) { int c, components = 0, notfound, n_dht = 0, n_dqt = 0; - unsigned int height, width, word, subsampling = 0, sos = 0, sof = 0, - sof_len = 0; - unsigned int dht[S5P_JPEG_MAX_MARKER], dht_len[S5P_JPEG_MAX_MARKER], - dqt[S5P_JPEG_MAX_MARKER], dqt_len[S5P_JPEG_MAX_MARKER]; + unsigned int height = 0, width = 0, word, subsampling = 0; + unsigned int sos = 0, sof = 0, sof_len = 0; + unsigned int dht[S5P_JPEG_MAX_MARKER], dht_len[S5P_JPEG_MAX_MARKER]; + unsigned int dqt[S5P_JPEG_MAX_MARKER], dqt_len[S5P_JPEG_MAX_MARKER]; long length; struct s5p_jpeg_buffer jpeg_buffer; -- cgit v1.2.3 From 76572609e45895826f4bf49c376271dc62f125dc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 6 Jun 2017 17:59:03 +0200 Subject: thermal: max77620: fix device-node reference imbalance commit c592fafbdbb6b1279b76a54722d1465ca77e5bde upstream. The thermal child device reuses the parent MFD-device device-tree node when registering a thermal zone, but did not take a reference to the node. This leads to a reference imbalance, and potential use-after-free, when the node reference is dropped by the platform-bus device destructor (once for the child and later again for the parent). Fix this by dropping any reference already held to a device-tree node and getting a reference to the parent's node which will be balanced on reprobe or on platform-device release, whichever comes first. Note that simply clearing the of_node pointer on probe errors and on driver unbind would not allow the use of device-managed resources as specifically thermal_zone_of_sensor_unregister() claims that a valid device-tree node pointer is needed during deregistration (even if it currently does not seem to use it). Fixes: ec4664b3fd6d ("thermal: max77620: Add thermal driver for reporting junction temp") Cc: Laxman Dewangan Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/max77620_thermal.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/max77620_thermal.c b/drivers/thermal/max77620_thermal.c index 83905ff46e40..7e989277a890 100644 --- a/drivers/thermal/max77620_thermal.c +++ b/drivers/thermal/max77620_thermal.c @@ -104,8 +104,6 @@ static int max77620_thermal_probe(struct platform_device *pdev) return -EINVAL; } - pdev->dev.of_node = pdev->dev.parent->of_node; - mtherm->dev = &pdev->dev; mtherm->rmap = dev_get_regmap(pdev->dev.parent, NULL); if (!mtherm->rmap) { @@ -113,6 +111,14 @@ static int max77620_thermal_probe(struct platform_device *pdev) return -ENODEV; } + /* + * Drop any current reference to a device-tree node and get a + * reference to the parent's node which will be balanced on reprobe or + * on platform-device release. + */ + of_node_put(pdev->dev.of_node); + pdev->dev.of_node = of_node_get(pdev->dev.parent->of_node); + mtherm->tz_device = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mtherm, &max77620_thermal_ops); if (IS_ERR(mtherm->tz_device)) { -- cgit v1.2.3 From 7cd7b56037ae4cbdfe5e7fa9e41b67ba15199d7f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:08 +0530 Subject: thermal: cpu_cooling: Avoid accessing potentially freed structures commit 289d72afddf83440117c35d864bf0c6309c1d011 upstream. After the lock is dropped, it is possible that the cpufreq_dev gets freed before we call get_level() and that can cause kernel to crash. Drop the lock after we are done using the structure. Fixes: 02373d7c69b4 ("thermal: cpu_cooling: fix lockdep problems in cpu_cooling") Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/cpu_cooling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 9ce0e9eef923..f49d2989d000 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -191,8 +191,10 @@ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) mutex_lock(&cooling_list_lock); list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { + unsigned long level = get_level(cpufreq_dev, freq); + mutex_unlock(&cooling_list_lock); - return get_level(cpufreq_dev, freq); + return level; } } mutex_unlock(&cooling_list_lock); -- cgit v1.2.3 From b729a1aea14d482e8a29186bd4a930282961a43c Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 27 Jun 2017 17:31:49 +0300 Subject: ath9k: fix tx99 use after free commit cf8ce1ea61b75712a154c93e40f2a5af2e4dd997 upstream. One scenario that could lead to UAF is two threads writing simultaneously to the "tx99" debug file. One of them would set the "start" value to true and follow to ath9k_tx99_init(). Inside the function it would set the sc->tx99_state to true after allocating sc->tx99skb. Then, the other thread would execute write_file_tx99() and call ath9k_tx99_deinit(). sc->tx99_state would be freed. After that, the first thread would continue inside ath9k_tx99_init() and call r = ath9k_tx99_send(sc, sc->tx99_skb, &txctl); that would make use of the freed sc->tx99_skb memory. Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/tx99.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index 16aca9e28b77..1fa7f844b5da 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -189,22 +189,27 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, if (strtobool(buf, &start)) return -EINVAL; + mutex_lock(&sc->mutex); + if (start == sc->tx99_state) { if (!start) - return count; + goto out; ath_dbg(common, XMIT, "Resetting TX99\n"); ath9k_tx99_deinit(sc); } if (!start) { ath9k_tx99_deinit(sc); - return count; + goto out; } r = ath9k_tx99_init(sc); - if (r) + if (r) { + mutex_unlock(&sc->mutex); return r; - + } +out: + mutex_unlock(&sc->mutex); return count; } -- cgit v1.2.3 From 99f96831083bb23dff471ff1b4e16cc8370ecaec Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 27 Jun 2017 17:31:51 +0300 Subject: ath9k: fix tx99 bus error commit bde717ab473668377fc65872398a102d40cb2d58 upstream. The hard coded register 0x9864 and 0x9924 are invalid for ar9300 chips. Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index ae3043559b6d..fe5102ca5010 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1821,8 +1821,6 @@ static void ar9003_hw_spectral_scan_wait(struct ath_hw *ah) static void ar9003_hw_tx99_start(struct ath_hw *ah, u32 qnum) { REG_SET_BIT(ah, AR_PHY_TEST, PHY_AGC_CLR); - REG_SET_BIT(ah, 0x9864, 0x7f000); - REG_SET_BIT(ah, 0x9924, 0x7f00fe); REG_CLR_BIT(ah, AR_DIAG_SW, AR_DIAG_RX_DIS); REG_WRITE(ah, AR_CR, AR_CR_RXD); REG_WRITE(ah, AR_DLCL_IFS(qnum), 0); -- cgit v1.2.3 From 48879676a8c3f481e1ae51ccde6d6e7515753476 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 27 Jun 2017 17:31:53 +0300 Subject: ath9k: fix an invalid pointer dereference in ath9k_rng_stop() commit 07246c115801c27652700e3679bb58661ef7ed65 upstream. The bug was triggered when do suspend/resuming continuously on Dell XPS L322X/0PJHXN version 9333 (2013) with kernel 4.12.0-041200rc4-generic. But can't reproduce on DELL E5440 + AR9300 PCIE chips. The warning is caused by accessing invalid pointer sc->rng_task. sc->rng_task is not be cleared after kthread_stop(sc->rng_task) be called in ath9k_rng_stop(). Because the kthread is stopped before ath9k_rng_kthread() be scheduled. So set sc->rng_task to null after kthread_stop(sc->rng_task) to resolve this issue. WARNING: CPU: 0 PID: 984 at linux/kernel/kthread.c:71 kthread_stop+0xf1/0x100 CPU: 0 PID: 984 Comm: NetworkManager Not tainted 4.12.0-041200rc4-generic #201706042031 Hardware name: Dell Inc. Dell System XPS L322X/0PJHXN, BIOS A09 05/15/2013 task: ffff950170fdda00 task.stack: ffffa22c01538000 RIP: 0010:kthread_stop+0xf1/0x100 RSP: 0018:ffffa22c0153b5b0 EFLAGS: 00010246 RAX: ffffffffa6257800 RBX: ffff950171b79560 RCX: 0000000000000000 RDX: 0000000080000000 RSI: 000000007fffffff RDI: ffff9500ac9a9680 RBP: ffffa22c0153b5c8 R08: 0000000000000000 R09: 0000000000000000 R10: ffffa22c0153b648 R11: ffff9501768004b8 R12: ffff9500ac9a9680 R13: ffff950171b79f70 R14: ffff950171b78780 R15: ffff9501749dc018 FS: 00007f0d6bfd5540(0000) GS:ffff95017f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc190161a08 CR3: 0000000232906000 CR4: 00000000001406f0 Call Trace: ath9k_rng_stop+0x1a/0x20 [ath9k] ath9k_stop+0x3b/0x1d0 [ath9k] drv_stop+0x33/0xf0 [mac80211] ieee80211_stop_device+0x43/0x50 [mac80211] ieee80211_do_stop+0x4f2/0x810 [mac80211] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196043 Reported-by: Giulio Genovese Tested-by: Giulio Genovese Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/rng.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/rng.c b/drivers/net/wireless/ath/ath9k/rng.c index d38e50f96db7..e0374ebe7bdc 100644 --- a/drivers/net/wireless/ath/ath9k/rng.c +++ b/drivers/net/wireless/ath/ath9k/rng.c @@ -120,6 +120,8 @@ void ath9k_rng_start(struct ath_softc *sc) void ath9k_rng_stop(struct ath_softc *sc) { - if (sc->rng_task) + if (sc->rng_task) { kthread_stop(sc->rng_task); + sc->rng_task = NULL; + } } -- cgit v1.2.3 From f73de3f9915c997dd091cadcdbafbdb9835f2cf5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Mar 2017 12:15:35 +0200 Subject: NFC: fix broken device allocation commit 20777bc57c346b6994f465e0d8261a7fbf213a09 upstream. Commit 7eda8b8e9677 ("NFC: Use IDR library to assing NFC devices IDs") moved device-id allocation and struct-device initialisation from nfc_allocate_device() to nfc_register_device(). This broke just about every nfc-device-registration error path, which continue to call nfc_free_device() that tries to put the device reference of the now uninitialised (but zeroed) struct device: kobject: '(null)' (ce316420): is not initialized, yet kobject_put() is being called. The late struct-device initialisation also meant that various work queues whose names are derived from the nfc device name were also misnamed: 421 root 0 SW< [(null)_nci_cmd_] 422 root 0 SW< [(null)_nci_rx_w] 423 root 0 SW< [(null)_nci_tx_w] Move the id-allocation and struct-device initialisation back to nfc_allocate_device() and fix up the single call site which did not use nfc_free_device() in its error path. Fixes: 7eda8b8e9677 ("NFC: Use IDR library to assing NFC devices IDs") Cc: Samuel Ortiz Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/core.c | 31 ++++++++++++++++++------------- net/nfc/nci/core.c | 3 +-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index 122bb81da918..5cf33df888c3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -982,6 +982,8 @@ static void nfc_release(struct device *d) kfree(se); } + ida_simple_remove(&nfc_index_ida, dev->idx); + kfree(dev); } @@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; + int rc; if (!ops->start_poll || !ops->stop_poll || !ops->activate_target || !ops->deactivate_target || !ops->im_transceive) @@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, if (!dev) return NULL; + rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); + if (rc < 0) + goto err_free_dev; + dev->idx = rc; + + dev->dev.class = &nfc_class; + dev_set_name(&dev->dev, "nfc%d", dev->idx); + device_initialize(&dev->dev); + dev->ops = ops; dev->supported_protocols = supported_protocols; dev->tx_headroom = tx_headroom; @@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, } return dev; + +err_free_dev: + kfree(dev); + + return ERR_PTR(rc); } EXPORT_SYMBOL(nfc_allocate_device); @@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); - if (dev->idx < 0) - return dev->idx; - - dev->dev.class = &nfc_class; - dev_set_name(&dev->dev, "nfc%d", dev->idx); - device_initialize(&dev->dev); - mutex_lock(&nfc_devlist_mutex); nfc_devlist_generation++; rc = device_add(&dev->dev); @@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device); */ void nfc_unregister_device(struct nfc_dev *dev) { - int rc, id; + int rc; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - id = dev->idx; - if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); @@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev) nfc_devlist_generation++; device_del(&dev->dev); mutex_unlock(&nfc_devlist_mutex); - - ida_simple_remove(&nfc_index_ida, id); } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 61fff422424f..85a3d9ed4c29 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1173,8 +1173,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, return ndev; free_nfc: - kfree(ndev->nfc_dev); - + nfc_free_device(ndev->nfc_dev); free_nci: kfree(ndev); return NULL; -- cgit v1.2.3 From 7de6ff402dd02cbfc6b963068efd6efecc0e4978 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Mar 2017 12:15:36 +0200 Subject: NFC: nfcmrvl_uart: add missing tty-device sanity check commit 15e0c59f1535926a939d1df66d6edcf997d7c1b9 upstream. Make sure to check the tty-device pointer before trying to access the parent device to avoid dereferencing a NULL-pointer when the tty is one end of a Unix98 pty. Fixes: e097dc624f78 ("NFC: nfcmrvl: add UART driver") Cc: Vincent Cuissard Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/nfcmrvl/uart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 83a99e38e7bd..6c0c301611c4 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -109,6 +109,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) struct nfcmrvl_private *priv; struct nfcmrvl_platform_data *pdata = NULL; struct nfcmrvl_platform_data config; + struct device *dev = nu->tty->dev; /* * Platform data cannot be used here since usually it is already used @@ -116,9 +117,8 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) * and check if DT entries were added. */ - if (nu->tty->dev->parent && nu->tty->dev->parent->of_node) - if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node, - &config) == 0) + if (dev && dev->parent && dev->parent->of_node) + if (nfcmrvl_uart_parse_dt(dev->parent->of_node, &config) == 0) pdata = &config; if (!pdata) { @@ -131,7 +131,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) } priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_UART, nu, &uart_ops, - nu->tty->dev, pdata); + dev, pdata); if (IS_ERR(priv)) return PTR_ERR(priv); -- cgit v1.2.3 From 4b4f74d6c4f801aee5add0cab1d68904e6141380 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Mar 2017 12:15:37 +0200 Subject: NFC: nfcmrvl: do not use device-managed resources commit 0cbe40112f42cf5e008f9127f6cd5952ba3946c7 upstream. This specifically fixes resource leaks in the registration error paths. Device-managed resources is a bad fit for this driver as devices can be registered from the n_nci line discipline. Firstly, a tty may not even have a corresponding device (should it be part of a Unix98 pty) something which would lead to a NULL-pointer dereference when registering resources. Secondly, if the tty has a class device, its lifetime exceeds that of the line discipline, which means that resources would leak every time the line discipline is closed (or if registration fails). Currently, the devres interface was only being used to request a reset gpio despite the fact that it was already explicitly freed in nfcmrvl_nci_unregister_dev() (along with the private data), something which also prevented the resource leak at close. Note that the driver treats gpio number 0 as invalid despite it being perfectly valid. This will be addressed in a follow-up patch. Fixes: b2fe288eac72 ("NFC: nfcmrvl: free reset gpio") Fixes: 4a2b947f56b3 ("NFC: nfcmrvl: add chip reset management") Cc: Vincent Cuissard Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/nfcmrvl/main.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 51c8240a1672..3e3fc9588f10 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -124,12 +124,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, memcpy(&priv->config, pdata, sizeof(*pdata)); if (priv->config.reset_n_io) { - rc = devm_gpio_request_one(dev, - priv->config.reset_n_io, - GPIOF_OUT_INIT_LOW, - "nfcmrvl_reset_n"); - if (rc < 0) + rc = gpio_request_one(priv->config.reset_n_io, + GPIOF_OUT_INIT_LOW, + "nfcmrvl_reset_n"); + if (rc < 0) { + priv->config.reset_n_io = 0; nfc_err(dev, "failed to request reset_n io\n"); + } } if (phy == NFCMRVL_PHY_SPI) { @@ -154,7 +155,7 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, if (!priv->ndev) { nfc_err(dev, "nci_allocate_device failed\n"); rc = -ENOMEM; - goto error; + goto error_free_gpio; } nci_set_drvdata(priv->ndev, priv); @@ -179,7 +180,9 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, error_free_dev: nci_free_device(priv->ndev); -error: +error_free_gpio: + if (priv->config.reset_n_io) + gpio_free(priv->config.reset_n_io); kfree(priv); return ERR_PTR(rc); } @@ -195,7 +198,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) nfcmrvl_fw_dnld_deinit(priv); if (priv->config.reset_n_io) - devm_gpio_free(priv->dev, priv->config.reset_n_io); + gpio_free(priv->config.reset_n_io); nci_unregister_device(ndev); nci_free_device(ndev); -- cgit v1.2.3 From 10f03f0f7ef0ee456369b2940e092e0225c754a6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Mar 2017 12:15:38 +0200 Subject: NFC: nfcmrvl: use nfc-device for firmware download commit e5834ac22948169bbd7c45996d8d4905edd20f5e upstream. Use the nfc- rather than phy-device in firmware-management code that needs a valid struct device. This specifically fixes a NULL-pointer dereference in nfcmrvl_fw_dnld_init() during registration when the underlying tty is one end of a Unix98 pty. Note that the driver still uses the phy device for any debugging, which is fine for now. Fixes: 3194c6870158 ("NFC: nfcmrvl: add firmware download support") Cc: Vincent Cuissard Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/nfcmrvl/fw_dnld.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index f8dcdf4b24f6..af62c4c854f3 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -459,7 +459,7 @@ int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv) INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work); snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq", - dev_name(priv->dev)); + dev_name(&priv->ndev->nfc_dev->dev)); priv->fw_dnld.rx_wq = create_singlethread_workqueue(name); if (!priv->fw_dnld.rx_wq) return -ENOMEM; @@ -496,6 +496,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) { struct nfcmrvl_private *priv = nci_get_drvdata(ndev); struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld; + int res; if (!priv->support_fw_dnld) return -ENOTSUPP; @@ -511,7 +512,9 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) */ /* Retrieve FW binary */ - if (request_firmware(&fw_dnld->fw, firmware_name, priv->dev) < 0) { + res = request_firmware(&fw_dnld->fw, firmware_name, + &ndev->nfc_dev->dev); + if (res < 0) { nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name); return -ENOENT; } -- cgit v1.2.3 From 06be8e2767da8d2ffeaf490e05cda274935f7f89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Mar 2017 12:15:39 +0200 Subject: NFC: nfcmrvl: fix firmware-management initialisation commit 45dd39b974f6632222dd5cdcbea7358a077ab0b0 upstream. The nci-device was never deregistered in the event that fw-initialisation failed. Fix this by moving the firmware initialisation before device registration since the firmware work queue should be available before registering. Note that this depends on a recent fix that moved device-name initialisation back to to nci_allocate_device() as the firmware-workqueue name is now derived from the nfc-device name. Fixes: 3194c6870158 ("NFC: nfcmrvl: add firmware download support") Cc: Vincent Cuissard Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/nfcmrvl/main.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 3e3fc9588f10..a446590a71ca 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -158,26 +158,28 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, goto error_free_gpio; } + rc = nfcmrvl_fw_dnld_init(priv); + if (rc) { + nfc_err(dev, "failed to initialize FW download %d\n", rc); + goto error_free_dev; + } + nci_set_drvdata(priv->ndev, priv); rc = nci_register_device(priv->ndev); if (rc) { nfc_err(dev, "nci_register_device failed %d\n", rc); - goto error_free_dev; + goto error_fw_dnld_deinit; } /* Ensure that controller is powered off */ nfcmrvl_chip_halt(priv); - rc = nfcmrvl_fw_dnld_init(priv); - if (rc) { - nfc_err(dev, "failed to initialize FW download %d\n", rc); - goto error_free_dev; - } - nfc_info(dev, "registered with nci successfully\n"); return priv; +error_fw_dnld_deinit: + nfcmrvl_fw_dnld_deinit(priv); error_free_dev: nci_free_device(priv->ndev); error_free_gpio: -- cgit v1.2.3 From d1ac8a98f312da20784723b53c62ad67766d5cb7 Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 24 May 2017 12:42:26 +0200 Subject: nfc: Ensure presence of required attributes in the activate_target handler commit a0323b979f81ad2deb2c8836eab506534891876a upstream. Check that the NFC_ATTR_TARGET_INDEX and NFC_ATTR_PROTOCOLS attributes (in addition to NFC_ATTR_DEVICE_INDEX) are provided by the netlink client prior to accessing them. This prevents potential unhandled NULL pointer dereference exceptions which can be triggered by malicious user-mode programs, if they omit one or both of these attributes. Signed-off-by: Mateusz Jurczyk Acked-by: Kees Cook Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ea023b35f1c2..102c681c48b5 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -910,7 +910,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) u32 device_idx, target_idx, protocol; int rc; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX] || + !info->attrs[NFC_ATTR_PROTOCOLS]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); -- cgit v1.2.3 From 4a142251a315e4df6f12a7c99a2a6dfe4ac273ec Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 24 May 2017 12:26:20 +0200 Subject: nfc: Fix the sockaddr length sanitization in llcp_sock_connect commit 608c4adfcabab220142ee335a2a003ccd1c0b25b upstream. Fix the sockaddr length verification in the connect() handler of NFC/LLCP sockets, to compare against the size of the actual structure expected on input (sockaddr_nfc_llcp) instead of its shorter version (sockaddr_nfc). Both structures are defined in include/uapi/linux/nfc.h. The fields specific to the _llcp extended struct are as follows: 276 __u8 dsap; /* Destination SAP, if known */ 277 __u8 ssap; /* Source SAP to be bound to */ 278 char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; 279 size_t service_name_len; If the caller doesn't provide a sufficiently long sockaddr buffer, these fields remain uninitialized (and they currently originate from the stack frame of the top-level sys_connect handler). They are then copied by llcp_sock_connect() into internal storage (nfc_llcp_sock structure), and could be subsequently read back through the user-mode getsockname() function (handled by llcp_sock_getname()). This would result in the disclosure of up to ~70 uninitialized bytes from the kernel stack to user-mode clients capable of creating AFC_NFC sockets. Signed-off-by: Mateusz Jurczyk Acked-by: Kees Cook Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp_sock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index b9edf5fae6ae..19c2ca1a1548 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -661,8 +661,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); - if (!addr || len < sizeof(struct sockaddr_nfc) || - addr->sa_family != AF_NFC) + if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC) return -EINVAL; if (addr->service_name_len == 0 && addr->dsap == 0) -- cgit v1.2.3 From 244a7db0e9d584af63e1cfe4911d3200d1b180ee Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Tue, 13 Jun 2017 18:44:28 +0200 Subject: NFC: Add sockaddr length checks before accessing sa_family in bind handlers commit f6a5885fc4d68e7f25ffb42b9d8d80aebb3bacbb upstream. Verify that the caller-provided sockaddr structure is large enough to contain the sa_family field, before accessing it in bind() handlers of the AF_NFC socket. Since the syscall doesn't enforce a minimum size of the corresponding memory region, very short sockaddrs (zero or one byte long) result in operating on uninitialized memory while referencing .sa_family. Signed-off-by: Mateusz Jurczyk Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp_sock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 19c2ca1a1548..e31dea17473d 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -76,7 +76,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -150,7 +151,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); -- cgit v1.2.3 From d3503ef1a6147be68f6208af01a9f81b11cc4877 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:02 +0300 Subject: perf intel-pt: Move decoder error setting into one condition commit 22c06892332d8916115525145b78e606e9cc6492 upstream. Move decoder error setting into one condition. Cc'ed to stable because later fixes depend on it. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 04387ab31316..9612870a927e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2128,15 +2128,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } } while (err == -ENOLINK); - decoder->state.err = err ? intel_pt_ext_err(err) : 0; + if (err) { + decoder->state.err = intel_pt_ext_err(err); + decoder->state.from_ip = decoder->ip; + } else { + decoder->state.err = 0; + } + decoder->state.timestamp = decoder->timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; - if (err) - decoder->state.from_ip = decoder->ip; - return &decoder->state; } -- cgit v1.2.3 From 3e6837f1f414ea98f574abc31f04a1a5acb68203 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:03 +0300 Subject: perf intel-pt: Improve sample timestamp commit 3f04d98e972b59706bd43d6cc75efac91f8fba50 upstream. The decoder uses its current timestamp in samples. Usually that is a timestamp that has already passed, but in some cases it is a timestamp for a branch that the decoder is walking towards, and consequently hasn't reached. Improve that situation by using the pkt_state to determine when to use the current or previous timestamp. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 34 ++++++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9612870a927e..5539b06b09d3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -99,6 +118,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -139,6 +159,7 @@ struct intel_pt_decoder { unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -898,6 +919,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -2067,7 +2089,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2075,7 +2097,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2131,11 +2153,17 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; } else { decoder->state.err = 0; + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } } - decoder->state.timestamp = decoder->timestamp; + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; -- cgit v1.2.3 From b338b87f2277e9c1a71749ec6985afe64be9efe8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:04 +0300 Subject: perf intel-pt: Fix missing stack clear commit 12b7080609097753fd8198cc1daf589be3ec1cca upstream. The return compression stack must be cleared whenever there is a PSB. Fix one case where that was not happening. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5539b06b09d3..378347b8fb0e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1930,6 +1930,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) return err; -- cgit v1.2.3 From dc0401375f7a7a27ad56e1b0fc19dc470cf0f8d9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:05 +0300 Subject: perf intel-pt: Ensure IP is zero when state is INTEL_PT_STATE_NO_IP commit ad7167a8cd174ba7d8c0d0ed8d8410521206d104 upstream. A value of zero is used to indicate that there is no IP. Ensure the value is zero when the state is INTEL_PT_STATE_NO_IP. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 378347b8fb0e..f080ed86f1eb 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2115,6 +2115,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) break; case INTEL_PT_STATE_NO_IP: decoder->last_ip = 0; + decoder->ip = 0; /* Fall through */ case INTEL_PT_STATE_ERR_RESYNC: err = intel_pt_sync_ip(decoder); -- cgit v1.2.3 From daa637832dd2c64751369a39d8ad69e993a42a42 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:06 +0300 Subject: perf intel-pt: Fix last_ip usage commit ee14ac0ef6827cd6f9a572cc83dd0191ea17812c upstream. Intel PT uses IP compression based on the last IP. For decoding purposes, 'last IP' is considered to be reset to zero whenever there is a synchronization packet (PSB). The decoder wasn't doing that, and was treating the zero value to mean that there was no last IP, whereas compression can be done against the zero value. Fix by setting last_ip to zero when a PSB is received and keep track of have_last_ip. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f080ed86f1eb..958db8d06c97 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -111,6 +111,7 @@ struct intel_pt_decoder { bool have_tma; bool have_cyc; bool fixup_last_mtc; + bool have_last_ip; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -419,6 +420,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); + decoder->have_last_ip = true; } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -1670,6 +1672,8 @@ next: break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); if (err == -EAGAIN) @@ -1750,7 +1754,7 @@ next: static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) { - return decoder->last_ip || decoder->packet.count == 0 || + return decoder->have_last_ip || decoder->packet.count == 0 || decoder->packet.count == 3 || decoder->packet.count == 6; } @@ -1880,7 +1884,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) if (decoder->ip) return 0; } - if (decoder->packet.count) + if (decoder->packet.count && decoder->have_last_ip) intel_pt_set_last_ip(decoder); break; @@ -1930,6 +1934,8 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) @@ -2064,6 +2070,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; + decoder->have_last_ip = false; decoder->last_ip = 0; decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); @@ -2072,6 +2079,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (err) return err; + decoder->have_last_ip = true; decoder->pkt_state = INTEL_PT_STATE_NO_IP; err = intel_pt_walk_psb(decoder); @@ -2114,6 +2122,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_sync(decoder); break; case INTEL_PT_STATE_NO_IP: + decoder->have_last_ip = false; decoder->last_ip = 0; decoder->ip = 0; /* Fall through */ -- cgit v1.2.3 From 5a16bd39c5012940955afa708e98ffbb32be31ce Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:07 +0300 Subject: perf intel-pt: Ensure never to set 'last_ip' when packet 'count' is zero commit f952eaceb089b691eba7c4e13686e742a8f26bf5 upstream. Intel PT uses IP compression based on the last IP. For decoding purposes, 'last IP' is not updated when a branch target has been suppressed, which is indicated by IPBytes == 0. IPBytes is stored in the packet 'count', so ensure never to set 'last_ip' when packet 'count' is zero. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 958db8d06c97..81c03c5c7106 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1468,7 +1468,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - intel_pt_set_last_ip(decoder); + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); break; case INTEL_PT_MODE_TSX: @@ -1754,8 +1755,9 @@ next: static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) { - return decoder->have_last_ip || decoder->packet.count == 0 || - decoder->packet.count == 3 || decoder->packet.count == 6; + return decoder->packet.count && + (decoder->have_last_ip || decoder->packet.count == 3 || + decoder->packet.count == 6); } /* Walk PSB+ packets to get in sync. */ -- cgit v1.2.3 From 94c38cd0aff23b38a7e78f82141081e986f560b5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:08 +0300 Subject: perf intel-pt: Use FUP always when scanning for an IP commit 622b7a47b843c78626f40c1d1aeef8483383fba2 upstream. The decoder will try to use branch packets to find an IP to start decoding or to recover from errors. Currently the FUP packet is used only in the case of an overflow, however there is no reason for that to be a special case. So just use FUP always when scanning for an IP. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 81c03c5c7106..499e5af83233 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1880,14 +1880,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_FUP: - if (decoder->overflow) { - if (intel_pt_have_ip(decoder)) - intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - } - if (decoder->packet.count && decoder->have_last_ip) - intel_pt_set_last_ip(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; break; case INTEL_PT_MTC: -- cgit v1.2.3 From fe5cdbcdafaae75418307485b5d0ff5c66e250c4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 26 May 2017 11:17:09 +0300 Subject: perf intel-pt: Clear FUP flag on error commit 6a558f12dbe85437acbdec5e149ea07b5554eced upstream. Sometimes a FUP packet is associated with a TSX transaction and a flag is set to indicate that. Ensure that flag is cleared on any error condition because at that point the decoder can no longer assume it is correct. Signed-off-by: Adrian Hunter Cc: Andi Kleen Link: http://lkml.kernel.org/r/1495786658-18063-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 499e5af83233..7e27207d0f45 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1960,6 +1960,8 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) { int err; + decoder->set_fup_tx_flags = false; + intel_pt_log("Scanning for full IP\n"); err = intel_pt_walk_to_ip(decoder); if (err) -- cgit v1.2.3 From 3da27a9d982798ece7f09dd3b259be8fe0bc3a11 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 10 Jun 2017 04:59:11 +0200 Subject: Bluetooth: use constant time memory comparison for secret values commit 329d82309824ff1082dc4a91a5bbed8c3bec1580 upstream. This file is filled with complex cryptography. Thus, the comparisons of MACs and secret keys and curve points and so forth should not add timing attacks, which could either result in a direct forgery, or, given the complexity, some other type of attack. Signed-off-by: Jason A. Donenfeld Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/smp.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 43faf2aea2ab..658c900752c6 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -506,7 +507,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], if (err) return false; - return !memcmp(bdaddr->b, hash, 3); + return !crypto_memneq(bdaddr->b, hash, 3); } int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) @@ -559,7 +560,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } smp->debug_key = false; @@ -973,7 +974,7 @@ static u8 smp_random(struct smp_chan *smp) if (ret) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { + if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { BT_ERR("Pairing failed (confirmation values mismatch)"); return SMP_CONFIRM_FAILED; } @@ -1473,7 +1474,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) smp->rrnd, r, cfm)) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; smp->passkey_round++; @@ -1857,7 +1858,7 @@ static u8 sc_send_public_key(struct smp_chan *smp) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } } @@ -2122,7 +2123,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; } else { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), @@ -2603,7 +2604,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(cfm.confirm_val, smp->pcnf, 16)) + if (crypto_memneq(cfm.confirm_val, smp->pcnf, 16)) return SMP_CONFIRM_FAILED; } @@ -2636,7 +2637,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) else hcon->pending_sec_level = BT_SECURITY_FIPS; - if (!memcmp(debug_pk, smp->remote_pk, 64)) + if (!crypto_memneq(debug_pk, smp->remote_pk, 64)) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (smp->method == DSP_PASSKEY) { @@ -2735,7 +2736,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(check->e, e, 16)) + if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; if (!hcon->out) { @@ -3446,7 +3447,7 @@ static int __init test_ah(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 3)) + if (crypto_memneq(res, exp, 3)) return -EINVAL; return 0; @@ -3476,7 +3477,7 @@ static int __init test_c1(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3501,7 +3502,7 @@ static int __init test_s1(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3533,7 +3534,7 @@ static int __init test_f4(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3567,10 +3568,10 @@ static int __init test_f5(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(mackey, exp_mackey, 16)) + if (crypto_memneq(mackey, exp_mackey, 16)) return -EINVAL; - if (memcmp(ltk, exp_ltk, 16)) + if (crypto_memneq(ltk, exp_ltk, 16)) return -EINVAL; return 0; @@ -3603,7 +3604,7 @@ static int __init test_f6(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3657,7 +3658,7 @@ static int __init test_h6(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; -- cgit v1.2.3 From 754f4e05e9994e734bf620bfd77a9d04ac55e7b4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 13:52:09 +0200 Subject: wlcore: fix 64K page support commit 4a4274bf2dbbd1c7a45be0c89a1687c9d2eef4a0 upstream. In the stable linux-3.16 branch, I ran into a warning in the wlcore driver: drivers/net/wireless/ti/wlcore/spi.c: In function 'wl12xx_spi_raw_write': drivers/net/wireless/ti/wlcore/spi.c:315:1: error: the frame size of 12848 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] Newer kernels no longer show the warning, but the bug is still there, as the allocation is based on the CPU page size rather than the actual capabilities of the hardware. This replaces the PAGE_SIZE macro with the SZ_4K macro, i.e. 4096 bytes per buffer. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wlcore/spi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index f949ad2bd898..fa3547e06424 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -70,10 +70,10 @@ #define WSPI_MAX_CHUNK_SIZE 4092 /* - * wl18xx driver aggregation buffer size is (13 * PAGE_SIZE) compared to - * (4 * PAGE_SIZE) for wl12xx, so use the larger buffer needed for wl18xx + * wl18xx driver aggregation buffer size is (13 * 4K) compared to + * (4 * 4K) for wl12xx, so use the larger buffer needed for wl18xx */ -#define SPI_AGGR_BUFFER_SIZE (13 * PAGE_SIZE) +#define SPI_AGGR_BUFFER_SIZE (13 * SZ_4K) /* Maximum number of SPI write chunks */ #define WSPI_MAX_NUM_OF_CHUNKS \ -- cgit v1.2.3 From 157302f97aaa6f79854622a9aab7749fe8661d20 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Jun 2017 15:31:07 +0200 Subject: btrfs: Don't clear SGID when inheriting ACLs commit b7f8a09f8097db776b8d160862540e4fc1f51296 upstream. When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by moving posix_acl_update_mode() out of __btrfs_set_acl() into btrfs_set_acl(). That way the function will not be called when inheriting ACLs which is what we want as it prevents SGID bit clearing and the mode has been properly set by posix_acl_create() anyway. Fixes: 073931017b49d9458aa351605b43a7e34598caef CC: linux-btrfs@vger.kernel.org CC: David Sterba Signed-off-by: Jan Kara Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/acl.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 247b8dfaf6e5..8d8370ddb6b2 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -78,12 +78,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (ret) - return ret; - } - ret = 0; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) @@ -119,6 +113,13 @@ out: int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + int ret; + + if (type == ACL_TYPE_ACCESS && acl) { + ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (ret) + return ret; + } return __btrfs_set_acl(NULL, inode, acl, type); } -- cgit v1.2.3 From 9a81c136f486803b211c0e8d68a1fa9b0d8e8444 Mon Sep 17 00:00:00 2001 From: Matwey V Kornilov Date: Thu, 24 Nov 2016 13:32:48 +0300 Subject: igb: Explicitly select page 0 at initialization commit 440aeca4b9858248d8f16d724d9fa87a4f65fa33 upstream. The functions igb_read_phy_reg_gs40g/igb_write_phy_reg_gs40g (which were removed in 2a3cdea) explicitly selected the required page at every phy_reg access. Currently, igb_get_phy_id_82575 relays on the fact that page 0 is already selected. The assumption is not fulfilled for my Lex 3I380CW motherboard with integrated dual i211 based gigabit ethernet. This leads to igb initialization failure and network interfaces are not working: igb: Intel(R) Gigabit Ethernet Network Driver - version 5.4.0-k igb: Copyright (c) 2007-2014 Intel Corporation. igb: probe of 0000:01:00.0 failed with error -2 igb: probe of 0000:02:00.0 failed with error -2 In order to fix it, we explicitly select page 0 before first access to phy registers. See also: https://bugzilla.suse.com/show_bug.cgi?id=1009911 See also: http://www.lex.com.tw/products/pdf/3I380A&3I380CW.pdf Fixes: 2a3cdea ("igb: Remove GS40G specific defines/functions") Signed-off-by: Matwey V Kornilov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/e1000_82575.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index a61447fd778e..1264a3616acf 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -246,6 +246,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) E1000_STATUS_FUNC_SHIFT; /* Set phy->phy_addr and phy->id. */ + igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0); ret_val = igb_get_phy_id_82575(hw); if (ret_val) return ret_val; -- cgit v1.2.3 From 4d94276155f8bbd63b4ff1b3dc3ae9e14c9964e1 Mon Sep 17 00:00:00 2001 From: Satish Babu Patakokila Date: Fri, 16 Jun 2017 17:33:40 -0700 Subject: ASoC: compress: Derive substream from stream based on direction commit 01b8cedfd0422326caae308641dcadaa85e0ca72 upstream. Currently compress driver hardcodes direction as playback to get substream from the stream. This results in getting the incorrect substream for compressed capture usecase. To fix this, remove the hardcoding and derive substream based on the stream direction. Signed-off-by: Satish Babu Patakokila Signed-off-by: Banajit Goswami Acked-By: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-compress.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index bf7b52fce597..ff093edccea9 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -68,7 +68,8 @@ out: static int soc_compr_open_fe(struct snd_compr_stream *cstream) { struct snd_soc_pcm_runtime *fe = cstream->private_data; - struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_pcm_substream *fe_substream = + fe->pcm->streams[cstream->direction].substream; struct snd_soc_platform *platform = fe->platform; struct snd_soc_dpcm *dpcm; struct snd_soc_dapm_widget_list *list; @@ -414,7 +415,8 @@ static int soc_compr_set_params_fe(struct snd_compr_stream *cstream, struct snd_compr_params *params) { struct snd_soc_pcm_runtime *fe = cstream->private_data; - struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_pcm_substream *fe_substream = + fe->pcm->streams[cstream->direction].substream; struct snd_soc_platform *platform = fe->platform; int ret = 0, stream; -- cgit v1.2.3 From b87a32175a22fae2b15750477c1397970deafd68 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jun 2017 16:56:18 +0200 Subject: PM / Domains: Fix unsafe iteration over modified list of device links commit c6e83cac3eda5f7dd32ee1453df2f7abb5c6cd46 upstream. pm_genpd_remove_subdomain() iterates over domain's master_links list and removes matching element thus it has to use safe version of list iteration. Fixes: f721889ff65a ("PM / Domains: Support for generic I/O PM domains (v8)") Signed-off-by: Krzysztof Kozlowski Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e023066e4215..7704ae7f1565 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1244,7 +1244,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *l, *link; int ret = -EINVAL; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) @@ -1260,7 +1260,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->master_links, master_node) { + list_for_each_entry_safe(link, l, &genpd->master_links, master_node) { if (link->slave != subdomain) continue; -- cgit v1.2.3 From 178aa241fc070e5b34c5bbe3f9ae0a6b0b335840 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jun 2017 16:56:19 +0200 Subject: PM / Domains: Fix unsafe iteration over modified list of domain providers commit b556b15dc04e9b9b98790f04c21acf5e24f994b2 upstream. of_genpd_del_provider() iterates over list of domain provides and removes matching element thus it has to use safe version of list iteration. Fixes: aa42240ab254 (PM / Domains: Add generic OF-based PM domain look-up) Signed-off-by: Krzysztof Kozlowski Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 7704ae7f1565..45546543e6a5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1607,12 +1607,12 @@ EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell); */ void of_genpd_del_provider(struct device_node *np) { - struct of_genpd_provider *cp; + struct of_genpd_provider *cp, *tmp; struct generic_pm_domain *gpd; mutex_lock(&gpd_list_lock); mutex_lock(&of_genpd_mutex); - list_for_each_entry(cp, &of_genpd_providers, link) { + list_for_each_entry_safe(cp, tmp, &of_genpd_providers, link) { if (cp->node == np) { /* * For each PM domain associated with the -- cgit v1.2.3 From d2bb0af4463d0fd094385d8e15f8f18c9adaafa4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jun 2017 16:56:20 +0200 Subject: PM / Domains: Fix unsafe iteration over modified list of domains commit a7e2d1bce4c1db471f1cbc0c4666a3112bbf0994 upstream. of_genpd_remove_last() iterates over list of domains and removes matching element thus it has to use safe version of list iteration. Fixes: 17926551c98a (PM / Domains: Add support for removing nested PM domains by provider) Signed-off-by: Krzysztof Kozlowski Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 45546543e6a5..951c2140d22e 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1752,14 +1752,14 @@ EXPORT_SYMBOL_GPL(of_genpd_add_subdomain); */ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { - struct generic_pm_domain *gpd, *genpd = ERR_PTR(-ENOENT); + struct generic_pm_domain *gpd, *tmp, *genpd = ERR_PTR(-ENOENT); int ret; if (IS_ERR_OR_NULL(np)) return ERR_PTR(-EINVAL); mutex_lock(&gpd_list_lock); - list_for_each_entry(gpd, &gpd_list, gpd_list_node) { + list_for_each_entry_safe(gpd, tmp, &gpd_list, gpd_list_node) { if (gpd->provider == &np->fwnode) { ret = genpd_remove(gpd); genpd = ret ? ERR_PTR(ret) : gpd; -- cgit v1.2.3 From 542c097f1cb61f57ad42ffd8431371d69e76aa03 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 27 Jun 2017 11:53:27 +0200 Subject: scsi: ses: do not add a device to an enclosure if enclosure_add_links() fails. commit 62e62ffd95539b9220894a7900a619e0f3ef4756 upstream. The enclosure_add_device() function should fail if it can't create the relevant sysfs links. Signed-off-by: Maurizio Lombardi Tested-by: Douglas Miller Acked-by: James Bottomley Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/misc/enclosure.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 65fed7146e9b..cc91f7b3d90c 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -375,6 +375,7 @@ int enclosure_add_device(struct enclosure_device *edev, int component, struct device *dev) { struct enclosure_component *cdev; + int err; if (!edev || component >= edev->components) return -EINVAL; @@ -384,12 +385,17 @@ int enclosure_add_device(struct enclosure_device *edev, int component, if (cdev->dev == dev) return -EEXIST; - if (cdev->dev) + if (cdev->dev) { enclosure_remove_links(cdev); - - put_device(cdev->dev); + put_device(cdev->dev); + } cdev->dev = get_device(dev); - return enclosure_add_links(cdev); + err = enclosure_add_links(cdev); + if (err) { + put_device(cdev->dev); + cdev->dev = NULL; + } + return err; } EXPORT_SYMBOL_GPL(enclosure_add_device); -- cgit v1.2.3 From fc866b296a463661f90ea734c40b1049c4834eaa Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Tue, 27 Jun 2017 14:55:58 -0400 Subject: scsi: Add STARGET_CREATED_REMOVE state to scsi_target_state commit f9279c968c257ee39b0d7bd2571a4d231a67bcc1 upstream. The addition of the STARGET_REMOVE state had the side effect of introducing a race condition that can cause a crash. scsi_target_reap_ref_release() checks the starget->state to see if it still in STARGET_CREATED, and if so, skips calling transport_remove_device() and device_del(), because the starget->state is only set to STARGET_RUNNING after scsi_target_add() has called device_add() and transport_add_device(). However, if an rport loss occurs while a target is being scanned, it can happen that scsi_remove_target() will be called while the starget is still in the STARGET_CREATED state. In this case, the starget->state will be set to STARGET_REMOVE, and as a result, scsi_target_reap_ref_release() will take the wrong path. The end result is a panic: [ 1255.356653] Oops: 0000 [#1] SMP [ 1255.360154] Modules linked in: x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel ghash_clmulni_i [ 1255.393234] CPU: 5 PID: 149 Comm: kworker/u96:4 Tainted: G W 4.11.0+ #8 [ 1255.401879] Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013 [ 1255.410327] Workqueue: scsi_wq_6 fc_scsi_scan_rport [scsi_transport_fc] [ 1255.417720] task: ffff88060ca8c8c0 task.stack: ffffc900048a8000 [ 1255.424331] RIP: 0010:kernfs_find_ns+0x13/0xc0 [ 1255.429287] RSP: 0018:ffffc900048abbf0 EFLAGS: 00010246 [ 1255.435123] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 1255.443083] RDX: 0000000000000000 RSI: ffffffff8188d659 RDI: 0000000000000000 [ 1255.451043] RBP: ffffc900048abc10 R08: 0000000000000000 R09: 0000012433fe0025 [ 1255.459005] R10: 0000000025e5a4b5 R11: 0000000025e5a4b5 R12: ffffffff8188d659 [ 1255.466972] R13: 0000000000000000 R14: ffff8805f55e5088 R15: 0000000000000000 [ 1255.474931] FS: 0000000000000000(0000) GS:ffff880616b40000(0000) knlGS:0000000000000000 [ 1255.483959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1255.490370] CR2: 0000000000000068 CR3: 0000000001c09000 CR4: 00000000000406e0 [ 1255.498332] Call Trace: [ 1255.501058] kernfs_find_and_get_ns+0x31/0x60 [ 1255.505916] sysfs_unmerge_group+0x1d/0x60 [ 1255.510498] dpm_sysfs_remove+0x22/0x60 [ 1255.514783] device_del+0xf4/0x2e0 [ 1255.518577] ? device_remove_file+0x19/0x20 [ 1255.523241] attribute_container_class_device_del+0x1a/0x20 [ 1255.529457] transport_remove_classdev+0x4e/0x60 [ 1255.534607] ? transport_add_class_device+0x40/0x40 [ 1255.540046] attribute_container_device_trigger+0xb0/0xc0 [ 1255.546069] transport_remove_device+0x15/0x20 [ 1255.551025] scsi_target_reap_ref_release+0x25/0x40 [ 1255.556467] scsi_target_reap+0x2e/0x40 [ 1255.560744] __scsi_scan_target+0xaa/0x5b0 [ 1255.565312] scsi_scan_target+0xec/0x100 [ 1255.569689] fc_scsi_scan_rport+0xb1/0xc0 [scsi_transport_fc] [ 1255.576099] process_one_work+0x14b/0x390 [ 1255.580569] worker_thread+0x4b/0x390 [ 1255.584651] kthread+0x109/0x140 [ 1255.588251] ? rescuer_thread+0x330/0x330 [ 1255.592730] ? kthread_park+0x60/0x60 [ 1255.596815] ret_from_fork+0x29/0x40 [ 1255.600801] Code: 24 08 48 83 42 40 01 5b 41 5c 5d c3 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 [ 1255.621876] RIP: kernfs_find_ns+0x13/0xc0 RSP: ffffc900048abbf0 [ 1255.628479] CR2: 0000000000000068 [ 1255.632756] ---[ end trace 34a69ba0477d036f ]--- Fix this by adding another scsi_target state STARGET_CREATED_REMOVE to distinguish this case. Fixes: f05795d3d771 ("scsi: Add intermediate STARGET_REMOVE state to scsi_target_state") Reported-by: David Jeffery Signed-off-by: Ewan D. Milne Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_scan.c | 5 +++-- drivers/scsi/scsi_sysfs.c | 8 ++++++-- include/scsi/scsi_device.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 6f7128f49c30..27a6d3c6cb7c 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -384,11 +384,12 @@ static void scsi_target_reap_ref_release(struct kref *kref) = container_of(kref, struct scsi_target, reap_ref); /* - * if we get here and the target is still in the CREATED state that + * if we get here and the target is still in a CREATED state that * means it was allocated but never made visible (because a scan * turned up no LUNs), so don't call device_del() on it. */ - if (starget->state != STARGET_CREATED) { + if ((starget->state != STARGET_CREATED) && + (starget->state != STARGET_CREATED_REMOVE)) { transport_remove_device(&starget->dev); device_del(&starget->dev); } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 82dfe07b1d47..3a6f557ec128 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1370,11 +1370,15 @@ restart: spin_lock_irqsave(shost->host_lock, flags); list_for_each_entry(starget, &shost->__targets, siblings) { if (starget->state == STARGET_DEL || - starget->state == STARGET_REMOVE) + starget->state == STARGET_REMOVE || + starget->state == STARGET_CREATED_REMOVE) continue; if (starget->dev.parent == dev || &starget->dev == dev) { kref_get(&starget->reap_ref); - starget->state = STARGET_REMOVE; + if (starget->state == STARGET_CREATED) + starget->state = STARGET_CREATED_REMOVE; + else + starget->state = STARGET_REMOVE; spin_unlock_irqrestore(shost->host_lock, flags); __scsi_remove_target(starget); scsi_target_reap(starget); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b9ec4939b80c..f2b9a2ffe9e6 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -248,6 +248,7 @@ enum scsi_target_state { STARGET_CREATED = 1, STARGET_RUNNING, STARGET_REMOVE, + STARGET_CREATED_REMOVE, STARGET_DEL, }; -- cgit v1.2.3 From 732e3c76cf97451e8b58f666c1f54cd8b3f9d26f Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 7 Jul 2017 14:45:49 -0700 Subject: iscsi-target: Add login_keys_workaround attribute for non RFC initiators commit 138d351eefb727ab9e41a3dc5f112ceb4f6e59f2 upstream. This patch re-introduces part of a long standing login workaround that was recently dropped by: commit 1c99de981f30b3e7868b8d20ce5479fa1c0fea46 Author: Nicholas Bellinger Date: Sun Apr 2 13:36:44 2017 -0700 iscsi-target: Drop work-around for legacy GlobalSAN initiator Namely, the workaround for FirstBurstLength ended up being required by Mellanox Flexboot PXE boot ROMs as reported by Robert. So this patch re-adds the work-around for FirstBurstLength within iscsi_check_proposer_for_optional_reply(), and makes the key optional to respond when the initiator does not propose, nor respond to it. Also as requested by Arun, this patch introduces a new TPG attribute named 'login_keys_workaround' that controls the use of both the FirstBurstLength workaround, as well as the two other existing workarounds for gPXE iSCSI boot client. By default, the workaround is enabled with login_keys_workaround=1, since Mellanox FlexBoot requires it, and Arun has verified the Qlogic MSFT initiator already proposes FirstBurstLength, so it's uneffected by this re-adding this part of the original work-around. Reported-by: Robert LeBlanc Cc: Robert LeBlanc Reviewed-by: Arun Easi Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_configfs.c | 2 ++ drivers/target/iscsi/iscsi_target_nego.c | 6 ++-- drivers/target/iscsi/iscsi_target_parameters.c | 41 ++++++++++++++++++-------- drivers/target/iscsi/iscsi_target_parameters.h | 2 +- drivers/target/iscsi/iscsi_target_tpg.c | 19 ++++++++++++ drivers/target/iscsi/iscsi_target_tpg.h | 1 + include/target/iscsi/iscsi_target_core.h | 9 ++++++ 7 files changed, 64 insertions(+), 16 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 7e70fe849f0d..9cbbc9cf63fb 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -802,6 +802,7 @@ DEF_TPG_ATTRIB(default_erl); DEF_TPG_ATTRIB(t10_pi); DEF_TPG_ATTRIB(fabric_prot_type); DEF_TPG_ATTRIB(tpg_enabled_sendtargets); +DEF_TPG_ATTRIB(login_keys_workaround); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_attr_authentication, @@ -817,6 +818,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_attr_t10_pi, &iscsi_tpg_attrib_attr_fabric_prot_type, &iscsi_tpg_attrib_attr_tpg_enabled_sendtargets, + &iscsi_tpg_attrib_attr_login_keys_workaround, NULL, }; diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 89d34bd6d87f..6693d7c69f97 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -819,7 +819,8 @@ static int iscsi_target_handle_csg_zero( SENDER_TARGET, login->rsp_buf, &login->rsp_length, - conn->param_list); + conn->param_list, + conn->tpg->tpg_attrib.login_keys_workaround); if (ret < 0) return -1; @@ -889,7 +890,8 @@ static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_log SENDER_TARGET, login->rsp_buf, &login->rsp_length, - conn->param_list); + conn->param_list, + conn->tpg->tpg_attrib.login_keys_workaround); if (ret < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, ISCSI_LOGIN_STATUS_INIT_ERR); diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 4a073339ae2e..0151776fb48e 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -765,7 +765,8 @@ static int iscsi_check_for_auth_key(char *key) return 0; } -static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) +static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param, + bool keys_workaround) { if (IS_TYPE_BOOL_AND(param)) { if (!strcmp(param->value, NO)) @@ -773,19 +774,31 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) } else if (IS_TYPE_BOOL_OR(param)) { if (!strcmp(param->value, YES)) SET_PSTATE_REPLY_OPTIONAL(param); - /* - * Required for gPXE iSCSI boot client - */ - if (!strcmp(param->name, IMMEDIATEDATA)) - SET_PSTATE_REPLY_OPTIONAL(param); + + if (keys_workaround) { + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, IMMEDIATEDATA)) + SET_PSTATE_REPLY_OPTIONAL(param); + } } else if (IS_TYPE_NUMBER(param)) { if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) SET_PSTATE_REPLY_OPTIONAL(param); - /* - * Required for gPXE iSCSI boot client - */ - if (!strcmp(param->name, MAXCONNECTIONS)) - SET_PSTATE_REPLY_OPTIONAL(param); + + if (keys_workaround) { + /* + * Required for Mellanox Flexboot PXE boot ROM + */ + if (!strcmp(param->name, FIRSTBURSTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, MAXCONNECTIONS)) + SET_PSTATE_REPLY_OPTIONAL(param); + } } else if (IS_PHASE_DECLARATIVE(param)) SET_PSTATE_REPLY_OPTIONAL(param); } @@ -1422,7 +1435,8 @@ int iscsi_encode_text_output( u8 sender, char *textbuf, u32 *length, - struct iscsi_param_list *param_list) + struct iscsi_param_list *param_list, + bool keys_workaround) { char *output_buf = NULL; struct iscsi_extra_response *er; @@ -1458,7 +1472,8 @@ int iscsi_encode_text_output( *length += 1; output_buf = textbuf + *length; SET_PSTATE_PROPOSER(param); - iscsi_check_proposer_for_optional_reply(param); + iscsi_check_proposer_for_optional_reply(param, + keys_workaround); pr_debug("Sending key: %s=%s\n", param->name, param->value); } diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index a0751e3f0813..17a58c2913f2 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -40,7 +40,7 @@ extern int iscsi_extract_key_value(char *, char **, char **); extern int iscsi_update_param_value(struct iscsi_param *, char *); extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_conn *); extern int iscsi_encode_text_output(u8, u8, char *, u32 *, - struct iscsi_param_list *); + struct iscsi_param_list *, bool); extern int iscsi_check_negotiated_keys(struct iscsi_param_list *); extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *, struct iscsi_param_list *); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 205a509b0dfb..63e1dcc5914d 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -227,6 +227,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a->t10_pi = TA_DEFAULT_T10_PI; a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE; a->tpg_enabled_sendtargets = TA_DEFAULT_TPG_ENABLED_SENDTARGETS; + a->login_keys_workaround = TA_DEFAULT_LOGIN_KEYS_WORKAROUND; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -899,3 +900,21 @@ int iscsit_ta_tpg_enabled_sendtargets( return 0; } + +int iscsit_ta_login_keys_workaround( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->login_keys_workaround = flag; + pr_debug("iSCSI_TPG[%hu] - TPG enabled bit for login keys workaround: %s ", + tpg->tpgt, (a->login_keys_workaround) ? "ON" : "OFF"); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 2da211920c18..901a712180f0 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -39,5 +39,6 @@ extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32); extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32); extern int iscsit_ta_tpg_enabled_sendtargets(struct iscsi_portal_group *, u32); +extern int iscsit_ta_login_keys_workaround(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 4ac24f5a3308..33b2e75bf2eb 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -64,6 +64,14 @@ #define TA_DEFAULT_FABRIC_PROT_TYPE 0 /* TPG status needs to be enabled to return sendtargets discovery endpoint info */ #define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1 +/* + * Used to control the sending of keys with optional to respond state bit, + * as a workaround for non RFC compliant initiators,that do not propose, + * nor respond to specific keys required for login to complete. + * + * See iscsi_check_proposer_for_optional_reply() for more details. + */ +#define TA_DEFAULT_LOGIN_KEYS_WORKAROUND 1 #define ISCSI_IOV_DATA_BUFFER 5 @@ -766,6 +774,7 @@ struct iscsi_tpg_attrib { u8 t10_pi; u32 fabric_prot_type; u32 tpg_enabled_sendtargets; + u32 login_keys_workaround; struct iscsi_portal_group *tpg; }; -- cgit v1.2.3 From 71b1caea95a8ab5b2e5f868a68a454283ed51109 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 May 2017 16:48:36 -0700 Subject: xen/scsiback: Fix a TMR related use-after-free commit 9f4ab18ac51dc87345a9cbd2527e6acf7a0a9335 upstream. scsiback_release_cmd() must not dereference se_cmd->se_tmr_req because that memory is freed by target_free_cmd_mem() before scsiback_release_cmd() is called. Fix this use-after-free by inlining struct scsiback_tmr into struct vscsibk_pend. Signed-off-by: Bart Van Assche Reviewed-by: Juergen Gross Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: David Disseldorp Cc: xen-devel@lists.xenproject.org Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-scsiback.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index d6950e0802b7..980f32817305 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -134,9 +134,7 @@ struct vscsibk_pend { struct page *pages[VSCSI_MAX_GRANTS]; struct se_cmd se_cmd; -}; -struct scsiback_tmr { atomic_t tmr_complete; wait_queue_head_t tmr_wait; }; @@ -599,26 +597,20 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req, struct scsiback_tpg *tpg = pending_req->v2p->tpg; struct scsiback_nexus *nexus = tpg->tpg_nexus; struct se_cmd *se_cmd = &pending_req->se_cmd; - struct scsiback_tmr *tmr; u64 unpacked_lun = pending_req->v2p->lun; int rc, err = FAILED; - tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL); - if (!tmr) { - target_put_sess_cmd(se_cmd); - goto err; - } - - init_waitqueue_head(&tmr->tmr_wait); + init_waitqueue_head(&pending_req->tmr_wait); rc = target_submit_tmr(&pending_req->se_cmd, nexus->tvn_se_sess, &pending_req->sense_buffer[0], - unpacked_lun, tmr, act, GFP_KERNEL, + unpacked_lun, NULL, act, GFP_KERNEL, tag, TARGET_SCF_ACK_KREF); if (rc) goto err; - wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete)); + wait_event(pending_req->tmr_wait, + atomic_read(&pending_req->tmr_complete)); err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; @@ -626,9 +618,8 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req, scsiback_do_resp_with_sense(NULL, err, 0, pending_req); transport_generic_free_cmd(&pending_req->se_cmd, 1); return; + err: - if (tmr) - kfree(tmr); scsiback_do_resp_with_sense(NULL, err, 0, pending_req); } @@ -1389,12 +1380,6 @@ static int scsiback_check_stop_free(struct se_cmd *se_cmd) static void scsiback_release_cmd(struct se_cmd *se_cmd) { struct se_session *se_sess = se_cmd->se_sess; - struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - - if (se_tmr && se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) { - struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr; - kfree(tmr); - } percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } @@ -1455,11 +1440,11 @@ static int scsiback_queue_status(struct se_cmd *se_cmd) static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd) { - struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr; + struct vscsibk_pend *pending_req = container_of(se_cmd, + struct vscsibk_pend, se_cmd); - atomic_set(&tmr->tmr_complete, 1); - wake_up(&tmr->tmr_wait); + atomic_set(&pending_req->tmr_complete, 1); + wake_up(&pending_req->tmr_wait); } static void scsiback_aborted_task(struct se_cmd *se_cmd) -- cgit v1.2.3 From d638c8584462609237b4d12563f871f2323ac819 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Thu, 29 Jun 2017 03:04:07 +1000 Subject: powerpc/pseries: Fix passing of pp0 in updatepp() and updateboltedpp() commit e71ff982ae4c17d176e9f0132157d54973788377 upstream. Once upon a time there were only two PP (page protection) bits. In ISA 2.03 an additional PP bit was added, but because of the layout of the HPTE it could not be made contiguous with the existing PP bits. The result is that we now have three PP bits, named pp0, pp1, pp2, where pp0 occupies bit 63 of dword 1 of the HPTE and pp1 and pp2 occupy bits 1 and 0 respectively. Until recently Linux hasn't used pp0, however with the addition of _PAGE_KERNEL_RO we started using it. The problem arises in the LPAR code, where we need to translate the PP bits into the argument for the H_PROTECT hypercall. Currently the code only passes bits 0-2 of newpp, which covers pp1, pp2 and N (no execute), meaning pp0 is not passed to the hypervisor at all. We can't simply pass it through in bit 63, as that would collide with a different field in the flags argument, as defined in PAPR. Instead we have to shift it down to bit 8 (IBM bit 55). Fixes: e58e87adc8bf ("powerpc/mm: Update _PAGE_KERNEL_RO") Signed-off-by: Balbir Singh [mpe: Simplify the test, rework change log] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/lpar.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2c98f6c1c9c..a7bb872a0dc4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -279,7 +279,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, int ssize, unsigned long inv_flags) { unsigned long lpar_rc; - unsigned long flags = (newpp & 7) | H_AVPN; + unsigned long flags; unsigned long want_v; want_v = hpte_encode_avpn(vpn, psize, ssize); @@ -287,6 +287,11 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", want_v, slot, flags, psize); + flags = (newpp & 7) | H_AVPN; + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) + /* Move pp0 into bit 8 (IBM 55) */ + flags |= (newpp & HPTE_R_PP0) >> 55; + lpar_rc = plpar_pte_protect(flags, slot, want_v); if (lpar_rc == H_NOT_FOUND) { @@ -358,6 +363,10 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, BUG_ON(slot == -1); flags = newpp & 7; + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) + /* Move pp0 into bit 8 (IBM 55) */ + flags |= (newpp & HPTE_R_PP0) >> 55; + lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); -- cgit v1.2.3 From 99fc5a2254ef6409947e21b7b829cd59eb8234c5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 11 Jul 2017 22:10:54 +1000 Subject: powerpc/64: Fix atomic64_inc_not_zero() to return an int commit 01e6a61aceb82e13bec29502a8eb70d9574f97ad upstream. Although it's not documented anywhere, there is an expectation that atomic64_inc_not_zero() returns a result which fits in an int. This is the behaviour implemented on all arches except powerpc. This has caused at least one bug in practice, in the percpu-refcount code, where the long result from our atomic64_inc_not_zero() was truncated to an int leading to lost references and stuck systems. That was worked around in that code in commit 966d2b04e070 ("percpu-refcount: fix reference leak during percpu-atomic transition"). To the best of my grepping abilities there are no other callers in-tree which truncate the value, but we should fix it anyway. Because the breakage is subtle and potentially very harmful I'm also tagging it for stable. Code generation is largely unaffected because in most cases the callers are just using the result for a test anyway. In particular the case of fget() that was mentioned in commit a6cf7ed5119f ("powerpc/atomic: Implement atomic*_inc_not_zero") generates exactly the same code. Fixes: a6cf7ed5119f ("powerpc/atomic: Implement atomic*_inc_not_zero") Noticed-by: Linus Torvalds Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 2b90335194a7..a2cc8010cd72 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -560,7 +560,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) * Atomically increments @v by 1, so long as @v is non-zero. * Returns non-zero if @v was non-zero, and zero otherwise. */ -static __inline__ long atomic64_inc_not_zero(atomic64_t *v) +static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { long t1, t2; @@ -579,7 +579,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v) : "r" (&v->counter) : "cc", "xer", "memory"); - return t1; + return t1 != 0; } #endif /* __powerpc64__ */ -- cgit v1.2.3 From 53a28216131762700d10eb4755562b10c8d2685b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 15 Jun 2017 09:46:38 +1000 Subject: powerpc: Fix emulation of mcrf in emulate_step() commit 87c4b83e0fe234a1f0eed131ab6fa232036860d5 upstream. The mcrf emulation code was using the CR field number directly as the shift value, without taking into account that CR fields are numbered from 0-7 starting at the high bits. That meant it was looking at the CR fields in the reverse order. Fixes: cf87c3f6b647 ("powerpc: Emulate icbi, mcrf and conditional-trap instructions") Signed-off-by: Anton Blanchard Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/sstep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 6ca3b902f7b9..b6e4eca6e4a4 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -687,8 +687,10 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 19: switch ((instr >> 1) & 0x3ff) { case 0: /* mcrf */ - rd = (instr >> 21) & 0x1c; - ra = (instr >> 16) & 0x1c; + rd = 7 - ((instr >> 23) & 0x7); + ra = 7 - ((instr >> 18) & 0x7); + rd *= 4; + ra *= 4; val = (regs->ccr >> ra) & 0xf; regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); goto instr_done; -- cgit v1.2.3 From 5e35ee249e26e36cae641bee8d212fba0e1bb6e6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 15 Jun 2017 09:46:39 +1000 Subject: powerpc: Fix emulation of mfocrf in emulate_step() commit 64e756c55aa46fc18fd53e8f3598b73b528d8637 upstream. From POWER4 onwards, mfocrf() only places the specified CR field into the destination GPR, and the rest of it is set to 0. The PowerPC AS from version 3.0 now requires this behaviour. The emulation code currently puts the entire CR into the destination GPR. Fix it. Fixes: 6888199f7fe5 ("[POWERPC] Emulate more instructions in software") Signed-off-by: Anton Blanchard Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/sstep.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b6e4eca6e4a4..776c1a1f9bc2 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -970,6 +970,19 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, #endif case 19: /* mfcr */ + if ((instr >> 20) & 1) { + imm = 0xf0000000UL; + for (sh = 0; sh < 8; ++sh) { + if (instr & (0x80000 >> sh)) { + regs->gpr[rd] = regs->ccr & imm; + break; + } + imm >>= 4; + } + + goto instr_done; + } + regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; -- cgit v1.2.3 From 88481a2c40e49912d0671cc7e9a5bd3534662b49 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 6 Jul 2017 18:46:43 +1000 Subject: powerpc/asm: Mark cr0 as clobbered in mftb() commit 2400fd822f467cb4c886c879d8ad99feac9cf319 upstream. The workaround for the CELL timebase bug does not correctly mark cr0 as being clobbered. This means GCC doesn't know that the asm block changes cr0 and might leave the result of an unrelated comparison in cr0 across the block, which we then trash, leading to basically random behaviour. Fixes: 859deea949c3 ("[POWERPC] Cell timebase bug workaround") Signed-off-by: Oliver O'Halloran [mpe: Tweak change log and flag for stable] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e7d9eca53af3..ceb168cd3b81 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1283,7 +1283,7 @@ static inline void msr_check_and_clear(unsigned long bits) " .llong 0\n" \ ".previous" \ : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ rval;}) #else #define mftb() ({unsigned long rval; \ -- cgit v1.2.3 From 3b7babc6be0aa56297b5d1dea0090339a2d5659d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 8 Jul 2017 07:45:32 -0500 Subject: powerpc/mm/radix: Properly clear process table entry commit c6bb0b8d426a8cf865ca9c8a532cc3a2927cfceb upstream. On radix, the process table entry we want to clear when destroying a context is entry 0, not entry 1. This has no *immediate* consequence on Power9, but it can cause other bugs to become worse. Fixes: 7e381c0ff618 ("powerpc/mm/radix: Add mmu context handling callback for radix") Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/mmu_context_book3s64.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 73bf6e14c3aa..a006f822b154 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -167,9 +167,15 @@ void destroy_context(struct mm_struct *mm) mm->context.cop_lockp = NULL; #endif /* CONFIG_PPC_ICSWX */ - if (radix_enabled()) - process_tb[mm->context.id].prtb1 = 0; - else + if (radix_enabled()) { + /* + * Radix doesn't have a valid bit in the process table + * entries. However we know that at least P9 implementation + * will avoid caching an entry with an invalid RTS field, + * and 0 is invalid. So this will do. + */ + process_tb[mm->context.id].prtb0 = 0; + } else subpage_prot_free(mm); destroy_pagetable_page(mm); __destroy_context(mm->context.id); -- cgit v1.2.3 From 3c17d418afb017792aa961e6eebb2ecc1ca5c775 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Apr 2017 18:35:59 +0800 Subject: af_key: Fix sadb_x_ipsecrequest parsing commit 096f41d3a8fcbb8dde7f71379b1ca85fe213eded upstream. The parsing of sadb_x_ipsecrequest is broken in a number of ways. First of all we're not verifying sadb_x_ipsecrequest_len. This is needed when the structure carries addresses at the end. Worse we don't even look at the length when we parse those optional addresses. The migration code had similar parsing code that's better but it also has some deficiencies. The length is overcounted first of all as it includes the header itself. It also fails to check the length before dereferencing the sa_family field. This patch fixes those problems in parse_sockaddr_pair and then uses it in parse_ipsecrequest. Reported-by: Andrey Konovalov Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index e67c28e614b9..d8d95b6415e4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -65,6 +65,10 @@ struct pfkey_sock { } dump; }; +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, + xfrm_address_t *saddr, xfrm_address_t *daddr, + u16 *family); + static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; @@ -1922,19 +1926,14 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { - u8 *sa = (u8 *) (rq + 1); - int family, socklen; + int err; - family = pfkey_sockaddr_extract((struct sockaddr *)sa, - &t->saddr); - if (!family) - return -EINVAL; - - socklen = pfkey_sockaddr_len(family); - if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen), - &t->id.daddr) != family) - return -EINVAL; - t->encap_family = family; + err = parse_sockaddr_pair( + (struct sockaddr *)(rq + 1), + rq->sadb_x_ipsecrequest_len - sizeof(*rq), + &t->saddr, &t->id.daddr, &t->encap_family); + if (err) + return err; } else t->encap_family = xp->family; @@ -1954,7 +1953,11 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; - while (len >= sizeof(struct sadb_x_ipsecrequest)) { + while (len >= sizeof(*rq)) { + if (len < rq->sadb_x_ipsecrequest_len || + rq->sadb_x_ipsecrequest_len < sizeof(*rq)) + return -EINVAL; + if ((err = parse_ipsecrequest(xp, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; @@ -2417,7 +2420,6 @@ out: return err; } -#ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); @@ -2429,7 +2431,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, { int af, socklen; - if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) + if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); @@ -2445,6 +2447,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, return 0; } +#ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { @@ -2452,13 +2455,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct sadb_x_ipsecrequest *rq2; int mode; - if (len <= sizeof(struct sadb_x_ipsecrequest) || - len < rq1->sadb_x_ipsecrequest_len) + if (len < sizeof(*rq1) || + len < rq1->sadb_x_ipsecrequest_len || + rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), - rq1->sadb_x_ipsecrequest_len, + rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) @@ -2467,13 +2471,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; - if (len <= sizeof(struct sadb_x_ipsecrequest) || - len < rq2->sadb_x_ipsecrequest_len) + if (len <= sizeof(*rq2) || + len < rq2->sadb_x_ipsecrequest_len || + rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), - rq2->sadb_x_ipsecrequest_len, + rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) -- cgit v1.2.3 From 13b2f9f9b8aaa2085a2fcf946b8280f4e3d0f4f8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 19 Aug 2016 16:30:25 +0800 Subject: PCI: Work around poweroff & suspend-to-RAM issue on Macbook Pro 11 commit 13cfc732160f7bc7e596128ce34cda361c556966 upstream. Neither soft poweroff (transition to ACPI power state S5) nor suspend-to-RAM (transition to state S3) works on the Macbook Pro 11,4 and 11,5. The problem is related to the [mem 0x7fa00000-0x7fbfffff] space. When we use that space, e.g., by assigning it to the 00:1c.0 Root Port, the ACPI Power Management 1 Control Register (PM1_CNT) at [io 0x1804] doesn't work anymore. Linux does a soft poweroff (transition to S5) by writing to PM1_CNT. The theory about why this doesn't work is: - The write to PM1_CNT causes an SMI - The BIOS SMI handler depends on something in [mem 0x7fa00000-0x7fbfffff] - When Linux assigns [mem 0x7fa00000-0x7fbfffff] to the 00:1c.0 Port, it covers up whatever the SMI handler uses, so the SMI handler no longer works correctly Reserve the [mem 0x7fa00000-0x7fbfffff] space so we don't assign it to anything. This is voodoo programming, since we don't know what the real conflict is, but we've failed to find the root cause. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=103211 Tested-by: thejoe@gmail.com Signed-off-by: Bjorn Helgaas Cc: Rafael J. Wysocki Cc: Lukas Wunner Cc: Chen Yu Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/fixup.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 6d52b94f4bb9..20fa7c84109d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -571,3 +571,35 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); + +/* + * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff] + * + * Using the [mem 0x7fa00000-0x7fbfffff] region, e.g., by assigning it to + * the 00:1c.0 Root Port, causes a conflict with [io 0x1804], which is used + * for soft poweroff and suspend-to-RAM. + * + * As far as we know, this is related to the address space, not to the Root + * Port itself. Attaching the quirk to the Root Port is a convenience, but + * it could probably also be a standalone DMI quirk. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=103211 + */ +static void quirk_apple_mbp_poweroff(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + + if ((!dmi_match(DMI_PRODUCT_NAME, "MacBookPro11,4") && + !dmi_match(DMI_PRODUCT_NAME, "MacBookPro11,5")) || + pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x1c, 0)) + return; + + res = request_mem_region(0x7fa00000, 0x200000, + "MacBook Pro poweroff workaround"); + if (res) + dev_info(dev, "claimed %s %pR\n", res->name, res); + else + dev_info(dev, "can't work around MacBook Pro poweroff issue\n"); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff); -- cgit v1.2.3 From f257f4bf6f0766725fd8bcb243ad9271aee7a106 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Mon, 3 Jul 2017 17:21:02 +0800 Subject: PCI: rockchip: Use normal register bank for config accessors commit dc8cca5ef25ac4cb0dfc37467521a759767ff361 upstream. Rockchip's RC has two banks of registers for the root port: a normal bank that is strictly compatible with the PCIe spec, and a privileged bank that can be used to change RO bits of root port registers. When probing the RC driver, we use the privileged bank to do some basic setup work as some RO bits are hw-inited to wrong value. But we didn't change to the normal bank after probing the driver. This leads to a serious problem when the PME code tries to clear the PME status by writing PCI_EXP_RTSTA_PME to the register of PCI_EXP_RTSTA. Per PCIe 3.0 spec, section 7.8.14, the PME status bit is RW1C. So the PME code is doing the right thing to clear the PME status but we find the RC doesn't clear it but actually setting it to one. So finally the system trap in pcie_pme_work_fn() as PCI_EXP_RTSTA_PME is true now forever. This issue can be reproduced by booting kernel with pci=nomsi. Use the normal register bank for the PCI config accessors. The privileged bank is used only internally by this driver. Fixes: e77f847d ("PCI: rockchip: Add Rockchip PCIe controller support") Signed-off-by: Shawn Lin Signed-off-by: Bjorn Helgaas Cc: Jeffy Chen Cc: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-rockchip.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index 3452983d3569..03ebfd574735 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -131,6 +131,7 @@ PCIE_CORE_INT_CT | PCIE_CORE_INT_UTC | \ PCIE_CORE_INT_MMVC) +#define PCIE_RC_CONFIG_NORMAL_BASE 0x800000 #define PCIE_RC_CONFIG_BASE 0xa00000 #define PCIE_RC_CONFIG_VENDOR (PCIE_RC_CONFIG_BASE + 0x00) #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) @@ -267,7 +268,9 @@ static int rockchip_pcie_valid_device(struct rockchip_pcie *rockchip, static int rockchip_pcie_rd_own_conf(struct rockchip_pcie *rockchip, int where, int size, u32 *val) { - void __iomem *addr = rockchip->apb_base + PCIE_RC_CONFIG_BASE + where; + void __iomem *addr; + + addr = rockchip->apb_base + PCIE_RC_CONFIG_NORMAL_BASE + where; if (!IS_ALIGNED((uintptr_t)addr, size)) { *val = 0; @@ -291,11 +294,13 @@ static int rockchip_pcie_wr_own_conf(struct rockchip_pcie *rockchip, int where, int size, u32 val) { u32 mask, tmp, offset; + void __iomem *addr; offset = where & ~0x3; + addr = rockchip->apb_base + PCIE_RC_CONFIG_NORMAL_BASE + offset; if (size == 4) { - writel(val, rockchip->apb_base + PCIE_RC_CONFIG_BASE + offset); + writel(val, addr); return PCIBIOS_SUCCESSFUL; } @@ -306,9 +311,9 @@ static int rockchip_pcie_wr_own_conf(struct rockchip_pcie *rockchip, * corrupt RW1C bits in adjacent registers. But the hardware * doesn't support smaller writes. */ - tmp = readl(rockchip->apb_base + PCIE_RC_CONFIG_BASE + offset) & mask; + tmp = readl(addr) & mask; tmp |= val << ((where & 0x3) * 8); - writel(tmp, rockchip->apb_base + PCIE_RC_CONFIG_BASE + offset); + writel(tmp, addr); return PCIBIOS_SUCCESSFUL; } -- cgit v1.2.3 From 33780512d9fe64872b3c46596d2c6d812be98fc9 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Thu, 25 May 2017 16:49:07 +0800 Subject: PCI/PM: Restore the status of PCI devices across hibernation commit e60514bd4485c0c7c5a7cf779b200ce0b95c70d6 upstream. Currently we saw a lot of "No irq handler" errors during hibernation, which caused the system hang finally: ata4.00: qc timeout (cmd 0xec) ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4) ata4.00: revalidation failed (errno=-5) ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 300) do_IRQ: 31.151 No irq handler for vector According to above logs, there is an interrupt triggered and it is dispatched to CPU31 with a vector number 151, but there is no handler for it, thus this IRQ will not get acked and will cause an IRQ flood which kills the system. To be more specific, the 31.151 is an interrupt from the AHCI host controller. After some investigation, the reason why this issue is triggered is because the thaw_noirq() function does not restore the MSI/MSI-X settings across hibernation. The scenario is illustrated below: 1. Before hibernation, IRQ 34 is the handler for the AHCI device, which is bound to CPU31. 2. Hibernation starts, the AHCI device is put into low power state. 3. All the nonboot CPUs are put offline, so IRQ 34 has to be migrated to the last alive one - CPU0. 4. After the snapshot has been created, all the nonboot CPUs are brought up again; IRQ 34 remains bound to CPU0. 5. AHCI devices are put into D0. 6. The snapshot is written to the disk. The issue is triggered in step 6. The AHCI interrupt should be delivered to CPU0, however it is delivered to the original CPU31 instead, which causes the "No irq handler" issue. Ying Huang has provided a clue that, in step 3 it is possible that writing to the register might not take effect as the PCI devices have been suspended. In step 3, the IRQ 34 affinity should be modified from CPU31 to CPU0, but in fact it is not. In __pci_write_msi_msg(), if the device is already in low power state, the low level MSI message entry will not be updated but cached. During the device restore process after a normal suspend/resume, pci_restore_msi_state() writes the cached MSI back to the hardware. But this is not the case for hibernation. pci_restore_msi_state() is not currently called in pci_pm_thaw_noirq(), although pci_save_state() has saved the necessary PCI cached information in pci_pm_freeze_noirq(). Restore the PCI status for the device during hibernation. Otherwise the status might be lost across hibernation (for example, settings for MSI, MSI-X, ATS, ACS, IOV, etc.), which might cause problems during hibernation. Suggested-by: Ying Huang Suggested-by: Rafael J. Wysocki Signed-off-by: Chen Yu [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Cc: Len Brown Cc: Dan Williams Cc: Rui Zhang Cc: Ying Huang Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 1ccce1cd6aca..8a68e2b554e1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -954,6 +954,7 @@ static int pci_pm_thaw_noirq(struct device *dev) return pci_legacy_resume_early(dev); pci_update_current_state(pci_dev, PCI_D0); + pci_restore_state(pci_dev); if (drv && drv->pm && drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); -- cgit v1.2.3 From 445ea10969aade3f9711b262756099717a5fe6bb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 29 Apr 2017 20:33:09 +0300 Subject: ipvs: SNAT packet replies only for NATed connections commit 3c5ab3f395d66a9e4e937fcfdf6ebc63894f028b upstream. We do not check if packet from real server is for NAT connection before performing SNAT. This causes problems for setups that use DR/TUN and allow local clients to access the real server directly, for example: - local client in director creates IPVS-DR/TUN connection CIP->VIP and the request packets are routed to RIP. Talks are finished but IPVS connection is not expired yet. - second local client creates non-IPVS connection CIP->RIP with same reply tuple RIP->CIP and when replies are received on LOCAL_IN we wrongly assign them for the first client connection because RIP->CIP matches the reply direction. As result, IPVS SNATs replies for non-IPVS connections. The problem is more visible to local UDP clients but in rare cases it can happen also for TCP or remote clients when the real server sends the reply traffic via the director. So, better to be more precise for the reply traffic. As replies are not expected for DR/TUN connections, better to not touch them. Reported-by: Nick Moriarty Tested-by: Nick Moriarty Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_core.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2c1b498a7a27..e34d3f60fccd 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, { unsigned int verdict = NF_DROP; - if (IP_VS_FWD_METHOD(cp) != 0) { - pr_err("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); + +ignore_cp: verdict = NF_ACCEPT; out: @@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in */ cp = pp->conn_out_get(ipvs, af, skb, &iph); - if (likely(cp)) + if (likely(cp)) { + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); + } /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { @@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } } + +out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; + +ignore_cp: + __ip_vs_conn_put(cp); + goto out; } /* -- cgit v1.2.3 From bf0440882ea94629b198b132ce89c11e25b4b91d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 20 Jul 2017 14:48:29 +0300 Subject: xhci: fix 20000ms port resume timeout commit a54408d0a004757789863d74e29c2297edae0b4d upstream. A uncleared PLC (port link change) bit will prevent furuther port event interrupts for that port. Leaving it uncleared caused get_port_status() to timeout after 20000ms while waiting to get the final port event interrupt for resume -> U0 state change. This is a targeted fix for a specific case where we get a port resume event racing with xhci resume. The port event interrupt handler notices xHC is not yet running and bails out early, leaving PLC uncleared. The whole xhci port resuming needs more attention, but while working on it it anyways makes sense to always ensure PLC is cleared in get_port_status before setting a new link state and waiting for its completion. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index ff544f20872c..36b7789f8f22 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -783,6 +783,9 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, clear_bit(wIndex, &bus_state->resuming_ports); set_bit(wIndex, &bus_state->rexit_ports); + + xhci_test_and_clear_bit(xhci, port_array, wIndex, + PORT_PLC); xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); -- cgit v1.2.3 From 01845a8347b73ddc89c0e7a81a108a4786ff07ea Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 20 Jul 2017 14:48:26 +0300 Subject: xhci: Fix NULL pointer dereference when cleaning up streams for removed host commit 4b895868bb2da60a386a17cde3bf9ecbc70c79f4 upstream. This off by one in stream_id indexing caused NULL pointer dereference and soft lockup on machines with USB attached SCSI devices connected to a hotpluggable xhci controller. The code that cleans up pending URBs for dead hosts tried to dereference a stream ring at the invalid stream_id 0. ep->stream_info->stream_rings[0] doesn't point to a ring. Start looping stream_id from 1 like in all the other places in the driver, and check that the ring exists before trying to kill URBs on it. Reported-by: rocko r Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 521c1816a26a..63735b5310bb 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -860,13 +860,16 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci, (ep->ep_state & EP_GETTING_NO_STREAMS)) { int stream_id; - for (stream_id = 0; stream_id < ep->stream_info->num_streams; + for (stream_id = 1; stream_id < ep->stream_info->num_streams; stream_id++) { + ring = ep->stream_info->stream_rings[stream_id]; + if (!ring) + continue; + xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Killing URBs for slot ID %u, ep index %u, stream %u", - slot_id, ep_index, stream_id + 1); - xhci_kill_ring_urbs(xhci, - ep->stream_info->stream_rings[stream_id]); + slot_id, ep_index, stream_id); + xhci_kill_ring_urbs(xhci, ring); } } else { ring = ep->ring; -- cgit v1.2.3 From 24a950e16eb54d42f25e4074bc00ee4a746877ac Mon Sep 17 00:00:00 2001 From: Jiahau Chang Date: Thu, 20 Jul 2017 14:48:27 +0300 Subject: xhci: Bad Ethernet performance plugged in ASM1042A host commit 9da5a1092b13468839b1a864b126cacfb72ad016 upstream. When USB Ethernet is plugged in ASMEDIA ASM1042A xHCI host, bad performance was manifesting in Web browser use (like download large file such as ISO image). It is known limitation of ASM1042A that is not compatible with driver scheduling, As a workaround we can modify flow control handling of ASM1042A. The register we modify is changes the behavior [use quirk bit 28, usleep_range 40-60us, empty non-pci function -Mathias] Signed-off-by: Jiahau Chang Signed-off-by: Ian Pilcher Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 54 +++++++++++++++++++++++++++++++++++++++++++ drivers/usb/host/pci-quirks.h | 2 ++ drivers/usb/host/xhci-pci.c | 6 +++++ drivers/usb/host/xhci.c | 6 +++++ drivers/usb/host/xhci.h | 1 + 5 files changed, 69 insertions(+) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index a9a1e4c40480..c8989c62a262 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -77,6 +77,16 @@ #define USB_INTEL_USB3_PSSEN 0xD8 #define USB_INTEL_USB3PRM 0xDC +/* ASMEDIA quirk use */ +#define ASMT_DATA_WRITE0_REG 0xF8 +#define ASMT_DATA_WRITE1_REG 0xFC +#define ASMT_CONTROL_REG 0xE0 +#define ASMT_CONTROL_WRITE_BIT 0x02 +#define ASMT_WRITEREG_CMD 0x10423 +#define ASMT_FLOWCTL_ADDR 0xFA30 +#define ASMT_FLOWCTL_DATA 0xBA +#define ASMT_PSEUDO_DATA 0 + /* * amd_chipset_gen values represent AMD different chipset generations */ @@ -412,6 +422,50 @@ void usb_amd_quirk_pll_disable(void) } EXPORT_SYMBOL_GPL(usb_amd_quirk_pll_disable); +static int usb_asmedia_wait_write(struct pci_dev *pdev) +{ + unsigned long retry_count; + unsigned char value; + + for (retry_count = 1000; retry_count > 0; --retry_count) { + + pci_read_config_byte(pdev, ASMT_CONTROL_REG, &value); + + if (value == 0xff) { + dev_err(&pdev->dev, "%s: check_ready ERROR", __func__); + return -EIO; + } + + if ((value & ASMT_CONTROL_WRITE_BIT) == 0) + return 0; + + usleep_range(40, 60); + } + + dev_warn(&pdev->dev, "%s: check_write_ready timeout", __func__); + return -ETIMEDOUT; +} + +void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) +{ + if (usb_asmedia_wait_write(pdev) != 0) + return; + + /* send command and address to device */ + pci_write_config_dword(pdev, ASMT_DATA_WRITE0_REG, ASMT_WRITEREG_CMD); + pci_write_config_dword(pdev, ASMT_DATA_WRITE1_REG, ASMT_FLOWCTL_ADDR); + pci_write_config_byte(pdev, ASMT_CONTROL_REG, ASMT_CONTROL_WRITE_BIT); + + if (usb_asmedia_wait_write(pdev) != 0) + return; + + /* send data to device */ + pci_write_config_dword(pdev, ASMT_DATA_WRITE0_REG, ASMT_FLOWCTL_DATA); + pci_write_config_dword(pdev, ASMT_DATA_WRITE1_REG, ASMT_PSEUDO_DATA); + pci_write_config_byte(pdev, ASMT_CONTROL_REG, ASMT_CONTROL_WRITE_BIT); +} +EXPORT_SYMBOL_GPL(usb_asmedia_modifyflowcontrol); + void usb_amd_quirk_pll_enable(void) { usb_amd_quirk_pll(0); diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index c622ddf21c94..6463fdb403c2 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -11,6 +11,7 @@ bool usb_amd_prefetch_quirk(void); void usb_amd_dev_put(void); void usb_amd_quirk_pll_disable(void); void usb_amd_quirk_pll_enable(void); +void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev); void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev); void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); void sb800_prefetch(struct device *dev, int on); @@ -18,6 +19,7 @@ void sb800_prefetch(struct device *dev, int on); struct pci_dev; static inline void usb_amd_quirk_pll_disable(void) {} static inline void usb_amd_quirk_pll_enable(void) {} +static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {} static inline void usb_amd_dev_put(void) {} static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {} static inline void sb800_prefetch(struct device *dev, int on) {} diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 672751ed2ba1..23833448e602 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,6 +59,8 @@ #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc +#define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 + static const char hcd_name[] = "xhci_hcd"; static struct hc_driver __read_mostly xhci_pci_hc_driver; @@ -217,6 +219,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x1142) xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && + pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) + xhci->quirks |= XHCI_ASMEDIA_MODIFY_FLOWCONTROL; + if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 34e23c7d7797..82308af5801b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -192,6 +192,9 @@ int xhci_reset(struct xhci_hcd *xhci) if (ret) return ret; + if (xhci->quirks & XHCI_ASMEDIA_MODIFY_FLOWCONTROL) + usb_asmedia_modifyflowcontrol(to_pci_dev(xhci_to_hcd(xhci)->self.controller)); + xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Wait for controller to be ready for doorbell rings"); /* @@ -1122,6 +1125,9 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !comp_timer_running) compliance_mode_recovery_timer_init(xhci); + if (xhci->quirks & XHCI_ASMEDIA_MODIFY_FLOWCONTROL) + usb_asmedia_modifyflowcontrol(to_pci_dev(hcd->self.controller)); + /* Re-enable port polling. */ xhci_dbg(xhci, "%s: starting port polling.\n", __func__); set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8336e07dc5f9..a0f4a9feb058 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1661,6 +1661,7 @@ struct xhci_hcd { #define XHCI_BROKEN_PORT_PED (1 << 25) #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) #define XHCI_U2_DISABLE_WAKE (1 << 27) +#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) unsigned int num_active_eps; unsigned int limit_active_eps; -- cgit v1.2.3 From 8bc51b4f2a42187b4386974ef92add94acafe135 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Fri, 21 Apr 2017 13:28:37 -0300 Subject: mxl111sf: Fix driver to use heap allocate buffers for USB messages commit d90b336f3f652ff0441e631a06236f785581c8f7 upstream. The recent changes in 4.9 to mandate USB buffers be heap allocated broke this driver, which was allocating the buffers on the stack. This resulted in the device failing at initialization. Introduce dedicated send/receive buffers as part of the state structure, and add a mutex to protect access to them. Note: we also had to tweak the API to mxl111sf_ctrl_msg to pass the pointer to the state struct rather than the device, since we need it inside the function to access the buffers and the mutex. This patch adjusts the callers to match the API change. Signed-off-by: Devin Heitmueller Reported-by: Doug Lung Cc: Michael Ira Krufky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c | 4 ++-- drivers/media/usb/dvb-usb-v2/mxl111sf.c | 32 +++++++++++++++++------------ drivers/media/usb/dvb-usb-v2/mxl111sf.h | 8 +++++++- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c index 283495c84ba3..aab8eeec601f 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c @@ -320,7 +320,7 @@ fail: static int mxl111sf_i2c_send_data(struct mxl111sf_state *state, u8 index, u8 *wdata) { - int ret = mxl111sf_ctrl_msg(state->d, wdata[0], + int ret = mxl111sf_ctrl_msg(state, wdata[0], &wdata[1], 25, NULL, 0); mxl_fail(ret); @@ -330,7 +330,7 @@ static int mxl111sf_i2c_send_data(struct mxl111sf_state *state, static int mxl111sf_i2c_get_data(struct mxl111sf_state *state, u8 index, u8 *wdata, u8 *rdata) { - int ret = mxl111sf_ctrl_msg(state->d, wdata[0], + int ret = mxl111sf_ctrl_msg(state, wdata[0], &wdata[1], 25, rdata, 24); mxl_fail(ret); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c index 5d676b533a3a..f1f448650e6f 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c @@ -24,9 +24,6 @@ #include "lgdt3305.h" #include "lg2160.h" -/* Max transfer size done by I2C transfer functions */ -#define MAX_XFER_SIZE 64 - int dvb_usb_mxl111sf_debug; module_param_named(debug, dvb_usb_mxl111sf_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level " @@ -56,27 +53,34 @@ MODULE_PARM_DESC(rfswitch, "force rf switch position (0=auto, 1=ext, 2=int)."); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); -int mxl111sf_ctrl_msg(struct dvb_usb_device *d, +int mxl111sf_ctrl_msg(struct mxl111sf_state *state, u8 cmd, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { + struct dvb_usb_device *d = state->d; int wo = (rbuf == NULL || rlen == 0); /* write-only */ int ret; - u8 sndbuf[MAX_XFER_SIZE]; - if (1 + wlen > sizeof(sndbuf)) { + if (1 + wlen > MXL_MAX_XFER_SIZE) { pr_warn("%s: len=%d is too big!\n", __func__, wlen); return -EOPNOTSUPP; } pr_debug("%s(wlen = %d, rlen = %d)\n", __func__, wlen, rlen); - memset(sndbuf, 0, 1+wlen); + mutex_lock(&state->msg_lock); + memset(state->sndbuf, 0, 1+wlen); + memset(state->rcvbuf, 0, rlen); + + state->sndbuf[0] = cmd; + memcpy(&state->sndbuf[1], wbuf, wlen); - sndbuf[0] = cmd; - memcpy(&sndbuf[1], wbuf, wlen); + ret = (wo) ? dvb_usbv2_generic_write(d, state->sndbuf, 1+wlen) : + dvb_usbv2_generic_rw(d, state->sndbuf, 1+wlen, state->rcvbuf, + rlen); + + memcpy(rbuf, state->rcvbuf, rlen); + mutex_unlock(&state->msg_lock); - ret = (wo) ? dvb_usbv2_generic_write(d, sndbuf, 1+wlen) : - dvb_usbv2_generic_rw(d, sndbuf, 1+wlen, rbuf, rlen); mxl_fail(ret); return ret; @@ -92,7 +96,7 @@ int mxl111sf_read_reg(struct mxl111sf_state *state, u8 addr, u8 *data) u8 buf[2]; int ret; - ret = mxl111sf_ctrl_msg(state->d, MXL_CMD_REG_READ, &addr, 1, buf, 2); + ret = mxl111sf_ctrl_msg(state, MXL_CMD_REG_READ, &addr, 1, buf, 2); if (mxl_fail(ret)) { mxl_debug("error reading reg: 0x%02x", addr); goto fail; @@ -118,7 +122,7 @@ int mxl111sf_write_reg(struct mxl111sf_state *state, u8 addr, u8 data) pr_debug("W: (0x%02x, 0x%02x)\n", addr, data); - ret = mxl111sf_ctrl_msg(state->d, MXL_CMD_REG_WRITE, buf, 2, NULL, 0); + ret = mxl111sf_ctrl_msg(state, MXL_CMD_REG_WRITE, buf, 2, NULL, 0); if (mxl_fail(ret)) pr_err("error writing reg: 0x%02x, val: 0x%02x", addr, data); return ret; @@ -922,6 +926,8 @@ static int mxl111sf_init(struct dvb_usb_device *d) static u8 eeprom[256]; struct i2c_client c; + mutex_init(&state->msg_lock); + ret = get_chip_info(state); if (mxl_fail(ret)) pr_err("failed to get chip info during probe"); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.h b/drivers/media/usb/dvb-usb-v2/mxl111sf.h index 846260e0eec0..3e6f5880bd1e 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.h @@ -19,6 +19,9 @@ #include #include +/* Max transfer size done by I2C transfer functions */ +#define MXL_MAX_XFER_SIZE 64 + #define MXL_EP1_REG_READ 1 #define MXL_EP2_REG_WRITE 2 #define MXL_EP3_INTERRUPT 3 @@ -86,6 +89,9 @@ struct mxl111sf_state { struct mutex fe_lock; u8 num_frontends; struct mxl111sf_adap_state adap_state[3]; + u8 sndbuf[MXL_MAX_XFER_SIZE]; + u8 rcvbuf[MXL_MAX_XFER_SIZE]; + struct mutex msg_lock; #ifdef CONFIG_MEDIA_CONTROLLER_DVB struct media_entity tuner; struct media_pad tuner_pads[2]; @@ -108,7 +114,7 @@ int mxl111sf_ctrl_program_regs(struct mxl111sf_state *state, /* needed for hardware i2c functions in mxl111sf-i2c.c: * mxl111sf_i2c_send_data / mxl111sf_i2c_get_data */ -int mxl111sf_ctrl_msg(struct dvb_usb_device *d, +int mxl111sf_ctrl_msg(struct mxl111sf_state *state, u8 cmd, u8 *wbuf, int wlen, u8 *rbuf, int rlen); #define mxl_printk(kern, fmt, arg...) \ -- cgit v1.2.3 From 8665f40a06a0794517f226cad694386faf9802c3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Jul 2017 16:06:32 +0100 Subject: usb: storage: return on error to avoid a null pointer dereference commit 446230f52a5bef593554510302465eabab45a372 upstream. When us->extra is null the driver is not initialized, however, a later call to osd200_scsi_to_ata is made that dereferences us->extra, causing a null pointer dereference. The code currently detects and reports that the driver is not initialized; add a return to avoid the subsequent dereference issue in this check. Thanks to Alan Stern for pointing out that srb->result needs setting to DID_ERROR << 16 Detected by CoverityScan, CID#100308 ("Dereference after null check") Signed-off-by: Colin Ian King Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/isd200.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index fba4005dd737..6a7720e66595 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1529,8 +1529,11 @@ static void isd200_ata_command(struct scsi_cmnd *srb, struct us_data *us) /* Make sure driver was initialized */ - if (us->extra == NULL) + if (us->extra == NULL) { usb_stor_dbg(us, "ERROR Driver not initialized\n"); + srb->result = DID_ERROR << 16; + return; + } scsi_set_resid(srb, 0); /* scsi_bufflen might change in protocol translation to ata */ -- cgit v1.2.3 From a74779d8e1726da718e0a71066e5c4de5b07012c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Jul 2017 15:08:39 +0200 Subject: USB: cdc-acm: add device-id for quirky printer commit fe855789d605590e57f9cd968d85ecce46f5c3fd upstream. Add device-id entry for DATECS FP-2000 fiscal printer needing the NO_UNION_NORMAL quirk. Reported-by: Anton Avramov Signed-off-by: Johan Hovold Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index a876d47246dc..f16491c25e73 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1770,6 +1770,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */ + .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, -- cgit v1.2.3 From 5433bfcc85273a89c7c5a565a337697ff31ae499 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 19 Jul 2017 16:16:54 +0900 Subject: usb: renesas_usbhs: fix usbhsc_resume() for !USBHSF_RUNTIME_PWCTRL commit 59a0879a0e17b2e43ecdc5e3299da85b8410d7ce upstream. This patch fixes an issue that some registers may be not initialized after resume if the USBHSF_RUNTIME_PWCTRL is not set. Otherwise, if a cable is not connected, the driver will not enable INTENB0.VBSE after resume. And then, the driver cannot detect the VBUS. Fixes: ca8a282a5373 ("usb: gadget: renesas_usbhs: add suspend/resume support") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 012a37aa3e0d..7994208be5de 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -752,8 +752,10 @@ static int usbhsc_resume(struct device *dev) struct usbhs_priv *priv = dev_get_drvdata(dev); struct platform_device *pdev = usbhs_priv_to_pdev(priv); - if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) { usbhsc_power_ctrl(priv, 1); + usbhs_mod_autonomy_mode(priv); + } usbhs_platform_call(priv, phy_reset, pdev); -- cgit v1.2.3 From dbc969ca944f1a3f61af083f4126bb5408d37b4c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 19 Jul 2017 16:16:55 +0900 Subject: usb: renesas_usbhs: gadget: disable all eps when the driver stops commit b8b9c974afee685789fcbb191b52d1790be3608c upstream. A gadget driver will not disable eps immediately when ->disconnect() is called. But, since this driver assumes all eps stop after the ->disconnect(), unexpected behavior happens (especially in system suspend). So, this patch disables all eps in usbhsg_try_stop(). After disabling eps by renesas_usbhs driver, since some functions will be called by both a gadget and renesas_usbhs driver, renesas_usbhs driver should protect uep->pipe. To protect uep->pipe easily, this patch adds a new lock in struct usbhsg_uep. Fixes: 2f98382dc ("usb: renesas_usbhs: Add Renesas USBHS Gadget") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/mod_gadget.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 5bc7a6138855..93fba9033b00 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -37,6 +37,7 @@ struct usbhsg_gpriv; struct usbhsg_uep { struct usb_ep ep; struct usbhs_pipe *pipe; + spinlock_t lock; /* protect the pipe */ char ep_name[EP_NAME_SIZE]; @@ -636,10 +637,16 @@ usbhsg_ep_enable_end: static int usbhsg_ep_disable(struct usb_ep *ep) { struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep); - struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep); + struct usbhs_pipe *pipe; + unsigned long flags; + int ret = 0; - if (!pipe) - return -EINVAL; + spin_lock_irqsave(&uep->lock, flags); + pipe = usbhsg_uep_to_pipe(uep); + if (!pipe) { + ret = -EINVAL; + goto out; + } usbhsg_pipe_disable(uep); usbhs_pipe_free(pipe); @@ -647,6 +654,9 @@ static int usbhsg_ep_disable(struct usb_ep *ep) uep->pipe->mod_private = NULL; uep->pipe = NULL; +out: + spin_unlock_irqrestore(&uep->lock, flags); + return 0; } @@ -696,8 +706,11 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req) { struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep); struct usbhsg_request *ureq = usbhsg_req_to_ureq(req); - struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep); + struct usbhs_pipe *pipe; + unsigned long flags; + spin_lock_irqsave(&uep->lock, flags); + pipe = usbhsg_uep_to_pipe(uep); if (pipe) usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq)); @@ -706,6 +719,7 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req) * even if the pipe is NULL. */ usbhsg_queue_pop(uep, ureq, -ECONNRESET); + spin_unlock_irqrestore(&uep->lock, flags); return 0; } @@ -852,10 +866,10 @@ static int usbhsg_try_stop(struct usbhs_priv *priv, u32 status) { struct usbhsg_gpriv *gpriv = usbhsg_priv_to_gpriv(priv); struct usbhs_mod *mod = usbhs_mod_get_current(priv); - struct usbhsg_uep *dcp = usbhsg_gpriv_to_dcp(gpriv); + struct usbhsg_uep *uep; struct device *dev = usbhs_priv_to_dev(priv); unsigned long flags; - int ret = 0; + int ret = 0, i; /******************** spin lock ********************/ usbhs_lock(priv, flags); @@ -887,7 +901,9 @@ static int usbhsg_try_stop(struct usbhs_priv *priv, u32 status) usbhs_sys_set_test_mode(priv, 0); usbhs_sys_function_ctrl(priv, 0); - usbhsg_ep_disable(&dcp->ep); + /* disable all eps */ + usbhsg_for_each_uep_with_dcp(uep, gpriv, i) + usbhsg_ep_disable(&uep->ep); dev_dbg(dev, "stop gadget\n"); @@ -1069,6 +1085,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) ret = -ENOMEM; goto usbhs_mod_gadget_probe_err_gpriv; } + spin_lock_init(&uep->lock); gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", -- cgit v1.2.3 From 03c1d9d45582e8a0991747a6f7a3a235b39b3e2b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 7 Jun 2017 19:05:31 -0400 Subject: md: don't use flush_signals in userspace processes commit f9c79bc05a2a91f4fba8bfd653579e066714b1ec upstream. The function flush_signals clears all pending signals for the process. It may be used by kernel threads when we need to prepare a kernel thread for responding to signals. However using this function for an userspaces processes is incorrect - clearing signals without the program expecting it can cause misbehavior. The raid1 and raid5 code uses flush_signals in its request routine because it wants to prepare for an interruptible wait. This patch drops flush_signals and uses sigprocmask instead to block all signals (including SIGKILL) around the schedule() call. The signals are not lost, but the schedule() call won't respond to them. Signed-off-by: Mikulas Patocka Acked-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 5 ++++- drivers/md/raid5.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 29e2df5cd77b..81a78757bc78 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1073,7 +1073,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) */ DEFINE_WAIT(w); for (;;) { - flush_signals(current); + sigset_t full, old; prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); if (bio_end_sector(bio) <= mddev->suspend_lo || @@ -1082,7 +1082,10 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) !md_cluster_ops->area_resyncing(mddev, WRITE, bio->bi_iter.bi_sector, bio_end_sector(bio)))) break; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); } finish_wait(&conf->wait_barrier, &w); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f34ad2be66a1..a613eea4b8a5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5300,12 +5300,15 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) * userspace, we want an interruptible * wait. */ - flush_signals(current); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_INTERRUPTIBLE); if (logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { + sigset_t full, old; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); do_prepare = true; } goto retry; -- cgit v1.2.3 From 4d3d3a1690c25205b36f6ec015458466e1ea6dae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 26 Jun 2017 14:49:46 +0200 Subject: x86/xen: allow userspace access during hypercalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c54590cac51db8ab5fd30156bdaba34af915e629 upstream. Userspace application can do a hypercall through /dev/xen/privcmd, and some for some hypercalls argument is a pointers to user-provided structure. When SMAP is supported and enabled, hypervisor can't access. So, lets allow it. The same applies to HYPERVISOR_dm_op, where additionally privcmd driver carefully verify buffer addresses. Cc: stable@vger.kernel.org Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Juergen Gross [HYPERVISOR_dm_op dropped - not present until 4.11] Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/xen/hypercall.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a12a047184ee..8b678af866f7 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -214,10 +215,12 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + stac(); asm volatile("call *%[call]" : __HYPERCALL_5PARAM : [call] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); + clac(); return (long)__res; } -- cgit v1.2.3 From e82672f45ea3dd7511fcbe0098df3f4fe65c20e8 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Sat, 20 Sep 2014 09:23:44 -0300 Subject: cx88: Fix regression in initial video standard setting commit 4e0973a918b9a42e217093f078e04a61e5dd95a5 upstream. Setting initial standard at the top of cx8800_initdev would cause the first call to cx88_set_tvnorm() to return without programming any registers (leaving the driver saying it's set to NTSC but the hardware isn't programmed). Even worse, any subsequent attempt to explicitly set it to NTSC-M will return success but actually fail to program the underlying registers unless first changing the standard to something other than NTSC-M. Set the initial standard later in the process, and make sure the field is zero at the beginning to ensure that the call always goes through. This regression was introduced in the following commit: commit ccd6f1d488e7 ("[media] cx88: move width, height and field to core struct") Author: Hans Verkuil [media] cx88: move width, height and field to core struct Signed-off-by: Devin Heitmueller Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/cx88/cx88-cards.c | 9 ++++++++- drivers/media/pci/cx88/cx88-video.c | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/media/pci/cx88/cx88-cards.c b/drivers/media/pci/cx88/cx88-cards.c index 8f2556ec3971..61611d1682d1 100644 --- a/drivers/media/pci/cx88/cx88-cards.c +++ b/drivers/media/pci/cx88/cx88-cards.c @@ -3691,7 +3691,14 @@ struct cx88_core *cx88_core_create(struct pci_dev *pci, int nr) core->nr = nr; sprintf(core->name, "cx88[%d]", core->nr); - core->tvnorm = V4L2_STD_NTSC_M; + /* + * Note: Setting initial standard here would cause first call to + * cx88_set_tvnorm() to return without programming any registers. Leave + * it blank for at this point and it will get set later in + * cx8800_initdev() + */ + core->tvnorm = 0; + core->width = 320; core->height = 240; core->field = V4L2_FIELD_INTERLACED; diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index d83eb3b10f54..3b140ad598de 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -1422,7 +1422,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, /* initial device configuration */ mutex_lock(&core->lock); - cx88_set_tvnorm(core, core->tvnorm); + cx88_set_tvnorm(core, V4L2_STD_NTSC_M); v4l2_ctrl_handler_setup(&core->video_hdl); v4l2_ctrl_handler_setup(&core->audio_hdl); cx88_video_mux(core, 0); -- cgit v1.2.3 From 891c31e16cb7aab2356625444f3eedebb96cde00 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 29 Jun 2017 16:59:11 -0600 Subject: libnvdimm, btt: fix btt_rw_page not returning errors commit c13c43d54f2c6a3be1c675766778ac1ad8dfbfcc upstream. btt_rw_page was not propagating errors frm btt_do_bvec, resulting in any IO errors via the rw_page path going unnoticed. the pmem driver recently fixed this in e10624f pmem: fail io-requests to known bad blocks but same problem in BTT went neglected. Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") Cc: Toshi Kani Cc: Dan Williams Cc: Jeff Moyer Signed-off-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/btt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 368795aad5c9..94733f73d37f 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1203,10 +1203,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { struct btt *btt = bdev->bd_disk->private_data; + int rc; - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); - page_endio(page, is_write, 0); - return 0; + rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); + if (rc == 0) + page_endio(page, is_write, 0); + + return rc; } -- cgit v1.2.3 From 0fa705dc61ee12e09827ac19225be3731b1cb988 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 7 Jul 2017 17:44:26 -0600 Subject: libnvdimm: fix badblock range handling of ARS range commit 4e3f0701f25ab194c5362576b1146a1e6cc6c2e7 upstream. __add_badblock_range() does not account sector alignment when it sets 'num_sectors'. Therefore, an ARS error record range spanning across two sectors is set to a single sector length, which leaves the 2nd sector unprotected. Change __add_badblock_range() to set 'num_sectors' properly. Fixes: 0caeef63e6d2 ("libnvdimm: Add a poison list and export badblocks") Signed-off-by: Toshi Kani Reviewed-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 7ceba08774b6..18a0bea115df 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -450,14 +450,15 @@ static void set_badblock(struct badblocks *bb, sector_t s, int num) static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) { const unsigned int sector_size = 512; - sector_t start_sector; + sector_t start_sector, end_sector; u64 num_sectors; u32 rem; start_sector = div_u64(ns_offset, sector_size); - num_sectors = div_u64_rem(len, sector_size, &rem); + end_sector = div_u64_rem(ns_offset + len, sector_size, &rem); if (rem) - num_sectors++; + end_sector++; + num_sectors = end_sector - start_sector; if (unlikely(num_sectors > (u64)INT_MAX)) { u64 remaining = num_sectors; -- cgit v1.2.3 From 4d1f97eb59a472a11982d6fff9a722b681225e1a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jun 2017 14:34:15 +0200 Subject: ext2: Don't clear SGID when inheriting ACLs commit a992f2d38e4ce17b8c7d1f7f67b2de0eebdea069 upstream. When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by creating __ext2_set_acl() function that does not call posix_acl_update_mode() and use it when inheriting ACLs. That prevents SGID bit clearing and the mode has been properly set by posix_acl_create() anyway. Fixes: 073931017b49d9458aa351605b43a7e34598caef CC: linux-ext4@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/acl.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 79dafa71effd..069c0dceda01 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -175,11 +175,8 @@ ext2_get_acl(struct inode *inode, int type) return acl; } -/* - * inode->i_mutex: down - */ -int -ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int +__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int name_index; void *value = NULL; @@ -189,13 +186,6 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: @@ -221,6 +211,24 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; } +/* + * inode->i_mutex: down + */ +int +ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int error; + + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (error) + return error; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } + return __ext2_set_acl(inode, acl, type); +} + /* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * @@ -238,12 +246,12 @@ ext2_init_acl(struct inode *inode, struct inode *dir) return error; if (default_acl) { - error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } return error; -- cgit v1.2.3 From 1e95148551f3ea57aef1a97d1b2a825be5a7704a Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 5 Jul 2017 17:34:04 +0800 Subject: Raid5 should update rdev->sectors after reshape commit b5d27718f38843a74552e9a93d32e2391fd3999f upstream. The raid5 md device is created by the disks which we don't use the total size. For example, the size of the device is 5G and it just uses 3G of the devices to create one raid5 device. Then change the chunksize and wait reshape to finish. After reshape finishing stop the raid and assemble it again. It fails. mdadm -CR /dev/md0 -l5 -n3 /dev/loop[0-2] --size=3G --chunk=32 --assume-clean mdadm /dev/md0 --grow --chunk=64 wait reshape to finish mdadm -S /dev/md0 mdadm -As The error messages: [197519.814302] md: loop1 does not have a valid v1.2 superblock, not importing! [197519.821686] md: md_import_device returned -22 After reshape the data offset is changed. It selects backwards direction in this condition. In function super_1_load it compares the available space of the underlying device with sb->data_size. The new data offset gets bigger after reshape. So super_1_load returns -EINVAL. rdev->sectors is updated in md_finish_reshape. Then sb->data_size is set in super_1_sync based on rdev->sectors. So add md_finish_reshape in end_reshape. Signed-off-by: Xiao Ni Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a613eea4b8a5..8f117d6372c9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7560,12 +7560,10 @@ static void end_reshape(struct r5conf *conf) { if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { - struct md_rdev *rdev; spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; - rdev_for_each(rdev, conf->mddev) - rdev->data_offset = rdev->new_data_offset; + md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; conf->mddev->reshape_position = MaxSector; -- cgit v1.2.3 From 8302e9d2f50b4358e4419deae2b09e0ccca766c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 29 Jun 2017 11:38:11 +0200 Subject: s390/syscalls: Fix out of bounds arguments access commit c46fc0424ced3fb71208e72bd597d91b9169a781 upstream. Zorro reported following crash while having enabled syscall tracing (CONFIG_FTRACE_SYSCALLS): Unable to handle kernel pointer dereference at virtual ... Oops: 0011 [#1] SMP DEBUG_PAGEALLOC SNIP Call Trace: ([<000000000024d79c>] ftrace_syscall_enter+0xec/0x1d8) [<00000000001099c6>] do_syscall_trace_enter+0x236/0x2f8 [<0000000000730f1c>] sysc_tracesys+0x1a/0x32 [<000003fffcf946a2>] 0x3fffcf946a2 INFO: lockdep is turned off. Last Breaking-Event-Address: [<000000000022dd44>] rb_event_data+0x34/0x40 ---[ end trace 8c795f86b1b3f7b9 ]--- The crash happens in syscall_get_arguments function for syscalls with zero arguments, that will try to access first argument (args[0]) in event entry, but it's not allocated. Bail out of there are no arguments. Reported-by: Zorro Lang Signed-off-by: Jiri Olsa Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/syscall.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6ba0bf928909..6bc941be6921 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -64,6 +64,12 @@ static inline void syscall_get_arguments(struct task_struct *task, { unsigned long mask = -1UL; + /* + * No arguments for this syscall, there's nothing to do. + */ + if (!n) + return; + BUG_ON(i + n > 6); #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) -- cgit v1.2.3 From b85007c9114a05c696c46c2aac6da68aeaed7bbc Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Tue, 23 May 2017 11:35:22 -0400 Subject: drm/amd/amdgpu: Return error if initiating read out of range on vram MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9156e723301c0a7a7def4cde820e018ce791b842 upstream. If you initiate a read that is out of the VRAM address space return ENXIO instead of 0. Reads that begin below that point will read upto the VRAM limit as before. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dcaf691f56b5..264899df9bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1419,6 +1419,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (*pos >= adev->mc.mc_vram_size) + return -ENXIO; + while (size) { unsigned long flags; uint32_t value; -- cgit v1.2.3 From a844f8d2a564d00d0043487d860a82d303b1a313 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:14:14 -0400 Subject: drm/radeon/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ab03d9fe508f4e2914a8f4a9eef1b21051cacd0f upstream. Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index ea36dc4dd5d2..24810492d2c1 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev) u32 vblank_time = r600_dpm_get_vblank_time(rdev); u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (r600_dpm_get_vrefresh(rdev) > 120) + return true; + /* disable mclk switching if the refresh is >120Hz, even if the * blanking period would allow it */ -- cgit v1.2.3 From 6e7b1eff91aa3bcc222e665328bea9988b453218 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 7 Jul 2017 04:57:04 +0200 Subject: drm/radeon: Fix eDP for single-display iMac10,1 (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 564d8a2cf3abf16575af48bdc3e86e92ee8a617d upstream. The late 2009, 27 inch Apple iMac10,1 has an internal eDP display and an external Mini- Displayport output, driven by a DCE-3.2, RV730 Radeon Mobility HD-4670. The machine worked fine in a dual-display setup with eDP panel + externally connected HDMI or DVI-D digital display sink, connected via MiniDP to DVI or HDMI adapter. However, booting the machine single-display with only eDP panel results in a completely black display - even backlight powering off, as soon as the radeon modesetting driver loads. This patch fixes the single dispay eDP case by assigning encoders based on dig->linkb, similar to DCE-4+. While this should not be generally necessary (Alex: "...atom on normal boards should be able to handle any mapping."), Apple seems to use some special routing here. One remaining problem not solved by this patch is that an external Minidisplayport->DP sink does still not work on iMac10,1, whereas external DVI and HDMI sinks continue to work. The problem affects at least all tested kernels since Linux 3.13 - didn't test earlier kernels, so backporting to stable probably makes sense. v2: With the original patch from 2016, Alex was worried it will break other DCE3.2 systems. Use dmi_match() to apply this special encoder assignment only for the Apple iMac 10,1 from late 2009. Signed-off-by: Mario Kleiner Cc: Alex Deucher Cc: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 56bb758f4e33..7bb1e531325b 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -30,6 +30,7 @@ #include "radeon_audio.h" #include "atom.h" #include +#include extern int atom_debug; @@ -2183,9 +2184,17 @@ int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder, int fe_idx) goto assigned; } - /* on DCE32 and encoder can driver any block so just crtc id */ + /* + * On DCE32 any encoder can drive any block so usually just use crtc id, + * but Apple thinks different at least on iMac10,1, so there use linkb, + * otherwise the internal eDP panel will stay dark. + */ if (ASIC_IS_DCE32(rdev)) { - enc_idx = radeon_crtc->crtc_id; + if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1")) + enc_idx = (dig->linkb) ? 1 : 0; + else + enc_idx = radeon_crtc->crtc_id; + goto assigned; } -- cgit v1.2.3 From 685e124ebc7f11de248b68f883cd845523a63fc9 Mon Sep 17 00:00:00 2001 From: Tony Camuso Date: Mon, 19 Jun 2017 13:17:33 -0400 Subject: ipmi: use rcu lock around call to intf->handlers->sender() commit cdea46566bb21ce309725a024208322a409055cc upstream. A vendor with a system having more than 128 CPUs occasionally encounters the following crash during shutdown. This is not an easily reproduceable event, but the vendor was able to provide the following analysis of the crash, which exhibits the same footprint each time. crash> bt PID: 0 TASK: ffff88017c70ce70 CPU: 5 COMMAND: "swapper/5" #0 [ffff88085c143ac8] machine_kexec at ffffffff81059c8b #1 [ffff88085c143b28] __crash_kexec at ffffffff811052e2 #2 [ffff88085c143bf8] crash_kexec at ffffffff811053d0 #3 [ffff88085c143c10] oops_end at ffffffff8168ef88 #4 [ffff88085c143c38] no_context at ffffffff8167ebb3 #5 [ffff88085c143c88] __bad_area_nosemaphore at ffffffff8167ec49 #6 [ffff88085c143cd0] bad_area_nosemaphore at ffffffff8167edb3 #7 [ffff88085c143ce0] __do_page_fault at ffffffff81691d1e #8 [ffff88085c143d40] do_page_fault at ffffffff81691ec5 #9 [ffff88085c143d70] page_fault at ffffffff8168e188 [exception RIP: unknown or invalid address] RIP: ffffffffa053c800 RSP: ffff88085c143e28 RFLAGS: 00010206 RAX: ffff88017c72bfd8 RBX: ffff88017a8dc000 RCX: ffff8810588b5ac8 RDX: ffff8810588b5a00 RSI: ffffffffa053c800 RDI: ffff8810588b5a00 RBP: ffff88085c143e58 R8: ffff88017c70d408 R9: ffff88017a8dc000 R10: 0000000000000002 R11: ffff88085c143da0 R12: ffff8810588b5ac8 R13: 0000000000000100 R14: ffffffffa053c800 R15: ffff8810588b5a00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 [exception RIP: cpuidle_enter_state+82] RIP: ffffffff81514192 RSP: ffff88017c72be50 RFLAGS: 00000202 RAX: 0000001e4c3c6f16 RBX: 000000000000f8a0 RCX: 0000000000000018 RDX: 0000000225c17d03 RSI: ffff88017c72bfd8 RDI: 0000001e4c3c6f16 RBP: ffff88017c72be78 R8: 000000000000237e R9: 0000000000000018 R10: 0000000000002494 R11: 0000000000000001 R12: ffff88017c72be20 R13: ffff88085c14f8e0 R14: 0000000000000082 R15: 0000001e4c3bb400 ORIG_RAX: ffffffffffffff10 CS: 0010 SS: 0018 This is the corresponding stack trace It has crashed because the area pointed with RIP extracted from timer element is already removed during a shutdown process. The function is smi_timeout(). And we think ffff8810588b5a00 in RDX is a parameter struct smi_info crash> rd ffff8810588b5a00 20 ffff8810588b5a00: ffff8810588b6000 0000000000000000 .`.X............ ffff8810588b5a10: ffff880853264400 ffffffffa05417e0 .D&S......T..... ffff8810588b5a20: 24a024a000000000 0000000000000000 .....$.$........ ffff8810588b5a30: 0000000000000000 0000000000000000 ................ ffff8810588b5a30: 0000000000000000 0000000000000000 ................ ffff8810588b5a40: ffffffffa053a040 ffffffffa053a060 @.S.....`.S..... ffff8810588b5a50: 0000000000000000 0000000100000001 ................ ffff8810588b5a60: 0000000000000000 0000000000000e00 ................ ffff8810588b5a70: ffffffffa053a580 ffffffffa053a6e0 ..S.......S..... ffff8810588b5a80: ffffffffa053a4a0 ffffffffa053a250 ..S.....P.S..... ffff8810588b5a90: 0000000500000002 0000000000000000 ................ Unfortunately the top of this area is already detroyed by someone. But because of two reasonns we think this is struct smi_info 1) The address included in between ffff8810588b5a70 and ffff8810588b5a80: are inside of ipmi_si_intf.c see crash> module ffff88085779d2c0 2) We've found the area which point this. It is offset 0x68 of ffff880859df4000 crash> rd ffff880859df4000 100 ffff880859df4000: 0000000000000000 0000000000000001 ................ ffff880859df4010: ffffffffa0535290 dead000000000200 .RS............. ffff880859df4020: ffff880859df4020 ffff880859df4020 @.Y.... @.Y.... ffff880859df4030: 0000000000000002 0000000000100010 ................ ffff880859df4040: ffff880859df4040 ffff880859df4040 @@.Y....@@.Y.... ffff880859df4050: 0000000000000000 0000000000000000 ................ ffff880859df4060: 0000000000000000 ffff8810588b5a00 .........Z.X.... ffff880859df4070: 0000000000000001 ffff880859df4078 ........x@.Y.... If we regards it as struct ipmi_smi in shutdown process it looks consistent. The remedy for this apparent race is affixed below. Signed-off-by: Tony Camuso Signed-off-by: Greg Kroah-Hartman This was first introduced in 7ea0ed2b5be817 ipmi: Make the message handler easier to use for SMI interfaces where some code was moved outside of the rcu_read_lock() and the lock was not added. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index fcdd886819f5..172a9dc06ec9 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3877,6 +3877,9 @@ static void smi_recv_tasklet(unsigned long val) * because the lower layer is allowed to hold locks while calling * message delivery. */ + + rcu_read_lock(); + if (!run_to_completion) spin_lock_irqsave(&intf->xmit_msgs_lock, flags); if (intf->curr_msg == NULL && !intf->in_shutdown) { @@ -3899,6 +3902,8 @@ static void smi_recv_tasklet(unsigned long val) if (newmsg) intf->handlers->sender(intf->send_info, newmsg); + rcu_read_unlock(); + handle_new_recv_msgs(intf); } -- cgit v1.2.3 From 1b9008cdae94e6c4ae42bc90bf031b9fefcf7a93 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 30 Jun 2017 07:18:08 -0500 Subject: ipmi:ssif: Add missing unlock in error branch commit 4495ec6d770e1bca7a04e93ac453ab6720c56c5d upstream. When getting flags, a response to a different message would result in a deadlock because of a missing unlock. Add that unlock and a comment. Found by static analysis. Reported-by: Dan Carpenter Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 6958b5ce9145..510fc104bcdc 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -762,6 +762,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, result, len, data[2]); } else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || data[1] != IPMI_GET_MSG_FLAGS_CMD) { + /* + * Don't abort here, maybe it was a queued + * response to a previous command. + */ + ipmi_ssif_unlock_cond(ssif_info, flags); pr_warn(PFX "Invalid response getting flags: %x %x\n", data[0], data[1]); } else { -- cgit v1.2.3 From 58d2eacd3b0ef6eb7d7a786021f1bcd7f147082d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Jun 2017 08:48:18 -0700 Subject: xfs: Don't clear SGID when inheriting ACLs commit 8ba358756aa08414fa9e65a1a41d28304ed6fd7f upstream. When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by calling __xfs_set_acl() instead of xfs_set_acl() when setting up inode in xfs_generic_create(). That prevents SGID bit clearing and mode is properly set by posix_acl_create() anyway. We also reorder arguments of __xfs_set_acl() to match the ordering of xfs_set_acl() to make things consistent. Fixes: 073931017b49d9458aa351605b43a7e34598caef CC: Darrick J. Wong CC: linux-xfs@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_acl.c | 6 +++--- fs/xfs/xfs_acl.h | 1 + fs/xfs/xfs_iops.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b468e041f207..7034e17535de 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -170,8 +170,8 @@ xfs_get_acl(struct inode *inode, int type) return acl; } -STATIC int -__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +int +__xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct xfs_inode *ip = XFS_I(inode); unsigned char *ea_name; @@ -268,5 +268,5 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } set_acl: - return __xfs_set_acl(inode, type, acl); + return __xfs_set_acl(inode, acl, type); } diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 286fa89217f5..04327318ef67 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -24,6 +24,7 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); #else static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) { diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f5e0f608e245..a1247c3c1efb 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -190,12 +190,12 @@ xfs_generic_create( #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { - error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } -- cgit v1.2.3 From 19e117a50135efbb1d7c5ab1f24522162cc30bf0 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 1 Jun 2017 11:18:30 -0700 Subject: f2fs: sanity check size of nat and sit cache commit 21d3f8e1c3b7996ce239ab6fa82e9f7a8c47d84d upstream. Make sure number of entires doesn't exceed max journal size. Signed-off-by: Jin Qian Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a7943f861d68..74a2b444406d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1805,6 +1805,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { + struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; + struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; int type = CURSEG_HOT_DATA; int err; @@ -1831,6 +1833,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) return err; } + /* sanity check for summary blocks */ + if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + return -EINVAL; + return 0; } -- cgit v1.2.3 From f97f9e94f666213b3cc59a6cff70b318154f2b09 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Jul 2017 14:56:49 -0700 Subject: f2fs: Don't clear SGID when inheriting ACLs commit c925dc162f770578ff4a65ec9b08270382dba9e6 upstream. This patch copies commit b7f8a09f80: "btrfs: Don't clear SGID when inheriting ACLs" written by Jan. Fixes: 073931017b49d9458aa351605b43a7e34598caef Signed-off-by: Jan Kara Reviewed-by: Chao Yu Reviewed-by: Jan Kara Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 6fe23af509e1..55aa29c0c78d 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -211,7 +211,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { + if (acl && !ipage) { error = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (error) return error; -- cgit v1.2.3 From 0fb615f9cad7966267cd1904766a1b3059336f76 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Mon, 3 Jul 2017 14:05:34 -0400 Subject: drm/ttm: Fix use-after-free in ttm_bo_clean_mm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8046e1955465e3f24e9154d0f2a2e0a8e3f8dccf upstream. We unref the man->move fence in ttm_bo_clean_mm() and then call ttm_bo_force_list_clean() which waits on it, except the refcount is now zero so a warning is generated (or worse): [149492.279301] refcount_t: increment on 0; use-after-free. [149492.279309] ------------[ cut here ]------------ [149492.279315] WARNING: CPU: 3 PID: 18726 at lib/refcount.c:150 refcount_inc+0x2b/0x30 [149492.279315] Modules linked in: vhost_net vhost tun x86_pkg_temp_thermal crc32_pclmul ghash_clmulni_intel efivarfs amdgpu( -) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm [149492.279326] CPU: 3 PID: 18726 Comm: rmmod Not tainted 4.12.0-rc5-drm-next-4.13-ttmpatch+ #1 [149492.279326] Hardware name: Gigabyte Technology Co., Ltd. Z97X-UD3H-BK/Z97X-UD3H-BK-CF, BIOS F6 06/17/2014 [149492.279327] task: ffff8804ddfedcc0 task.stack: ffffc90008d20000 [149492.279329] RIP: 0010:refcount_inc+0x2b/0x30 [149492.279330] RSP: 0018:ffffc90008d23c30 EFLAGS: 00010286 [149492.279331] RAX: 000000000000002b RBX: 0000000000000170 RCX: 0000000000000000 [149492.279331] RDX: 0000000000000000 RSI: ffff88051ecccbe8 RDI: ffff88051ecccbe8 [149492.279332] RBP: ffffc90008d23c30 R08: 0000000000000001 R09: 00000000000003ee [149492.279333] R10: ffffc90008d23bb0 R11: 00000000000003ee R12: ffff88043aaac960 [149492.279333] R13: ffff8805005e28a8 R14: 0000000000000002 R15: ffff88050115e178 [149492.279334] FS: 00007fc540168700(0000) GS:ffff88051ecc0000(0000) knlGS:0000000000000000 [149492.279335] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [149492.279336] CR2: 00007fc3e8654140 CR3: 000000027ba77000 CR4: 00000000001426e0 [149492.279337] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [149492.279337] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [149492.279338] Call Trace: [149492.279345] ttm_bo_force_list_clean+0xb9/0x110 [ttm] [149492.279348] ttm_bo_clean_mm+0x7a/0xe0 [ttm] [149492.279375] amdgpu_ttm_fini+0xc9/0x1f0 [amdgpu] [149492.279392] amdgpu_bo_fini+0x12/0x40 [amdgpu] [149492.279415] gmc_v7_0_sw_fini+0x32/0x40 [amdgpu] [149492.279430] amdgpu_fini+0x2c9/0x490 [amdgpu] [149492.279445] amdgpu_device_fini+0x58/0x1b0 [amdgpu] [149492.279461] amdgpu_driver_unload_kms+0x4f/0xa0 [amdgpu] [149492.279470] drm_dev_unregister+0x3c/0xe0 [drm] [149492.279485] amdgpu_pci_remove+0x19/0x30 [amdgpu] [149492.279487] pci_device_remove+0x39/0xc0 [149492.279490] device_release_driver_internal+0x155/0x210 [149492.279491] driver_detach+0x38/0x70 [149492.279493] bus_remove_driver+0x4c/0xa0 [149492.279494] driver_unregister+0x2c/0x40 [149492.279496] pci_unregister_driver+0x21/0x90 [149492.279520] amdgpu_exit+0x15/0x406 [amdgpu] [149492.279523] SyS_delete_module+0x1a8/0x270 [149492.279525] ? exit_to_usermode_loop+0x92/0xa0 [149492.279528] entry_SYSCALL_64_fastpath+0x13/0x94 [149492.279529] RIP: 0033:0x7fc53fcb68e7 [149492.279529] RSP: 002b:00007ffcfbfaabb8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [149492.279531] RAX: ffffffffffffffda RBX: 0000563117adb200 RCX: 00007fc53fcb68e7 [149492.279531] RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000563117adb268 [149492.279532] RBP: 0000000000000003 R08: 0000000000000000 R09: 1999999999999999 [149492.279533] R10: 0000000000000883 R11: 0000000000000206 R12: 00007ffcfbfa9ba0 [149492.279533] R13: 0000000000000000 R14: 0000000000000000 R15: 0000563117adb200 [149492.279534] Code: 55 48 89 e5 e8 77 fe ff ff 84 c0 74 02 5d c3 80 3d 40 f2 a4 00 00 75 f5 48 c7 c7 20 3c ca 81 c6 05 30 f2 a4 00 01 e8 91 f0 d7 ff <0f> ff 5d c3 90 55 48 89 fe bf 01 00 00 00 48 89 e5 e8 9f fe ff [149492.279557] ---[ end trace 2d4e0ffcb66a1016 ]--- Unref the fence *after* waiting for it. v2: Set man->move to NULL after dropping the last ref (Christian König) Fixes: aff98ba1fdb8 (drm/ttm: wait for eviction in ttm_bo_force_list_clean) Signed-off-by: John Brooks Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 35cc16f9fec9..d09276ec7e90 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1343,7 +1343,6 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) mem_type); return ret; } - fence_put(man->move); man->use_type = false; man->has_type = false; @@ -1355,6 +1354,9 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) ret = (*man->func->takedown)(man); } + fence_put(man->move); + man->move = NULL; + return ret; } EXPORT_SYMBOL(ttm_bo_clean_mm); -- cgit v1.2.3 From fee760fc6c79998efc48bd06afeec31824dfb2d4 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 10 Jan 2017 21:30:21 +0300 Subject: ovl: drop CAP_SYS_RESOURCE from saved mounter's credentials commit 51f8f3c4e22535933ef9aecc00e9a6069e051b57 upstream. If overlay was mounted by root then quota set for upper layer does not work because overlay now always use mounter's credentials for operations. Also overlay might deplete reserved space and inodes in ext4. This patch drops capability SYS_RESOURCE from saved credentials. This affects creation new files, whiteouts, and copy-up operations. Signed-off-by: Konstantin Khlebnikov Fixes: 1175b6b8d963 ("ovl: do operations on underlying file system in mounter's context") Cc: Vivek Goyal Signed-off-by: Miklos Szeredi Cc: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0e100856c7b8..2a0148957d6a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1146,6 +1146,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int stacklen = 0; unsigned int i; bool remote = false; + struct cred *cred; int err; err = -ENOMEM; @@ -1309,10 +1310,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; - ufs->creator_cred = prepare_creds(); - if (!ufs->creator_cred) + ufs->creator_cred = cred = prepare_creds(); + if (!cred) goto out_put_lower_mnt; + /* Never override disk quota limits or use reserved space */ + cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); + err = -ENOMEM; oe = ovl_alloc_entry(numlower); if (!oe) -- cgit v1.2.3 From e91a55790ddfbda7f9f72963e561d9c738f14a52 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 19 Jun 2017 09:10:32 -0600 Subject: vfio: Fix group release deadlock commit 811642d8d8a82c0cce8dc2debfdaf23c5a144839 upstream. If vfio_iommu_group_notifier() acquires a group reference and that reference becomes the last reference to the group, then vfio_group_put introduces a deadlock code path where we're trying to unregister from the iommu notifier chain from within a callout of that chain. Use a work_struct to release this reference asynchronously. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index d1d70e0b011b..8e8b73b05e54 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -419,6 +419,34 @@ static void vfio_group_put(struct vfio_group *group) kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); } +struct vfio_group_put_work { + struct work_struct work; + struct vfio_group *group; +}; + +static void vfio_group_put_bg(struct work_struct *work) +{ + struct vfio_group_put_work *do_work; + + do_work = container_of(work, struct vfio_group_put_work, work); + + vfio_group_put(do_work->group); + kfree(do_work); +} + +static void vfio_group_schedule_put(struct vfio_group *group) +{ + struct vfio_group_put_work *do_work; + + do_work = kmalloc(sizeof(*do_work), GFP_KERNEL); + if (WARN_ON(!do_work)) + return; + + INIT_WORK(&do_work->work, vfio_group_put_bg); + do_work->group = group; + schedule_work(&do_work->work); +} + /* Assume group_lock or group reference is held */ static void vfio_group_get(struct vfio_group *group) { @@ -743,7 +771,14 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, break; } - vfio_group_put(group); + /* + * If we're the last reference to the group, the group will be + * released, which includes unregistering the iommu group notifier. + * We hold a read-lock on that notifier list, unregistering needs + * a write-lock... deadlock. Release our reference asynchronously + * to avoid that situation. + */ + vfio_group_schedule_put(group); return NOTIFY_OK; } -- cgit v1.2.3 From 8f9dec0c2dbb99aa36fa2d242828d853abde8eb0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 28 Jun 2017 13:50:05 -0600 Subject: vfio: New external user group/file match commit 5d6dee80a1e94cc284d03e06d930e60e8d3ecf7d upstream. At the point where the kvm-vfio pseudo device wants to release its vfio group reference, we can't always acquire a new reference to make that happen. The group can be in a state where we wouldn't allow a new reference to be added. This new helper function allows a caller to match a file to a group to facilitate this. Given a file and group, report if they match. Thus the caller needs to already have a group reference to match to the file. This allows the deletion of a group without acquiring a new reference. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Reviewed-by: Paolo Bonzini Tested-by: Eric Auger Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio.c | 9 +++++++++ include/linux/vfio.h | 2 ++ virt/kvm/vfio.c | 27 +++++++++++++++++++-------- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 8e8b73b05e54..881fc3a55edc 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1751,6 +1751,15 @@ void vfio_group_put_external_user(struct vfio_group *group) } EXPORT_SYMBOL_GPL(vfio_group_put_external_user); +bool vfio_external_group_match_file(struct vfio_group *test_group, + struct file *filep) +{ + struct vfio_group *group = filep->private_data; + + return (filep->f_op == &vfio_group_fops) && (group == test_group); +} +EXPORT_SYMBOL_GPL(vfio_external_group_match_file); + int vfio_external_user_iommu_id(struct vfio_group *group) { return iommu_group_id(group->iommu_group); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 0ecae0b1cd34..ed466750e744 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -88,6 +88,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern bool vfio_external_group_match_file(struct vfio_group *group, + struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 1dd087da6f31..111e09c3f4bf 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -47,6 +47,22 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) return vfio_group; } +static bool kvm_vfio_external_group_match_file(struct vfio_group *group, + struct file *filep) +{ + bool ret, (*fn)(struct vfio_group *, struct file *); + + fn = symbol_get(vfio_external_group_match_file); + if (!fn) + return false; + + ret = fn(group, filep); + + symbol_put(vfio_external_group_match_file); + + return ret; +} + static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) { void (*fn)(struct vfio_group *); @@ -171,18 +187,13 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) if (!f.file) return -EBADF; - vfio_group = kvm_vfio_group_get_external_user(f.file); - fdput(f); - - if (IS_ERR(vfio_group)) - return PTR_ERR(vfio_group); - ret = -ENOENT; mutex_lock(&kv->lock); list_for_each_entry(kvg, &kv->group_list, node) { - if (kvg->vfio_group != vfio_group) + if (!kvm_vfio_external_group_match_file(kvg->vfio_group, + f.file)) continue; list_del(&kvg->node); @@ -196,7 +207,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); - kvm_vfio_group_put_external_user(vfio_group); + fdput(f); kvm_vfio_update_coherency(dev); -- cgit v1.2.3 From d17cc7b7a7522c908636b32beee7537f64e3c043 Mon Sep 17 00:00:00 2001 From: Marta Rybczynska Date: Tue, 6 Jun 2017 13:27:21 +0200 Subject: nvme-rdma: remove race conditions from IB signalling commit 5e599d73c1c1816af07f94ddba879499aa39b43c upstream. This patch improves the way the RDMA IB signalling is done by using atomic operations for the signalling variable. This avoids race conditions on sig_count. The signalling interval changes slightly and is now the largest power of two not larger than queue depth / 2. ilog() usage idea by Bart Van Assche. Signed-off-by: Marta Rybczynska Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/rdma.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3222f3e987eb..286fda4ee100 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -88,7 +88,7 @@ enum nvme_rdma_queue_flags { struct nvme_rdma_queue { struct nvme_rdma_qe *rsp_ring; - u8 sig_count; + atomic_t sig_count; int queue_size; size_t cmnd_capsule_len; struct nvme_rdma_ctrl *ctrl; @@ -555,6 +555,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->queue_size = queue_size; + atomic_set(&queue->sig_count, 0); queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, RDMA_PS_TCP, IB_QPT_RC); @@ -1011,17 +1012,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } -static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) +/* + * We want to signal completion at least every queue depth/2. This returns the + * largest power of two that is not above half of (queue size + 1) to optimize + * (avoid divisions). + */ +static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) { - int sig_limit; + int limit = 1 << ilog2((queue->queue_size + 1) / 2); - /* - * We signal completion every queue depth/2 and also handle the - * degenerated case of a device with queue_depth=1, where we - * would need to signal every message. - */ - sig_limit = max(queue->queue_size / 2, 1); - return (++queue->sig_count % sig_limit) == 0; + return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; } static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, -- cgit v1.2.3 From 198bd494cebfad07c251d9fd26279393c81d8dec Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Jul 2017 10:35:57 +0300 Subject: ftrace: Fix uninitialized variable in match_records() commit 2e028c4fe12907f226b8221815f16c2486ad3aa7 upstream. My static checker complains that if "func" is NULL then "clear_filter" is uninitialized. This seems like it could be true, although it's possible something subtle is happening that I haven't seen. kernel/trace/ftrace.c:3844 match_records() error: uninitialized symbol 'clear_filter'. Link: http://lkml.kernel.org/r/20170712073556.h6tkpjcdzjaozozs@mwanda Fixes: f0a3b154bd7 ("ftrace: Clarify code for mod command") Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 221eb59272e1..4f7ea8446bb5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3590,7 +3590,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) int exclude_mod = 0; int found = 0; int ret; - int clear_filter; + int clear_filter = 0; if (func) { func_g.type = filter_parse_regex(func, len, &func_g.search, -- cgit v1.2.3 From dd2f83263dd8ecc4e325fe5f1aed5a930edbe643 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 31 May 2017 16:19:47 +0100 Subject: MIPS: Fix mips_atomic_set() retry condition commit 2ec420b26f7b6ff332393f0bb5a7d245f7ad87f0 upstream. The inline asm retry check in the MIPS_ATOMIC_SET operation of the sysmips system call has been backwards since commit f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler") merged in v2.6.32, resulting in the non R10000_LLSC_WAR case retrying until the operation was inatomic, before returning the new value that was probably just written multiple times instead of the old value. Invert the branch condition to fix that particular issue. Fixes: f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler") Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16148/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 53a7ef9a8f32..876c5a56931f 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -141,7 +141,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" "2: sc %[tmp], (%[addr]) \n" - " bnez %[tmp], 4f \n" + " beqz %[tmp], 4f \n" "3: \n" " .insn \n" " .subsection 2 \n" -- cgit v1.2.3 From f39f3b5d8260de549887f127ac5e99f0931b4990 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 31 May 2017 16:19:49 +0100 Subject: MIPS: Fix mips_atomic_set() with EVA commit 4915e1b043d6286928207b1f6968197b50407294 upstream. EVA linked loads (LLE) and conditional stores (SCE) should be used on EVA kernels for the MIPS_ATOMIC_SET operation of the sysmips system call, or else the atomic set will apply to the kernel view of the virtual address space (potentially unmapped on EVA kernels) rather than the user view (TLB mapped). Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16151/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/syscall.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 876c5a56931f..8c222d390c48 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -138,9 +139,11 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) __asm__ __volatile__ ( " .set "MIPS_ISA_ARCH_LEVEL" \n" " li %[err], 0 \n" - "1: ll %[old], (%[addr]) \n" + "1: \n" + user_ll("%[old]", "(%[addr])") " move %[tmp], %[new] \n" - "2: sc %[tmp], (%[addr]) \n" + "2: \n" + user_sc("%[tmp]", "(%[addr])") " beqz %[tmp], 4f \n" "3: \n" " .insn \n" -- cgit v1.2.3 From 02131aea6bd4d7a1414333160124b6a078b262fe Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 29 Jun 2017 10:12:34 +0100 Subject: MIPS: Negate error syscall return in trace commit 4f32a39d49b25eaa66d2420f1f03d371ea4cd906 upstream. The sys_exit trace event takes a single return value for the system call, which MIPS passes the value of the $v0 (result) register, however MIPS returns positive error codes in $v0 with $a3 specifying that $v0 contains an error code. As a result erroring system calls are traced returning positive error numbers that can't always be distinguished from success. Use regs_return_value() to negate the error code if $a3 is set. Fixes: 1d7bf993e073 ("MIPS: ftrace: Add support for syscall tracepoints.") Signed-off-by: James Hogan Cc: Steven Rostedt Cc: Ingo Molnar Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16651/ Acked-by: Steven Rostedt (VMware) Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index bf83dc1eecfb..3de026034c35 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -924,7 +924,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->regs[2]); + trace_sys_exit(regs, regs_return_value(regs)); if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); -- cgit v1.2.3 From ec469b5e2aafdcedbc80d9a7a8a4fa8632e584e3 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 16 Jun 2017 16:21:44 +0200 Subject: ubifs: Don't leak kernel memory to the MTD commit 4acadda74ff8b949c448c0282765ae747e088c87 upstream. When UBIFS prepares data structures which will be written to the MTD it ensues that their lengths are multiple of 8. Since it uses kmalloc() the padded bytes are left uninitialized and we leak a few bytes of kernel memory to the MTD. To make sure that all bytes are initialized, let's switch to kzalloc(). Kzalloc() is fine in this case because the buffers are not huge and in the IO path the performance bottleneck is anyway the MTD. Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 91bc76dc559e..7d764e3b6c79 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -576,7 +576,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, /* Make sure to also account for extended attributes */ len += host_ui->data_len; - dent = kmalloc(len, GFP_NOFS); + dent = kzalloc(len, GFP_NOFS); if (!dent) return -ENOMEM; @@ -952,7 +952,7 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir, if (twoparents) len += plen; - dent1 = kmalloc(len, GFP_NOFS); + dent1 = kzalloc(len, GFP_NOFS); if (!dent1) return -ENOMEM; @@ -1102,7 +1102,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); if (move) len += plen; - dent = kmalloc(len, GFP_NOFS); + dent = kzalloc(len, GFP_NOFS); if (!dent) return -ENOMEM; @@ -1466,7 +1466,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, hlen = host_ui->data_len + UBIFS_INO_NODE_SZ; len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8); - xent = kmalloc(len, GFP_NOFS); + xent = kzalloc(len, GFP_NOFS); if (!xent) return -ENOMEM; @@ -1573,7 +1573,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, aligned_len1 = ALIGN(len1, 8); aligned_len = aligned_len1 + ALIGN(len2, 8); - ino = kmalloc(aligned_len, GFP_NOFS); + ino = kzalloc(aligned_len, GFP_NOFS); if (!ino) return -ENOMEM; -- cgit v1.2.3 From b2966b109b1a2c23e626ad35fc03d4a52efd1aa4 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 12 Jul 2017 11:09:09 +0800 Subject: ACPI / EC: Drop EC noirq hooks to fix a regression commit 662591461c4b9a1e3b9b159dbf37648a585ebaae upstream. According to bug reports, although the busy polling mode can make noirq stages execute faster, it causes abnormal fan blowing up after system resume (see the first link below for a video demonstration) on Lenovo ThinkPad X1 Carbon - the 5th Generation. The problem can be fixed by upgrading the EC firmware on that machine. However, many reporters confirm that the problem can be fixed by stopping busy polling during suspend/resume and for some of them upgrading the EC firmware is not an option. For this reason, drop the noirq stage hooks from the EC driver to fix the regression. Fixes: c3a696b6e8f8 (ACPI / EC: Use busy polling mode when GPE is not enabled) Link: https://youtu.be/9NQ9x-Jm99Q Link: https://bugzilla.kernel.org/show_bug.cgi?id=196129 Reported-by: Andreas Lindhe Tested-by: Gjorgji Jankovski Tested-by: Damjan Georgievski Tested-by: Fernando Chaves Tested-by: Tomislav Ivek Tested-by: Denis P. Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 22ca89242518..3f01f6523a81 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1865,24 +1865,6 @@ error: } #ifdef CONFIG_PM_SLEEP -static int acpi_ec_suspend_noirq(struct device *dev) -{ - struct acpi_ec *ec = - acpi_driver_data(to_acpi_device(dev)); - - acpi_ec_enter_noirq(ec); - return 0; -} - -static int acpi_ec_resume_noirq(struct device *dev) -{ - struct acpi_ec *ec = - acpi_driver_data(to_acpi_device(dev)); - - acpi_ec_leave_noirq(ec); - return 0; -} - static int acpi_ec_suspend(struct device *dev) { struct acpi_ec *ec = @@ -1904,7 +1886,6 @@ static int acpi_ec_resume(struct device *dev) #endif static const struct dev_pm_ops acpi_ec_pm = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend_noirq, acpi_ec_resume_noirq) SET_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend, acpi_ec_resume) }; -- cgit v1.2.3 From 456a997498cb5217a61fba0f1929be7d182b5338 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 12 Jul 2017 11:09:17 +0800 Subject: Revert "ACPI / EC: Enable event freeze mode..." to fix a regression commit 9c40f956ce9b331493347d1b3cb7e384f7dc0581 upstream. On Lenovo ThinkPad X1 Carbon - the 5th Generation, enabling an earlier EC event freezing timing causes acpitz-virtual-0 to report a stuck 48C temparature. And with EC firmware revisioned as 1.14, without reverting back to old EC event freezing timing, the fan still blows up after a system resume. This reverts the culprit change so that the regression can be fixed without upgrading the EC firmware. Fixes: d30283057ecd (ACPI / EC: Enable event freeze mode to improve event handling) Link: https://bugzilla.kernel.org/show_bug.cgi?id=191181#c168 Tested-by: Damjan Georgievski Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 3f01f6523a81..79152dbc5528 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -147,7 +147,7 @@ static unsigned int ec_storm_threshold __read_mostly = 8; module_param(ec_storm_threshold, uint, 0644); MODULE_PARM_DESC(ec_storm_threshold, "Maxim false GPE numbers not considered as GPE storm"); -static bool ec_freeze_events __read_mostly = true; +static bool ec_freeze_events __read_mostly = false; module_param(ec_freeze_events, bool, 0644); MODULE_PARM_DESC(ec_freeze_events, "Disabling event handling during suspend/resume"); -- cgit v1.2.3 From 036d59f40ac94964a1bbc8959f78f34efac71fd5 Mon Sep 17 00:00:00 2001 From: Seunghun Han Date: Tue, 18 Jul 2017 20:03:51 +0900 Subject: x86/acpi: Prevent out of bound access caused by broken ACPI tables commit dad5ab0db8deac535d03e3fe3d8f2892173fa6a4 upstream. The bus_irq argument of mp_override_legacy_irq() is used as the index into the isa_irq_to_gsi[] array. The bus_irq argument originates from ACPI_MADT_TYPE_IO_APIC and ACPI_MADT_TYPE_INTERRUPT items in the ACPI tables, but is nowhere sanity checked. That allows broken or malicious ACPI tables to overwrite memory, which might cause malfunction, panic or arbitrary code execution. Add a sanity check and emit a warning when that triggers. [ tglx: Added warning and rewrote changelog ] Signed-off-by: Seunghun Han Signed-off-by: Thomas Gleixner Cc: security@kernel.org Cc: "Rafael J. Wysocki" Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 931ced8ca345..d3e0d049a0c2 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -337,6 +337,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, int pin; struct mpc_intsrc mp_irq; + /* + * Check bus_irq boundary. + */ + if (bus_irq >= NR_IRQS_LEGACY) { + pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq); + return; + } + /* * Convert 'gsi' to 'ioapic.pin'. */ -- cgit v1.2.3 From c69280e9018eaea65ab18cc9a231d913479a5ee3 Mon Sep 17 00:00:00 2001 From: Seunghun Han Date: Tue, 18 Jul 2017 18:20:44 +0900 Subject: x86/ioapic: Pass the correct data to unmask_ioapic_irq() commit e708e35ba6d89ff785b225cd07dcccab04fa954a upstream. One of the rarely executed code pathes in check_timer() calls unmask_ioapic_irq() passing irq_get_chip_data(0) as argument. That's wrong as unmask_ioapic_irq() expects a pointer to the irq data of interrupt 0. irq_get_chip_data(0) returns NULL, so the following dereference in unmask_ioapic_irq() causes a kernel panic. The issue went unnoticed in the first place because irq_get_chip_data() returns a void pointer so the compiler cannot do a type check on the argument. The code path was added for machines with broken configuration, but it seems that those machines are either not running current kernels or simply do not longer exist. Hand in irq_get_irq_data(0) as argument which provides the correct data. [ tglx: Rewrote changelog ] Fixes: 4467715a44cc ("x86/irq: Move irq_cfg.irq_2_pin into io_apic.c") Signed-off-by: Seunghun Han Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/1500369644-45767-1-git-send-email-kkamagui@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7249f1500bcb..cf89928dbd46 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2116,7 +2116,7 @@ static inline void __init check_timer(void) int idx; idx = find_irq_entry(apic1, pin1, mp_INT); if (idx != -1 && irq_trigger(idx)) - unmask_ioapic_irq(irq_get_chip_data(0)); + unmask_ioapic_irq(irq_get_irq_data(0)); } irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); -- cgit v1.2.3 From a9db2f4f887c2cc9557fc2ae553ea24ecc65c6e0 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 8 Jul 2017 23:24:44 +0100 Subject: MIPS: Fix MIPS I ISA /proc/cpuinfo reporting commit e5f5a5b06e51a36f6ddf31a4a485358263953a3d upstream. Correct a commit 515a6393dbac ("MIPS: kernel: proc: Add MIPS R6 support to /proc/cpuinfo") regression that caused MIPS I systems to show no ISA levels supported in /proc/cpuinfo, e.g.: system type : Digital DECstation 2100/3100 machine : Unknown processor : 0 cpu model : R3000 V2.0 FPU V2.0 BogoMIPS : 10.69 wait instruction : no microsecond timers : no tlb_entries : 64 extra interrupt vector : no hardware watchpoint : no isa : ASEs implemented : shadow register sets : 1 kscratch registers : 0 package : 0 core : 0 VCED exceptions : not available VCEI exceptions : not available and similarly exclude `mips1' from the ISA list for any processors below MIPSr1. This is because the condition to show `mips1' on has been made `cpu_has_mips_r1' rather than newly-introduced `cpu_has_mips_1'. Use the correct condition then. Fixes: 515a6393dbac ("MIPS: kernel: proc: Add MIPS R6 support to /proc/cpuinfo") Signed-off-by: Maciej W. Rozycki Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16758/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 4eff2aed7360..4c01ee5b88c9 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -83,7 +83,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) } seq_printf(m, "isa\t\t\t:"); - if (cpu_has_mips_r1) + if (cpu_has_mips_1) seq_printf(m, " mips1"); if (cpu_has_mips_2) seq_printf(m, "%s", " mips2"); -- cgit v1.2.3 From f8c331cbc937d849d93dc9f3f7cdc31d5fbac58e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 31 May 2017 16:19:48 +0100 Subject: MIPS: Save static registers before sysmips commit 49955d84cd9ccdca5a16a495e448e1a06fad9e49 upstream. The MIPS sysmips system call handler may return directly from the MIPS_ATOMIC_SET case (mips_atomic_set()) to syscall_exit. This path restores the static (callee saved) registers, however they won't have been saved on entry to the system call. Use the save_static_function() macro to create a __sys_sysmips wrapper function which saves the static registers before calling sys_sysmips, so that the correct static register state is restored by syscall_exit. Fixes: f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler") Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16149/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/mips/kernel/syscall.c | 6 ++++++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index c29d397eee86..e6be1f6210ba 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -371,7 +371,7 @@ EXPORT(sys_call_table) PTR sys_writev PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 0687f96ee912..aa27dafa1c78 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -311,7 +311,7 @@ EXPORT(sys_call_table) PTR sys_sched_getaffinity PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_io_setup /* 5200 */ PTR sys_io_destroy PTR sys_io_getevents diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 0331ba39a065..37f608f2a76f 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -302,7 +302,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_sched_getaffinity PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR compat_sys_io_setup /* 6200 */ PTR sys_io_destroy PTR compat_sys_io_getevents diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 5a47042dd25f..7913a5cf6806 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -371,7 +371,7 @@ EXPORT(sys32_call_table) PTR compat_sys_writev PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 8c222d390c48..4234b2d726c5 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -202,6 +202,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) unreachable(); } +/* + * mips_atomic_set() normally returns directly via syscall_exit potentially + * clobbering static registers, so be sure to preserve them. + */ +save_static_function(sys_sysmips); + SYSCALL_DEFINE3(sysmips, long, cmd, long, arg1, long, arg2) { switch (cmd) { -- cgit v1.2.3 From 434c9f2e3b4667859a705b48d53f23f5f629ecf1 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:06:19 +0100 Subject: MIPS: Actually decode JALX in `__compute_return_epc_for_insn' commit a9db101b735a9d49295326ae41f610f6da62b08c upstream. Complement commit fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") and actually decode the regular MIPS JALX major instruction opcode, the handling of which has been added with the said commit for EPC calculation in `__compute_return_epc_for_insn'. Fixes: fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16394/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/branch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index c86b66b57fc6..62806db073d2 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -556,6 +556,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* * These are unconditional and in j_format. */ + case jalx_op: case jal_op: regs->regs[31] = regs->cp0_epc + 8; case j_op: -- cgit v1.2.3 From 040078ad0fe82beff150dfd3b060a2ba7b47ea37 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:07:34 +0100 Subject: MIPS: Fix unaligned PC interpretation in `compute_return_epc' commit 11a3799dbeb620bf0400b1fda5cc2c6bea55f20a upstream. Fix a regression introduced with commit fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") and defer to `__compute_return_epc' if the ISA bit is set in EPC with non-MIPS16, non-microMIPS hardware, which will then arrange for a SIGBUS due to an unaligned instruction reference. Returning EPC here is never correct as the API defines this function's result to be either a negative error code on failure or one of 0 and BRANCH_LIKELY_TAKEN on success. Fixes: fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16395/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/branch.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h index de781cf54bc7..da80878f2c0d 100644 --- a/arch/mips/include/asm/branch.h +++ b/arch/mips/include/asm/branch.h @@ -74,10 +74,7 @@ static inline int compute_return_epc(struct pt_regs *regs) return __microMIPS_compute_return_epc(regs); if (cpu_has_mips16) return __MIPS16e_compute_return_epc(regs); - return regs->cp0_epc; - } - - if (!delay_slot(regs)) { + } else if (!delay_slot(regs)) { regs->cp0_epc += 4; return 0; } -- cgit v1.2.3 From d79354cc1bc912d623765781d570ddc398d792db Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:05:08 +0100 Subject: MIPS: math-emu: Prevent wrong ISA mode instruction emulation commit 13769ebad0c42738831787e27c7c7f982e7da579 upstream. Terminate FPU emulation immediately whenever an ISA mode switch has been observed. This is so that we do not interpret machine code in the wrong mode, for example when a regular MIPS FPU instruction has been placed in a delay slot of a jump that switches into the MIPS16 mode, as with the following code (taken from a GCC test suite case): 00400650 : 400650: 3c020100 lui v0,0x100 400654: 03e00008 jr ra 400658: 44c2f800 ctc1 v0,c1_fcsr 40065c: 00000000 nop [...] 004012d0 <__libc_csu_init>: 4012d0: f000 6a02 li v0,2 4012d4: f150 0b1c la v1,3f9430 <_DYNAMIC-0x6df0> 4012d8: f400 3240 sll v0,16 4012dc: e269 addu v0,v1 4012de: 659a move gp,v0 4012e0: f00c 64f6 save a0-a2,48,ra,s0-s1 4012e4: 673c move s1,gp 4012e6: f010 9978 lw v1,-32744(s1) 4012ea: d204 sw v0,16(sp) 4012ec: eb40 jalr v1 4012ee: 653b move t9,v1 4012f0: f010 997c lw v1,-32740(s1) 4012f4: f030 9920 lw s1,-32736(s1) 4012f8: e32f subu v1,s1 4012fa: 326b sra v0,v1,2 4012fc: d206 sw v0,24(sp) 4012fe: 220c beqz v0,401318 <__libc_csu_init+0x48> 401300: 6800 li s0,0 401302: 99e0 lw a3,0(s1) 401304: 4801 addiu s0,1 401306: 960e lw a2,56(sp) 401308: 4904 addiu s1,4 40130a: 950d lw a1,52(sp) 40130c: 940c lw a0,48(sp) 40130e: ef40 jalr a3 401310: 653f move t9,a3 401312: 9206 lw v0,24(sp) 401314: ea0a cmp v0,s0 401316: 61f5 btnez 401302 <__libc_csu_init+0x32> 401318: 6476 restore 48,ra,s0-s1 40131a: e8a0 jrc ra Here `set_fast_math' is called from `40130e' (`40130f' with the ISA bit) and emulation triggers for the CTC1 instruction. As it is in a jump delay slot emulation continues from `401312' (`401313' with the ISA bit). However we have no path to handle MIPS16 FPU code emulation, because there are no MIPS16 FPU instructions. So the default emulation path is taken, interpreting a 32-bit word fetched by `get_user' from `401313' as a regular MIPS instruction, which is: 401313: f5ea0a92 sdc1 $f10,2706(t7) This makes the FPU emulator proceed with the supposed SDC1 instruction and consequently makes the program considered here terminate with SIGSEGV. A similar although less severe issue exists with pure-microMIPS processors in the case where similarly an FPU instruction is emulated in a delay slot of a register jump that (incorrectly) switches into the regular MIPS mode. A subsequent instruction fetch from the jump's target is supposed to cause an Address Error exception, however instead we proceed with regular MIPS FPU emulation. For simplicity then, always terminate the emulation loop whenever a mode change is detected, denoted by an ISA mode bit flip. As from commit 377cb1b6c16a ("MIPS: Disable MIPS16/microMIPS crap for platforms not supporting these ASEs.") the result of `get_isa16_mode' can be hardcoded to 0, so we need to examine the ISA mode bit by hand. This complements commit 102cedc32a6e ("MIPS: microMIPS: Floating point support.") which added JALX decoding to FPU emulation. Fixes: 102cedc32a6e ("MIPS: microMIPS: Floating point support.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16393/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/cp1emu.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index f8b7bf836437..e9385bcd9723 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2522,6 +2522,35 @@ dcopuop: return 0; } +/* + * Emulate FPU instructions. + * + * If we use FPU hardware, then we have been typically called to handle + * an unimplemented operation, such as where an operand is a NaN or + * denormalized. In that case exit the emulation loop after a single + * iteration so as to let hardware execute any subsequent instructions. + * + * If we have no FPU hardware or it has been disabled, then continue + * emulating floating-point instructions until one of these conditions + * has occurred: + * + * - a non-FPU instruction has been encountered, + * + * - an attempt to emulate has ended with a signal, + * + * - the ISA mode has been switched. + * + * We need to terminate the emulation loop if we got switched to the + * MIPS16 mode, whether supported or not, so that we do not attempt + * to emulate a MIPS16 instruction as a regular MIPS FPU instruction. + * Similarly if we got switched to the microMIPS mode and only the + * regular MIPS mode is supported, so that we do not attempt to emulate + * a microMIPS instruction as a regular MIPS FPU instruction. Or if + * we got switched to the regular MIPS mode and only the microMIPS mode + * is supported, so that we do not attempt to emulate a regular MIPS + * instruction that should cause an Address Error exception instead. + * For simplicity we always terminate upon an ISA mode switch. + */ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr) { @@ -2607,6 +2636,15 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; if (sig) break; + /* + * We have to check for the ISA bit explicitly here, + * because `get_isa16_mode' may return 0 if support + * for code compression has been globally disabled, + * or otherwise we may produce the wrong signal or + * even proceed successfully where we must not. + */ + if ((xcp->cp0_epc ^ prevepc) & 0x1) + break; cond_resched(); } while (xcp->cp0_epc > prevepc); -- cgit v1.2.3 From 86dd4aa3079058342b99289977cc06c681fb6a2d Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:08:29 +0100 Subject: MIPS: Send SIGILL for BPOSGE32 in `__compute_return_epc_for_insn' commit 7b82c1058ac1f8f8b9f2b8786b1f710a57a870a8 upstream. Fix commit e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") and send SIGILL rather than SIGBUS whenever an unimplemented BPOSGE32 DSP ASE instruction has been encountered in `__compute_return_epc_for_insn' as our Reserved Instruction exception handler would in response to an attempt to actually execute the instruction. Sending SIGBUS only makes sense for the unaligned PC case, since moved to `__compute_return_epc'. Adjust function documentation accordingly, correct formatting and use `pr_info' rather than `printk' as the other exit path already does. Fixes: e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16396/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/branch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 62806db073d2..4e7e3b9c21e9 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -399,7 +399,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) * * @regs: Pointer to pt_regs * @insn: branch instruction to decode - * @returns: -EFAULT on error and forces SIGBUS, and on success + * @returns: -EFAULT on error and forces SIGILL, and on success * returns 0 or BRANCH_LIKELY_TAKEN as appropriate after * evaluating the branch. * @@ -832,8 +832,9 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, return ret; sigill_dsp: - printk("%s: DSP branch but not DSP ASE - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + pr_info("%s: DSP branch but not DSP ASE - sending SIGILL.\n", + current->comm); + force_sig(SIGILL, current); return -EFAULT; sigill_r6: pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n", -- cgit v1.2.3 From 99ce76144dee8dfbedeccfe7fed85f61d31c6379 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:09:23 +0100 Subject: MIPS: Rename `sigill_r6' to `sigill_r2r6' in `__compute_return_epc_for_insn' commit 1f4edde422961397cf4470b347958c13c6a740bb upstream. Use the more accurate `sigill_r2r6' name for the label used in the case of sending SIGILL in the absence of the instruction emulator for an earlier ISA level instruction that has been removed as from the R6 ISA, so that the `sigill_r6' name is freed for the situation where an R6 instruction is not supposed to be interpreted, because the executing processor does not support the R6 ISA. Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16397/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/branch.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 4e7e3b9c21e9..a1cf462b13a9 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -431,7 +431,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* Fall through */ case jr_op: if (NO_R6EMU && insn.r_format.func == jr_op) - goto sigill_r6; + goto sigill_r2r6; regs->cp0_epc = regs->regs[insn.r_format.rs]; break; } @@ -446,7 +446,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, switch (insn.i_format.rt) { case bltzl_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -459,7 +459,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezl_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -574,7 +574,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, */ case beql_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) { @@ -588,7 +588,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bnel_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) { @@ -602,7 +602,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case blezl_op: /* not really i_format */ if (!insn.i_format.rt && NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case blez_op: /* * Compact branches for R6 for the @@ -637,7 +637,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bgtz_op: /* * Compact branches for R6 for the @@ -836,7 +836,7 @@ sigill_dsp: current->comm); force_sig(SIGILL, current); return -EFAULT; -sigill_r6: +sigill_r2r6: pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n", current->comm); force_sig(SIGILL, current); -- cgit v1.2.3 From d4bd6a1df2c816b7027f9cf1a900ae52f528f300 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:12:53 +0100 Subject: MIPS: Send SIGILL for linked branches in `__compute_return_epc_for_insn' commit fef40be6da856afead4177aaa9d869a66fb3381f upstream. Fix commit 319824eabc3f ("MIPS: kernel: branch: Do not emulate the branch likelies on MIPS R6") and also send SIGILL rather than returning -SIGILL for BLTZAL, BLTZALL, BGEZAL and BGEZALL instruction encodings no longer supported in R6, except where emulated. Returning -SIGILL is never correct as the API defines this function's result upon error to be -EFAULT and a signal actually issued. Fixes: 319824eabc3f ("MIPS: kernel: branch: Do not emulate the branch likelies on MIPS R6") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16398/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/branch.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index a1cf462b13a9..ddf0f0270b1c 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -473,10 +473,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bltzal_op: case bltzall_op: if (NO_R6EMU && (insn.i_format.rs || - insn.i_format.rt == bltzall_op)) { - ret = -SIGILL; - break; - } + insn.i_format.rt == bltzall_op)) + goto sigill_r2r6; regs->regs[31] = epc + 8; /* * OK we are here either because we hit a NAL @@ -507,10 +505,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezal_op: case bgezall_op: if (NO_R6EMU && (insn.i_format.rs || - insn.i_format.rt == bgezall_op)) { - ret = -SIGILL; - break; - } + insn.i_format.rt == bgezall_op)) + goto sigill_r2r6; regs->regs[31] = epc + 8; /* * OK we are here either because we hit a BAL -- cgit v1.2.3 From 3330a05c5c6ebb6134479bca4758d9766d1c9663 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:14:12 +0100 Subject: MIPS: Send SIGILL for R6 branches in `__compute_return_epc_for_insn' commit a60b1a5bf88a250f1a77977c0224e502c901c77b upstream. Fix: * commit 8467ca0122e2 ("MIPS: Emulate the new MIPS R6 branch compact (BC) instruction"), * commit 84fef630127a ("MIPS: Emulate the new MIPS R6 BALC instruction"), * commit 69b9a2fd05a3 ("MIPS: Emulate the new MIPS R6 BEQZC and JIC instructions"), * commit 28d6f93d201d ("MIPS: Emulate the new MIPS R6 BNEZC and JIALC instructions"), * commit c893ce38b265 ("MIPS: Emulate the new MIPS R6 BOVC, BEQC and BEQZALC instructions") and send SIGILL rather than returning -SIGILL for R6 branch and jump instructions. Returning -SIGILL is never correct as the API defines this function's result upon error to be -EFAULT and a signal actually issued. Fixes: 8467ca0122e2 ("MIPS: Emulate the new MIPS R6 branch compact (BC) instruction") Fixes: 84fef630127a ("MIPS: Emulate the new MIPS R6 BALC instruction") Fixes: 69b9a2fd05a3 ("MIPS: Emulate the new MIPS R6 BEQZC and JIC instructions") Fixes: 28d6f93d201d ("MIPS: Emulate the new MIPS R6 BNEZC and JIALC instructions") Fixes: c893ce38b265 ("MIPS: Emulate the new MIPS R6 BOVC, BEQC and BEQZALC instructions") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16399/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/branch.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index ddf0f0270b1c..be6d4ba989bd 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -771,35 +771,27 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, #else case bc6_op: /* Only valid for MIPS R6 */ - if (!cpu_has_mips_r6) { - ret = -SIGILL; - break; - } + if (!cpu_has_mips_r6) + goto sigill_r6; regs->cp0_epc += 8; break; case balc6_op: - if (!cpu_has_mips_r6) { - ret = -SIGILL; - break; - } + if (!cpu_has_mips_r6) + goto sigill_r6; /* Compact branch: BALC */ regs->regs[31] = epc + 4; epc += 4 + (insn.i_format.simmediate << 2); regs->cp0_epc = epc; break; case pop66_op: - if (!cpu_has_mips_r6) { - ret = -SIGILL; - break; - } + if (!cpu_has_mips_r6) + goto sigill_r6; /* Compact branch: BEQZC || JIC */ regs->cp0_epc += 8; break; case pop76_op: - if (!cpu_has_mips_r6) { - ret = -SIGILL; - break; - } + if (!cpu_has_mips_r6) + goto sigill_r6; /* Compact branch: BNEZC || JIALC */ if (!insn.i_format.rs) { /* JIALC: set $31/ra */ @@ -811,10 +803,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case pop10_op: case pop30_op: /* Only valid for MIPS R6 */ - if (!cpu_has_mips_r6) { - ret = -SIGILL; - break; - } + if (!cpu_has_mips_r6) + goto sigill_r6; /* * Compact branches: * bovc, beqc, beqzalc, bnvc, bnec, bnezlac @@ -837,6 +827,11 @@ sigill_r2r6: current->comm); force_sig(SIGILL, current); return -EFAULT; +sigill_r6: + pr_info("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n", + current->comm); + force_sig(SIGILL, current); + return -EFAULT; } EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn); -- cgit v1.2.3 From 6d77ac4bc514568b98762f3a35b83a4de3e966b8 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:15:22 +0100 Subject: MIPS: Fix a typo: s/preset/present/ in r2-to-r6 emulation error message commit 27fe2200dad2de8207a694024a7b9037dff1b280 upstream. This is a user-visible message, so we want it to be spelled correctly. Fixes: 5f9f41c474be ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16400/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/branch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index be6d4ba989bd..c3f2fb34751e 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -823,7 +823,7 @@ sigill_dsp: force_sig(SIGILL, current); return -EFAULT; sigill_r2r6: - pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n", + pr_info("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n", current->comm); force_sig(SIGILL, current); return -EFAULT; -- cgit v1.2.3 From 5b50e0e74e2e5f084d18a03f6dedc67cfdb8db49 Mon Sep 17 00:00:00 2001 From: Chen Hong Date: Sun, 2 Jul 2017 15:11:10 -0700 Subject: Input: i8042 - fix crash at boot time commit 340d394a789518018f834ff70f7534fc463d3226 upstream. The driver checks port->exists twice in i8042_interrupt(), first when trying to assign temporary "serio" variable, and second time when deciding whether it should call serio_interrupt(). The value of port->exists may change between the 2 checks, and we may end up calling serio_interrupt() with a NULL pointer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 IP: [] _spin_lock_irqsave+0x1f/0x40 PGD 0 Oops: 0002 [#1] SMP last sysfs file: CPU 0 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.32-358.el6.x86_64 #1 QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:[] [] _spin_lock_irqsave+0x1f/0x40 RSP: 0018:ffff880028203cc0 EFLAGS: 00010082 RAX: 0000000000010000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000282 RSI: 0000000000000098 RDI: 0000000000000050 RBP: ffff880028203cc0 R08: ffff88013e79c000 R09: ffff880028203ee0 R10: 0000000000000298 R11: 0000000000000282 R12: 0000000000000050 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000098 FS: 0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000050 CR3: 0000000001a85000 CR4: 00000000001407f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff88013e79c000, task ffff88013e79b500) Stack: ffff880028203d00 ffffffff813de186 ffffffffffffff02 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000098 ffff880028203d70 ffffffff813e0162 ffff880028203d20 ffffffff8103b8ac Call Trace: [] serio_interrupt+0x36/0xa0 [] i8042_interrupt+0x132/0x3a0 [] ? kvm_clock_read+0x1c/0x20 [] ? kvm_clock_get_cycles+0x9/0x10 [] handle_IRQ_event+0x60/0x170 [] ? kvm_guest_apic_eoi_write+0x44/0x50 [] handle_edge_irq+0xde/0x180 [] handle_irq+0x49/0xa0 [] do_IRQ+0x6c/0xf0 [] ret_from_intr+0x0/0x11 [] ? __do_softirq+0x73/0x1e0 [] ? hrtimer_interrupt+0x14b/0x260 [] ? call_softirq+0x1c/0x30 [] ? do_softirq+0x65/0xa0 [] ? irq_exit+0x85/0x90 [] ? smp_apic_timer_interrupt+0x70/0x9b [] ? apic_timer_interrupt+0x13/0x20 To avoid the issue let's change the second check to test whether serio is NULL or not. Also, let's take i8042_lock in i8042_start() and i8042_stop() instead of trying to be overly smart and using memory barriers. Signed-off-by: Chen Hong [dtor: take lock in i8042_start()/i8042_stop()] Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 89abfdb539ac..c84c685056b9 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -434,8 +434,10 @@ static int i8042_start(struct serio *serio) { struct i8042_port *port = serio->port_data; + spin_lock_irq(&i8042_lock); port->exists = true; - mb(); + spin_unlock_irq(&i8042_lock); + return 0; } @@ -448,16 +450,20 @@ static void i8042_stop(struct serio *serio) { struct i8042_port *port = serio->port_data; + spin_lock_irq(&i8042_lock); port->exists = false; + port->serio = NULL; + spin_unlock_irq(&i8042_lock); /* + * We need to make sure that interrupt handler finishes using + * our serio port before we return from this function. * We synchronize with both AUX and KBD IRQs because there is * a (very unlikely) chance that AUX IRQ is raised for KBD port * and vice versa. */ synchronize_irq(I8042_AUX_IRQ); synchronize_irq(I8042_KBD_IRQ); - port->serio = NULL; } /* @@ -574,7 +580,7 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&i8042_lock, flags); - if (likely(port->exists && !filtered)) + if (likely(serio && !filtered)) serio_interrupt(serio, data, dfl); out: -- cgit v1.2.3 From 5c2717f4154502779e696b303cf8dcfb94b2e5f5 Mon Sep 17 00:00:00 2001 From: Vladimir Neyelov Date: Sun, 21 May 2017 19:17:31 +0300 Subject: IB/iser: Fix connection teardown race condition commit c8c16d3bae967f1c7af541e8d016e5c51e4f010a upstream. Under heavy iser target(scst) start/stop stress during login/logout on iser intitiator side happened trace call provided below. The function iscsi_iser_slave_alloc iser_conn pointer could be NULL, due to the fact that function iscsi_iser_conn_stop can be called before and free iser connection. Let's protect that flow by introducing global mutex. BUG: unable to handle kernel paging request at 0000000000001018 IP: [] iscsi_iser_slave_alloc+0x1e/0x50 [ib_iser] Call Trace: ? scsi_alloc_sdev+0x242/0x300 scsi_probe_and_add_lun+0x9e1/0xea0 ? kfree_const+0x21/0x30 ? kobject_set_name_vargs+0x76/0x90 ? __pm_runtime_resume+0x5b/0x70 __scsi_scan_target+0xf6/0x250 scsi_scan_target+0xea/0x100 iscsi_user_scan_session.part.13+0x101/0x130 [scsi_transport_iscsi] ? iscsi_user_scan_session.part.13+0x130/0x130 [scsi_transport_iscsi] iscsi_user_scan_session+0x1e/0x30 [scsi_transport_iscsi] device_for_each_child+0x50/0x90 iscsi_user_scan+0x44/0x60 [scsi_transport_iscsi] store_scan+0xa8/0x100 ? common_file_perm+0x5d/0x1c0 dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x12c/0x1c0 __vfs_write+0x18/0x40 vfs_write+0xb5/0x1a0 SyS_write+0x55/0xc0 Fixes: 318d311e8f01 ("iser: Accept arbitrary sg lists mapping if the device supports it") Signed-off-by: Vladimir Neyelov Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iscsi_iser.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 140f3f354cf3..e46e2b095c18 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; static struct scsi_transport_template *iscsi_iser_scsi_transport; static struct workqueue_struct *release_wq; +static DEFINE_MUTEX(unbind_iser_conn_mutex); struct iser_global ig; int iser_debug_level = 0; @@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ if (iser_conn) { mutex_lock(&iser_conn->state_mutex); + mutex_lock(&unbind_iser_conn_mutex); iser_conn_terminate(iser_conn); iscsi_conn_stop(cls_conn, flag); /* unbind */ iser_conn->iscsi_conn = NULL; conn->dd_data = NULL; + mutex_unlock(&unbind_iser_conn_mutex); complete(&iser_conn->stop_completion); mutex_unlock(&iser_conn->state_mutex); @@ -973,13 +976,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev) struct iser_conn *iser_conn; struct ib_device *ib_dev; + mutex_lock(&unbind_iser_conn_mutex); + session = starget_to_session(scsi_target(sdev))->dd_data; iser_conn = session->leadconn->dd_data; + if (!iser_conn) { + mutex_unlock(&unbind_iser_conn_mutex); + return -ENOTCONN; + } ib_dev = iser_conn->ib_conn.device->ib_device; if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + mutex_unlock(&unbind_iser_conn_mutex); + return 0; } -- cgit v1.2.3 From dd0d6509cbf4ad5624300083ed7f98dcdac55e2a Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 23 May 2017 10:48:44 +0300 Subject: IB/core: Namespace is mandatory input for address resolution commit bebb2a473a43c8f84a8210687d1cbdde503046d7 upstream. In function addr_resolve() the namespace is a required input parameter and not an output. It is passed later for searching the routing table and device addresses. Also, it shouldn't be copied back to the caller. Fixes: 565edd1d5555 ('IB/addr: Pass network namespace as a parameter') Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 8fd108d89527..63e82f8e8308 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -518,6 +518,11 @@ static int addr_resolve(struct sockaddr *src_in, struct dst_entry *dst; int ret; + if (!addr->net) { + pr_warn_ratelimited("%s: missing namespace\n", __func__); + return -EINVAL; + } + if (src_in->sa_family == AF_INET) { struct rtable *rt = NULL; const struct sockaddr_in *dst_in4 = @@ -555,7 +560,6 @@ static int addr_resolve(struct sockaddr *src_in, } addr->bound_dev_if = ndev->ifindex; - addr->net = dev_net(ndev); dev_put(ndev); return ret; -- cgit v1.2.3 From 4dd0aa9ae6d0e948848f87b4c040cea8163a59bf Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 10 Jun 2017 04:59:07 +0200 Subject: sunrpc: use constant time memory comparison for mac commit 15a8b93fd5690de017ce665382ea45e5d61811a4 upstream. Otherwise, we enable a MAC forgery via timing attack. Signed-off-by: Jason A. Donenfeld Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 90115ceefd49..79aec90259cd 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -34,6 +34,7 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +#include #include #include #include @@ -927,7 +928,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, if (ret) goto out_err; - if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) { + if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) { ret = GSS_S_BAD_SIG; goto out_err; } -- cgit v1.2.3 From 9ebfb4fa3ab7ea99b5d93305b10ee3714501b465 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 5 Jul 2017 12:22:20 +1000 Subject: NFS: only invalidate dentrys that are clearly invalid. commit cc89684c9a265828ce061037f1f79f4a68ccd3f7 upstream. Since commit bafc9b754f75 ("vfs: More precise tests in d_invalidate") in v3.18, a return of '0' from ->d_revalidate() will cause the dentry to be invalidated even if it has filesystems mounted on or it or on a descendant. The mounted filesystem is unmounted. This means we need to be careful not to return 0 unless the directory referred to truly is invalid. So -ESTALE or -ENOENT should invalidate the directory. Other errors such a -EPERM or -ERESTARTSYS should be returned from ->d_revalidate() so they are propagated to the caller. A particular problem can be demonstrated by: 1/ mount an NFS filesystem using NFSv3 on /mnt 2/ mount any other filesystem on /mnt/foo 3/ ls /mnt/foo 4/ turn off network, or otherwise make the server unable to respond 5/ ls /mnt/foo & 6/ cat /proc/$!/stack # note that nfs_lookup_revalidate is in the call stack 7/ kill -9 $! # this results in -ERESTARTSYS being returned 8/ observe that /mnt/foo has been unmounted. This patch changes nfs_lookup_revalidate() to only treat -ESTALE from nfs_lookup_verify_inode() and -ESTALE or -ENOENT from ->lookup() as indicating an invalid inode. Other errors are returned. Also nfs_check_inode_attributes() is changed to return -ESTALE rather than -EIO. This is consistent with the error returned in similar circumstances from nfs_update_inode(). As this bug allows any user to unmount a filesystem mounted on an NFS filesystem, this fix is suitable for stable kernels. Fixes: bafc9b754f75 ("vfs: More precise tests in d_invalidate") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 12 ++++++++---- fs/nfs/inode.c | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 53e02b8bd9bd..d04ec3814779 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1167,11 +1167,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) /* Force a full look up iff the parent directory has changed */ if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { - - if (nfs_lookup_verify_inode(inode, flags)) { + error = nfs_lookup_verify_inode(inode, flags); + if (error) { if (flags & LOOKUP_RCU) return -ECHILD; - goto out_zap_parent; + if (error == -ESTALE) + goto out_zap_parent; + goto out_error; } goto out_valid; } @@ -1195,8 +1197,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) trace_nfs_lookup_revalidate_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); - if (error) + if (error == -ESTALE || error == -ENOENT) goto out_bad; + if (error) + goto out_error; if (nfs_compare_fh(NFS_FH(inode), fhandle)) goto out_bad; if ((error = nfs_refresh_inode(inode, fattr)) != 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf4ec5ecc97e..76ae25661d3f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1278,9 +1278,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; /* Has the inode gone and changed behind our back? */ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) - return -EIO; + return -ESTALE; if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) - return -EIO; + return -ESTALE; if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ -- cgit v1.2.3 From fa67ac18eff4b59b7937c59e821f8b4be812113c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Jun 2017 16:20:25 +0200 Subject: udf: Fix deadlock between writeback and udf_setsize() commit f2e95355891153f66d4156bf3a142c6489cd78c6 upstream. udf_setsize() called truncate_setsize() with i_data_sem held. Thus truncate_pagecache() called from truncate_setsize() could lock a page under i_data_sem which can deadlock as page lock ranks below i_data_sem - e. g. writeback can hold page lock and try to acquire i_data_sem to map a block. Fix the problem by moving truncate_setsize() calls from under i_data_sem. It is safe for us to change i_size without holding i_data_sem as all the places that depend on i_size being stable already hold inode_lock. Fixes: 7e49b6f2480cb9a9e7322a91592e56a5c85361f5 Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 129b18a29c8f..035943501b9f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1243,8 +1243,8 @@ int udf_setsize(struct inode *inode, loff_t newsize) return err; } set_size: - truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); + truncate_setsize(inode, newsize); } else { if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { down_write(&iinfo->i_data_sem); @@ -1261,9 +1261,9 @@ set_size: udf_get_block); if (err) return err; + truncate_setsize(inode, newsize); down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); - truncate_setsize(inode, newsize); udf_truncate_extents(inode); up_write(&iinfo->i_data_sem); } -- cgit v1.2.3 From 672145dfae842fe31dfaedd965a23e051a4722b5 Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Sun, 25 Jun 2017 12:28:50 -0700 Subject: target: Fix COMPARE_AND_WRITE caw_sem leak during se_cmd quiesce commit 1d6ef276594a781686058802996e09c8550fd767 upstream. This patch addresses a COMPARE_AND_WRITE se_device->caw_sem leak, that would be triggered during normal se_cmd shutdown or abort via __transport_wait_for_tasks(). This would occur because target_complete_cmd() would catch this early and do complete_all(&cmd->t_transport_stop_comp), but since target_complete_ok_work() or target_complete_failure_work() are never called to invoke se_cmd->transport_complete_callback(), the COMPARE_AND_WRITE specific callbacks never release caw_sem. To address this special case, go ahead and release caw_sem directly from target_complete_cmd(). (Remove '&& success' from check, to release caw_sem regardless of scsi_status - nab) Signed-off-by: Jiang Yi Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_transport.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 1f9bfa4195ea..e8a1f5cadba5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -753,6 +753,15 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) if (cmd->transport_state & CMD_T_ABORTED || cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); + /* + * If COMPARE_AND_WRITE was stopped by __transport_wait_for_tasks(), + * release se_device->caw_sem obtained by sbc_compare_and_write() + * since target_complete_ok_work() or target_complete_failure_work() + * won't be called to invoke the normal CAW completion callbacks. + */ + if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) { + up(&dev->caw_sem); + } complete_all(&cmd->t_transport_stop_comp); return; } else if (!success) { -- cgit v1.2.3 From d5f9cd081c66f0b3187626c1c9f7b89949aae71f Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 29 Jun 2017 22:21:31 -0700 Subject: iser-target: Avoid isert_conn->cm_id dereference in isert_login_recv_done commit fce50a2fa4e9c6e103915c351b6d4a98661341d6 upstream. This patch fixes a NULL pointer dereference in isert_login_recv_done() of isert_conn->cm_id due to isert_cma_handler() -> isert_connect_error() resetting isert_conn->cm_id = NULL during a failed login attempt. As per Sagi, we will always see the completion of all recv wrs posted on the qp (given that we assigned a ->done handler), this is a FLUSH error completion, we just don't get to verify that because we deref NULL before. The issue here, was the assumption that dereferencing the connection cm_id is always safe, which is not true since: commit 4a579da2586bd3b79b025947ea24ede2bbfede62 Author: Sagi Grimberg Date: Sun Mar 29 15:52:04 2015 +0300 iser-target: Fix possible deadlock in RDMA_CM connection error As I see it, we have a direct reference to the isert_device from isert_conn which is the one-liner fix that we actually need like we do in isert_rdma_read_done() and isert_rdma_write_done(). Reported-by: Andrea Righi Tested-by: Andrea Righi Reviewed-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6dd43f63238e..39d28375aa37 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1447,7 +1447,7 @@ static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct isert_conn *isert_conn = wc->qp->qp_context; - struct ib_device *ib_dev = isert_conn->cm_id->device; + struct ib_device *ib_dev = isert_conn->device->ib_device; if (unlikely(wc->status != IB_WC_SUCCESS)) { isert_print_wc(wc, "login recv"); -- cgit v1.2.3 From 61415418cdba50d37db8a474e2c30af1aba30092 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 8 Jun 2017 14:01:44 +0800 Subject: perf annotate: Fix broken arrow at row 0 connecting jmp instruction to its target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 80f62589fa52f530cffc50e78c0b5a2ae572d61e upstream. When the jump instruction is displayed at the row 0 in annotate view, the arrow is broken. An example: 16.86 │ ┌──je 82 0.01 │ movsd (%rsp),%xmm0 │ movsd 0x8(%rsp),%xmm4 │ movsd 0x8(%rsp),%xmm1 │ movsd (%rsp),%xmm3 │ divsd %xmm4,%xmm0 │ divsd %xmm3,%xmm1 │ movsd (%rsp),%xmm2 │ addsd %xmm1,%xmm0 │ addsd %xmm2,%xmm0 │ movsd %xmm0,(%rsp) │82: sub $0x1,%ebx 83.03 │ ↑ jne 38 │ add $0x10,%rsp │ xor %eax,%eax │ pop %rbx │ ← retq The patch increments the row number before checking with 0. Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Fixes: 944e1abed9e1 ("perf ui browser: Add method to draw up/down arrow line") Link: http://lkml.kernel.org/r/1496901704-30275-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/ui/browser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 3eb3edb307a4..a1309010d109 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -702,7 +702,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser, ui_browser__gotorc(browser, row, column + 1); SLsmg_draw_hline(2); - if (row++ == 0) + if (++row == 0) goto out; } else row = 0; -- cgit v1.2.3 From a76a032300e26ecbc398dcaaeb10b5a850a95f76 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Jul 2017 10:56:54 +0200 Subject: Revert "perf/core: Drop kernel samples even though :u is specified" commit 6a8a75f3235724c5941a33e287b2f98966ad14c5 upstream. This reverts commit cc1582c231ea041fbc68861dfaf957eaf902b829. This commit introduced a regression that broke rr-project, which uses sampling events to receive a signal on overflow (but does not care about the contents of the sample). These signals are critical to the correct operation of rr. There's been some back and forth about how to fix it - but to not keep applications in limbo queue up a revert. Reported-by: Kyle Huey Acked-by: Kyle Huey Acked-by: Peter Zijlstra Cc: Jin Yao Cc: Vince Weaver Cc: Linus Torvalds Cc: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Stephane Eranian Cc: Namhyung Kim Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170628105600.GC5981@leverpostej Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 30ccc7029d18..f5a693589d66 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7088,21 +7088,6 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) -{ - /* - * Due to interrupt latency (AKA "skid"), we may enter the - * kernel before taking an overflow, even if the PMU is only - * counting user events. - * To avoid leaking information to userspace, we must always - * reject kernel samples when exclude_kernel is set. - */ - if (event->attr.exclude_kernel && !user_mode(regs)) - return false; - - return true; -} - /* * Generic event overflow handling, sampling. */ @@ -7149,12 +7134,6 @@ static int __perf_event_overflow(struct perf_event *event, perf_adjust_period(event, delta, hwc->last_period, true); } - /* - * For security, drop the skid kernel samples if necessary. - */ - if (!sample_is_allowed(event, regs)) - return ret; - /* * XXX event_limit might not quite work as expected on inherited * events -- cgit v1.2.3 From 964a21a1300c2857483e85f1d013c5867ee4d433 Mon Sep 17 00:00:00 2001 From: Michael Gugino Date: Mon, 17 Jul 2017 13:29:09 -0400 Subject: staging: rtl8188eu: add TL-WN722N v2 support commit 5a1d4c5dd4eb2f1f8a9b30e61762f3b3b564df70 upstream. Add support for USB Device TP-Link TL-WN722N v2. VendorID: 0x2357, ProductID: 0x010c Signed-off-by: Michael Gugino Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 68e1e6bbe87f..b432153a6c5a 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -43,6 +43,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {} /* Terminating entry */ }; -- cgit v1.2.3 From 2bc52403da8a940d3ecc1b4c770cab9a650167ae Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 30 Jun 2017 12:02:18 +0100 Subject: staging: comedi: ni_mio_common: fix AO timer off-by-one regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 15d5193104a457d5151840247e3bce561c42e3e9 upstream. As reported by Éric Piel on the Comedi mailing list (see ), the analog output asynchronous commands are running too fast with a period 50 ns shorter than it should be. This affects all boards with AO command support that are supported by the "ni_pcimio", "ni_atmio", and "ni_mio_cs" drivers. This is a regression bug introduced by commit 080e6795cba3 ("staging: comedi: ni_mio_common: Cleans up/clarifies ni_ao_cmd"), specifically, this line in `ni_ao_cmd_set_update()`: /* following line: N-1 per STC */ ni_stc_writel(dev, trigvar - 1, NISTC_AO_UI_LOADA_REG); The `trigvar` variable value comes from a call to `ni_ns_to_timer()` which converts a timer period in nanoseconds to a hardware divisor value. The function already reduces the divisor by 1 as required by the hardware, so the above line should not reduce it further by 1. Fix it by replacing `trigvar` by `trigvar - 1` in the above line, and remove the misleading comment. Reported-by: Éric Piel Fixes: 080e6795cba3 ("staging: comedi: ni_mio_common: Cleans up/clarifies ni_ao_cmd") Cc: Éric Piel Cc: Spencer E. Olson Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 1c967c30e4ce..a574885ffba9 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -3078,8 +3078,7 @@ static void ni_ao_cmd_set_update(struct comedi_device *dev, /* following line: 2-1 per STC */ ni_stc_writel(dev, 1, NISTC_AO_UI_LOADA_REG); ni_stc_writew(dev, NISTC_AO_CMD1_UI_LOAD, NISTC_AO_CMD1_REG); - /* following line: N-1 per STC */ - ni_stc_writel(dev, trigvar - 1, NISTC_AO_UI_LOADA_REG); + ni_stc_writel(dev, trigvar, NISTC_AO_UI_LOADA_REG); } else { /* TRIG_EXT */ /* FIXME: assert scan_begin_arg != 0, ret failure otherwise */ devpriv->ao_cmd2 |= NISTC_AO_CMD2_BC_GATE_ENA; -- cgit v1.2.3 From 85643f6f50282d52c8d8cedbf958092b2a03cf7a Mon Sep 17 00:00:00 2001 From: Teddy Wang Date: Fri, 30 Jun 2017 21:57:43 +0100 Subject: staging: sm750fb: avoid conflicting vesafb commit 740c433ec35187b45abe08bb6c45a321a791be8e upstream. If vesafb is enabled in the config then /dev/fb0 is created by vesa and this sm750 driver gets fb1, fb2. But we need to be fb0 and fb1 to effectively work with xorg. So if it has been alloted fb1, then try to remove the other fb0. In the previous send, why #ifdef is used was asked. https://lkml.org/lkml/2017/6/25/57 Answered at: https://lkml.org/lkml/2017/6/25/69 Also pasting here for reference. 'Did a quick research into "why". The patch d8801e4df91e ("x86/PCI: Set IORESOURCE_ROM_SHADOW only for the default VGA device") has started setting IORESOURCE_ROM_SHADOW in flags for a default VGA device and that is being done only for x86. And so, we will need that #ifdef to check IORESOURCE_ROM_SHADOW as that needs to be checked only for a x86 and not for other arch.' Signed-off-by: Teddy Wang Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/staging/sm750fb/sm750.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c index 7d90e250142c..86ace1449309 100644 --- a/drivers/staging/sm750fb/sm750.c +++ b/drivers/staging/sm750fb/sm750.c @@ -1049,6 +1049,26 @@ release_fb: return err; } +static int lynxfb_kick_out_firmware_fb(struct pci_dev *pdev) +{ + struct apertures_struct *ap; + bool primary = false; + + ap = alloc_apertures(1); + if (!ap) + return -ENOMEM; + + ap->ranges[0].base = pci_resource_start(pdev, 0); + ap->ranges[0].size = pci_resource_len(pdev, 0); +#ifdef CONFIG_X86 + primary = pdev->resource[PCI_ROM_RESOURCE].flags & + IORESOURCE_ROM_SHADOW; +#endif + remove_conflicting_framebuffers(ap, "sm750_fb1", primary); + kfree(ap); + return 0; +} + static int lynxfb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1057,6 +1077,10 @@ static int lynxfb_pci_probe(struct pci_dev *pdev, int fbidx; int err; + err = lynxfb_kick_out_firmware_fb(pdev); + if (err) + return err; + /* enable device */ err = pcim_enable_device(pdev); if (err) -- cgit v1.2.3 From d42f9c7435c06c370966281782fd0e33a2eff460 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 15 Jul 2017 11:32:08 -0400 Subject: staging: lustre: ko2iblnd: check copy_from_iter/copy_to_iter return code commit 566e1ce22e04426fa52328b2adcdf1df49acd98e upstream. We now get a helpful warning for code that calls copy_{from,to}_iter without checking the return value, introduced by commit aa28de275a24 ("iov_iter/hardening: move object size checks to inlined part"). drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c: In function 'kiblnd_send': drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c:1643:2: error: ignoring return value of 'copy_from_iter', declared with attribute warn_unused_result [-Werror=unused-result] drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c: In function 'kiblnd_recv': drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c:1744:3: error: ignoring return value of 'copy_to_iter', declared with attribute warn_unused_result [-Werror=unused-result] In case we get short copies here, we may get incorrect behavior. I've added failure handling for both rx and tx now, returning -EFAULT as expected. Signed-off-by: Arnd Bergmann Signed-off-by: James Simmons Signed-off-by: Greg Kroah-Hartman --- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index b27de8888149..995f2dac7f26 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1650,8 +1650,13 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) ibmsg = tx->tx_msg; ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE, - &from); + rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob, + &from); + if (rc != payload_nob) { + kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list); + return -EFAULT; + } + nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]); kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); @@ -1751,8 +1756,14 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, break; } - copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, - IBLND_MSG_SIZE, to); + rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen, + to); + if (rc != rlen) { + rc = -EFAULT; + break; + } + + rc = 0; lnet_finalize(ni, lntmsg, 0); break; -- cgit v1.2.3 From acccf01a807abddda5ceb2fe3b72a38429044d0e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 6 Jul 2017 11:12:21 +0800 Subject: ceph: fix race in concurrent readdir commit 84583cfb973c4313955c6231cc9cb3772d280b15 upstream. For a large directory, program needs to issue multiple readdir syscalls to get all dentries. When there are multiple programs read the directory concurrently. Following sequence of events can happen. - program calls readdir with pos = 2. ceph sends readdir request to mds. The reply contains N1 entries. ceph adds these N1 entries to readdir cache. - program calls readdir with pos = N1+2. The readdir is satisfied by the readdir cache, N2 entries are returned. (Other program calls readdir in the middle, which fills the cache) - program calls readdir with pos = N1+N2+2. ceph sends readdir request to mds. The reply contains N3 entries and it reaches directory end. ceph adds these N3 entries to the readdir cache and marks directory complete. The second readdir call does not update fi->readdir_cache_idx. ceph add the last N3 entries to wrong places. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/dir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index aca0d884de73..cec25691cbae 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -292,6 +292,11 @@ out: if (ret < 0) err = ret; dput(last); + /* last_name no longer match cache index */ + if (fi->readdir_cache_idx >= 0) { + fi->readdir_cache_idx = -1; + fi->dir_release_count = 0; + } } return err; } -- cgit v1.2.3 From db29753960904012acfb8958bda5bc53c97f7e56 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Fri, 14 Jul 2017 09:41:31 -0500 Subject: RDMA/core: Initialize port_num in qp_attr commit a62ab66b13a0f9bcb17b7b761f6670941ed5cd62 upstream. Initialize the port_num for iWARP in rdma_init_qp_attr. Fixes: 5ecce4c9b17b("Check port number supplied by user verbs cmds") Reviewed-by: Steve Wise Signed-off-by: Mustafa Ismail Tested-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f2d40c05ef9e..809a02800102 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -976,6 +976,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); + qp_attr->port_num = id_priv->id.port_num; + *qp_attr_mask |= IB_QP_PORT; } else ret = -ENOSYS; -- cgit v1.2.3 From bc3bd649ebb45c9b8cac19d1703fe012ba067600 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 19 Jul 2017 14:43:28 +0300 Subject: drm/mst: Fix error handling during MST sideband message reception commit 448421b5e93b9177c5698f0cf6f5e72d2995eeca upstream. Handle any error due to partial reads, timeouts etc. to avoid parsing uninitialized data subsequently. Also bail out if the parsing itself fails. Cc: Dave Airlie Cc: Lyude Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Lyude Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170719114330.26540-2-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index f59771da52ee..ce422d8479ba 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2200,11 +2200,17 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) ret = drm_dp_dpcd_read(mgr->aux, basereg + curreply, replyblock, len); if (ret != len) { - DRM_DEBUG_KMS("failed to read a chunk\n"); + DRM_DEBUG_KMS("failed to read a chunk (len %d, ret %d)\n", + len, ret); + return; } + ret = drm_dp_sideband_msg_build(msg, replyblock, len, false); - if (ret == false) + if (!ret) { DRM_DEBUG_KMS("failed to build sideband msg\n"); + return; + } + curreply += len; replylen -= len; } -- cgit v1.2.3 From 48376e6b71a67616db860872c1fa3b2eae4ad359 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 19 Jul 2017 14:43:29 +0300 Subject: drm/mst: Avoid dereferencing a NULL mstb in drm_dp_mst_handle_up_req() commit 7f8b3987da54cb4d41ad2545cd4d7958b9a36bdf upstream. In case of an unknown broadcast message is sent mstb will remain unset, so check for this. Cc: Dave Airlie Cc: Lyude Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Lyude Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170719114330.26540-3-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ce422d8479ba..4f297922ea22 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2328,7 +2328,9 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn); } - drm_dp_put_mst_branch_device(mstb); + if (mstb) + drm_dp_put_mst_branch_device(mstb); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); } return ret; -- cgit v1.2.3 From a440425194692f9b1d65728615b8599b9b517c0b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 19 Jul 2017 16:46:32 +0300 Subject: drm/mst: Avoid processing partially received up/down message transactions commit 636c4c3e762b62aa93632c645ca65879285b16e3 upstream. Currently we may process up/down message transactions containing uninitialized data. This can happen if there was an error during the reception of any message in the transaction, but we happened to receive the last message correctly with the end-of-message flag set. To avoid this abort the reception of the transaction when the first error is detected, rejecting any messages until a message with the start-of-message flag is received (which will start a new transaction). This is also what the DP 1.4 spec 2.11.8.2 calls for in this case. In addtion this also prevents receiving bogus transactions without the first message with the the start-of-message flag set. v2: - unchanged v3: - git add the part that actually skips messages after an error in drm_dp_sideband_msg_build() Cc: Dave Airlie Cc: Lyude Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Lyude Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170719134632.13366-1-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 4f297922ea22..db7890cb254e 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -330,6 +330,13 @@ static bool drm_dp_sideband_msg_build(struct drm_dp_sideband_msg_rx *msg, return false; } + /* + * ignore out-of-order messages or messages that are part of a + * failed transaction + */ + if (!recv_hdr.somt && !msg->have_somt) + return false; + /* get length contained in this portion */ msg->curchunk_len = recv_hdr.msg_len; msg->curchunk_hdrlen = hdrlen; @@ -2168,7 +2175,7 @@ out_unlock: } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume); -static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) +static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) { int len; u8 replyblock[32]; @@ -2183,12 +2190,12 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) replyblock, len); if (ret != len) { DRM_DEBUG_KMS("failed to read DPCD down rep %d %d\n", len, ret); - return; + return false; } ret = drm_dp_sideband_msg_build(msg, replyblock, len, true); if (!ret) { DRM_DEBUG_KMS("sideband msg build failed %d\n", replyblock[0]); - return; + return false; } replylen = msg->curchunk_len + msg->curchunk_hdrlen; @@ -2202,25 +2209,30 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) if (ret != len) { DRM_DEBUG_KMS("failed to read a chunk (len %d, ret %d)\n", len, ret); - return; + return false; } ret = drm_dp_sideband_msg_build(msg, replyblock, len, false); if (!ret) { DRM_DEBUG_KMS("failed to build sideband msg\n"); - return; + return false; } curreply += len; replylen -= len; } + return true; } static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) { int ret = 0; - drm_dp_get_one_sb_msg(mgr, false); + if (!drm_dp_get_one_sb_msg(mgr, false)) { + memset(&mgr->down_rep_recv, 0, + sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } if (mgr->down_rep_recv.have_eomt) { struct drm_dp_sideband_msg_tx *txmsg; @@ -2276,7 +2288,12 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { int ret = 0; - drm_dp_get_one_sb_msg(mgr, true); + + if (!drm_dp_get_one_sb_msg(mgr, true)) { + memset(&mgr->up_req_recv, 0, + sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } if (mgr->up_req_recv.have_eomt) { struct drm_dp_sideband_msg_req_body msg; -- cgit v1.2.3 From b33da556255ce9703ab06805200bff244697b014 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 24 Apr 2017 15:15:28 -0700 Subject: mlx5: Avoid that mlx5_ib_sg_to_klms() overflows the klms[] array commit 99975cd4fda52974a767aa44fe0b1a8f74950d9d upstream. ib_map_mr_sg() can pass an SG-list to .map_mr_sg() that is larger than what fits into a single MR. .map_mr_sg() must not attempt to map more SG-list elements than what fits into a single MR. Hence make sure that mlx5_ib_sg_to_klms() does not write outside the MR klms[] array. Fixes: b005d3164713 ("mlx5: Add arbitrary sg list support") Signed-off-by: Bart Van Assche Reviewed-by: Max Gurtovoy Cc: Sagi Grimberg Cc: Leon Romanovsky Cc: Israel Rukshin Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1fb31a47966d..0a260a06876d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1823,7 +1823,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { - if (unlikely(i > mr->max_descs)) + if (unlikely(i >= mr->max_descs)) break; klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); -- cgit v1.2.3 From 5cf84432b422e92775aef5c43b0dfc58585fabb0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jun 2017 15:02:47 +0200 Subject: hfsplus: Don't clear SGID when inheriting ACLs commit 84969465ddc4f8aeb3b993123b571aa01c5f2683 upstream. When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by creating __hfsplus_set_posix_acl() function that does not call posix_acl_update_mode() and use it when inheriting ACLs. That prevents SGID bit clearing and the mode has been properly set by posix_acl_create() anyway. Fixes: 073931017b49d9458aa351605b43a7e34598caef Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/posix_acl.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c index 9b92058a1240..6bb5d7c42888 100644 --- a/fs/hfsplus/posix_acl.c +++ b/fs/hfsplus/posix_acl.c @@ -51,8 +51,8 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) return acl; } -int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, - int type) +static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, + int type) { int err; char *xattr_name; @@ -64,12 +64,6 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, switch (type) { case ACL_TYPE_ACCESS: xattr_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - err = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (err) - return err; - } - err = 0; break; case ACL_TYPE_DEFAULT: @@ -105,6 +99,18 @@ end_set_acl: return err; } +int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int err; + + if (type == ACL_TYPE_ACCESS && acl) { + err = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (err) + return err; + } + return __hfsplus_set_posix_acl(inode, acl, type); +} + int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) { int err = 0; @@ -122,15 +128,15 @@ int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) return err; if (default_acl) { - err = hfsplus_set_posix_acl(inode, default_acl, - ACL_TYPE_DEFAULT); + err = __hfsplus_set_posix_acl(inode, default_acl, + ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!err) - err = hfsplus_set_posix_acl(inode, acl, - ACL_TYPE_ACCESS); + err = __hfsplus_set_posix_acl(inode, acl, + ACL_TYPE_ACCESS); posix_acl_release(acl); } return err; -- cgit v1.2.3 From 97de6f34b42b079befb14e58e7f8b0d6e989c014 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 11 Jul 2017 15:58:35 +0300 Subject: ovl: fix random return value on mount commit 8fc646b44385ff0a9853f6590497e43049eeb311 upstream. On failure to prepare_creds(), mount fails with a random return value, as err was last set to an integer cast of a valid lower mnt pointer or set to 0 if inodes index feature is enabled. Reported-by: Dan Carpenter Fixes: 3fe6e52f0626 ("ovl: override creds with the ones from ...") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 2a0148957d6a..e7c8ac41e288 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1310,6 +1310,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ufs->creator_cred = cred = prepare_creds(); if (!cred) goto out_put_lower_mnt; -- cgit v1.2.3 From 6b50bca7cdb0822628f45bea2bb028630999aa13 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 31 May 2017 13:32:00 -0400 Subject: acpi/nfit: Fix memory corruption/Unregister mce decoder on failure commit 7e700d2c59e5853c9126642976b4f5768f64c9b3 upstream. nfit_init() calls nfit_mce_register() on module load. When the module load fails the nfit mce decoder is not unregistered. The module's memory is freed leaving the decoder chain referencing junk. This will cause panics as future registrations will reference the free'd memory. Unregister the nfit mce decoder on module init failure. [v2]: register and then unregister mce handler to avoid losing mce events [v3]: also cleanup nfit workqueue Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Vishal Verma Cc: "Lee, Chun-Yi" Cc: Linda Knippers Cc: lszubowi@redhat.com Acked-by: Jeff Moyer Signed-off-by: Prarit Bhargava Reviewed-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 9ef3941eeff0..f3bc901ac930 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2945,6 +2945,8 @@ static struct acpi_driver acpi_nfit_driver = { static __init int nfit_init(void) { + int ret; + BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40); BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56); BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48); @@ -2972,8 +2974,14 @@ static __init int nfit_init(void) return -ENOMEM; nfit_mce_register(); + ret = acpi_bus_register_driver(&acpi_nfit_driver); + if (ret) { + nfit_mce_unregister(); + destroy_workqueue(nfit_wq); + } + + return ret; - return acpi_bus_register_driver(&acpi_nfit_driver); } static __exit void nfit_exit(void) -- cgit v1.2.3 From a18935b45e9700b5d21fce80ea82c1cb5efb4f9e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 28 Dec 2016 14:56:48 -0800 Subject: of: device: Export of_device_{get_modalias, uvent_modalias} to modules commit 7a3b7cd332db08546f3cdd984f11773e0d1999e7 upstream. The ULPI bus can be built as a module, and it will soon be calling these functions when it supports probing devices from DT. Export them so they can be used by the ULPI module. Acked-by: Rob Herring Cc: Signed-off-by: Stephen Boyd Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/of/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/of/device.c b/drivers/of/device.c index fd5cfad7c403..f7a970120055 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -225,6 +225,7 @@ ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len) return tsize; } +EXPORT_SYMBOL_GPL(of_device_get_modalias); /** * of_device_uevent - Display OF related uevent information @@ -287,3 +288,4 @@ int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) return 0; } +EXPORT_SYMBOL_GPL(of_device_uevent_modalias); -- cgit v1.2.3 From 1c7e5ca092c08bd2fde78a4eaad4a8afc5dc3555 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 29 Jun 2017 14:46:44 -0700 Subject: spmi: Include OF based modalias in device uevent commit d50daa2af2618dab6d21634e65a5fbcf4ae437d6 upstream. Include the OF-based modalias in the uevent sent when registering SPMI devices, so that user space has a chance to autoload the kernel module for the device. Tested-by: Rob Clark Reported-by: Rob Clark Reviewed-by: Stephen Boyd Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/spmi/spmi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/spmi/spmi.c b/drivers/spmi/spmi.c index 2b9b0941d9eb..6d23226e5f69 100644 --- a/drivers/spmi/spmi.c +++ b/drivers/spmi/spmi.c @@ -365,11 +365,23 @@ static int spmi_drv_remove(struct device *dev) return 0; } +static int spmi_drv_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + int ret; + + ret = of_device_uevent_modalias(dev, env); + if (ret != -ENODEV) + return ret; + + return 0; +} + static struct bus_type spmi_bus_type = { .name = "spmi", .match = spmi_device_match, .probe = spmi_drv_probe, .remove = spmi_drv_remove, + .uevent = spmi_drv_uevent, }; /** -- cgit v1.2.3 From 69fbb442144abc4a7288d42edb6eb8a1ee799d87 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Jun 2017 09:32:49 +0200 Subject: reiserfs: Don't clear SGID when inheriting ACLs commit 6883cd7f68245e43e91e5ee583b7550abf14523f upstream. When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by moving posix_acl_update_mode() out of __reiserfs_set_acl() into reiserfs_set_acl(). That way the function will not be called when inheriting ACLs which is what we want as it prevents SGID bit clearing and the mode has been properly set by posix_acl_create() anyway. Fixes: 073931017b49d9458aa351605b43a7e34598caef CC: reiserfs-devel@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/xattr_acl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 3d2256a425ee..d92a1dc6ee70 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -37,7 +37,14 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = journal_begin(&th, inode->i_sb, jcreate_blocks); reiserfs_write_unlock(inode->i_sb); if (error == 0) { + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &inode->i_mode, + &acl); + if (error) + goto unlock; + } error = __reiserfs_set_acl(&th, inode, type, acl); +unlock: reiserfs_write_lock(inode->i_sb); error2 = journal_end(&th); reiserfs_write_unlock(inode->i_sb); @@ -241,11 +248,6 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; -- cgit v1.2.3 From 7bd804a05eb00954891abf66c6125d054b8a0f93 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 14 Jul 2017 11:51:48 +0100 Subject: PM / Domains: defer dev_pm_domain_set() until genpd->attach_dev succeeds if present commit 975e83cfb8dc16e7a2fdc58188c77c0c605876c2 upstream. If the genpd->attach_dev or genpd->power_on fails, genpd_dev_pm_attach may return -EPROBE_DEFER initially. However genpd_alloc_dev_data sets the PM domain for the device unconditionally. When subsequent attempts are made to call genpd_dev_pm_attach, it may return -EEXISTS checking dev->pm_domain without re-attempting to call attach_dev or power_on. platform_drv_probe then attempts to call drv->probe as the return value -EEXIST != -EPROBE_DEFER, which may end up in a situation where the device is accessed without it's power domain switched on. Fixes: f104e1e5ef57 (PM / Domains: Re-order initialization of generic_pm_domain_data) Signed-off-by: Sudeep Holla Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 951c2140d22e..8c7d0f33bd53 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1029,8 +1029,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, spin_unlock_irq(&dev->power.lock); - dev_pm_domain_set(dev, &genpd->domain); - return gpd_data; err_free: @@ -1044,8 +1042,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, static void genpd_free_dev_data(struct device *dev, struct generic_pm_domain_data *gpd_data) { - dev_pm_domain_set(dev, NULL); - spin_lock_irq(&dev->power.lock); dev->power.subsys_data->domain_data = NULL; @@ -1082,6 +1078,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (ret) goto out; + dev_pm_domain_set(dev, &genpd->domain); + genpd->device_count++; genpd->max_off_time_changed = true; @@ -1143,6 +1141,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, if (genpd->detach_dev) genpd->detach_dev(genpd, dev); + dev_pm_domain_set(dev, NULL); + list_del_init(&pdd->list_node); mutex_unlock(&genpd->lock); -- cgit v1.2.3 From 919e481152ce86bf1960a71b39a29ef643aaff03 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Thu, 20 Jul 2017 18:36:09 +0800 Subject: tracing: Fix kmemleak in instance_rmdir commit db9108e054700c96322b0f0028546aa4e643cf0b upstream. Hit the kmemleak when executing instance_rmdir, it forgot releasing mem of tracing_cpumask. With this fix, the warn does not appear any more. unreferenced object 0xffff93a8dfaa7c18 (size 8): comm "mkdir", pid 1436, jiffies 4294763622 (age 9134.308s) hex dump (first 8 bytes): ff ff ff ff ff ff ff ff ........ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] __kmalloc_node+0xf1/0x280 [] alloc_cpumask_var_node+0x23/0x30 [] alloc_cpumask_var+0xe/0x10 [] instance_mkdir+0x90/0x240 [] tracefs_syscall_mkdir+0x40/0x70 [] vfs_mkdir+0x109/0x1b0 [] SyS_mkdir+0xd0/0x100 [] do_syscall_64+0x67/0x150 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff Link: http://lkml.kernel.org/r/1500546969-12594-1-git-send-email-chuhu@redhat.com Fixes: ccfe9e42e451 ("tracing: Make tracing_cpumask available for all instances") Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 52ee2c51f4b3..53c308068e39 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7162,6 +7162,7 @@ static int instance_rmdir(const char *name) } kfree(tr->topts); + free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); -- cgit v1.2.3 From 91af5f04cd5b498ccafa1fa554be8d22e953bc82 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 24 Jul 2017 10:19:24 -0700 Subject: alarmtimer: don't rate limit one-shot timers Commit ff86bf0c65f1 ("alarmtimer: Rate limit periodic intervals") sets a minimum bound on the alarm timer interval. This minimum bound shouldn't be applied if the interval is 0. Otherwise, one-shot timers will be converted into periodic ones. Fixes: ff86bf0c65f1 ("alarmtimer: Rate limit periodic intervals") Reported-by: Ben Fennema Signed-off-by: Greg Hackmann Cc: stable@vger.kernel.org Cc: John Stultz Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/alarmtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9ba04aa740b9..d67ef56ca9bc 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -629,7 +629,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, * Rate limit to the tick as a hot fix to prevent DOS. Will be * mopped up later. */ - if (ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) + if (timr->it.alarm.interval.tv64 && + ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) timr->it.alarm.interval = ktime_set(0, TICK_NSEC); exp = timespec_to_ktime(new_setting->it_value); -- cgit v1.2.3 From efcfbfb1d8bf756d1b58fe215bf4e419d176435b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Jul 2017 15:08:24 -0700 Subject: Linux 4.9.40 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0a2c352ba887..d9397a912c31 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 39 +SUBLEVEL = 40 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From 0f94b36de37c73bdb3affe7352acac819082d360 Mon Sep 17 00:00:00 2001 From: Yuejie Shi Date: Fri, 31 Mar 2017 15:10:20 +0800 Subject: af_key: Add lock to key dump commit 89e357d83c06b6fac581c3ca7f0ee3ae7e67109e upstream. A dump may come in the middle of another dump, modifying its dump structure members. This race condition will result in NULL pointer dereference in kernel. So add a lock to prevent that race. Fixes: 83321d6b9872 ("[AF_KEY]: Dump SA/SP entries non-atomically") Signed-off-by: Yuejie Shi Signed-off-by: Steffen Klassert Signed-off-by: Mark Salyzyn Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index d8d95b6415e4..2e1050ec2cf0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -63,6 +63,7 @@ struct pfkey_sock { } u; struct sk_buff *skb; } dump; + struct mutex dump_lock; }; static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, @@ -143,6 +144,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; + struct pfkey_sock *pfk; int err; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -157,6 +159,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, if (sk == NULL) goto out; + pfk = pfkey_sk(sk); + mutex_init(&pfk->dump_lock); + sock->ops = &pfkey_ops; sock_init_data(sock, sk); @@ -285,13 +290,23 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) struct sadb_msg *hdr; int rc; + mutex_lock(&pfk->dump_lock); + if (!pfk->dump.dump) { + rc = 0; + goto out; + } + rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; + if (rc == -ENOBUFS) { + rc = 0; + goto out; + } if (pfk->dump.skb) { - if (!pfkey_can_dump(&pfk->sk)) - return 0; + if (!pfkey_can_dump(&pfk->sk)) { + rc = 0; + goto out; + } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; @@ -302,6 +317,9 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) } pfkey_terminate_dump(pfk); + +out: + mutex_unlock(&pfk->dump_lock); return rc; } @@ -1806,19 +1824,26 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); - if (pfk->dump.dump != NULL) + mutex_lock(&pfk->dump_lock); + if (pfk->dump.dump != NULL) { + mutex_unlock(&pfk->dump_lock); return -EBUSY; + } proto = pfkey_satype2proto(hdr->sadb_msg_satype); - if (proto == 0) + if (proto == 0) { + mutex_unlock(&pfk->dump_lock); return -EINVAL; + } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; filter = kmalloc(sizeof(*filter), GFP_KERNEL); - if (filter == NULL) + if (filter == NULL) { + mutex_unlock(&pfk->dump_lock); return -ENOMEM; + } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); @@ -1834,6 +1859,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); + mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } @@ -2693,14 +2719,18 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb { struct pfkey_sock *pfk = pfkey_sk(sk); - if (pfk->dump.dump != NULL) + mutex_lock(&pfk->dump_lock); + if (pfk->dump.dump != NULL) { + mutex_unlock(&pfk->dump_lock); return -EBUSY; + } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); + mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } -- cgit v1.2.3 From d97aff4f9770582a14461ed57c0985a7288d3480 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 20 Oct 2016 00:34:00 -0700 Subject: pstore: Make spinlock per zone instead of global commit 109704492ef637956265ec2eb72ae7b3b39eb6f4 upstream. Currently pstore has a global spinlock for all zones. Since the zones are independent and modify different areas of memory, there's no need to have a global lock, so we should use a per-zone lock as introduced here. Also, when ramoops's ftrace use-case has a FTRACE_PER_CPU flag introduced later, which splits the ftrace memory area into a single zone per CPU, it will eliminate the need for locking. In preparation for this, make the locking optional. Signed-off-by: Joel Fernandes [kees: updated commit message] Signed-off-by: Kees Cook Cc: Leo Yan Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 11 +++++------ include/linux/pstore_ram.h | 1 + 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3975deec02f8..cb92055e6016 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -48,8 +48,6 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) return atomic_read(&prz->buffer->start); } -static DEFINE_RAW_SPINLOCK(buffer_lock); - /* increase and wrap the start pointer, returning the old value */ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) { @@ -57,7 +55,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) int new; unsigned long flags; - raw_spin_lock_irqsave(&buffer_lock, flags); + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->start); new = old + a; @@ -65,7 +63,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) new -= prz->buffer_size; atomic_set(&prz->buffer->start, new); - raw_spin_unlock_irqrestore(&buffer_lock, flags); + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); return old; } @@ -77,7 +75,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) size_t new; unsigned long flags; - raw_spin_lock_irqsave(&buffer_lock, flags); + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->size); if (old == prz->buffer_size) @@ -89,7 +87,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) atomic_set(&prz->buffer->size, new); exit: - raw_spin_unlock_irqrestore(&buffer_lock, flags); + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); } static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, @@ -493,6 +491,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, prz->buffer->sig = sig; persistent_ram_zap(prz); + prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); return 0; } diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index c668c861c96c..244d2423dbaf 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -40,6 +40,7 @@ struct persistent_ram_zone { void *vaddr; struct persistent_ram_buffer *buffer; size_t buffer_size; + raw_spinlock_t buffer_lock; /* ECC correction */ char *par_buffer; -- cgit v1.2.3 From 26d01aa8a110b9809d2bd138be42ee39ed4e3183 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2017 10:20:32 -0800 Subject: net: reduce skb_warn_bad_offload() noise commit b2504a5dbef3305ef41988ad270b0e8ec289331c upstream. Dmitry reported warnings occurring in __skb_gso_segment() [1] All SKB_GSO_DODGY producers can allow user space to feed packets that trigger the current check. We could prevent them from doing so, rejecting packets, but this might add regressions to existing programs. It turns out our SKB_GSO_DODGY handlers properly set up checksum information that is needed anyway when packets needs to be segmented. By checking again skb_needs_check() after skb_mac_gso_segment(), we should remove these pesky warnings, at a very minor cost. With help from Willem de Bruijn [1] WARNING: CPU: 1 PID: 6768 at net/core/dev.c:2439 skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 lo: caps=(0x000000a2803b7c69, 0x0000000000000000) len=138 data_len=0 gso_size=15883 gso_type=4 ip_summed=0 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 6768 Comm: syz-executor1 Not tainted 4.9.0 #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ffff8801c063ecd8 ffffffff82346bdf ffffffff00000001 1ffff100380c7d2e ffffed00380c7d26 0000000041b58ab3 ffffffff84b37e38 ffffffff823468f1 ffffffff84820740 ffffffff84f289c0 dffffc0000000000 ffff8801c063ee20 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 [] panic+0x1fb/0x412 kernel/panic.c:179 [] __warn+0x1c4/0x1e0 kernel/panic.c:542 [] warn_slowpath_fmt+0xc5/0x100 kernel/panic.c:565 [] skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 [] __skb_gso_segment+0x482/0x780 net/core/dev.c:2706 [] skb_gso_segment include/linux/netdevice.h:3985 [inline] [] validate_xmit_skb+0x5c9/0xc20 net/core/dev.c:2969 [] __dev_queue_xmit+0xe6b/0x1e70 net/core/dev.c:3383 [] dev_queue_xmit+0x17/0x20 net/core/dev.c:3424 [] packet_snd net/packet/af_packet.c:2930 [inline] [] packet_sendmsg+0x32ed/0x4d30 net/packet/af_packet.c:2955 [] sock_sendmsg_nosec net/socket.c:621 [inline] [] sock_sendmsg+0xca/0x110 net/socket.c:631 [] ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1954 [] __sys_sendmsg+0x138/0x300 net/socket.c:1988 [] SYSC_sendmsg net/socket.c:1999 [inline] [] SyS_sendmsg+0x2d/0x50 net/socket.c:1995 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Signed-off-by: David S. Miller Cc: Mark Salyzyn Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c17952b6e0b6..69d604e7dff5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2723,11 +2723,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) { + struct sk_buff *segs; + if (unlikely(skb_needs_check(skb, tx_path))) { int err; - skb_warn_bad_offload(skb); - + /* We're going to init ->check field in TCP or UDP header */ err = skb_cow_head(skb, 0); if (err < 0) return ERR_PTR(err); @@ -2755,7 +2756,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_reset_mac_len(skb); - return skb_mac_gso_segment(skb, features); + segs = skb_mac_gso_segment(skb, features); + + if (unlikely(skb_needs_check(skb, tx_path))) + skb_warn_bad_offload(skb); + + return segs; } EXPORT_SYMBOL(__skb_gso_segment); -- cgit v1.2.3 From 3a79e1c8e709db0174dbafc5dcd9785e4bf36250 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Jun 2017 15:31:10 +0200 Subject: jfs: Don't clear SGID when inheriting ACLs commit 9bcf66c72d726322441ec82962994e69157613e4 upstream. When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by moving posix_acl_update_mode() out of __jfs_set_acl() into jfs_set_acl(). That way the function will not be called when inheriting ACLs which is what we want as it prevents SGID bit clearing and the mode has been properly set by posix_acl_create() anyway. Fixes: 073931017b49d9458aa351605b43a7e34598caef CC: jfs-discussion@lists.sourceforge.net Signed-off-by: Jan Kara Signed-off-by: Dave Kleikamp Signed-off-by: Greg Kroah-Hartman --- fs/jfs/acl.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 7bc186f4ed4d..1be45c8d460d 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -77,13 +77,6 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (rc) - return rc; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: ea_name = XATTR_NAME_POSIX_ACL_DEFAULT; @@ -118,9 +111,17 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) tid = txBegin(inode->i_sb, 0); mutex_lock(&JFS_IP(inode)->commit_mutex); + if (type == ACL_TYPE_ACCESS && acl) { + rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (rc) + goto end_tx; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } rc = __jfs_set_acl(tid, inode, type, acl); if (!rc) rc = txCommit(tid, 1, &inode, 0); +end_tx: txEnd(tid); mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; -- cgit v1.2.3 From 3d955095c597a6deac43ab2f388da12d11af7aee Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 16 Jul 2017 21:40:03 +0300 Subject: ALSA: fm801: Initialize chip after IRQ handler is registered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 610e1ae9b533be82b3aa118b907e0a703256913d upstream. The commit b56fa687e02b ("ALSA: fm801: detect FM-only card earlier") rearranged initialization calls, i.e. it makes snd_fm801_chip_init() to be called before we register interrupt handler and set PCI bus mastering. Somehow it prevents FM801-AU to work properly. Thus, partially revert initialization order changed by commit mentioned above. Fixes: b56fa687e02b ("ALSA: fm801: detect FM-only card earlier") Reported-by: Émeric MASCHINO Tested-by: Émeric MASCHINO Signed-off-by: Andy Shevchenko Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/fm801.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index c47287d79306..a178e0d03088 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1235,8 +1235,6 @@ static int snd_fm801_create(struct snd_card *card, } } - snd_fm801_chip_init(chip); - if ((chip->tea575x_tuner & TUNER_ONLY) == 0) { if (devm_request_irq(&pci->dev, pci->irq, snd_fm801_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { @@ -1248,6 +1246,8 @@ static int snd_fm801_create(struct snd_card *card, pci_set_master(pci); } + snd_fm801_chip_init(chip); + if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops)) < 0) { snd_fm801_free(chip); return err; -- cgit v1.2.3 From 5f8bdd5edc82987b40db73e8af93c1f0b9e10048 Mon Sep 17 00:00:00 2001 From: Daniel Dadap Date: Thu, 13 Jul 2017 19:27:39 -0500 Subject: ALSA: hda - Add missing NVIDIA GPU codec IDs to patch table commit 74ec118152ea494a25ebb677cbc83a75c982ac5f upstream. Add codec IDs for several recently released, pending, and historical NVIDIA GPU audio controllers to the patch table, to allow the correct patch functions to be selected for them. Signed-off-by: Daniel Dadap Reviewed-by: Andy Ritger Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4bf48336b0fc..775c67818bf1 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3600,11 +3600,15 @@ HDA_CODEC_ENTRY(0x1002aa01, "R6xx HDMI", patch_atihdmi), HDA_CODEC_ENTRY(0x10951390, "SiI1390 HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x10951392, "SiI1392 HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x17e80047, "Chrontel HDMI", patch_generic_hdmi), +HDA_CODEC_ENTRY(0x10de0001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de0002, "MCP77/78 HDMI", patch_nvhdmi_8ch_7x), HDA_CODEC_ENTRY(0x10de0003, "MCP77/78 HDMI", patch_nvhdmi_8ch_7x), +HDA_CODEC_ENTRY(0x10de0004, "GPU 04 HDMI", patch_nvhdmi_8ch_7x), HDA_CODEC_ENTRY(0x10de0005, "MCP77/78 HDMI", patch_nvhdmi_8ch_7x), HDA_CODEC_ENTRY(0x10de0006, "MCP77/78 HDMI", patch_nvhdmi_8ch_7x), HDA_CODEC_ENTRY(0x10de0007, "MCP79/7A HDMI", patch_nvhdmi_8ch_7x), +HDA_CODEC_ENTRY(0x10de0008, "GPU 08 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0009, "GPU 09 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de000a, "GPU 0a HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de000b, "GPU 0b HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de000c, "MCP89 HDMI", patch_nvhdmi), @@ -3631,17 +3635,40 @@ HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0043, "GPU 43 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0044, "GPU 44 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0045, "GPU 45 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0050, "GPU 50 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0051, "GPU 51 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0052, "GPU 52 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0060, "GPU 60 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0061, "GPU 61 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0062, "GPU 62 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0067, "MCP67 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0073, "GPU 73 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0074, "GPU 74 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0076, "GPU 76 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de007b, "GPU 7b HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de007c, "GPU 7c HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de007e, "GPU 7e HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0080, "GPU 80 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0081, "GPU 81 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0082, "GPU 82 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0084, "GPU 84 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0090, "GPU 90 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0091, "GPU 91 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0092, "GPU 92 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0093, "GPU 93 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0094, "GPU 94 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0095, "GPU 95 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), +HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi), HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP", patch_via_hdmi), HDA_CODEC_ENTRY(0x11069f84, "VX11 HDMI/DP", patch_generic_hdmi), -- cgit v1.2.3 From afe9fc012bc1ba720d83959af3918de501fda3d5 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 25 Jul 2017 17:11:26 -0400 Subject: parisc: Prevent TLB speculation on flushed pages on CPUs that only support equivalent aliases commit ae7a609c34b6fb12328c553b5f9aab26ae74a28e upstream. Helge noticed that we flush the TLB page in flush_cache_page but not in flush_cache_range or flush_cache_mm. For a long time, we have had random segmentation faults building packages on machines with PA8800/8900 processors. These machines only support equivalent aliases. We don't see these faults on machines that don't require strict coherency. So, it appears TLB speculation sometimes leads to cache corruption on machines that require coherency. This patch adds TLB flushes to flush_cache_range and flush_cache_mm when coherency is required. We only flush the TLB in flush_cache_page when coherency is required. The patch also optimizes flush_cache_range. It turns out we always have the right context to use flush_user_dcache_range_asm and flush_user_icache_range_asm. The patch has been tested for some time on rp3440, rp3410 and A500-44. It's been boot tested on c8000. No random segmentation faults were observed during testing. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/cache.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 53ec75f8e237..139803a5d8cd 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -538,6 +538,10 @@ void flush_cache_mm(struct mm_struct *mm) struct vm_area_struct *vma; pgd_t *pgd; + /* Flush the TLB to avoid speculation if coherency is required. */ + if (parisc_requires_coherency()) + flush_tlb_all(); + /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ if (mm_total_size(mm) >= parisc_cache_flush_threshold) { @@ -594,33 +598,22 @@ flush_user_icache_range(unsigned long start, unsigned long end) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long addr; - pgd_t *pgd; - BUG_ON(!vma->vm_mm->context); + /* Flush the TLB to avoid speculation if coherency is required. */ + if (parisc_requires_coherency()) + flush_tlb_range(vma, start, end); + if ((end - start) >= parisc_cache_flush_threshold) { flush_cache_all(); return; } - if (vma->vm_mm->context == mfsp(3)) { - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - return; - } + BUG_ON(vma->vm_mm->context != mfsp(3)); - pgd = vma->vm_mm->pgd; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long pfn; - pte_t *ptep = get_ptep(pgd, addr); - if (!ptep) - continue; - pfn = pte_pfn(*ptep); - if (pfn_valid(pfn)) - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); } void @@ -629,7 +622,8 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long BUG_ON(!vma->vm_mm->context); if (pfn_valid(pfn)) { - flush_tlb_page(vma, vmaddr); + if (parisc_requires_coherency()) + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } -- cgit v1.2.3 From f0d23fa632430cee981ea2b662b52c62915fc9bc Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 25 Jul 2017 17:23:35 -0400 Subject: parisc: Extend disabled preemption in copy_user_page commit 56008c04ebc099940021b714da2d7779117cf6a7 upstream. It's always bothered me that we only disable preemption in copy_user_page around the call to flush_dcache_page_asm. This patch extends this to after the copy. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 139803a5d8cd..c721ea2fdbd8 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -452,8 +452,8 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, before it can be accessed through the kernel mapping. */ preempt_disable(); flush_dcache_page_asm(__pa(vfrom), vaddr); - preempt_enable(); copy_page_asm(vto, vfrom); + preempt_enable(); } EXPORT_SYMBOL(copy_user_page); -- cgit v1.2.3 From fa2aa76efe7d07b8920defea92072bf1df2df7b1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 25 Jul 2017 21:41:41 +0200 Subject: parisc: Suspend lockup detectors before system halt commit 56188832a50f09998cb570ba3771a1d25c193c0e upstream. Some machines can't power off the machine, so disable the lockup detectors to avoid this watchdog BUG to show up every few seconds: watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [systemd-shutdow:1] Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index e7ffde2758fc..7593787ed4c3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,7 @@ void machine_power_off(void) /* prevent soft lockup/stalled CPU messages for endless loop. */ rcu_sysrq_start(); + lockup_detector_suspend(); for (;;); } -- cgit v1.2.3 From 6d3d93ca2ef8ae97f68f1aa03ec5b826a7f29f1e Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Jul 2017 16:51:39 +0200 Subject: powerpc/pseries: Fix of_node_put() underflow during reconfig remove commit 4fd1bd443e80b12f0a01a45fb9a793206b41cb72 upstream. As for commit 68baf692c435 ("powerpc/pseries: Fix of_node_put() underflow during DLPAR remove"), the call to of_node_put() must be removed from pSeries_reconfig_remove_node(). dlpar_detach_node() and pSeries_reconfig_remove_node() both call of_detach_node(), and thus the node should not be released in both cases. Fixes: 0829f6d1f69e ("of: device_node kobject lifecycle fixes") Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/reconfig.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index cc66c49f07aa..666ad0611e63 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np) of_detach_node(np); of_node_put(parent); - of_node_put(np); /* Must decrement the refcount */ return 0; } -- cgit v1.2.3 From b087b8b11e52f3ae9278d01f62db7cddb7236fee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 24 Jul 2017 13:18:50 +1000 Subject: NFS: invalidate file size when taking a lock. commit 442ce0499c0535f8972b68fa1fda357357a5c953 upstream. Prior to commit ca0daa277aca ("NFS: Cache aggressively when file is open for writing"), NFS would revalidate, or invalidate, the file size when taking a lock. Since that commit it only invalidates the file content. If the file size is changed on the server while wait for the lock, the client will have an incorrect understanding of the file size and could corrupt data. This particularly happens when writing beyond the (supposed) end of file and can be easily be demonstrated with posix_fallocate(). If an application opens an empty file, waits for a write lock, and then calls posix_fallocate(), glibc will determine that the underlying filesystem doesn't support fallocate (assuming version 4.1 or earlier) and will write out a '0' byte at the end of each 4K page in the region being fallocated that is after the end of the file. NFS will (usually) detect that these writes are beyond EOF and will expand them to cover the whole page, and then will merge the pages. Consequently, NFS will write out large blocks of zeroes beyond where it thought EOF was. If EOF had moved, the pre-existing part of the file will be over-written. Locking should have protected against this, but it doesn't. This patch restores the use of nfs_zap_caches() which invalidated the cached attributes. When posix_fallocate() asks for the file size, the request will go to the server and get a correct answer. Fixes: ca0daa277aca ("NFS: Cache aggressively when file is open for writing") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a1de8ef63e56..84c1cb9237d0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -757,7 +757,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - nfs_zap_mapping(inode, filp->f_mapping); + nfs_zap_caches(inode); out: return status; } -- cgit v1.2.3 From 7d2a354861e9257824a201d39a11e6a4d2a60cb4 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 28 Jul 2017 12:33:54 -0400 Subject: NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails to wake a waiter commit b7dbcc0e433f0f61acb89ed9861ec996be4f2b38 upstream. nfs4_retry_setlk() sets the task's state to TASK_INTERRUPTIBLE within the same region protected by the wait_queue's lock after checking for a notification from CB_NOTIFY_LOCK callback. However, after releasing that lock, a wakeup for that task may race in before the call to freezable_schedule_timeout_interruptible() and set TASK_WAKING, then freezable_schedule_timeout_interruptible() will set the state back to TASK_INTERRUPTIBLE before the task will sleep. The result is that the task will sleep for the entire duration of the timeout. Since we've already set TASK_INTERRUPTIBLE in the locked section, just use freezable_schedule_timout() instead. Fixes: a1d617d8f134 ("nfs: allow blocking locks to be awoken by lock callbacks") Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 401ea6e4cab8..46ca7881d80d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6419,7 +6419,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&q->lock, flags); - freezable_schedule_timeout_interruptible(NFS4_LOCK_MAXTIMEOUT); + freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT); } finish_wait(q, &wait); -- cgit v1.2.3 From 9eb088e57edd9aa38e7d7a6c86424b4e6d5400eb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Jul 2017 15:32:30 +0800 Subject: crypto: authencesn - Fix digest_null crash commit 41cdf7a45389e01991ee31e3301ed83cb3e3f7dc upstream. When authencesn is used together with digest_null a crash will occur on the decrypt path. This is because normally we perform a special setup to preserve the ESN, but this is skipped if there is no authentication. However, on the post-authentication path it always expects the preservation to be in place, thus causing a crash when digest_null is used. This patch fixes this by also skipping the post-processing when there is no authentication. Fixes: 104880a6b470 ("crypto: authencesn - Convert to new AEAD...") Reported-by: Jan Tluka Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/authencesn.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 121010ac9962..18c94e1c31d1 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -248,6 +248,9 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, u8 *ihash = ohash + crypto_ahash_digestsize(auth); u32 tmp[2]; + if (!authsize) + goto decrypt; + /* Move high-order bits of sequence number back. */ scatterwalk_map_and_copy(tmp, dst, 4, 4, 0); scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0); @@ -256,6 +259,8 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, if (crypto_memneq(ihash, ohash, authsize)) return -EBADMSG; +decrypt: + sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen); -- cgit v1.2.3 From d745f0f67b70afafd4d5f300ec422dae26bbd938 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Jul 2017 13:57:14 +1000 Subject: KVM: PPC: Book3S HV: Enable TM before accessing TM registers commit e47057151422a67ce08747176fa21cb3b526a2c9 upstream. Commit 46a704f8409f ("KVM: PPC: Book3S HV: Preserve userspace HTM state properly", 2017-06-15) added code to read transactional memory (TM) registers but forgot to enable TM before doing so. The result is that if userspace does have live values in the TM registers, a KVM_RUN ioctl will cause a host kernel crash like this: [ 181.328511] Unrecoverable TM Unavailable Exception f60 at d00000001e7d9980 [ 181.328605] Oops: Unrecoverable TM Unavailable Exception, sig: 6 [#1] [ 181.328613] SMP NR_CPUS=2048 [ 181.328613] NUMA [ 181.328618] PowerNV [ 181.328646] Modules linked in: vhost_net vhost tap nfs_layout_nfsv41_files rpcsec_gss_krb5 nfsv4 dns_resolver nfs +fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat +nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun ebtable_filter ebtables +ip6table_filter ip6_tables iptable_filter bridge stp llc kvm_hv kvm nfsd ses enclosure scsi_transport_sas ghash_generic +auth_rpcgss gf128mul xts sg ctr nfs_acl lockd vmx_crypto shpchp ipmi_powernv i2c_opal grace ipmi_devintf i2c_core +powernv_rng sunrpc ipmi_msghandler ibmpowernv uio_pdrv_genirq uio leds_powernv powernv_op_panel ip_tables xfs sd_mod +lpfc ipr bnx2x libata mdio ptp pps_core scsi_transport_fc libcrc32c dm_mirror dm_region_hash dm_log dm_mod [ 181.329278] CPU: 40 PID: 9926 Comm: CPU 0/KVM Not tainted 4.12.0+ #1 [ 181.329337] task: c000003fc6980000 task.stack: c000003fe4d80000 [ 181.329396] NIP: d00000001e7d9980 LR: d00000001e77381c CTR: d00000001e7d98f0 [ 181.329465] REGS: c000003fe4d837e0 TRAP: 0f60 Not tainted (4.12.0+) [ 181.329523] MSR: 9000000000009033 [ 181.329527] CR: 24022448 XER: 00000000 [ 181.329608] CFAR: d00000001e773818 SOFTE: 1 [ 181.329608] GPR00: d00000001e77381c c000003fe4d83a60 d00000001e7ef410 c000003fdcfe0000 [ 181.329608] GPR04: c000003fe4f00000 0000000000000000 0000000000000000 c000003fd7954800 [ 181.329608] GPR08: 0000000000000001 c000003fc6980000 0000000000000000 d00000001e7e2880 [ 181.329608] GPR12: d00000001e7d98f0 c000000007b19000 00000001295220e0 00007fffc0ce2090 [ 181.329608] GPR16: 0000010011886608 00007fff8c89f260 0000000000000001 00007fff8c080028 [ 181.329608] GPR20: 0000000000000000 00000100118500a6 0000010011850000 0000010011850000 [ 181.329608] GPR24: 00007fffc0ce1b48 0000010011850000 00000000d673b901 0000000000000000 [ 181.329608] GPR28: 0000000000000000 c000003fdcfe0000 c000003fdcfe0000 c000003fe4f00000 [ 181.330199] NIP [d00000001e7d9980] kvmppc_vcpu_run_hv+0x90/0x6b0 [kvm_hv] [ 181.330264] LR [d00000001e77381c] kvmppc_vcpu_run+0x2c/0x40 [kvm] [ 181.330322] Call Trace: [ 181.330351] [c000003fe4d83a60] [d00000001e773478] kvmppc_set_one_reg+0x48/0x340 [kvm] (unreliable) [ 181.330437] [c000003fe4d83b30] [d00000001e77381c] kvmppc_vcpu_run+0x2c/0x40 [kvm] [ 181.330513] [c000003fe4d83b50] [d00000001e7700b4] kvm_arch_vcpu_ioctl_run+0x114/0x2a0 [kvm] [ 181.330586] [c000003fe4d83bd0] [d00000001e7642f8] kvm_vcpu_ioctl+0x598/0x7a0 [kvm] [ 181.330658] [c000003fe4d83d40] [c0000000003451b8] do_vfs_ioctl+0xc8/0x8b0 [ 181.330717] [c000003fe4d83de0] [c000000000345a64] SyS_ioctl+0xc4/0x120 [ 181.330776] [c000003fe4d83e30] [c00000000000b004] system_call+0x58/0x6c [ 181.330833] Instruction dump: [ 181.330869] e92d0260 e9290b50 e9290108 792807e3 41820058 e92d0260 e9290b50 e9290108 [ 181.330941] 792ae8a4 794a1f87 408204f4 e92d0260 <7d4022a6> f9490ff0 e92d0260 7d4122a6 [ 181.331013] ---[ end trace 6f6ddeb4bfe92a92 ]--- The fix is just to turn on the TM bit in the MSR before accessing the registers. Fixes: 46a704f8409f ("KVM: PPC: Book3S HV: Preserve userspace HTM state properly") Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5c0298422300..e2bcf0c2d6df 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2828,6 +2828,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } + /* Enable TM so we can read the TM SPRs */ + mtmsr(mfmsr() | MSR_TM); current->thread.tm_tfhar = mfspr(SPRN_TFHAR); current->thread.tm_tfiar = mfspr(SPRN_TFIAR); current->thread.tm_texasr = mfspr(SPRN_TEXASR); -- cgit v1.2.3 From fabc7dffe9e123e5be2456139853a7e12c5adce8 Mon Sep 17 00:00:00 2001 From: Ofer Heifetz Date: Mon, 24 Jul 2017 09:17:40 +0300 Subject: md/raid5: add thread_group worker async_tx_issue_pending_all commit 7e96d559634b73a8158ee99a7abece2eacec2668 upstream. Since thread_group worker and raid5d kthread are not in sync, if worker writes stripe before raid5d then requests will be waiting for issue_pendig. Issue observed when building raid5 with ext4, in some build runs jbd2 would get hung and requests were waiting in the HW engine waiting to be issued. Fix this by adding a call to async_tx_issue_pending_all in the raid5_do_work. Signed-off-by: Ofer Heifetz Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8f117d6372c9..383f19c6bf24 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5843,6 +5843,8 @@ static void raid5_do_work(struct work_struct *work) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); + + async_tx_issue_pending_all(); blk_finish_plug(&plug); pr_debug("--- raid5worker inactive\n"); -- cgit v1.2.3 From e41779886b90cf928cdc3e790dd31104fdab00fa Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Mon, 17 Jul 2017 23:28:36 -0700 Subject: drm/vmwgfx: Fix gcc-7.1.1 warning commit fcfffdd8f98ac305285dca568b5065ef86be6458 upstream. The current code does not look correct, and the reason for it is probably lost. Since this now generates a compiler warning, fix it to what makes sense. Reported-by: Arnd Bergmann Reported-by: Linus Torvalds Signed-off-by: Sinclair Yeh Reviewed-by: Brian Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index c7b53d987f06..fefb9d995d2c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -519,7 +519,7 @@ static int vmw_cmd_invalid(struct vmw_private *dev_priv, struct vmw_sw_context *sw_context, SVGA3dCmdHeader *header) { - return capable(CAP_SYS_ADMIN) ? : -EINVAL; + return -EINVAL; } static int vmw_cmd_ok(struct vmw_private *dev_priv, -- cgit v1.2.3 From 34da5f74eb5caf1ae3873463aa5e2b801fa450c3 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 28 Jun 2017 08:24:45 -0400 Subject: drm/nouveau/disp/nv50-: bump max chans to 21 commit a90e049cacd965dade4dae7263b4d3fd550e78b6 upstream. GP102's cursors go from chan 17..20. Increase the array size to hold their data properly. Fixes: e50fcff15f ("drm/nouveau/disp/gp102: fix cursor/overlay immediate channel indices") Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index 1e1de6bfe85a..5893be9788d3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -27,7 +27,7 @@ struct nv50_disp { u8 type[3]; } pior; - struct nv50_disp_chan *chan[17]; + struct nv50_disp_chan *chan[21]; }; int nv50_disp_root_scanoutpos(NV50_DISP_MTHD_V0); -- cgit v1.2.3 From ae8faca6e2e727105f8ed485783ea25a25a425ee Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 25 Jul 2017 11:06:47 +1000 Subject: drm/nouveau/bar/gf100: fix access to upper half of BAR2 commit 38bcb208f60924a031b9f809f7cd252ea4a94e5f upstream. Bit 30 being set causes the upper half of BAR2 to stay in physical mode, mapped over the end of VRAM, even when the rest of the BAR has been set to virtual mode. We inherited our initial value from RM, but I'm not aware of any reason we need to keep it that way. This fixes severe GPU hang/lockup issues revealed by Wayland on F26. Shout-out to NVIDIA for the quick response with the potential cause! Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c index c794b2c2d21e..6d8f21290aa2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c @@ -129,7 +129,7 @@ gf100_bar_init(struct nvkm_bar *base) if (bar->bar[0].mem) { addr = nvkm_memory_addr(bar->bar[0].mem) >> 12; - nvkm_wr32(device, 0x001714, 0xc0000000 | addr); + nvkm_wr32(device, 0x001714, 0x80000000 | addr); } return 0; -- cgit v1.2.3 From e5cd34d10459e20a78514efcd7a45cfc370743c2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 15 Jun 2017 15:43:17 +1000 Subject: KVM: PPC: Book3S HV: Restore critical SPRs to host values on guest exit commit 4c3bb4ccd074e1a0552078c0bf94c662367a1658 upstream. This restores several special-purpose registers (SPRs) to sane values on guest exit that were missed before. TAR and VRSAVE are readable and writable by userspace, and we need to save and restore them to prevent the guest from potentially affecting userspace execution (not that TAR or VRSAVE are used by any known program that run uses the KVM_RUN ioctl). We save/restore these in kvmppc_vcpu_run_hv() rather than on every guest entry/exit. FSCR affects userspace execution in that it can prohibit access to certain facilities by userspace. We restore it to the normal value for the task on exit from the KVM_RUN ioctl. IAMR is normally 0, and is restored to 0 on guest exit. However, with a radix host on POWER9, it is set to a value that prevents the kernel from executing user-accessible memory. On POWER9, we save IAMR on guest entry and restore it on guest exit to the saved value rather than 0. On POWER8 we continue to set it to 0 on guest exit. PSPB is normally 0. We restore it to 0 on guest exit to prevent userspace taking advantage of the guest having set it non-zero (which would allow userspace to set its SMT priority to high). UAMOR is normally 0. We restore it to 0 on guest exit to prevent the AMR from being used as a covert channel between userspace processes, since the AMR is not context-switched at present. [paulus@ozlabs.org - removed IAMR bits that are only needed on POWER9] Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 11 +++++++++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e2bcf0c2d6df..218cba2f5699 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2808,6 +2808,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) int r; int srcu_idx; unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -2858,12 +2860,14 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_all_to_thread(current); - /* Save userspace EBB register values */ + /* Save userspace EBB and other register values */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { ebb_regs[0] = mfspr(SPRN_EBBHR); ebb_regs[1] = mfspr(SPRN_EBBRR); ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); } + user_vrsave = mfspr(SPRN_VRSAVE); vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; @@ -2887,12 +2891,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) r = kvmppc_xics_rm_complete(vcpu, 0); } while (is_kvmppc_resume_guest(r)); - /* Restore userspace EBB register values */ + /* Restore userspace EBB and other register values */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { mtspr(SPRN_EBBHR, ebb_regs[0]); mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, current->thread.fscr); } + mtspr(SPRN_VRSAVE, user_vrsave); out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6f81adb112f1..b0e30e2de6f9 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1363,6 +1363,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ @@ -1378,6 +1379,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR -- cgit v1.2.3 From c39c3aeb2b417f0f12ab729f9631d9f90cd830f3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 16 Jun 2017 11:53:19 +1000 Subject: KVM: PPC: Book3S HV: Save/restore host values of debug registers commit 7ceaa6dcd8c6f59588428cec37f3c8093dd1011f upstream. At present, HV KVM on POWER8 and POWER9 machines loses any instruction or data breakpoint set in the host whenever a guest is run. Instruction breakpoints are currently only used by xmon, but ptrace and the perf_event subsystem can set data breakpoints as well as xmon. To fix this, we save the host values of the debug registers (CIABR, DAWR and DAWRX) before entering the guest and restore them on exit. To provide space to save them in the stack frame, we expand the stack frame allocated by kvmppc_hv_entry() from 112 to 144 bytes. [paulus@ozlabs.org - Adjusted stack offsets since we aren't saving POWER9-specific registers.] Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 39 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b0e30e2de6f9..0447a22a4df6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -37,6 +37,13 @@ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 112 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_CIABR (SFS-16) +#define STACK_SLOT_DAWR (SFS-24) +#define STACK_SLOT_DAWRX (SFS-32) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -289,10 +296,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -537,7 +544,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -698,6 +705,16 @@ kvmppc_got_guest: mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + /* Save host values of some registers */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -1361,8 +1378,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) */ li r0, 0 mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 mtspr SPRN_PSPB, r0 mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 @@ -1521,6 +1536,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync + /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1654,8 +1679,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr -- cgit v1.2.3 From 0f31691508c64a139da303f17b31ed3dcbf63de1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Aug 2017 12:26:04 -0700 Subject: Revert "powerpc/numa: Fix percpu allocations to be NUMA aware" This reverts commit b4624ff952ec7d268a9651cd9184a1995befc271 which is commit ba4a648f12f4cd0a8003dd229b6ca8a53348ee4b upstream. Michal Hocko writes: JFYI. We have encountered a regression after applying this patch on a large ppc machine. While the patch is the right thing to do it doesn't work well with the current vmalloc area size on ppc and large machines where NUMA nodes are very far from each other. Just for the reference the boot fails on such a machine with bunch of warning preceeding it. See http://lkml.kernel.org/r/20170724134240.GL25221@dhcp22.suse.cz It seems the right thing to do is to enlarge the vmalloc space on ppc but this is not the case in the upstream kernel yet AFAIK. It is also questionable whether that is a stable material but I will decision on you here. We have reverted this patch from our 4.4 based kernel. Newer kernels do not have enlarged vmalloc space yet AFAIK so they won't work properly eiter. This bug is quite rare though because you need a specific HW configuration to trigger the issue - namely NUMA nodes have to be far away from each other in the physical memory space. Cc: Michal Hocko Cc: Michael Ellerman Cc: Nicholas Piggin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/topology.h | 14 -------------- arch/powerpc/kernel/setup_64.c | 4 ++-- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 329771559cbb..8b3b46b7b0f2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,22 +44,8 @@ extern void __init dump_numa_cpu_topology(void); extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -static inline int early_cpu_to_node(int cpu) -{ - int nid; - - nid = numa_cpu_lookup_table[cpu]; - - /* - * Fall back to node 0 if nid is unset (it should be, except bugs). - * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)). - */ - return (nid < 0) ? 0 : nid; -} #else -static inline int early_cpu_to_node(int cpu) { return 0; } - static inline void dump_numa_cpu_topology(void) {} static inline int sysfs_add_device_to_node(struct device *dev, int nid) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ada71bee176d..a12be60181bf 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -595,7 +595,7 @@ void __init emergency_stack_init(void) static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, __pa(MAX_DMA_ADDRESS)); } @@ -606,7 +606,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size) static int pcpu_cpu_distance(unsigned int from, unsigned int to) { - if (early_cpu_to_node(from) == early_cpu_to_node(to)) + if (cpu_to_node(from) == cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; -- cgit v1.2.3 From 9bf0d78bf640c95a3e9e7e1eeb9e0596a651bbe7 Mon Sep 17 00:00:00 2001 From: Cheah Kok Cheong Date: Fri, 30 Dec 2016 19:25:52 +0800 Subject: Staging: comedi: comedi_fops: Avoid orphaned proc entry commit bf279ece37d2a3eaaa9813fcd7a1d8a81eb29c20 upstream. Move comedi_proc_init to the end to avoid orphaned proc entry if module loading failed. Signed-off-by: Cheah Kok Cheong Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index a34fd5afb9a8..ec9979070479 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2898,9 +2898,6 @@ static int __init comedi_init(void) comedi_class->dev_groups = comedi_dev_groups; - /* XXX requires /proc interface */ - comedi_proc_init(); - /* create devices files for legacy/manual use */ for (i = 0; i < comedi_num_legacy_minors; i++) { struct comedi_device *dev; @@ -2918,6 +2915,9 @@ static int __init comedi_init(void) mutex_unlock(&dev->mutex); } + /* XXX requires /proc interface */ + comedi_proc_init(); + return 0; } module_init(comedi_init); -- cgit v1.2.3 From 755f65501f704219eab609601c9fa043f2131fb9 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 19 Oct 2016 00:51:35 +0300 Subject: drm: rcar-du: Simplify and fix probe error handling commit 4f7b0d263833928e947e172eff2d2997179c5cb9 upstream. It isn't safe to call drm_dev_unregister() without first initializing mode setting with drm_mode_config_init(). This leads to a crash if either IO memory can't be remapped or vblank initialization fails. Fix this by reordering the initialization sequence. Move vblank initialization after the drm_mode_config_init() call, and move IO remapping before drm_dev_alloc() to avoid the need to perform clean up in case of failure. While at it remove the explicit drm_vblank_cleanup() call from rcar_du_remove() as the drm_dev_unregister() function already cleans up vblank. Signed-off-by: Laurent Pinchart Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_drv.c | 30 ++++++++++-------------------- drivers/gpu/drm/rcar-du/rcar_du_kms.c | 7 +++++++ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 73c971e39b1c..ae125d070212 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -285,7 +285,6 @@ static int rcar_du_remove(struct platform_device *pdev) drm_kms_helper_poll_fini(ddev); drm_mode_config_cleanup(ddev); - drm_vblank_cleanup(ddev); drm_dev_unref(ddev); @@ -305,7 +304,7 @@ static int rcar_du_probe(struct platform_device *pdev) return -ENODEV; } - /* Allocate and initialize the DRM and R-Car device structures. */ + /* Allocate and initialize the R-Car device structure. */ rcdu = devm_kzalloc(&pdev->dev, sizeof(*rcdu), GFP_KERNEL); if (rcdu == NULL) return -ENOMEM; @@ -315,31 +314,22 @@ static int rcar_du_probe(struct platform_device *pdev) rcdu->dev = &pdev->dev; rcdu->info = of_match_device(rcar_du_of_table, rcdu->dev)->data; - ddev = drm_dev_alloc(&rcar_du_driver, &pdev->dev); - if (IS_ERR(ddev)) - return PTR_ERR(ddev); - - rcdu->ddev = ddev; - ddev->dev_private = rcdu; - platform_set_drvdata(pdev, rcdu); /* I/O resources */ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); rcdu->mmio = devm_ioremap_resource(&pdev->dev, mem); - if (IS_ERR(rcdu->mmio)) { - ret = PTR_ERR(rcdu->mmio); - goto error; - } - - /* Initialize vertical blanking interrupts handling. Start with vblank - * disabled for all CRTCs. - */ - ret = drm_vblank_init(ddev, (1 << rcdu->info->num_crtcs) - 1); - if (ret < 0) - goto error; + if (IS_ERR(rcdu->mmio)) + return PTR_ERR(rcdu->mmio); /* DRM/KMS objects */ + ddev = drm_dev_alloc(&rcar_du_driver, &pdev->dev); + if (IS_ERR(ddev)) + return PTR_ERR(ddev); + + rcdu->ddev = ddev; + ddev->dev_private = rcdu; + ret = rcar_du_modeset_init(rcdu); if (ret < 0) { if (ret != -EPROBE_DEFER) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 392c7e6de042..cfc302c65b0b 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -567,6 +567,13 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) if (ret < 0) return ret; + /* Initialize vertical blanking interrupts handling. Start with vblank + * disabled for all CRTCs. + */ + ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); + if (ret < 0) + return ret; + /* Initialize the groups. */ num_groups = DIV_ROUND_UP(rcdu->num_crtcs, 2); -- cgit v1.2.3 From 7b4e4b18ea64f95a8502199e792cbe75297c66ef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Jul 2017 22:20:23 +0200 Subject: smp/hotplug: Move unparking of percpu threads to the control CPU commit 9cd4f1a4e7a858849e889a081a99adff83e08e4c upstream. Vikram reported the following backtrace: BUG: scheduling while atomic: swapper/7/0/0x00000002 CPU: 7 PID: 0 Comm: swapper/7 Not tainted 4.9.32-perf+ #680 schedule schedule_hrtimeout_range_clock schedule_hrtimeout wait_task_inactive __kthread_bind_mask __kthread_bind __kthread_unpark kthread_unpark cpuhp_online_idle cpu_startup_entry secondary_start_kernel He analyzed correctly that a parked cpu hotplug thread of an offlined CPU was still on the runqueue when the CPU came back online and tried to unpark it. This causes the thread which invoked kthread_unpark() to call wait_task_inactive() and subsequently schedule() with preemption disabled. His proposed workaround was to "make sure" that a parked thread has scheduled out when the CPU goes offline, so the situation cannot happen. But that's still wrong because the root cause is not the fact that the percpu thread is still on the runqueue and neither that preemption is disabled, which could be simply solved by enabling preemption before calling kthread_unpark(). The real issue is that the calling thread is the idle task of the upcoming CPU, which is not supposed to call anything which might sleep. The moron, who wrote that code, missed completely that kthread_unpark() might end up in schedule(). The solution is simpler than expected. The thread which controls the hotplug operation is waiting for the CPU to call complete() on the hotplug state completion. So the idle task of the upcoming CPU can set its state to CPUHP_AP_ONLINE_IDLE and invoke complete(). This in turn wakes the control task on a different CPU, which then can safely do the unpark and kick the now unparked hotplug thread of the upcoming CPU to complete the bringup to the final target state. Control CPU AP bringup_cpu(); __cpu_up() ------------> bringup_ap(); bringup_wait_for_ap() wait_for_completion(); cpuhp_online_idle(); <------------ complete(); unpark(AP->stopper); unpark(AP->hotplugthread); while(1) do_idle(); kick(AP->hotplugthread); wait_for_completion(); hotplug_thread() run_online_callbacks(); complete(); Fixes: 8df3e07e7f21 ("cpu/hotplug: Let upcoming cpu bring itself fully up") Reported-by: Vikram Mulukutla Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Sebastian Sewior Cc: Rusty Russell Cc: Tejun Heo Cc: Andrew Morton Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707042218020.2131@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 8f52977aad59..b4b27629373c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -410,11 +410,25 @@ static int notify_online(unsigned int cpu) return 0; } +static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); + static int bringup_wait_for_ap(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ wait_for_completion(&st->done); + BUG_ON(!cpu_online(cpu)); + + /* Unpark the stopper thread and the hotplug thread of the target cpu */ + stop_machine_unpark(cpu); + kthread_unpark(st->thread); + + /* Should we go further up ? */ + if (st->target > CPUHP_AP_ONLINE_IDLE) { + __cpuhp_kick_ap_work(st); + wait_for_completion(&st->done); + } return st->result; } @@ -437,9 +451,7 @@ static int bringup_cpu(unsigned int cpu) cpu_notify(CPU_UP_CANCELED, cpu); return ret; } - ret = bringup_wait_for_ap(cpu); - BUG_ON(!cpu_online(cpu)); - return ret; + return bringup_wait_for_ap(cpu); } /* @@ -974,31 +986,20 @@ void notify_cpu_starting(unsigned int cpu) } /* - * Called from the idle task. We need to set active here, so we can kick off - * the stopper thread and unpark the smpboot threads. If the target state is - * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the - * cpu further. + * Called from the idle task. Wake up the controlling task which brings the + * stopper and the hotplug thread of the upcoming CPU up and then delegates + * the rest of the online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); - unsigned int cpu = smp_processor_id(); /* Happens for the boot cpu */ if (state != CPUHP_AP_ONLINE_IDLE) return; st->state = CPUHP_AP_ONLINE_IDLE; - - /* Unpark the stopper thread and the hotplug thread of this cpu */ - stop_machine_unpark(cpu); - kthread_unpark(st->thread); - - /* Should we go further up ? */ - if (st->target > CPUHP_AP_ONLINE_IDLE) - __cpuhp_kick_ap_work(st); - else - complete(&st->done); + complete(&st->done); } /* Requires cpu_add_remove_lock to be held */ -- cgit v1.2.3 From 6b3d13fe67da15aca3186c11c016b6abd00f0469 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 11 Jul 2017 22:06:24 +0200 Subject: smp/hotplug: Replace BUG_ON and react useful commit dea1d0f5f1284e3defee4b8484d9fc230686cd42 upstream. The move of the unpark functions to the control thread moved the BUG_ON() there as well. While it made some sense in the idle thread of the upcoming CPU, it's bogus to crash the control thread on the already online CPU, especially as the function has a return value and the callsite is prepared to handle an error return. Replace it with a WARN_ON_ONCE() and return a proper error code. Fixes: 9cd4f1a4e7a8 ("smp/hotplug: Move unparking of percpu threads to the control CPU") Rightfully-ranted-at-by: Linux Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index b4b27629373c..26a4f74bff83 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -418,7 +418,8 @@ static int bringup_wait_for_ap(unsigned int cpu) /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ wait_for_completion(&st->done); - BUG_ON(!cpu_online(cpu)); + if (WARN_ON_ONCE((!cpu_online(cpu)))) + return -ECANCELED; /* Unpark the stopper thread and the hotplug thread of the target cpu */ stop_machine_unpark(cpu); -- cgit v1.2.3 From 35bdf9a61dc9bd8e47f988b729e038a1ac8b7c9d Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sat, 4 Feb 2017 10:16:56 +0900 Subject: nfc: Fix hangup of RC-S380* in port100_send_ack() commit 2497128133f8169b24b928852ba6eae34fc495e5 upstream. If port100_send_ack() was called twice or more, it has race to hangup. port100_send_ack() port100_send_ack() init_completion() [...] dev->cmd_cancel = true /* this removes previous from completion */ init_completion() [...] dev->cmd_cancel = true wait_for_completion() /* never be waked up */ wait_for_completion() Like above race, this code is not assuming port100_send_ack() is called twice or more. To fix, this checks dev->cmd_cancel to know if prior cancel is in-flight or not. And never be remove prior task from completion by using reinit_completion(), so this guarantees to be waked up properly soon or later. Signed-off-by: OGAWA Hirofumi Signed-off-by: Samuel Ortiz Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/port100.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 2b2330b235e6..073e4a478c89 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -725,23 +725,33 @@ static int port100_submit_urb_for_ack(struct port100 *dev, gfp_t flags) static int port100_send_ack(struct port100 *dev) { - int rc; + int rc = 0; mutex_lock(&dev->out_urb_lock); - init_completion(&dev->cmd_cancel_done); + /* + * If prior cancel is in-flight (dev->cmd_cancel == true), we + * can skip to send cancel. Then this will wait the prior + * cancel, or merged into the next cancel rarely if next + * cancel was started before waiting done. In any case, this + * will be waked up soon or later. + */ + if (!dev->cmd_cancel) { + reinit_completion(&dev->cmd_cancel_done); - usb_kill_urb(dev->out_urb); + usb_kill_urb(dev->out_urb); - dev->out_urb->transfer_buffer = ack_frame; - dev->out_urb->transfer_buffer_length = sizeof(ack_frame); - rc = usb_submit_urb(dev->out_urb, GFP_KERNEL); + dev->out_urb->transfer_buffer = ack_frame; + dev->out_urb->transfer_buffer_length = sizeof(ack_frame); + rc = usb_submit_urb(dev->out_urb, GFP_KERNEL); - /* Set the cmd_cancel flag only if the URB has been successfully - * submitted. It will be reset by the out URB completion callback - * port100_send_complete(). - */ - dev->cmd_cancel = !rc; + /* + * Set the cmd_cancel flag only if the URB has been + * successfully submitted. It will be reset by the out + * URB completion callback port100_send_complete(). + */ + dev->cmd_cancel = !rc; + } mutex_unlock(&dev->out_urb_lock); @@ -928,8 +938,8 @@ static void port100_send_complete(struct urb *urb) struct port100 *dev = urb->context; if (dev->cmd_cancel) { + complete_all(&dev->cmd_cancel_done); dev->cmd_cancel = false; - complete(&dev->cmd_cancel_done); } switch (urb->status) { @@ -1543,6 +1553,7 @@ static int port100_probe(struct usb_interface *interface, PORT100_COMM_RF_HEAD_MAX_LEN; dev->skb_tailroom = PORT100_FRAME_TAIL_LEN; + init_completion(&dev->cmd_cancel_done); INIT_WORK(&dev->cmd_complete_work, port100_wq_cmd_complete); /* The first thing to do with the Port-100 is to set the command type -- cgit v1.2.3 From 57154f0302cd39a7ff5f3f4ceef23bda4112c77e Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 20 Dec 2016 21:09:04 +0000 Subject: nfc: fdp: fix NULL pointer dereference commit b6355fb3f5f40bbce165847d277e64896cab8f95 upstream. We are checking phy after dereferencing it. We can print the debug information after checking it. If phy is NULL then we will get a good stack trace to tell us that we are in this irq handler. Signed-off-by: Sudip Mukherjee Signed-off-by: Samuel Ortiz Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/fdp/i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index 5e797d5c38ed..712936f5d2d6 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -210,14 +210,14 @@ static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id) struct sk_buff *skb; int r; - client = phy->i2c_dev; - dev_dbg(&client->dev, "%s\n", __func__); - if (!phy || irq != phy->i2c_dev->irq) { WARN_ON_ONCE(1); return IRQ_NONE; } + client = phy->i2c_dev; + dev_dbg(&client->dev, "%s\n", __func__); + r = fdp_nci_i2c_read(phy, &skb); if (r == -EREMOTEIO) -- cgit v1.2.3 From 6c78197e4a69c19e61dfe904fdc661b2aee8ec20 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Mar 2017 12:34:49 -0800 Subject: net: phy: Do not perform software reset for Generic PHY commit 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 upstream. The Generic PHY driver is a catch-all PHY driver and it should preserve whatever prior initialization has been done by boot loader or firmware agents. For specific PHY device configuration it is expected that a specialized PHY driver would take over that role. Resetting the generic PHY was a bad idea that has lead to several complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()"). Reported-by: Felix Fietkau Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 32b555a72e13..9e7b7836774f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1792,7 +1792,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", - .soft_reset = genphy_soft_reset, + .soft_reset = genphy_no_soft_reset, .config_init = genphy_config_init, .features = PHY_GBIT_FEATURES | SUPPORTED_MII | SUPPORTED_AUI | SUPPORTED_FIBRE | diff --git a/include/linux/phy.h b/include/linux/phy.h index 6c9b1e0006ee..8431c8c0c320 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -799,6 +799,10 @@ int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); +static inline int genphy_no_soft_reset(struct phy_device *phydev) +{ + return 0; +} void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); -- cgit v1.2.3 From b756862459d2b8a977538e5f61345e3633c51d0f Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 31 May 2017 09:40:11 +0800 Subject: isdn: Fix a sleep-in-atomic bug commit e8f4ae85439f34bec3b0ab69223a41809dab28c9 upstream. The driver may sleep under a spin lock, the function call path is: isdn_ppp_mp_receive (acquire the lock) isdn_ppp_mp_reassembly isdn_ppp_push_higher isdn_ppp_decompress isdn_ppp_ccp_reset_trans isdn_ppp_ccp_reset_alloc_state kzalloc(GFP_KERNEL) --> may sleep To fixed it, the "GFP_KERNEL" is replaced with "GFP_ATOMIC". Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/i4l/isdn_ppp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 9c1e8adaf4fc..bf3fbd00a091 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -2364,7 +2364,7 @@ static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_s id); return NULL; } else { - rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_KERNEL); + rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_ATOMIC); if (!rs) return NULL; rs->state = CCPResetIdle; -- cgit v1.2.3 From 7b3a66739ff01fcd9b8007a18ddd29edd2cb74f7 Mon Sep 17 00:00:00 2001 From: Annie Cherkaev Date: Sat, 15 Jul 2017 15:08:58 -0600 Subject: isdn/i4l: fix buffer overflow commit 9f5af546e6acc30f075828cb58c7f09665033967 upstream. This fixes a potential buffer overflow in isdn_net.c caused by an unbounded strcpy. [ ISDN seems to be effectively unmaintained, and the I4L driver in particular is long deprecated, but in case somebody uses this.. - Linus ] Signed-off-by: Jiten Thakkar Signed-off-by: Annie Cherkaev Cc: Karsten Keil Cc: Kees Cook Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/i4l/isdn_common.c | 1 + drivers/isdn/i4l/isdn_net.c | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index 9b856e1890d1..e4c43a17b333 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -1379,6 +1379,7 @@ isdn_ioctl(struct file *file, uint cmd, ulong arg) if (arg) { if (copy_from_user(bname, argp, sizeof(bname) - 1)) return -EFAULT; + bname[sizeof(bname)-1] = 0; } else return -EINVAL; ret = mutex_lock_interruptible(&dev->mtx); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index c151c6daa67e..f63a110b7bcb 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -2611,10 +2611,9 @@ isdn_net_newslave(char *parm) char newname[10]; if (p) { - /* Slave-Name MUST not be empty */ - if (!strlen(p + 1)) + /* Slave-Name MUST not be empty or overflow 'newname' */ + if (strscpy(newname, p + 1, sizeof(newname)) <= 0) return NULL; - strcpy(newname, p + 1); *p = 0; /* Master must already exist */ if (!(n = isdn_net_findif(parm))) -- cgit v1.2.3 From 59153e6589366f09eb42b92c8bc8c2fce72fe8fe Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Mon, 14 Nov 2016 14:25:23 +0100 Subject: ath10k: fix null deref on wmi-tlv when trying spectral scan commit 18ae68fff392e445af3c2d8be9bef8a16e1c72a7 upstream. WMI ops wrappers did not properly check for null function pointers for spectral scan. This caused null dereference crash with WMI-TLV based firmware which doesn't implement spectral scan. The crash could be triggered with: ip link set dev wlan0 up echo background > /sys/kernel/debug/ieee80211/phy0/ath10k/spectral_scan_ctl The crash looked like this: [ 168.031989] BUG: unable to handle kernel NULL pointer dereference at (null) [ 168.037406] IP: [< (null)>] (null) [ 168.040395] PGD cdd4067 PUD fa0f067 PMD 0 [ 168.043303] Oops: 0010 [#1] SMP [ 168.045377] Modules linked in: ath10k_pci(O) ath10k_core(O) ath mac80211 cfg80211 [last unloaded: cfg80211] [ 168.051560] CPU: 1 PID: 1380 Comm: bash Tainted: G W O 4.8.0 #78 [ 168.054336] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 168.059183] task: ffff88000c460c00 task.stack: ffff88000d4bc000 [ 168.061736] RIP: 0010:[<0000000000000000>] [< (null)>] (null) ... [ 168.100620] Call Trace: [ 168.101910] [] ? ath10k_spectral_scan_config+0x96/0x200 [ath10k_core] [ 168.104871] [] ? filemap_fault+0xb2/0x4a0 [ 168.106696] [] write_file_spec_scan_ctl+0x116/0x280 [ath10k_core] [ 168.109618] [] full_proxy_write+0x51/0x80 [ 168.111443] [] __vfs_write+0x28/0x120 [ 168.113090] [] ? security_file_permission+0x3d/0xc0 [ 168.114932] [] ? percpu_down_read+0x12/0x60 [ 168.116680] [] vfs_write+0xb8/0x1a0 [ 168.118293] [] SyS_write+0x46/0xa0 [ 168.119912] [] entry_SYSCALL_64_fastpath+0x1a/0xa4 [ 168.121737] Code: Bad RIP value. [ 168.123318] RIP [< (null)>] (null) Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi-ops.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index c9a8bb1186f2..c7956e181f80 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -660,6 +660,9 @@ ath10k_wmi_vdev_spectral_conf(struct ath10k *ar, struct sk_buff *skb; u32 cmd_id; + if (!ar->wmi.ops->gen_vdev_spectral_conf) + return -EOPNOTSUPP; + skb = ar->wmi.ops->gen_vdev_spectral_conf(ar, arg); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -675,6 +678,9 @@ ath10k_wmi_vdev_spectral_enable(struct ath10k *ar, u32 vdev_id, u32 trigger, struct sk_buff *skb; u32 cmd_id; + if (!ar->wmi.ops->gen_vdev_spectral_enable) + return -EOPNOTSUPP; + skb = ar->wmi.ops->gen_vdev_spectral_enable(ar, vdev_id, trigger, enable); if (IS_ERR(skb)) -- cgit v1.2.3 From 2f16bcd4dba9980a1d237b0fa0c697aa7547c2d6 Mon Sep 17 00:00:00 2001 From: Lior David Date: Wed, 23 Nov 2016 16:06:41 +0200 Subject: wil6210: fix deadlock when using fw_no_recovery option commit dfb5b098e0f40b68aa07f2ec55f4dd762efefbfa upstream. When FW crashes with no_fw_recovery option, driver waits for manual recovery with wil->mutex held, this can easily create deadlocks. Fix the problem by moving the wait outside the lock. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/main.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index e7130b54d1d8..24b07a0ce6f7 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -384,18 +384,19 @@ static void wil_fw_error_worker(struct work_struct *work) wil->last_fw_recovery = jiffies; + wil_info(wil, "fw error recovery requested (try %d)...\n", + wil->recovery_count); + if (!no_fw_recovery) + wil->recovery_state = fw_recovery_running; + if (wil_wait_for_recovery(wil) != 0) + return; + mutex_lock(&wil->mutex); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: - wil_info(wil, "fw error recovery requested (try %d)...\n", - wil->recovery_count); - if (!no_fw_recovery) - wil->recovery_state = fw_recovery_running; - if (0 != wil_wait_for_recovery(wil)) - break; - + /* silent recovery, upper layers will see disconnect */ __wil_down(wil); __wil_up(wil); break; -- cgit v1.2.3 From a23fba8182039d7162d6b19c9030a414f43f7f62 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 21 Mar 2017 11:30:14 +0000 Subject: mailbox: always wait in mbox_send_message for blocking Tx mode commit c61b781ee084e69855477d23dd33e7e6caad652c upstream. There exists a race when msg_submit return immediately as there was an active request being processed which may have completed just before it's checked again in mbox_send_message. This will result in return to the caller without waiting in mbox_send_message even when it's blocking Tx. This patch fixes the issue by waiting for the completion always if Tx is in blocking mode. Fixes: 2b6d83e2b8b7 ("mailbox: Introduce framework for mailbox") Reported-by: Alexey Klimov Signed-off-by: Sudeep Holla Reviewed-by: Alexey Klimov Signed-off-by: Jassi Brar Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 4a36632c236f..e9e6d6672967 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -261,7 +261,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) msg_submit(chan); - if (chan->cl->tx_block && chan->active_req) { + if (chan->cl->tx_block) { unsigned long wait; int ret; -- cgit v1.2.3 From abe9090ac07b1294c88686336ea881e833312a9e Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 21 Mar 2017 11:30:15 +0000 Subject: mailbox: skip complete wait event if timer expired commit cc6eeaa3029a6dbcb4ad41b1f92876483bd88965 upstream. If a wait_for_completion_timeout() call returns due to a timeout, complete() can get called after returning from the wait which is incorrect and can cause subsequent transmissions on a channel to fail. Since the wait_for_completion_timeout() sees the completion variable is non-zero caused by the erroneous/spurious complete() call, and it immediately returns without waiting for the time as expected by the client. This patch fixes the issue by skipping complete() call for the timer expiry. Fixes: 2b6d83e2b8b7 ("mailbox: Introduce framework for mailbox") Reported-by: Alexey Klimov Signed-off-by: Sudeep Holla Signed-off-by: Jassi Brar Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/mailbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index e9e6d6672967..77ac049e51b6 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -108,7 +108,7 @@ static void tx_tick(struct mbox_chan *chan, int r) if (mssg && chan->cl->tx_done) chan->cl->tx_done(chan->cl, mssg, r); - if (chan->cl->tx_block) + if (r != -ETIME && chan->cl->tx_block) complete(&chan->tx_complete); } @@ -272,8 +272,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) ret = wait_for_completion_timeout(&chan->tx_complete, wait); if (ret == 0) { - t = -EIO; - tx_tick(chan, -EIO); + t = -ETIME; + tx_tick(chan, t); } } -- cgit v1.2.3 From 016a638a518c828719147e2f2d9cd9a0941fe6dd Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 21 Mar 2017 11:30:16 +0000 Subject: mailbox: handle empty message in tx_tick commit cb710ab1d8a23f68ff8f45aedf3e552bb90e70de upstream. We already check if the message is empty before calling the client tx_done callback. Calling completion on a wait event is also invalid if the message is empty. This patch moves the existing empty message check earlier. Fixes: 2b6d83e2b8b7 ("mailbox: Introduce framework for mailbox") Signed-off-by: Sudeep Holla Signed-off-by: Jassi Brar Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/mailbox.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 77ac049e51b6..87ef465c6947 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -104,8 +104,11 @@ static void tx_tick(struct mbox_chan *chan, int r) /* Submit next message */ msg_submit(chan); + if (!mssg) + return; + /* Notify the client */ - if (mssg && chan->cl->tx_done) + if (chan->cl->tx_done) chan->cl->tx_done(chan->cl, mssg, r); if (r != -ETIME && chan->cl->tx_block) -- cgit v1.2.3 From 62b5776c8c86bc91acf7693a00d1ca82d3aa6a9c Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 8 Feb 2017 14:27:27 +0300 Subject: sched/cgroup: Move sched_online_group() back into css_online() to fix crash commit 96b777452d8881480fd5be50112f791c17db4b6b upstream. Commit: 2f5177f0fd7e ("sched/cgroup: Fix/cleanup cgroup teardown/init") .. moved sched_online_group() from css_online() to css_alloc(). It exposes half-baked task group into global lists before initializing generic cgroup stuff. LTP testcase (third in cgroup_regression_test) written for testing similar race in kernels 2.6.26-2.6.28 easily triggers this oops: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: kernfs_path_from_node_locked+0x260/0x320 CPU: 1 PID: 30346 Comm: cat Not tainted 4.10.0-rc5-test #4 Call Trace: ? kernfs_path_from_node+0x4f/0x60 kernfs_path_from_node+0x3e/0x60 print_rt_rq+0x44/0x2b0 print_rt_stats+0x7a/0xd0 print_cpu+0x2fc/0xe80 ? __might_sleep+0x4a/0x80 sched_debug_show+0x17/0x30 seq_read+0xf2/0x3b0 proc_reg_read+0x42/0x70 __vfs_read+0x28/0x130 ? security_file_permission+0x9b/0xc0 ? rw_verify_area+0x4e/0xb0 vfs_read+0xa5/0x170 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1e/0xad Here the task group is already linked into the global RCU-protected 'task_groups' list, but the css->cgroup pointer is still NULL. This patch reverts this chunk and moves online back to css_online(). Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Fixes: 2f5177f0fd7e ("sched/cgroup: Fix/cleanup cgroup teardown/init") Link: http://lkml.kernel.org/r/148655324740.424917.5302984537258726349.stgit@buzz Signed-off-by: Ingo Molnar Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d177b21d04ce..2098954c690f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8376,11 +8376,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); - sched_online_group(tg, parent); - return &tg->css; } +/* Expose task group only after completing cgroup initialization */ +static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + struct task_group *parent = css_tg(css->parent); + + if (parent) + sched_online_group(tg, parent); + return 0; +} + static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -8783,6 +8792,7 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, + .css_online = cpu_cgroup_css_online, .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, .fork = cpu_cgroup_fork, -- cgit v1.2.3 From 196553666dced915c535ee6fb6cacd98ad80cdff Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Fri, 14 Jul 2017 09:41:30 -0500 Subject: RDMA/uverbs: Fix the check for port number commit 5a7a88f1b488e4ee49eb3d5b82612d4d9ffdf2c3 upstream. The port number is only valid if IB_QP_PORT is set in the mask. So only check port number if it is valid to prevent modify_qp from failing due to an invalid port number. Fixes: 5ecce4c9b17b("Check port number supplied by user verbs cmds") Reviewed-by: Steve Wise Signed-off-by: Mustafa Ismail Tested-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_cmd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 01e3a37b0aef..d118ffe0bfb6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2342,8 +2342,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - if (cmd.port_num < rdma_start_port(ib_dev) || - cmd.port_num > rdma_end_port(ib_dev)) + if ((cmd.attr_mask & IB_QP_PORT) && + (cmd.port_num < rdma_start_port(ib_dev) || + cmd.port_num > rdma_end_port(ib_dev))) return -EINVAL; INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, -- cgit v1.2.3 From d933777b1b1b15c61bb464f715afdc249606ac00 Mon Sep 17 00:00:00 2001 From: Valentin Vidic Date: Fri, 5 May 2017 21:07:33 +0200 Subject: ipmi/watchdog: fix watchdog timeout set on reboot commit 860f01e96981a68553f3ca49f574ff14fe955e72 upstream. systemd by default starts watchdog on reboot and sets the timer to ShutdownWatchdogSec=10min. Reboot handler in ipmi_watchdog than reduces the timer to 120s which is not enough time to boot a Xen machine with a lot of RAM. As a result the machine is rebooted the second time during the long run of (XEN) Scrubbing Free RAM..... Fix this by setting the timer to 120s only if it was previously set to a low value. Signed-off-by: Valentin Vidic Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_watchdog.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 4facc7517a6a..909311016108 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -1162,10 +1162,11 @@ static int wdog_reboot_handler(struct notifier_block *this, ipmi_watchdog_state = WDOG_TIMEOUT_NONE; ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { - /* Set a long timer to let the reboot happens, but - reboot if it hangs, but only if the watchdog + /* Set a long timer to let the reboot happen or + reset if it hangs, but only if the watchdog timer was already running. */ - timeout = 120; + if (timeout < 120) + timeout = 120; pretimeout = 0; ipmi_watchdog_state = WDOG_TIMEOUT_RESET; ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); -- cgit v1.2.3 From ad25f11ed216d5ce3b5566b2f187b59fa3061b40 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Jul 2017 14:51:19 -0400 Subject: dentry name snapshots commit 49d31c2f389acfe83417083e1208422b4091cd9e upstream. take_dentry_name_snapshot() takes a safe snapshot of dentry name; if the name is a short one, it gets copied into caller-supplied structure, otherwise an extra reference to external name is grabbed (those are never modified). In either case the pointer to stable string is stored into the same structure. dentry must be held by the caller of take_dentry_name_snapshot(), but may be freely dropped afterwards - the snapshot will stay until destroyed by release_dentry_name_snapshot(). Intended use: struct name_snapshot s; take_dentry_name_snapshot(&s, dentry); ... access s.name ... release_dentry_name_snapshot(&s); Replaces fsnotify_oldname_...(), gets used in fsnotify to obtain the name to pass down with event. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 27 +++++++++++++++++++++++++++ fs/debugfs/inode.c | 10 +++++----- fs/namei.c | 8 ++++---- fs/notify/fsnotify.c | 8 ++++++-- include/linux/dcache.h | 6 ++++++ include/linux/fsnotify.h | 31 ------------------------------- 6 files changed, 48 insertions(+), 42 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 1dbc6b560fef..67957f5b325c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -277,6 +277,33 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } +void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + if (unlikely(dname_external(dentry))) { + struct external_name *p = external_name(dentry); + atomic_inc(&p->u.count); + spin_unlock(&dentry->d_lock); + name->name = p->name; + } else { + memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN); + spin_unlock(&dentry->d_lock); + name->name = name->inline_name; + } +} +EXPORT_SYMBOL(take_dentry_name_snapshot); + +void release_dentry_name_snapshot(struct name_snapshot *name) +{ + if (unlikely(name->name != name->inline_name)) { + struct external_name *p; + p = container_of(name->name, struct external_name, name[0]); + if (unlikely(atomic_dec_and_test(&p->u.count))) + kfree_rcu(p, u.head); + } +} +EXPORT_SYMBOL(release_dentry_name_snapshot); + static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 1e30f74a9527..3d7de9f4f545 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -730,7 +730,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, { int error; struct dentry *dentry = NULL, *trap; - const char *old_name; + struct name_snapshot old_name; trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ @@ -745,19 +745,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) goto exit; - old_name = fsnotify_oldname_init(old_dentry->d_name.name); + take_dentry_name_snapshot(&old_name, old_dentry); error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir), dentry, 0); if (error) { - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); goto exit; } d_move(old_dentry, dentry); - fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name, + fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name, d_is_dir(old_dentry), NULL, old_dentry); - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); unlock_rename(new_dir, old_dir); dput(dentry); return old_dentry; diff --git a/fs/namei.c b/fs/namei.c index d5e5140c1045..66209f720146 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4336,11 +4336,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, { int error; bool is_dir = d_is_dir(old_dentry); - const unsigned char *old_name; struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; + struct name_snapshot old_name; /* * Check source == target. @@ -4391,7 +4391,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - old_name = fsnotify_oldname_init(old_dentry->d_name.name); + take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); if (!is_dir || (flags & RENAME_EXCHANGE)) lock_two_nondirectories(source, target); @@ -4446,14 +4446,14 @@ out: inode_unlock(target); dput(new_dentry); if (!error) { - fsnotify_move(old_dir, new_dir, old_name, is_dir, + fsnotify_move(old_dir, new_dir, old_name.name, is_dir, !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); if (flags & RENAME_EXCHANGE) { fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, new_is_dir, NULL, new_dentry); } } - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); return error; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index db39de2dd4cb..a64adc2fced9 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -104,16 +104,20 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) if (unlikely(!fsnotify_inode_watches_children(p_inode))) __fsnotify_update_child_dentry_flags(p_inode); else if (p_inode->i_fsnotify_mask & mask) { + struct name_snapshot name; + /* we are notifying a parent so come up with the new mask which * specifies these are events which came from a child. */ mask |= FS_EVENT_ON_CHILD; + take_dentry_name_snapshot(&name, dentry); if (path) ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - dentry->d_name.name, 0); + name.name, 0); else ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - dentry->d_name.name, 0); + name.name, 0); + release_dentry_name_snapshot(&name); } dput(parent); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5beed7b30561..ff295e166b2c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -590,5 +590,11 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); } +struct name_snapshot { + const char *name; + char inline_name[DNAME_INLINE_LEN]; +}; +void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); +void release_dentry_name_snapshot(struct name_snapshot *); #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b8bcc058e031..e5f03a4d8430 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -293,35 +293,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) } } -#if defined(CONFIG_FSNOTIFY) /* notify helpers */ - -/* - * fsnotify_oldname_init - save off the old filename before we change it - */ -static inline const unsigned char *fsnotify_oldname_init(const unsigned char *name) -{ - return kstrdup(name, GFP_KERNEL); -} - -/* - * fsnotify_oldname_free - free the name we got from fsnotify_oldname_init - */ -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ - kfree(old_name); -} - -#else /* CONFIG_FSNOTIFY */ - -static inline const char *fsnotify_oldname_init(const unsigned char *name) -{ - return NULL; -} - -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ -} - -#endif /* CONFIG_FSNOTIFY */ - #endif /* _LINUX_FS_NOTIFY_H */ -- cgit v1.2.3 From 5463a3dccf10204577b8c1c170ca66efa7b65e3e Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 5 Jan 2017 10:34:07 -0200 Subject: v4l: s5c73m3: fix negation operator commit a2370ba2752538404e363346b339869c9973aeac upstream. Bool values should be negated using logical operators. Using bitwise operators results in unexpected and possibly incorrect results. Reported-by: David Binderman Signed-off-by: Andrzej Hajda Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c b/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c index 0a060339e516..2e7185030741 100644 --- a/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c +++ b/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c @@ -211,7 +211,7 @@ static int s5c73m3_3a_lock(struct s5c73m3 *state, struct v4l2_ctrl *ctrl) } if ((ctrl->val ^ ctrl->cur.val) & V4L2_LOCK_FOCUS) - ret = s5c73m3_af_run(state, ~af_lock); + ret = s5c73m3_af_run(state, !af_lock); return ret; } -- cgit v1.2.3 From 4693080316e671580459875723795fdb805a6cf4 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 20 Oct 2016 00:34:01 -0700 Subject: pstore: Allow prz to control need for locking commit 663deb47880f2283809669563c5a52ac7c6aef1a upstream. In preparation of not locking at all for certain buffers depending on if there's contention, make locking optional depending on the initialization of the prz. Signed-off-by: Joel Fernandes [kees: moved locking flag into prz instead of via caller arguments] Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 5 +++-- fs/pstore/ram_core.c | 24 +++++++++++++++--------- include/linux/pstore_ram.h | 10 +++++++++- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 6ad831b9d1b8..8b09271e5d66 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -434,7 +434,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, for (i = 0; i < cxt->max_dump_cnt; i++) { cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0, &cxt->ecc_info, - cxt->memtype); + cxt->memtype, 0); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -471,7 +471,8 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, + cxt->memtype, 0); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index cb92055e6016..a857338b7dab 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -53,9 +53,10 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) { int old; int new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&prz->buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->start); new = old + a; @@ -63,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) new -= prz->buffer_size; atomic_set(&prz->buffer->start, new); - raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); return old; } @@ -73,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) { size_t old; size_t new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&prz->buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->size); if (old == prz->buffer_size) @@ -87,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) atomic_set(&prz->buffer->size, new); exit: - raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); } static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, @@ -463,7 +467,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, - struct persistent_ram_ecc_info *ecc_info) + struct persistent_ram_ecc_info *ecc_info, + unsigned long flags) { int ret; @@ -492,6 +497,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, prz->buffer->sig = sig; persistent_ram_zap(prz); prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + prz->flags = flags; return 0; } @@ -516,7 +522,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype) + unsigned int memtype, u32 flags) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -531,7 +537,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, if (ret) goto err; - ret = persistent_ram_post_init(prz, sig, ecc_info); + ret = persistent_ram_post_init(prz, sig, ecc_info, flags); if (ret) goto err; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 244d2423dbaf..4058bf991868 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -24,6 +24,13 @@ #include #include +/* + * Choose whether access to the RAM zone requires locking or not. If a zone + * can be written to from different CPUs like with ftrace for example, then + * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. + */ +#define PRZ_FLAG_NO_LOCK BIT(0) + struct persistent_ram_buffer; struct rs_control; @@ -40,6 +47,7 @@ struct persistent_ram_zone { void *vaddr; struct persistent_ram_buffer *buffer; size_t buffer_size; + u32 flags; raw_spinlock_t buffer_lock; /* ECC correction */ @@ -56,7 +64,7 @@ struct persistent_ram_zone { struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype); + unsigned int memtype, u32 flags); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.3 From a0840275e3ebddd5d1349cd4908777e11ba50311 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 9 Feb 2017 15:43:44 -0800 Subject: pstore: Correctly initialize spinlock and flags commit 76d5692a58031696e282384cbd893832bc92bd76 upstream. The ram backend wasn't always initializing its spinlock correctly. Since it was coming from kzalloc memory, though, it was harmless on architectures that initialize unlocked spinlocks to 0 (at least x86 and ARM). This also fixes a possibly ignored flag setting too. When running under CONFIG_DEBUG_SPINLOCK, the following Oops was visible: [ 0.760836] persistent_ram: found existing buffer, size 29988, start 29988 [ 0.765112] persistent_ram: found existing buffer, size 30105, start 30105 [ 0.769435] persistent_ram: found existing buffer, size 118542, start 118542 [ 0.785960] persistent_ram: found existing buffer, size 0, start 0 [ 0.786098] persistent_ram: found existing buffer, size 0, start 0 [ 0.786131] pstore: using zlib compression [ 0.790716] BUG: spinlock bad magic on CPU#0, swapper/0/1 [ 0.790729] lock: 0xffffffc0d1ca9bb0, .magic: 00000000, .owner: /-1, .owner_cpu: 0 [ 0.790742] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-rc2+ #913 [ 0.790747] Hardware name: Google Kevin (DT) [ 0.790750] Call trace: [ 0.790768] [] dump_backtrace+0x0/0x2bc [ 0.790780] [] show_stack+0x20/0x28 [ 0.790794] [] dump_stack+0xa4/0xcc [ 0.790809] [] spin_dump+0xe0/0xf0 [ 0.790821] [] spin_bug+0x30/0x3c [ 0.790834] [] do_raw_spin_lock+0x50/0x1b8 [ 0.790846] [] _raw_spin_lock_irqsave+0x54/0x6c [ 0.790862] [] buffer_size_add+0x48/0xcc [ 0.790875] [] persistent_ram_write+0x60/0x11c [ 0.790888] [] ramoops_pstore_write_buf+0xd4/0x2a4 [ 0.790900] [] pstore_console_write+0xf0/0x134 [ 0.790912] [] console_unlock+0x48c/0x5e8 [ 0.790923] [] register_console+0x3b0/0x4d4 [ 0.790935] [] pstore_register+0x1a8/0x234 [ 0.790947] [] ramoops_probe+0x6b8/0x7d4 [ 0.790961] [] platform_drv_probe+0x7c/0xd0 [ 0.790972] [] driver_probe_device+0x1b4/0x3bc [ 0.790982] [] __device_attach_driver+0xc8/0xf4 [ 0.790996] [] bus_for_each_drv+0xb4/0xe4 [ 0.791006] [] __device_attach+0xd0/0x158 [ 0.791016] [] device_initial_probe+0x24/0x30 [ 0.791026] [] bus_probe_device+0x50/0xe4 [ 0.791038] [] device_add+0x3a4/0x76c [ 0.791051] [] of_device_add+0x74/0x84 [ 0.791062] [] of_platform_device_create_pdata+0xc0/0x100 [ 0.791073] [] of_platform_device_create+0x34/0x40 [ 0.791086] [] of_platform_default_populate_init+0x58/0x78 [ 0.791097] [] do_one_initcall+0x88/0x160 [ 0.791109] [] kernel_init_freeable+0x264/0x31c [ 0.791123] [] kernel_init+0x18/0x11c [ 0.791133] [] ret_from_fork+0x10/0x50 [ 0.793717] console [pstore-1] enabled [ 0.797845] pstore: Registered ramoops as persistent store backend [ 0.804647] ramoops: attached 0x100000@0xf7edc000, ecc: 0/0 Fixes: 663deb47880f ("pstore: Allow prz to control need for locking") Fixes: 109704492ef6 ("pstore: Make spinlock per zone instead of global") Reported-by: Brian Norris Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index a857338b7dab..bc927e30bdcc 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -467,8 +467,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, - struct persistent_ram_ecc_info *ecc_info, - unsigned long flags) + struct persistent_ram_ecc_info *ecc_info) { int ret; @@ -494,10 +493,9 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, prz->buffer->sig); } + /* Rewind missing or invalid memory area. */ prz->buffer->sig = sig; persistent_ram_zap(prz); - prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); - prz->flags = flags; return 0; } @@ -533,11 +531,15 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, goto err; } + /* Initialize general buffer state. */ + prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + prz->flags = flags; + ret = persistent_ram_buffer_map(start, size, prz, memtype); if (ret) goto err; - ret = persistent_ram_post_init(prz, sig, ecc_info, flags); + ret = persistent_ram_post_init(prz, sig, ecc_info); if (ret) goto err; -- cgit v1.2.3 From e10f7bd6a6b8484ebe140a793d30b0cc56603620 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 5 Mar 2017 22:08:58 -0800 Subject: pstore: Use dynamic spinlock initializer commit e9a330c4289f2ba1ca4bf98c2b430ab165a8931b upstream. The per-prz spinlock should be using the dynamic initializer so that lockdep can correctly track it. Without this, under lockdep, we get a warning at boot that the lock is in non-static memory. Fixes: 109704492ef6 ("pstore: Make spinlock per zone instead of global") Fixes: 76d5692a5803 ("pstore: Correctly initialize spinlock and flags") Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index bc927e30bdcc..e11672aa4575 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -532,7 +532,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, } /* Initialize general buffer state. */ - prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + raw_spin_lock_init(&prz->buffer_lock); prz->flags = flags; ret = persistent_ram_buffer_map(start, size, prz, memtype); -- cgit v1.2.3 From 48a70be0de9e014994c4bd4749c9ccc15bc7731a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Feb 2017 14:29:42 -0800 Subject: net: skb_needs_check() accepts CHECKSUM_NONE for tx commit 6e7bc478c9a006c701c14476ec9d389a484b4864 upstream. My recent change missed fact that UFO would perform a complete UDP checksum before segmenting in frags. In this case skb->ip_summed is set to CHECKSUM_NONE. We need to add this valid case to skb_needs_check() Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 69d604e7dff5..0af019dfe846 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2702,9 +2702,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment); static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; - else - return skb->ip_summed == CHECKSUM_NONE; + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_NONE; + + return skb->ip_summed == CHECKSUM_NONE; } /** -- cgit v1.2.3 From a3ff46097a1d05fc95e34f9c4ca493488d8fe766 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 18 Jul 2017 17:49:14 -0700 Subject: device-dax: fix sysfs duplicate warnings commit bbb3be170ac2891526ad07b18af7db226879a8e7 upstream. Fix warnings of the form... WARNING: CPU: 10 PID: 4983 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x62/0x80 sysfs: cannot create duplicate filename '/class/dax/dax12.0' Call Trace: dump_stack+0x63/0x86 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5a/0x80 ? kernfs_path_from_node+0x4f/0x60 sysfs_warn_dup+0x62/0x80 sysfs_do_create_link_sd.isra.2+0x97/0xb0 sysfs_create_link+0x25/0x40 device_add+0x266/0x630 devm_create_dax_dev+0x2cf/0x340 [dax] dax_pmem_probe+0x1f5/0x26e [dax_pmem] nvdimm_bus_probe+0x71/0x120 ...by reusing the namespace id for the device-dax instance name. Now that we have decided that there will never by more than one device-dax instance per libnvdimm-namespace parent device [1], we can directly reuse the namepace ids. There are some possible follow-on cleanups, but those are saved for a later patch to simplify the -stable backport. [1]: https://lists.01.org/pipermail/linux-nvdimm/2016-December/008266.html Fixes: 98a29c39dc68 ("libnvdimm, namespace: allow creation of multiple pmem...") Cc: Jeff Moyer Reported-by: Dariusz Dokupil Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/dax/dax.c | 24 ++++++++++++++++-------- drivers/dax/dax.h | 2 +- drivers/dax/pmem.c | 12 +++++++----- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 586f9543de73..40be3747724d 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -546,7 +546,8 @@ static void dax_dev_release(struct device *dev) struct dax_dev *dax_dev = to_dax_dev(dev); struct dax_region *dax_region = dax_dev->region; - ida_simple_remove(&dax_region->ida, dax_dev->id); + if (dax_dev->id >= 0) + ida_simple_remove(&dax_region->ida, dax_dev->id); ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); dax_region_put(dax_region); iput(dax_dev->inode); @@ -581,7 +582,7 @@ static void unregister_dax_dev(void *dev) } struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, - struct resource *res, int count) + int id, struct resource *res, int count) { struct device *parent = dax_region->dev; struct dax_dev *dax_dev; @@ -608,10 +609,16 @@ struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, if (i < count) goto err_id; - dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); - if (dax_dev->id < 0) { - rc = dax_dev->id; - goto err_id; + if (id < 0) { + id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); + dax_dev->id = id; + if (id < 0) { + rc = id; + goto err_id; + } + } else { + /* region provider owns @id lifetime */ + dax_dev->id = -1; } minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); @@ -650,7 +657,7 @@ struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, dev->parent = parent; dev->groups = dax_attribute_groups; dev->release = dax_dev_release; - dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id); + dev_set_name(dev, "dax%d.%d", dax_region->id, id); rc = device_add(dev); if (rc) { kill_dax_dev(dax_dev); @@ -669,7 +676,8 @@ struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, err_inode: ida_simple_remove(&dax_minor_ida, minor); err_minor: - ida_simple_remove(&dax_region->ida, dax_dev->id); + if (dax_dev->id >= 0) + ida_simple_remove(&dax_region->ida, dax_dev->id); err_id: kfree(dax_dev); diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h index ddd829ab58c0..b5ed85036b2a 100644 --- a/drivers/dax/dax.h +++ b/drivers/dax/dax.h @@ -21,5 +21,5 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, unsigned int align, void *addr, unsigned long flags); struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, - struct resource *res, int count); + int id, struct resource *res, int count); #endif /* __DAX_H__ */ diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 73c6ce93a0d9..eebb35720398 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -58,13 +58,12 @@ static void dax_pmem_percpu_kill(void *data) static int dax_pmem_probe(struct device *dev) { - int rc; void *addr; struct resource res; struct dax_dev *dax_dev; + int rc, id, region_id; struct nd_pfn_sb *pfn_sb; struct dax_pmem *dax_pmem; - struct nd_region *nd_region; struct nd_namespace_io *nsio; struct dax_region *dax_region; struct nd_namespace_common *ndns; @@ -122,14 +121,17 @@ static int dax_pmem_probe(struct device *dev) /* adjust the dax_region resource to the start of data */ res.start += le64_to_cpu(pfn_sb->dataoff); - nd_region = to_nd_region(dev->parent); - dax_region = alloc_dax_region(dev, nd_region->id, &res, + rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); + if (rc != 2) + return -EINVAL; + + dax_region = alloc_dax_region(dev, region_id, &res, le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); if (!dax_region) return -ENOMEM; /* TODO: support for subdividing a dax region... */ - dax_dev = devm_create_dax_dev(dax_region, &res, 1); + dax_dev = devm_create_dax_dev(dax_region, id, &res, 1); /* child dax_dev instances now own the lifetime of the dax_region */ dax_region_put(dax_region); -- cgit v1.2.3 From 34fae9c906ec93d7c783d17b365cf1b84db415ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Dec 2016 22:58:20 +0100 Subject: x86/mce/AMD: Make the init code more robust [ Upstream commit 0dad3a3014a0b9e72521ff44f17e0054f43dcdea ] If mce_device_init() fails then the mce device pointer is NULL and the AMD mce code happily dereferences it. Add a sanity check. Reported-by: Markus Trippelsdorf Reported-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 3dfca7b302dc..a5b47c1361a0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -955,6 +955,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) const char *name = get_name(bank, NULL); int err = 0; + if (!dev) + return -ENODEV; + if (is_shared_bank(bank)) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); -- cgit v1.2.3 From 82338e9ffba55e611613f6a5dfd17697fd76e1e2 Mon Sep 17 00:00:00 2001 From: Chun-Hao Lin Date: Tue, 27 Dec 2016 16:29:43 +0800 Subject: r8169: add support for RTL8168 series add-on card. [ Upstream commit 610c908773d30907c950ca3b2ee8ac4b2813537b ] This chip is the same as RTL8168, but its device id is 0x8161. Signed-off-by: Chun-Hao Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index bf000d819a21..2c4350a1c629 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -326,6 +326,7 @@ enum cfg_version { static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, -- cgit v1.2.3 From 0686a9bc42adff7e70f345685a059ccd2de6f42d Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 8 Dec 2016 09:40:30 +0530 Subject: ARM: omap2+: fixing wrong strcat for Non-NULL terminated string [ Upstream commit 5066d5296ff2db20625e5f46e7338872c90c649f ] Issue caught with static analysis tool: "Dangerous usage of 'name' (strncpy doesn't always 0-terminate it)" Use strlcpy _includes_ the NUL terminator, and strlcat() which ensures that it won't overflow the buffer. Reported-by: Maninder Singh Signed-off-by: Vaneet Narang CC: Russell King Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap_hwmod.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 1052b29697b8..b5c1714ebfdd 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -790,14 +790,14 @@ static int _init_main_clk(struct omap_hwmod *oh) int ret = 0; char name[MOD_CLK_MAX_NAME_LEN]; struct clk *clk; + static const char modck[] = "_mod_ck"; - /* +7 magic comes from '_mod_ck' suffix */ - if (strlen(oh->name) + 7 > MOD_CLK_MAX_NAME_LEN) + if (strlen(oh->name) >= MOD_CLK_MAX_NAME_LEN - strlen(modck)) pr_warn("%s: warning: cropping name for %s\n", __func__, oh->name); - strncpy(name, oh->name, MOD_CLK_MAX_NAME_LEN - 7); - strcat(name, "_mod_ck"); + strlcpy(name, oh->name, MOD_CLK_MAX_NAME_LEN - strlen(modck)); + strlcat(name, modck, MOD_CLK_MAX_NAME_LEN); clk = clk_get(NULL, name); if (!IS_ERR(clk)) { -- cgit v1.2.3 From b1164693f770988e6a164dd813d3408cb7c1762e Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Fri, 9 Dec 2016 15:28:32 +0900 Subject: dt-bindings: power/supply: Update TPS65217 properties [ Upstream commit 81d7358d7038dd1001547950087e5b0641732f3f ] Add interrupt specifiers for USB and AC charger input. Interrupt numbers are from the datasheet. Fix wrong property for compatible string. Signed-off-by: Milo Kim Acked-by: Rob Herring Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/power/supply/tps65217_charger.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt b/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt index 98d131acee95..a11072c5a866 100644 --- a/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt +++ b/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt @@ -2,11 +2,16 @@ TPS65217 Charger Required Properties: -compatible: "ti,tps65217-charger" +-interrupts: TPS65217 interrupt numbers for the AC and USB charger input change. + Should be <0> for the USB charger and <1> for the AC adapter. +-interrupt-names: Should be "USB" and "AC" This node is a subnode of the tps65217 PMIC. Example: tps65217-charger { - compatible = "ti,tps65090-charger"; + compatible = "ti,tps65217-charger"; + interrupts = <0>, <1>; + interrupt-names = "USB", "AC"; }; -- cgit v1.2.3 From 2bc4d1c9578454d1fae8fb731c50ad1f30656f69 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Fri, 9 Dec 2016 15:28:33 +0900 Subject: dt-bindings: input: Specify the interrupt number of TPS65217 power button [ Upstream commit 820381572fc015baa4f5744f5d4583ec0c0f1b82 ] Specify the power button interrupt number which is from the datasheet. Signed-off-by: Milo Kim Acked-by: Rob Herring Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt b/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt index 3e5b9793341f..8682ab6d4a50 100644 --- a/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt +++ b/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt @@ -8,8 +8,9 @@ This driver provides a simple power button event via an Interrupt. Required properties: - compatible: should be "ti,tps65217-pwrbutton" or "ti,tps65218-pwrbutton" -Required properties for TPS65218: +Required properties: - interrupts: should be one of the following + - <2>: For controllers compatible with tps65217 - <3 IRQ_TYPE_EDGE_BOTH>: For controllers compatible with tps65218 Examples: @@ -17,6 +18,7 @@ Examples: &tps { tps65217-pwrbutton { compatible = "ti,tps65217-pwrbutton"; + interrupts = <2>; }; }; -- cgit v1.2.3 From 0ce84ef6a29655b9f985827e149dbf0d23b02dc2 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 12 Dec 2016 11:58:05 +0200 Subject: ARM: dts: am57xx-idk: Put USB2 port in peripheral mode [ Upstream commit 5acd016c88937be3667ba4e6b60f0f74455b5e80 ] USB2 port can be operated in dual-role mode but till we have dual-role support in dwc3 driver let's limit this port to peripheral mode. If we don't do so it defaults to host mode. USB1 port is meant for host only operation and we don't want both ports in host only mode. Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am57xx-idk-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index 03cec62260e1..db858fff4e18 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -294,7 +294,7 @@ }; &usb2 { - dr_mode = "otg"; + dr_mode = "peripheral"; }; &mmc2 { -- cgit v1.2.3 From a0a4dd4469048544407e5c6f7466af4dc48a2571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 14 Dec 2016 22:29:44 +0100 Subject: ARM: dts: n900: Mark eMMC slot with no-sdio and no-sd flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4cf48f1d7520a4d325af58eded4d8090e1b40be7 ] Trying to initialize eMMC slot as SDIO or SD cause failure in n900 port of qemu. eMMC itself is not detected and is not working. Real Nokia N900 harware does not have this problem. As eMMC is really not SDIO or SD based such change is harmless and will fix support for qemu. Signed-off-by: Pali Rohár Acked-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap3-n900.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 87ca50b53002..4d448f145ed1 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -734,6 +734,8 @@ vmmc_aux-supply = <&vsim>; bus-width = <8>; non-removable; + no-sdio; + no-sd; }; &mmc3 { -- cgit v1.2.3 From ea703cb0140bdeed8b5576816d3cbe6cf09c44d5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 28 Dec 2016 14:58:31 +0200 Subject: net/mlx5: Disable RoCE on the e-switch management port under switchdev mode [ Upstream commit 9da34cd34e85aacc55af8774b81b1f23e86014f9 ] Under the switchdev/offloads mode, packets that don't match any e-switch steering rule are sent towards the e-switch management port. We use a NIC HW steering rule set per vport (uplink and VFs) to make them be received into the host OS through the respective vport representor netdevice. Currnetly such missed RoCE packets will not get to this NIC steering rule, and hence VF RoCE will not work over the slow path of the offloads mode. This is b/c these packets will be matched by a steering rule added by the firmware that serves RoCE traffic set on the PF NIC vport which is also the e-switch management port under SRIOV. Disabling RoCE on the e-switch management vport when we are in the offloads mode, will signal to the firmware to remove their RoCE rule, and then the missed RoCE packets will be matched by the representor NIC steering rule as any other missed packets. To achieve that, we disable RoCE on the PF vport. We do that by removing (hot-unplugging) the IB device instance associated with the PF. This is also required by our current model where the PF serves as the uplink representor and hence only SW switching (TC, bridge, OVS) applications and slow path vport mlx5e net-device should be running over that vport. Fixes: c930a3ad7453 ('net/mlx5e: Add devlink based SRIOV mode changes') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b08b9e2c6a76..6ffd5d2a70aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -672,6 +672,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto err_reps; } + + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return 0; err_reps: @@ -695,6 +701,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { -- cgit v1.2.3 From aeb230318ddbfc4b06f0d4befd755090ec0c4d6f Mon Sep 17 00:00:00 2001 From: Zheng Li Date: Wed, 28 Dec 2016 23:23:46 +0800 Subject: ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output [ Upstream commit e4c5e13aa45c23692e4acf56f0b3533f328199b2 ] There is an inconsistent conditional judgement between __ip6_append_data and ip6_finish_output functions, the variable length in __ip6_append_data just include the length of application's payload and udp6 header, don't include the length of ipv6 header, but in ip6_finish_output use (skb->len > ip6_skb_dst_mtu(skb)) as judgement, and skb->len include the length of ipv6 header. That causes some particular application's udp6 payloads whose length are between (MTU - IPv6 Header) and MTU were fragmented by ip6_fragment even though the rst->dev support UFO feature. Add the length of ipv6 header to length in __ip6_append_data to keep consistent conditional judgement as ip6_finish_output for ip6 fragment. Signed-off-by: Zheng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fd649599620e..5a4b8e7bcedd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1376,7 +1376,7 @@ emsgsize: */ cork->length += length; - if (((length > mtu) || + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && -- cgit v1.2.3 From a8820678afe1289cd35431ff847d817cdeffcadf Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:09 +0200 Subject: net/mlx4_core: Use-after-free causes a resource leak in flow-steering detach [ Upstream commit 3b01fe7f91c8e4f9afc4fae3c5af72c14958d2d8 ] mlx4_QP_FLOW_STEERING_DETACH_wrapper first removes the steering rule (which results in freeing the rule structure), and then references a field in this struct (the qp number) when releasing the busy-status on the rule's qp. Since this memory was freed, it could reallocated and changed. Therefore, the qp number in the struct may be incorrect, so that we are releasing the incorrect qp. This leaves the rule's qp in the busy state (and could possibly release an incorrect qp as well). Fix this by saving the qp number in a local variable, for use after removing the steering rule. Fixes: 2c473ae7e582 ("net/mlx4_core: Disallow releasing VF QPs which have steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 32f76bf018c3..f1eb74a61bce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4474,6 +4474,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct res_qp *rqp; struct res_fs_rule *rrule; u64 mirr_reg_id; + int qpn; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4490,10 +4491,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, } mirr_reg_id = rrule->mirr_rule_id; kfree(rrule->mirr_mbox); + qpn = rrule->qpn; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); - err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; @@ -4518,7 +4520,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, if (!err) atomic_dec(&rqp->ref_count); out: - put_res(dev, slave, rrule->qpn, RES_QP); + put_res(dev, slave, qpn, RES_QP); return err; } -- cgit v1.2.3 From 7e150f79134e7eabb5315975ae34bf3169beff4c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Dec 2016 18:37:11 +0200 Subject: net/mlx4: Remove BUG_ON from ICM allocation routine [ Upstream commit c1d5f8ff80ea84768f5fae1ca9d1abfbb5e6bbaa ] This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent() by checking DMA address alignment in advance and performing proper folding in case of error. Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory") Reported-by: Ozgur Karatas Signed-off-by: Leon Romanovsky Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } -- cgit v1.2.3 From 237652fae500c0ecacdd28281db18878ba46f521 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:13 +0200 Subject: net/mlx4_core: Fix raw qp flow steering rules under SRIOV [ Upstream commit 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 ] Demoting simple flow steering rule priority (for DPDK) was achieved by wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper function for the PF and all VFs. In function mlx4_ib_create_flow(), this change caused the main rule creation for the PF to be wrapped, while it left the associated tunnel steering rule creation unwrapped for the PF. This mismatch caused rule deletion failures in mlx4_ib_destroy_flow() for the PF when the detach wrapper function did not find the associated tunnel-steering rule (since creation of that rule for the PF did not go through the wrapper function). Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native" (so that the PF invocation does not go through the wrapper), and perform the required priority demotion for the PF in the mlx4_ib_create_flow() code path. Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 14 ++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 18 ++++++++++++++++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 22 +--------------------- include/linux/mlx4/device.h | 2 ++ 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f2a885eee4bb..8059b7eaf3a8 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1680,9 +1680,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att size += ret; } + if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR && + flow_attr->num_of_specs == 1) { + struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1); + enum ib_flow_spec_type header_spec = + ((union ib_flow_spec *)(flow_attr + 1))->type; + + if (header_spec == IB_FLOW_SPEC_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + } + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1699,7 +1709,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b2ca8a635b2e..551786f58e59 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index f1eb74a61bce..1822382212ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4165,22 +4165,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4364,10 +4348,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4395,7 +4376,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c9f379689dd0..80faf44b8887 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -- cgit v1.2.3 From e0d5bb92c07bee2205212ad7aa6f1a85451db6ed Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 20 Dec 2016 08:54:29 -0700 Subject: drm/msm: Ensure that the hardware write pointer is valid [ Upstream commit 88b333b0ed790f9433ff542b163bf972953b74d3 ] Currently the value written to CP_RB_WPTR is calculated on the fly as (rb->next - rb->start). But as the code is designed rb->next is wrapped before writing the commands so if a series of commands happened to fit perfectly in the ringbuffer, rb->next would end up being equal to rb->size / 4 and thus result in an out of bounds address to CP_RB_WPTR. The easiest way to fix this is to mask WPTR when writing it to the hardware; it makes the hardware happy and the rest of the ringbuffer math appears to work and there isn't any point in upsetting anything. Signed-off-by: Jordan Crouse [squash in is_power_of_2() check] Signed-off-by: Rob Clark Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 9 ++++++++- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f386f463278d..a904082ed206 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -210,7 +210,14 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, void adreno_flush(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr; + + /* + * Mask wptr value that we calculate to fit in the HW range. This is + * to account for the possibility that the last command fit exactly into + * the ringbuffer and rb->next hasn't wrapped to zero yet + */ + wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); /* ensure writes to ringbuffer have hit system memory: */ mb(); diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index f326cf6a32e6..67b34e069abf 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -23,7 +23,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) struct msm_ringbuffer *ring; int ret; - size = ALIGN(size, 4); /* size should be dword aligned */ + if (WARN_ON(!is_power_of_2(size))) + return ERR_PTR(-EINVAL); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { -- cgit v1.2.3 From 50e3950d77ba7d897c5509ca6192f1aaed426dd0 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 20 Dec 2016 08:54:30 -0700 Subject: drm/msm: Put back the vaddr in submit_reloc() [ Upstream commit 6490abc4bc35fa4f3bdb9c7e49096943c50e29ea ] The error cases in submit_reloc() need to put back the virtual address of the bo before failling. Add a single failure path for the function. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index b6a0f37a65f3..427898a96325 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -290,7 +290,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob { uint32_t i, last_offset = 0; uint32_t *ptr; - int ret; + int ret = 0; if (offset % 4) { DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset); @@ -317,12 +317,13 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob ret = copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc)); if (ret) - return -EFAULT; + goto out; if (submit_reloc.submit_offset % 4) { DRM_ERROR("non-aligned reloc offset: %u\n", submit_reloc.submit_offset); - return -EINVAL; + ret = -EINVAL; + goto out; } /* offset in dwords: */ @@ -331,12 +332,13 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob if ((off >= (obj->base.size / 4)) || (off < last_offset)) { DRM_ERROR("invalid offset %u at reloc %u\n", off, i); - return -EINVAL; + ret = -EINVAL; + goto out; } ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid); if (ret) - return ret; + goto out; if (valid) continue; @@ -353,9 +355,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob last_offset = off; } +out: msm_gem_put_vaddr_locked(&obj->base); - return 0; + return ret; } static void submit_cleanup(struct msm_gem_submit *submit) -- cgit v1.2.3 From 7d33b41d34f57547e66c27e96075f8210a47b1b5 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 20 Dec 2016 08:54:31 -0700 Subject: drm/msm: Verify that MSM_SUBMIT_BO_FLAGS are set [ Upstream commit a6cb3b864b21b7345f824a4faa12b723c8aaf099 ] For every submission buffer object one of MSM_SUBMIT_BO_WRITE and MSM_SUBMIT_BO_READ must be set (and nothing else). If we allowed zero then the buffer object would never get queued to be unreferenced. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 427898a96325..393973016b52 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -106,7 +106,8 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, pagefault_disable(); } - if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) { + if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) || + !(submit_bo.flags & MSM_SUBMIT_BO_FLAGS)) { DRM_ERROR("invalid flags: %x\n", submit_bo.flags); ret = -EINVAL; goto out_unlock; -- cgit v1.2.3 From c7d0c0d84808783740b69a06f5512b2c50200f5f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Dec 2016 08:13:47 -0700 Subject: vfio-pci: use 32-bit comparisons for register address for gcc-4.5 [ Upstream commit 45e869714489431625c569d21fc952428d761476 ] Using ancient compilers (gcc-4.5 or older) on ARM, we get a link failure with the vfio-pci driver: ERROR: "__aeabi_lcmp" [drivers/vfio/pci/vfio-pci.ko] undefined! The reason is that the compiler tries to do a comparison of a 64-bit range. This changes it to convert to a 32-bit number explicitly first, as newer compilers do for themselves. Signed-off-by: Arnd Bergmann Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_rdwr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 5ffd1d9ad4bd..357243d76f10 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -193,7 +193,10 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, if (!vdev->has_vga) return -EINVAL; - switch (pos) { + if (pos > 0xbfffful) + return -EINVAL; + + switch ((u32)pos) { case 0xa0000 ... 0xbffff: count = min(count, (size_t)(0xc0000 - pos)); iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); -- cgit v1.2.3 From ca40b2d039dc443ed82957260dbcc3097f8f80d6 Mon Sep 17 00:00:00 2001 From: "Strashko, Grygorii" Date: Thu, 8 Dec 2016 17:33:10 -0600 Subject: irqchip/keystone: Fix "scheduling while atomic" on rt [ Upstream commit 2f884e6e688a0deb69e6c9552e51aef8b7e3f5f1 ] The below call chain generates "scheduling while atomic" backtrace and causes system crash when Keystone 2 IRQ chip driver is used with RT-kernel: gic_handle_irq() |-__handle_domain_irq() |-generic_handle_irq() |-keystone_irq_handler() |-regmap_read() |-regmap_lock_spinlock() |-rt_spin_lock() The reason is that Keystone driver dispatches IRQ using chained IRQ handler and accesses I/O memory through syscon->regmap(mmio) which is implemented as fast_io regmap and uses regular spinlocks for synchronization, but spinlocks transformed to rt_mutexes on RT. Hence, convert Keystone 2 IRQ driver to use generic irq handler instead of chained IRQ handler. This way it will be compatible with RT kernel where it will be forced thread IRQ handler while in non-RT kernel it still will be executed in HW IRQ context. Cc: Suman Anna Signed-off-by: Grygorii Strashko Tested-by: Suman Anna Link: https://lkml.kernel.org/r/20161208233310.10329-1-grygorii.strashko@ti.com Signed-off-by: Jason Cooper Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-keystone.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index 54a5e870a8f5..efbcf8435185 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -19,9 +19,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -39,6 +39,7 @@ struct keystone_irq_device { struct irq_domain *irqd; struct regmap *devctrl_regs; u32 devctrl_offset; + raw_spinlock_t wa_lock; }; static inline u32 keystone_irq_readl(struct keystone_irq_device *kirq) @@ -83,17 +84,15 @@ static void keystone_irq_ack(struct irq_data *d) /* nothing to do here */ } -static void keystone_irq_handler(struct irq_desc *desc) +static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq) { - unsigned int irq = irq_desc_get_irq(desc); - struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc); + struct keystone_irq_device *kirq = keystone_irq; + unsigned long wa_lock_flags; unsigned long pending; int src, virq; dev_dbg(kirq->dev, "start irq %d\n", irq); - chained_irq_enter(irq_desc_get_chip(desc), desc); - pending = keystone_irq_readl(kirq); keystone_irq_writel(kirq, pending); @@ -111,13 +110,15 @@ static void keystone_irq_handler(struct irq_desc *desc) if (!virq) dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n", src, virq); + raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags); generic_handle_irq(virq); + raw_spin_unlock_irqrestore(&kirq->wa_lock, + wa_lock_flags); } } - chained_irq_exit(irq_desc_get_chip(desc), desc); - dev_dbg(kirq->dev, "end irq %d\n", irq); + return IRQ_HANDLED; } static int keystone_irq_map(struct irq_domain *h, unsigned int virq, @@ -182,9 +183,16 @@ static int keystone_irq_probe(struct platform_device *pdev) return -ENODEV; } + raw_spin_lock_init(&kirq->wa_lock); + platform_set_drvdata(pdev, kirq); - irq_set_chained_handler_and_data(kirq->irq, keystone_irq_handler, kirq); + ret = request_irq(kirq->irq, keystone_irq_handler, + 0, dev_name(dev), kirq); + if (ret) { + irq_domain_remove(kirq->irqd); + return ret; + } /* clear all source bits */ keystone_irq_writel(kirq, ~0x0); @@ -199,6 +207,8 @@ static int keystone_irq_remove(struct platform_device *pdev) struct keystone_irq_device *kirq = platform_get_drvdata(pdev); int hwirq; + free_irq(kirq->irq, kirq); + for (hwirq = 0; hwirq < KEYSTONE_N_IRQ; hwirq++) irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq)); -- cgit v1.2.3 From ed788dc6fa65f74a5f82f9fe5d7ea46d17cdaf87 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 23 Dec 2016 11:21:10 +0200 Subject: ASoC: tlv320aic3x: Mark the RESET register as volatile [ Upstream commit 63c3194b82530bd71fd49db84eb7ab656b8d404a ] The RESET register only have one self clearing bit and it should not be cached. If it is cached, when we sync the registers back to the chip we will initiate a software reset as well, which is not desirable. Signed-off-by: Peter Ujfalusi Reviewed-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tlv320aic3x.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 5a8d96ec058c..fe45a16a5142 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -126,6 +126,16 @@ static const struct reg_default aic3x_reg[] = { { 108, 0x00 }, { 109, 0x00 }, }; +static bool aic3x_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AIC3X_RESET: + return true; + default: + return false; + } +} + static const struct regmap_config aic3x_regmap = { .reg_bits = 8, .val_bits = 8, @@ -133,6 +143,9 @@ static const struct regmap_config aic3x_regmap = { .max_register = DAC_ICC_ADJ, .reg_defaults = aic3x_reg, .num_reg_defaults = ARRAY_SIZE(aic3x_reg), + + .volatile_reg = aic3x_volatile_reg, + .cache_type = REGCACHE_RBTREE, }; -- cgit v1.2.3 From 14e5c8c61791d03e69de94affc149dcb6735aa66 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 22 Dec 2016 17:18:12 +0800 Subject: spi: dw: Make debugfs name unique between instances [ Upstream commit 13288bdf4adbaa6bd1267f10044c1bc25d90ce7f ] Some system have multiple dw devices. Currently the driver uses a fixed name for the debugfs dir. Append dev name to the debugfs dir name to make it unique. Signed-off-by: Phil Reid Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-dw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 27960e46135d..c4226c07e091 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -107,7 +107,10 @@ static const struct file_operations dw_spi_regs_ops = { static int dw_spi_debugfs_init(struct dw_spi *dws) { - dws->debugfs = debugfs_create_dir("dw_spi", NULL); + char name[128]; + + snprintf(name, 128, "dw_spi-%s", dev_name(&dws->master->dev)); + dws->debugfs = debugfs_create_dir(name, NULL); if (!dws->debugfs) return -ENOMEM; -- cgit v1.2.3 From c612bba54b8c412ee81c8dd071fa81e4ab689505 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 20 Dec 2016 12:03:09 +0800 Subject: ASoC: nau8825: fix invalid configuration in Pre-Scalar of FLL [ Upstream commit a1792cda51300e15b03549cccf0b09f3be82e697 ] The clk_ref_div is not configured in the correct position of the register. The patch fixes that clk_ref_div, Pre-Scalar, is assigned the wrong value. Signed-off-by: John Hsu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/nau8825.c | 3 ++- sound/soc/codecs/nau8825.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index e643be91d762..f9f2737c4ad2 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -1928,7 +1928,8 @@ static void nau8825_fll_apply(struct nau8825 *nau8825, NAU8825_FLL_INTEGER_MASK, fll_param->fll_int); /* FLL pre-scaler */ regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL4, - NAU8825_FLL_REF_DIV_MASK, fll_param->clk_ref_div); + NAU8825_FLL_REF_DIV_MASK, + fll_param->clk_ref_div << NAU8825_FLL_REF_DIV_SFT); /* select divided VCO input */ regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL5, NAU8825_FLL_CLK_SW_MASK, NAU8825_FLL_CLK_SW_REF); diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h index 1c63e2abafa9..574d6f936135 100644 --- a/sound/soc/codecs/nau8825.h +++ b/sound/soc/codecs/nau8825.h @@ -129,7 +129,8 @@ #define NAU8825_FLL_CLK_SRC_FS (0x3 << NAU8825_FLL_CLK_SRC_SFT) /* FLL4 (0x07) */ -#define NAU8825_FLL_REF_DIV_MASK (0x3 << 10) +#define NAU8825_FLL_REF_DIV_SFT 10 +#define NAU8825_FLL_REF_DIV_MASK (0x3 << NAU8825_FLL_REF_DIV_SFT) /* FLL5 (0x08) */ #define NAU8825_FLL_PDB_DAC_EN (0x1 << 15) -- cgit v1.2.3 From 033d5ce4ad8d7b56bd562ff1107ac5780fb85eb7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 27 Dec 2016 18:29:57 +0000 Subject: irqchip/mxs: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND [ Upstream commit 88e20c74ee020f9e0c99dfce0dd9aa61c3f0cca0 ] The ICOLL controller doesn't provide any facility to configure the wakeup sources. That's the reason why this implementation lacks the irq_set_wake implementation. But this prevent us from properly entering power management states like "suspend to idle". So enable the flags IRQCHIP_SKIP_SET_WAKE and IRQCHIP_MASK_ON_SUSPEND to let the irqchip core allows and handles the power management. Signed-off-by: Stefan Wahren Reviewed-by: Fabio Estevam Link: https://lkml.kernel.org/r/1482863397-11400-1-git-send-email-stefan.wahren@i2se.com Signed-off-by: Jason Cooper Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-mxs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 17304705f2cf..05fa9f7af53c 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -131,12 +131,16 @@ static struct irq_chip mxs_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = icoll_mask_irq, .irq_unmask = icoll_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; static struct irq_chip asm9260_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = asm9260_mask_irq, .irq_unmask = asm9260_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs) -- cgit v1.2.3 From 88a86f8dfaad6b7185ad1658a8283f12dc4a351b Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 14 Dec 2016 21:27:57 +0900 Subject: openrisc: Add _text symbol to fix ksym build error [ Upstream commit 086cc1c31a0ec075dac02425367c871bb65bc2c9 ] The build robot reports: .tmp_kallsyms1.o: In function `kallsyms_relative_base': >> (.rodata+0x8a18): undefined reference to `_text' This is when using 'make alldefconfig'. Adding this _text symbol to mark the start of the kernel as in other architecture fixes this. Signed-off-by: Stafford Horne Acked-by: Jonas Bonn Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index d68b9ede8423..c50609aead35 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -38,6 +38,8 @@ SECTIONS /* Read-only sections, merged into text segment: */ . = LOAD_BASE ; + _text = .; + /* _s_kernel_ro must be page aligned */ . = ALIGN(PAGE_SIZE); _s_kernel_ro = .; -- cgit v1.2.3 From 2987ce159fb9b0894d9bf000b8317e07cfed3fa8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 13 Dec 2016 11:15:21 -0700 Subject: dmaengine: ioatdma: Add Skylake PCI Dev ID [ Upstream commit 1594c18fd297a8edcc72bc4b161f3f52603ebb92 ] Adding Skylake Xeon PCI device ids for ioatdma and related bits. Signed-off-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/hw.h | 2 ++ drivers/dma/ioat/init.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 8e67895bcca3..abcc51b343ce 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -64,6 +64,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e #define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f +#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021 + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index d235fbe2564f..b45b0363f7f6 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -106,6 +106,8 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) }, + /* I/OAT v3.3 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, @@ -243,10 +245,15 @@ static bool is_bdx_ioat(struct pci_dev *pdev) } } +static inline bool is_skx_ioat(struct pci_dev *pdev) +{ + return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false; +} + static bool is_xeon_cb32(struct pci_dev *pdev) { return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev) || is_bdx_ioat(pdev); + is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev); } bool is_bwd_ioat(struct pci_dev *pdev) -- cgit v1.2.3 From c2804b21f2e7f46f76500aa58850127b31054c84 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 13 Dec 2016 11:15:27 -0700 Subject: dmaengine: ioatdma: workaround SKX ioatdma version [ Upstream commit 34a31f0af84158955a9747fb5c6712da5bbb5331 ] The Skylake ioatdma is technically CBDMA 3.2+ and contains the same hardware bits with some additional 3.3 features, but it's not really 3.3 where the driver is concerned. Signed-off-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index b45b0363f7f6..0dea6d55f0ff 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1357,6 +1357,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) device->version = readb(device->reg_base + IOAT_VER_OFFSET); if (device->version >= IOAT_VER_3_0) { + if (is_skx_ioat(pdev)) + device->version = IOAT_VER_3_2; err = ioat3_dma_probe(device, ioat_dca_enabled); if (device->version >= IOAT_VER_3_3) -- cgit v1.2.3 From 000224c1106c2abc21a376c5bb851b1ffe4b4458 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Dec 2016 19:48:19 +0100 Subject: l2tp: consider '::' as wildcard address in l2tp_ip6 socket lookup [ Upstream commit 97b84fd6d91766ea57dcc350d78f42639e011c30 ] An L2TP socket bound to the unspecified address should match with any address. If not, it can't receive any packet and __l2tp_ip6_bind_lookup() can't prevent another socket from binding on the same device/tunnel ID. While there, rename the 'addr' variable to 'sk_laddr' (local addr), to make following patch clearer. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b10abef6b0a0..1d522ce833e6 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -64,7 +64,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - const struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -72,7 +72,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && - (!addr || ipv6_addr_equal(addr, laddr)) && + (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; -- cgit v1.2.3 From 29e0adf8bacd3cb91893ec90eb6a61d938209a24 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 19 Dec 2016 06:33:51 +0100 Subject: dmaengine: ti-dma-crossbar: Add some 'of_node_put()' in error path. [ Upstream commit 75bdc7f31a3a6e9a12e218b31a44a1f54a91554c ] Add some missing 'of_node_put()' in early exit error path. Signed-off-by: Christophe JAILLET Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ti-dma-crossbar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 3f24aeb48c0e..2403475a37cf 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -149,6 +149,7 @@ static int ti_am335x_xbar_probe(struct platform_device *pdev) match = of_match_node(ti_am335x_master_match, dma_node); if (!match) { dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); return -EINVAL; } @@ -339,6 +340,7 @@ static int ti_dra7_xbar_probe(struct platform_device *pdev) match = of_match_node(ti_dra7_master_match, dma_node); if (!match) { dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); return -EINVAL; } -- cgit v1.2.3 From 8af0937aee50d1d1f41f1caf56aefcf364d3b1b9 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 12 Dec 2016 13:37:52 -0600 Subject: usb: dwc3: omap: fix race of pm runtime with irq handler in probe [ Upstream commit 12a7f17fac5b370bec87259e4c718faf563ce900 ] Now races can happen between interrupt handler execution and PM runtime in error handling code path in probe and in dwc3_omap_remove() which will lead to system crash: in probe: ... err1: pm_runtime_put_sync(dev); ^^ PM runtime can race with IRQ handler when deferred probing happening due to extcon pm_runtime_disable(dev); return ret; in dwc3_omap_remove: ... dwc3_omap_disable_irqs(omap); ^^ IRQs are disabled in HW, but handler may still run of_platform_depopulate(omap->dev); pm_runtime_put_sync(&pdev->dev); ^^ PM runtime can race with IRQ handler pm_runtime_disable(&pdev->dev); return 0; So, OMAP DWC3 IRQ need to be disabled before calling pm_runtime_put() in probe and in dwc3_omap_remove(). Acked-by: Tony Lindgren Signed-off-by: Grygorii Strashko Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-omap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 5dd1832564c7..35b63518baf6 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -511,7 +512,7 @@ static int dwc3_omap_probe(struct platform_device *pdev) /* check the DMA Status */ reg = dwc3_omap_readl(omap->base, USBOTGSS_SYSCONFIG); - + irq_set_status_flags(omap->irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(dev, omap->irq, dwc3_omap_interrupt, dwc3_omap_interrupt_thread, IRQF_SHARED, "dwc3-omap", omap); @@ -532,7 +533,7 @@ static int dwc3_omap_probe(struct platform_device *pdev) } dwc3_omap_enable_irqs(omap); - + enable_irq(omap->irq); return 0; err2: @@ -553,6 +554,7 @@ static int dwc3_omap_remove(struct platform_device *pdev) extcon_unregister_notifier(omap->edev, EXTCON_USB, &omap->vbus_nb); extcon_unregister_notifier(omap->edev, EXTCON_USB_HOST, &omap->id_nb); dwc3_omap_disable_irqs(omap); + disable_irq(omap->irq); of_platform_depopulate(omap->dev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); -- cgit v1.2.3 From e3603533ae6394f377bf3a767787cddecc176696 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 15 Nov 2016 14:53:13 +0100 Subject: ARM64: zynqmp: Fix W=1 dtc 1.4 warnings [ Upstream commit 4ea2a6be9565455f152c12f80222af1582ede0c7 ] The patch removes these warnings reported by dtc 1.4: Warning (unit_address_vs_reg): Node /amba_apu has a reg or ranges property, but no unit name Warning (unit_address_vs_reg): Node /memory has a reg or ranges property, but no unit name Signed-off-by: Michal Simek Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts | 2 +- arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts index 358089687a69..ef1b9e573af0 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts @@ -27,7 +27,7 @@ stdout-path = "serial0:115200n8"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x0 0x0 0x0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 68a908334c7b..83791eadff41 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -72,7 +72,7 @@ <1 10 0xf08>; }; - amba_apu { + amba_apu: amba_apu@0 { compatible = "simple-bus"; #address-cells = <2>; #size-cells = <1>; -- cgit v1.2.3 From 3209f3f69eeb187d5174f253b1b91fbdbf264f33 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 22 Dec 2016 09:19:25 -0800 Subject: ARM64: zynqmp: Fix i2c node's compatible string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c415f9e8304a1d235ef118d912f374ee2e46c45d ] The Zynq Ultrascale MP uses version 1.4 of the Cadence IP core which fixes some silicon bugs that needed software workarounds in Version 1.0 that was used on Zynq systems. Signed-off-by: Moritz Fischer Cc: Michal Simek Cc: Sören Brinkmann Cc: Rob Herring Acked-by: Sören Brinkmann Signed-off-by: Michal Simek Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 83791eadff41..54dc28351c8c 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -175,7 +175,7 @@ }; i2c0: i2c@ff020000 { - compatible = "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 17 4>; @@ -185,7 +185,7 @@ }; i2c1: i2c@ff030000 { - compatible = "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 18 4>; -- cgit v1.2.3 From 69f3df1fd01322d90a35bac071c53b9497ea3601 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Jan 2017 00:20:49 +0900 Subject: perf probe: Fix to get correct modname from elf header [ Upstream commit 1f2ed153b916c95a49a1ca9d7107738664224b7f ] Since 'perf probe' supports cross-arch probes, it is possible to analyze different arch kernel image which has different bits-per-long. In that case, it fails to get the module name because it uses the MOD_NAME_OFFSET macro based on the host machine bits-per-long, instead of the target arch bits-per-long. This fixes above issue by changing modname-offset based on the target archs bit width. This is ok because linux kernel uses LP64 model on 64bit arch. E.g. without this (on x86_64, and target module is arm32): $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup p:probe/configfs_lookup :configfs_lookup+0 ^-Here is an empty module name. With this fix, you can see correct module name: $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup p:probe/configfs_lookup configfs:configfs_lookup+0 Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148337043836.6752.383495516397005695.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/probe-event.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7ea13f44178d..6c50d9f8e210 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -267,21 +267,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) return true; } -/* - * NOTE: - * '.gnu.linkonce.this_module' section of kernel module elf directly - * maps to 'struct module' from linux/module.h. This section contains - * actual module name which will be used by kernel after loading it. - * But, we cannot use 'struct module' here since linux/module.h is not - * exposed to user-space. Offset of 'name' has remained same from long - * time, so hardcoding it here. - */ -#ifdef __LP64__ -#define MOD_NAME_OFFSET 24 -#else -#define MOD_NAME_OFFSET 12 -#endif - /* * @module can be module name of module file path. In case of path, * inspect elf and find out what is actual module name. @@ -296,6 +281,7 @@ static char *find_module_name(const char *module) Elf_Data *data; Elf_Scn *sec; char *mod_name = NULL; + int name_offset; fd = open(module, O_RDONLY); if (fd < 0) @@ -317,7 +303,21 @@ static char *find_module_name(const char *module) if (!data || !data->d_buf) goto ret_err; - mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); + /* + * NOTE: + * '.gnu.linkonce.this_module' section of kernel module elf directly + * maps to 'struct module' from linux/module.h. This section contains + * actual module name which will be used by kernel after loading it. + * But, we cannot use 'struct module' here since linux/module.h is not + * exposed to user-space. Offset of 'name' has remained same from long + * time, so hardcoding it here. + */ + if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) + name_offset = 12; + else /* expect ELFCLASS64 by default */ + name_offset = 24; + + mod_name = strdup((char *)data->d_buf + name_offset); ret_err: elf_end(elf); -- cgit v1.2.3 From ebd4642ee412ba3719274d80658a4c785fc43678 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 29 Dec 2016 14:41:05 +0200 Subject: ARM: s3c2410_defconfig: Fix invalid values for NF_CT_PROTO_* [ Upstream commit 3ef01c968fbfb21c2f16281445d30a865ee4412c ] NF_CT_PROTO_DCCP/SCTP/UDPLITE were switched from tristate to boolean so defconfig needs to be adjusted to silence warnings: warning: symbol value 'm' invalid for NF_CT_PROTO_DCCP warning: symbol value 'm' invalid for NF_CT_PROTO_SCTP warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/s3c2410_defconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index bc4bfe02e611..60d3fecd7a22 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -86,9 +86,9 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m -- cgit v1.2.3 From a15a3d92eca8dd20f55ba574ff5cd1c527af4272 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Dec 2016 02:27:31 +0100 Subject: ACPI / scan: Prefer devices without _HID/_CID for _ADR matching [ Upstream commit c2a6bbaf0c5f90463a7011a295bbdb7e33c80b51 ] The way acpi_find_child_device() works currently is that, if there are two (or more) devices with the same _ADR value in the same namespace scope (which is not specifically allowed by the spec and the OS behavior in that case is not defined), the first one of them found to be present (with the help of _STA) will be returned. This covers the majority of cases, but is not sufficient if some of the devices in question have a _HID (or _CID) returning some valid ACPI/PNP device IDs (which is disallowed by the spec) and the ASL writers' expectation appears to be that the OS will match devices without a valid ACPI/PNP device ID against a given bus address first. To cover this special case as well, modify find_child_checks() to prefer devices without ACPI/PNP device IDs over devices that have them. Suggested-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Tested-by: Hans de Goede Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 5ea5dc219f56..73c9c7fa9001 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -98,7 +98,15 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) if (check_children && list_empty(&adev->children)) return -ENODEV; - return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; + /* + * If the device has a _HID (or _CID) returning a valid ACPI/PNP + * device ID, it is better to make it look less attractive here, so that + * the other device with the same _ADR value (that may not have a valid + * device ID) can be matched going forward. [This means a second spec + * violation in a row, so whatever we do here is best effort anyway.] + */ + return sta_present && list_empty(&adev->pnp.ids) ? + FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } struct acpi_device *acpi_find_child_device(struct acpi_device *parent, -- cgit v1.2.3 From 92d6a813b03a576dd821667b6566a1ddf7617137 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 2 Jan 2017 17:28:39 -0600 Subject: usb: gadget: Fix copy/pasted error message [ Upstream commit 43aef5c2ca90535b3227e97e71604291875444ed ] This fixes an error message that was probably copied and pasted. The same message is used for both the in and out endpoints, so it makes it impossible to know which one actually failed because both cases say "IN". Make the out endpoint error message say "OUT". Signed-off-by: David Lechner Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index b0f71957d00b..b6d4b484c51a 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -582,7 +582,7 @@ static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) } status = usb_ep_enable(hidg->out_ep); if (status < 0) { - ERROR(cdev, "Enable IN endpoint FAILED!\n"); + ERROR(cdev, "Enable OUT endpoint FAILED!\n"); goto fail; } hidg->out_ep->driver_data = hidg; -- cgit v1.2.3 From 78418b86733462c913aea82e081594b1005751ba Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:11:04 -0800 Subject: Btrfs: use down_read_nested to make lockdep silent [ Upstream commit e321f8a801d7b4c40da8005257b05b9c2b51b072 ] If @block_group is not @used_bg, it'll try to get @used_bg's lock without droping @block_group 's lock and lockdep has throwed a scary deadlock warning about it. Fix it by using down_read_nested. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e46e7fbe1b34..14a37ff0b9e3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7401,7 +7401,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, spin_unlock(&cluster->refill_lock); - down_read(&used_bg->data_rwsem); + /* We should only have one-level nested. */ + down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING); spin_lock(&cluster->refill_lock); if (used_bg == cluster->block_group) -- cgit v1.2.3 From 6731212836059c7ac3575e21c499380e795516a9 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:20:25 -0800 Subject: Btrfs: fix lockdep warning about log_mutex [ Upstream commit 781feef7e6befafd4d9787d1f7ada1f9ccd504e4 ] While checking INODE_REF/INODE_EXTREF for a corner case, we may acquire a different inode's log_mutex with holding the current inode's log_mutex, and lockdep has complained this with a possilble deadlock warning. Fix this by using mutex_lock_nested() when processing the other inode's log_mutex. Reviewed-by: Filipe Manana Signed-off-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b89004513c09..309313b71617 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -37,6 +37,7 @@ */ #define LOG_INODE_ALL 0 #define LOG_INODE_EXISTS 1 +#define LOG_OTHER_INODE 2 /* * directory trouble cases @@ -4623,7 +4624,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (S_ISDIR(inode->i_mode) || (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags) && - inode_only == LOG_INODE_EXISTS)) + inode_only >= LOG_INODE_EXISTS)) max_key.type = BTRFS_XATTR_ITEM_KEY; else max_key.type = (u8)-1; @@ -4647,7 +4648,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return ret; } - mutex_lock(&BTRFS_I(inode)->log_mutex); + if (inode_only == LOG_OTHER_INODE) { + inode_only = LOG_INODE_EXISTS; + mutex_lock_nested(&BTRFS_I(inode)->log_mutex, + SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&BTRFS_I(inode)->log_mutex); + } /* * a brute force approach to making sure we get the most uptodate @@ -4799,7 +4806,7 @@ again: * unpin it. */ err = btrfs_log_inode(trans, root, other_inode, - LOG_INODE_EXISTS, + LOG_OTHER_INODE, 0, LLONG_MAX, ctx); iput(other_inode); if (err) -- cgit v1.2.3 From 49fc90b443c71b3cdbfaf15c0c15fc7e3ffc5731 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 3 Jan 2017 16:26:04 +0100 Subject: benet: stricter vxlan offloading check in be_features_check [ Upstream commit 096de2f83ebc8e0404c5b7e847a4abd27b9739da ] When VXLAN offloading is enabled, be_features_check() tries to check if an encapsulated packet is indeed a VXLAN packet. The check is not strict enough, and considers any UDP-encapsulated ethernet frame with a 8-byte tunnel header as being VXLAN. Unfortunately, both GENEVE and VXLAN-GPE have a 8-byte header, so they get through this check. Force the UDP destination port to be the one that has been offloaded to hardware. Without this, GENEVE-encapsulated packets can end up having an incorrect checksum when both a GENEVE and a VXLAN (offloaded) tunnel are configured. This is similar to commit a547224dceed ("mlx4e: Do not attempt to offload VXLAN ports that are unrecognized"). Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b3c9cbef766e..5626908f3f7a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5186,7 +5186,9 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) || + !adapter->vxlan_port || + udp_hdr(skb)->dest != adapter->vxlan_port) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; -- cgit v1.2.3 From f76ddff6c5215131483efed20acc896e021abb9f Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 22 Dec 2016 17:13:54 -0800 Subject: Btrfs: adjust outstanding_extents counter properly when dio write is split [ Upstream commit c2931667c83ded6504b3857e99cc45b21fa496fb ] Currently how btrfs dio deals with split dio write is not good enough if dio write is split into several segments due to the lack of contiguous space, a large dio write like 'dd bs=1G count=1' can end up with incorrect outstanding_extents counter and endio would complain loudly with an assertion. This fixes the problem by compensating the outstanding_extents counter in inode if a large dio write gets split. Reported-by: Anand Jain Tested-by: Anand Jain Signed-off-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2a014b19f18..8a05fa7e2152 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7648,11 +7648,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode, * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (dio_data->outstanding_extents) { + if (dio_data->outstanding_extents >= num_extents) { dio_data->outstanding_extents -= num_extents; } else { + /* + * If dio write length has been split due to no large enough + * contiguous space, we need to compensate our inode counter + * appropriately. + */ + u64 num_needed = num_extents - dio_data->outstanding_extents; + spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents += num_extents; + BTRFS_I(inode)->outstanding_extents += num_needed; spin_unlock(&BTRFS_I(inode)->lock); } } -- cgit v1.2.3 From 704a6d719d4c4228c7f0b1fbaf6bc35fe2ccf1d8 Mon Sep 17 00:00:00 2001 From: Jiandi An Date: Tue, 27 Dec 2016 18:47:32 -0600 Subject: Xen: ARM: Zero reserved fields of xatp before making hypervisor call [ Upstream commit 0b47a6bd1150f4846b1d61925a4cc5a96593a541 ] Ensure all reserved fields of xatp are zero before making hypervisor call to XEN in xen_map_device_mmio(). xenmem_add_to_physmap_one() in XEN fails the mapping request if extra.res reserved field in xatp is not zero for XENMAPSPACE_dev_mmio request. Signed-off-by: Jiandi An Reviewed-by: Stefano Stabellini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/arm-device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/xen/arm-device.c b/drivers/xen/arm-device.c index 778acf80aacb..85dd20e05726 100644 --- a/drivers/xen/arm-device.c +++ b/drivers/xen/arm-device.c @@ -58,9 +58,13 @@ static int xen_map_device_mmio(const struct resource *resources, xen_pfn_t *gpfns; xen_ulong_t *idxs; int *errs; - struct xen_add_to_physmap_range xatp; for (i = 0; i < count; i++) { + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .space = XENMAPSPACE_dev_mmio + }; + r = &resources[i]; nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE); if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0)) @@ -87,9 +91,7 @@ static int xen_map_device_mmio(const struct resource *resources, idxs[j] = XEN_PFN_DOWN(r->start) + j; } - xatp.domid = DOMID_SELF; xatp.size = nr; - xatp.space = XENMAPSPACE_dev_mmio; set_xen_guest_handle(xatp.gpfns, gpfns); set_xen_guest_handle(xatp.idxs, idxs); -- cgit v1.2.3 From ab5e7df9e064d54f442108544d5dfb8116cb2f41 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 3 Jan 2017 12:42:42 +0100 Subject: tools lib traceevent: Fix prev/next_prio for deadline tasks [ Upstream commit 074859184d770824f4437dca716bdeb625ae8b1c ] Currently, the sched:sched_switch tracepoint reports deadline tasks with priority -1. But when reading the trace via perf script I've got the following output: # ./d & # (d is a deadline task, see [1]) # perf record -e sched:sched_switch -a sleep 1 # perf script ... swapper 0 [000] 2146.962441: sched:sched_switch: swapper/0:0 [120] R ==> d:2593 [4294967295] d 2593 [000] 2146.972472: sched:sched_switch: d:2593 [4294967295] R ==> g:2590 [4294967295] The task d reports the wrong priority [4294967295]. This happens because the "int prio" is stored in an unsigned long long val. Although it is set as a %lld, as int is shorter than unsigned long long, trace_seq_printf prints it as a positive number. The fix is just to cast the val as an int, and print it as a %d, as in the sched:sched_switch tracepoint's "format". The output with the fix is: # ./d & # perf record -e sched:sched_switch -a sleep 1 # perf script ... swapper 0 [000] 4306.374037: sched:sched_switch: swapper/0:0 [120] R ==> d:10941 [-1] d 10941 [000] 4306.383823: sched:sched_switch: d:10941 [-1] R ==> swapper/0:0 [120] [1] d.c --- #include #include #include #include #include struct sched_attr { __u32 size, sched_policy; __u64 sched_flags; __s32 sched_nice; __u32 sched_priority; __u64 sched_runtime, sched_deadline, sched_period; }; int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } int main(void) { struct sched_attr attr = { .size = sizeof(attr), .sched_policy = SCHED_DEADLINE, /* This creates a 10ms/30ms reservation */ .sched_runtime = 10 * 1000 * 1000, .sched_period = attr.sched_deadline = 30 * 1000 * 1000, }; if (sched_setattr(0, &attr, 0) < 0) { perror("sched_setattr"); return -1; } for(;;); } --- Committer notes: Got the program from the provided URL, http://bristot.me/lkml/d.c, trimmed it and included in the cset log above, so that we have everything needed to test it in one place. Signed-off-by: Daniel Bristot de Oliveira Acked-by: Steven Rostedt Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Daniel Bristot de Oliveira Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/866ef75bcebf670ae91c6a96daa63597ba981f0d.1483443552.git.bristot@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/lib/traceevent/plugin_sched_switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index f1ce60065258..ec30c2fcbac0 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld ", val); if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) - trace_seq_printf(s, "[%lld] ", val); + trace_seq_printf(s, "[%d] ", (int) val); if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) write_state(s, val); @@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld", val); if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) - trace_seq_printf(s, " [%lld]", val); + trace_seq_printf(s, " [%d]", (int) val); return 0; } -- cgit v1.2.3 From 0b8656414e29578f00e40e7d19c411039b9ab71e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 14 Feb 2017 07:43:56 +0100 Subject: xfrm: Don't use sk_family for socket policy lookups commit 4c86d77743a54fb2d8a4d18a037a074c892bb3be upstream. On IPv4-mapped IPv6 addresses sk_family is AF_INET6, but the flow informations are created based on AF_INET. So the routing set up 'struct flowi4' but we try to access 'struct flowi6' what leads to an out of bounds access. Fix this by using the family we get with the dst_entry, like we do it for the standard policy lookup. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8da67f7c9c5a..e26b515f7794 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1248,7 +1248,7 @@ static inline int policy_to_flow_dir(int dir) } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, - const struct flowi *fl) + const struct flowi *fl, u16 family) { struct xfrm_policy *pol; @@ -1256,8 +1256,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); + bool match = xfrm_selector_match(&pol->selector, fl, family); int err = 0; if (match) { @@ -2206,7 +2205,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2485,7 +2484,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; -- cgit v1.2.3 From d8086c3bd3a556e7598c93e5d303f771356adf9f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2017 12:03:59 -0300 Subject: perf tools: Install tools/lib/traceevent plugins with install-bin [ Upstream commit 30a9c6444810429aa2b7cbfbd453ce339baaadbf ] Those are binaries as well, so should be installed by: make -C tools/perf install-bin' too. Cc: Alexander Shishkin Cc: Daniel Bristot de Oliveira Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-3841b37u05evxrs1igkyu6ks@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 982d6439bb07..ef52d1e3d431 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -729,9 +729,9 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' -install-bin: install-tools install-tests +install-bin: install-tools install-tests install-traceevent-plugins -install: install-bin try-install-man install-traceevent-plugins +install: install-bin try-install-man install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' -- cgit v1.2.3 From ddc0ec3be8077a689e0136c873519bf851a741e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2017 15:19:21 -0300 Subject: perf symbols: Robustify reading of build-id from sysfs [ Upstream commit 7934c98a6e04028eb34c1293bfb5a6b0ab630b66 ] Markus reported that perf segfaults when reading /sys/kernel/notes from a kernel linked with GNU gold, due to what looks like a gold bug, so do some bounds checking to avoid crashing in that case. Reported-by: Markus Trippelsdorf Report-Link: http://lkml.kernel.org/r/20161219161821.GA294@x4 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ryhgs6a6jxvz207j2636w31c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/symbol-elf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 99400b0e8f2a..adbc6c02c3aa 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) break; } else { int n = namesz + descsz; + + if (n > (int)sizeof(bf)) { + n = sizeof(bf); + pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n", + __func__, filename, nhdr.n_namesz, nhdr.n_descsz); + } if (read(fd, bf, n) != n) break; } -- cgit v1.2.3 From a417ea5b9d22dc5733f6fac5cee72735cb7f8d99 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 13 Dec 2016 13:50:52 +0530 Subject: video: fbdev: cobalt_lcdfb: Handle return NULL error from devm_ioremap [ Upstream commit 4dcd19bfabaee8f9f4bcf203afba09b98ccbaf76 ] Here, If devm_ioremap will fail. It will return NULL. Kernel can run into a NULL-pointer dereference. This error check will avoid NULL pointer dereference. Signed-off-by: Arvind Yadav Acked-by: Yoichi Yuasa Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/cobalt_lcdfb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c index 2d3b691f3fc4..038ac6934fe9 100644 --- a/drivers/video/fbdev/cobalt_lcdfb.c +++ b/drivers/video/fbdev/cobalt_lcdfb.c @@ -308,6 +308,11 @@ static int cobalt_lcdfb_probe(struct platform_device *dev) info->screen_size = resource_size(res); info->screen_base = devm_ioremap(&dev->dev, res->start, info->screen_size); + if (!info->screen_base) { + framebuffer_release(info); + return -ENOMEM; + } + info->fbops = &cobalt_lcd_fbops; info->fix = cobalt_lcdfb_fix; info->fix.smem_start = res->start; -- cgit v1.2.3 From 812a7df6556faae25deb42dbcc9e47829855556f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 3 Jan 2017 17:26:46 +0530 Subject: vfio-pci: Handle error from pci_iomap [ Upstream commit e19f32da5ded958238eac1bbe001192acef191a2 ] Here, pci_iomap can fail, handle this case release selected pci regions and return -ENOMEM. Signed-off-by: Arvind Yadav Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 031bc08d000d..43559bed7822 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1173,6 +1173,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) return ret; vdev->barmap[index] = pci_iomap(pdev, index, 0); + if (!vdev->barmap[index]) { + pci_release_selected_regions(pdev, 1 << index); + return -ENOMEM; + } } vma->vm_private_data = vdev; -- cgit v1.2.3 From e95ec3582a295d95de8d9ebf5e31426599563c7f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 3 Jan 2017 14:27:26 +0000 Subject: arm64: mm: fix show_pte KERN_CONT fallout [ Upstream commit 6ef4fb387d50fa8f3bffdffc868b57e981cdd709 ] Recent changes made KERN_CONT mandatory for continued lines. In the absence of KERN_CONT, a newline may be implicit inserted by the core printk code. In show_pte, we (erroneously) use printk without KERN_CONT for continued prints, resulting in output being split across a number of lines, and not matching the intended output, e.g. [ff000000000000] *pgd=00000009f511b003 , *pud=00000009f4a80003 , *pmd=0000000000000000 Fix this by using pr_cont() for all the continuations. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8b8ac3db4092..0e90c7e0279c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -101,21 +101,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); - printk(", *pud=%016llx", pud_val(*pud)); + pr_cont(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; pmd = pmd_offset(pud, addr); - printk(", *pmd=%016llx", pmd_val(*pmd)); + pr_cont(", *pmd=%016llx", pmd_val(*pmd)); if (pmd_none(*pmd) || pmd_bad(*pmd)) break; pte = pte_offset_map(pmd, addr); - printk(", *pte=%016llx", pte_val(*pte)); + pr_cont(", *pte=%016llx", pte_val(*pte)); pte_unmap(pte); } while(0); - printk("\n"); + pr_cont("\n"); } #ifdef CONFIG_ARM64_HW_AFDBM -- cgit v1.2.3 From a7526723b7fced9fc0dfa033c4227f040e351e78 Mon Sep 17 00:00:00 2001 From: Daniel Schultz Date: Wed, 4 Jan 2017 16:18:10 +0000 Subject: nvmem: imx-ocotp: Fix wrong register size [ Upstream commit 14ba972842f9e84e6d3264bc0302101b8a792288 ] All i.MX6 SoCs have an OCOTP Controller with 4kbit fuses. The i.MX6SL is an exception and has only 2kbit fuses. In the TRM for the i.MX6DQ (IMX6QDRM - Rev 2, 06/2014) the fuses size is described in chapter 46.1.1 with: "32-bit word restricted program and read to 4Kbits of eFuse OTP(512x8)." In the TRM for the i.MX6SL (IMX6SLRM - Rev 2, 06/2015) the fuses size is described in chapter 34.1.1 with: "32-bit word restricted program and read to 2 kbit of eFuse OTP(128x8)." Since the Freescale Linux kernel OCOTP driver works with a fuses size of 2 kbit for the i.MX6SL, it looks like the TRM is wrong and the formula to calculate the correct fuses size has to be 256x8. Signed-off-by: Daniel Schultz Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/imx-ocotp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index ac27b9bac3b9..8e7b120696fa 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -71,7 +71,7 @@ static struct nvmem_config imx_ocotp_nvmem_config = { static const struct of_device_id imx_ocotp_dt_ids[] = { { .compatible = "fsl,imx6q-ocotp", (void *)128 }, - { .compatible = "fsl,imx6sl-ocotp", (void *)32 }, + { .compatible = "fsl,imx6sl-ocotp", (void *)64 }, { .compatible = "fsl,imx6sx-ocotp", (void *)128 }, { }, }; -- cgit v1.2.3 From 2548d893a542bab079ef72266ff297ff61a20ef3 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 3 Jan 2017 17:22:20 +0800 Subject: net: usb: asix_devices: add .reset_resume for USB PM [ Upstream commit 63dfb0dac9055145db85ce764355aef2f563739a ] The USB core may call reset_resume when it fails to resume asix device. And USB core can recovery this abnormal resume at low level driver, the same .resume at asix driver can work too. Add .reset_resume can avoid disconnecting after backing from system resume, and NFS can still be mounted after this commit. Signed-off-by: Peter Chen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index dc7b6392e75a..50737def774c 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1369,6 +1369,7 @@ static struct usb_driver asix_driver = { .probe = usbnet_probe, .suspend = asix_suspend, .resume = asix_resume, + .reset_resume = asix_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, -- cgit v1.2.3 From 2909c9c2d7138f1900bddf16d0515025306a8e05 Mon Sep 17 00:00:00 2001 From: Caleb Crome Date: Tue, 3 Jan 2017 10:22:57 -0800 Subject: ASoC: fsl_ssi: set fifo watermark to more reliable value [ Upstream commit 4ee437fbf626b5ad756889d8bc0fcead3d66dde7 ] The fsl_ssi fifo watermark is by default set to 2 free spaces (i.e. activate DMA on FIFO when only 2 spaces are left.) This means the DMA must service the fifo within 2 audio samples, which is just not enough time for many use cases with high data rate. In many configurations the audio channel slips (causing l/r swap in stereo configurations, or channel slipping in multi-channel configurations). This patch gives more breathing room and allows the SSI to operate reliably by changing the fifio refill watermark to 8. There is no change in behavior for older chips (with an 8-deep fifo). Only the newer chips with a 15-deep fifo get the new behavior. I suspect a new fifo depth setting could be optimized on the older chips too, but I have not tested. Signed-off-by: Caleb Crome Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_ssi.c | 74 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 50349437d961..fde08660b63b 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -224,6 +224,12 @@ struct fsl_ssi_soc_data { * @dbg_stats: Debugging statistics * * @soc: SoC specific data + * + * @fifo_watermark: the FIFO watermark setting. Notifies DMA when + * there are @fifo_watermark or fewer words in TX fifo or + * @fifo_watermark or more empty words in RX fifo. + * @dma_maxburst: max number of words to transfer in one go. So far, + * this is always the same as fifo_watermark. */ struct fsl_ssi_private { struct regmap *regs; @@ -263,6 +269,9 @@ struct fsl_ssi_private { const struct fsl_ssi_soc_data *soc; struct device *dev; + + u32 fifo_watermark; + u32 dma_maxburst; }; /* @@ -1051,21 +1060,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, regmap_write(regs, CCSR_SSI_SRCR, srcr); regmap_write(regs, CCSR_SSI_SCR, scr); - /* - * Set the watermark for transmit FIFI 0 and receive FIFO 0. We don't - * use FIFO 1. We program the transmit water to signal a DMA transfer - * if there are only two (or fewer) elements left in the FIFO. Two - * elements equals one frame (left channel, right channel). This value, - * however, depends on the depth of the transmit buffer. - * - * We set the watermark on the same level as the DMA burstsize. For - * fiq it is probably better to use the biggest possible watermark - * size. - */ - if (ssi_private->use_dma) - wm = ssi_private->fifo_depth - 2; - else - wm = ssi_private->fifo_depth; + wm = ssi_private->fifo_watermark; regmap_write(regs, CCSR_SSI_SFCSR, CCSR_SSI_SFCSR_TFWM0(wm) | CCSR_SSI_SFCSR_RFWM0(wm) | @@ -1373,12 +1368,8 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, dev_dbg(&pdev->dev, "could not get baud clock: %ld\n", PTR_ERR(ssi_private->baudclk)); - /* - * We have burstsize be "fifo_depth - 2" to match the SSI - * watermark setting in fsl_ssi_startup(). - */ - ssi_private->dma_params_tx.maxburst = ssi_private->fifo_depth - 2; - ssi_private->dma_params_rx.maxburst = ssi_private->fifo_depth - 2; + ssi_private->dma_params_tx.maxburst = ssi_private->dma_maxburst; + ssi_private->dma_params_rx.maxburst = ssi_private->dma_maxburst; ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0; ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0; @@ -1543,6 +1534,47 @@ static int fsl_ssi_probe(struct platform_device *pdev) /* Older 8610 DTs didn't have the fifo-depth property */ ssi_private->fifo_depth = 8; + /* + * Set the watermark for transmit FIFO 0 and receive FIFO 0. We don't + * use FIFO 1 but set the watermark appropriately nontheless. + * We program the transmit water to signal a DMA transfer + * if there are N elements left in the FIFO. For chips with 15-deep + * FIFOs, set watermark to 8. This allows the SSI to operate at a + * high data rate without channel slipping. Behavior is unchanged + * for the older chips with a fifo depth of only 8. A value of 4 + * might be appropriate for the older chips, but is left at + * fifo_depth-2 until sombody has a chance to test. + * + * We set the watermark on the same level as the DMA burstsize. For + * fiq it is probably better to use the biggest possible watermark + * size. + */ + switch (ssi_private->fifo_depth) { + case 15: + /* + * 2 samples is not enough when running at high data + * rates (like 48kHz @ 16 bits/channel, 16 channels) + * 8 seems to split things evenly and leave enough time + * for the DMA to fill the FIFO before it's over/under + * run. + */ + ssi_private->fifo_watermark = 8; + ssi_private->dma_maxburst = 8; + break; + case 8: + default: + /* + * maintain old behavior for older chips. + * Keeping it the same because I don't have an older + * board to test with. + * I suspect this could be changed to be something to + * leave some more space in the fifo. + */ + ssi_private->fifo_watermark = ssi_private->fifo_depth - 2; + ssi_private->dma_maxburst = ssi_private->fifo_depth - 2; + break; + } + dev_set_drvdata(&pdev->dev, ssi_private); if (ssi_private->soc->imx) { -- cgit v1.2.3 From f10e2065ce76b8ca54301d2de40aa9b351daf407 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 4 Jan 2017 23:10:23 +0300 Subject: sh_eth: enable RX descriptor word 0 shift on SH7734 [ Upstream commit 71eae1ca77fd6be218d8a952d97bba827e56516d ] The RX descriptor word 0 on SH7734 has the RFS[9:0] field in bits 16-25 (bits 0-15 usually used for that are occupied by the packet checksum). Thus we need to set the 'shift_rd0' field in the SH7734 SoC data... Fixes: f0e81fecd4f8 ("net: sh_eth: Add support SH7734") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index a2d218b28c0e..12be259394c6 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -819,6 +819,7 @@ static struct sh_eth_cpu_data sh7734_data = { .tsu = 1, .hw_crc = 1, .select_mii = 1, + .shift_rd0 = 1, }; /* SH7763 */ -- cgit v1.2.3 From 301681d60003b172af1a70fdd833567e16f55d9a Mon Sep 17 00:00:00 2001 From: Yuriy Kolerov Date: Wed, 28 Dec 2016 11:46:25 +0300 Subject: ARCv2: IRQ: Call entry/exit functions for chained handlers in MCIP [ Upstream commit e51d5d02f688c45b6f644f472f0c80fdfa73f0cb ] It is necessary to call entry/exit functions for parent interrupt controllers for proper masking/unmasking of interrupt lines. Signed-off-by: Yuriy Kolerov Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/mcip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index f39142acc89e..be131b296a55 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -221,10 +222,13 @@ static irq_hw_number_t idu_first_hwirq; static void idu_cascade_isr(struct irq_desc *desc) { struct irq_domain *idu_domain = irq_desc_get_handler_data(desc); + struct irq_chip *core_chip = irq_desc_get_chip(desc); irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc)); irq_hw_number_t idu_hwirq = core_hwirq - idu_first_hwirq; + chained_irq_enter(core_chip, desc); generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq)); + chained_irq_exit(core_chip, desc); } static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq) -- cgit v1.2.3 From 12dac5fcf5ec156f9f056aff1a8cbed04fde5538 Mon Sep 17 00:00:00 2001 From: Ioan-Adrian Ratiu Date: Thu, 5 Jan 2017 00:37:47 +0200 Subject: ALSA: usb-audio: test EP_FLAG_RUNNING at urb completion [ Upstream commit 13a6c8328e6056932dc680e447d4c5e8ad9add17 ] Testing EP_FLAG_RUNNING in snd_complete_urb() before running the completion logic allows us to save a few cpu cycles by returning early, skipping the pending urb in case the stream was stopped; the stop logic handles the urb and sets the completion callbacks to NULL. Signed-off-by: Ioan-Adrian Ratiu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index c5251aaad844..b8044c6034b3 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -384,6 +384,9 @@ static void snd_complete_urb(struct urb *urb) if (unlikely(atomic_read(&ep->chip->shutdown))) goto exit_clear; + if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + goto exit_clear; + if (usb_pipeout(ep->pipe)) { retire_outbound_urb(ep, ctx); /* can be stopped during retire callback */ -- cgit v1.2.3 From efa225254a4a57b871b279cc38fd4048a0fbfa76 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2017 11:24:50 +0200 Subject: x86/platform/intel-mid: Rename 'spidev' to 'mrfld_spidev' [ Upstream commit 159d3726db12b3476bc59ea0ab0a702103d466b5 ] The current implementation supports only Intel Merrifield platforms. Don't mess with the rest of the Intel MID family by not registering device with wrong properties. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170102092450.87229-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/intel-mid/device_libs/Makefile | 2 +- .../intel-mid/device_libs/platform_mrfld_spidev.c | 54 ++++++++++++++++++++++ .../intel-mid/device_libs/platform_spidev.c | 50 -------------------- 3 files changed, 55 insertions(+), 51 deletions(-) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_spidev.c diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index dd6cfa4ad3ac..75029d0cfa15 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # SPI Devices -obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o +obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o # I2C Devices obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c new file mode 100644 index 000000000000..27186ad654c9 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c @@ -0,0 +1,54 @@ +/* + * spidev platform data initilization file + * + * (C) Copyright 2014, 2016 Intel Corporation + * Authors: Andy Shevchenko + * Dan O'Donovan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include + +#include + +#define MRFLD_SPI_DEFAULT_DMA_BURST 8 +#define MRFLD_SPI_DEFAULT_TIMEOUT 500 + +/* GPIO pin for spidev chipselect */ +#define MRFLD_SPIDEV_GPIO_CS 111 + +static struct pxa2xx_spi_chip spidev_spi_chip = { + .dma_burst_size = MRFLD_SPI_DEFAULT_DMA_BURST, + .timeout = MRFLD_SPI_DEFAULT_TIMEOUT, + .gpio_cs = MRFLD_SPIDEV_GPIO_CS, +}; + +static void __init *spidev_platform_data(void *info) +{ + struct spi_board_info *spi_info = info; + + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return ERR_PTR(-ENODEV); + + spi_info->mode = SPI_MODE_0; + spi_info->controller_data = &spidev_spi_chip; + + return NULL; +} + +static const struct devs_id spidev_dev_id __initconst = { + .name = "spidev", + .type = SFI_DEV_TYPE_SPI, + .delay = 0, + .get_platform_data = &spidev_platform_data, +}; + +sfi_device(spidev_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c deleted file mode 100644 index 30c601b399ee..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * spidev platform data initilization file - * - * (C) Copyright 2014, 2016 Intel Corporation - * Authors: Andy Shevchenko - * Dan O'Donovan - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include -#include -#include -#include - -#include - -#define MRFLD_SPI_DEFAULT_DMA_BURST 8 -#define MRFLD_SPI_DEFAULT_TIMEOUT 500 - -/* GPIO pin for spidev chipselect */ -#define MRFLD_SPIDEV_GPIO_CS 111 - -static struct pxa2xx_spi_chip spidev_spi_chip = { - .dma_burst_size = MRFLD_SPI_DEFAULT_DMA_BURST, - .timeout = MRFLD_SPI_DEFAULT_TIMEOUT, - .gpio_cs = MRFLD_SPIDEV_GPIO_CS, -}; - -static void __init *spidev_platform_data(void *info) -{ - struct spi_board_info *spi_info = info; - - spi_info->mode = SPI_MODE_0; - spi_info->controller_data = &spidev_spi_chip; - - return NULL; -} - -static const struct devs_id spidev_dev_id __initconst = { - .name = "spidev", - .type = SFI_DEV_TYPE_SPI, - .delay = 0, - .get_platform_data = &spidev_platform_data, -}; - -sfi_device(spidev_dev_id); -- cgit v1.2.3 From b899142ce21d53b496fe7d1534ba7ca62538ca37 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Thu, 22 Dec 2016 17:17:40 -0800 Subject: perf/x86: Set pmu->module in Intel PMU modules [ Upstream commit 74545f63890e38520eb4d1dbedcadaa9c0dbc824 ] The conversion of Intel PMU drivers into modules did not include reference counting. The machine will crash when attempting to access deleted code if an event from a module PMU is started and the module removed before the event is destroyed. i.e. this crashes the machine: $ insmod intel-rapl-perf.ko $ perf stat -e power/energy-cores/ -C 0 & $ rmmod intel-rapl-perf.ko Set THIS_MODULE to pmu->module in Intel module PMUs so that generic code can handle reference counting and deny rmmod while an event still exists. Signed-off-by: David Carrillo-Cisneros Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1482455860-116269-1-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/cstate.c | 2 ++ arch/x86/events/intel/rapl.c | 1 + arch/x86/events/intel/uncore.c | 1 + 3 files changed, 4 insertions(+) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index fec8a461bdef..1076c9a77292 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static struct pmu cstate_pkg_pmu = { @@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static const struct cstate_model nhm_cstates __initconst = { diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 8b902b67342a..970c1de3b86e 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.module = THIS_MODULE; return 0; } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 19d646a783fd..aec6cc925af8 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) .start = uncore_pmu_event_start, .stop = uncore_pmu_event_stop, .read = uncore_pmu_event_read, + .module = THIS_MODULE, }; } else { pmu->pmu = *pmu->type->pmu; -- cgit v1.2.3 From 3f0dfa11cc0da608fc513f53b50efebb9827f371 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Jan 2017 15:44:52 -0600 Subject: ASoC: Intel: bytcr-rt5640: fix settings in internal clock mode [ Upstream commit 60448b077ed93d227e6c117a9e87db76ff0c1911 ] Frequency value of zero did not make sense, use same 24.576MHz setting and only change the clock source in idle mode Suggested-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/bytcr_rt5640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index d5873eeae1aa..bd19fad2d91b 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -142,7 +142,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, * for Jack detection and button press */ ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_RCCLK, - 0, + 48000 * 512, SND_SOC_CLOCK_IN); if (!ret) { if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && priv->mclk) -- cgit v1.2.3 From 3b6f4e292cbdd46c03303a20417e0c05ddb73bda Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 5 Jan 2017 14:25:59 +0100 Subject: HID: ignore Petzl USB headlamp [ Upstream commit 08f9572671c8047e7234cbf150869aa3c3d59a97 ] This headlamp contains a dummy HID descriptor which pretends to be a mouse-like device, but can't be used as a mouse at all. Reported-by: Lukas Ocilka Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index a5dd7e63ada3..4f3f5749b0c1 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2484,6 +2484,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) }, + { HID_USB_DEVICE(USB_VENDOR_ID_PETZL, USB_DEVICE_ID_PETZL_HEADLAMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_PHILIPS, USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE) }, { HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) }, #if IS_ENABLED(CONFIG_MOUSE_SYNAPTICS_USB) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index cfca43f635a6..08fd3f831d62 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -819,6 +819,9 @@ #define USB_VENDOR_ID_PETALYNX 0x18b1 #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE 0x0037 +#define USB_VENDOR_ID_PETZL 0x2122 +#define USB_DEVICE_ID_PETZL_HEADLAMP 0x1234 + #define USB_VENDOR_ID_PHILIPS 0x0471 #define USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE 0x0617 -- cgit v1.2.3 From 2803ea76366a735f69d81dbaeae19b8dbf44a48d Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 14 Dec 2016 13:20:41 -0800 Subject: scsi: fnic: Avoid sending reset to firmware when another reset is in progress [ Upstream commit 9698b6f473555a722bf81a3371998427d5d27bde ] This fix is to avoid calling fnic_fw_reset_handler through fnic_host_reset when a finc reset is alreay in progress. Signed-off-by: Satish Kharat Signed-off-by: Sesidhar Baddela Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fnic/fnic.h | 1 + drivers/scsi/fnic/fnic_scsi.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 9ddc9200e0a4..9e4b7709043e 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -248,6 +248,7 @@ struct fnic { struct completion *remove_wait; /* device remove thread blocks */ atomic_t in_flight; /* io counter */ + bool internal_reset_inprogress; u32 _reserved; /* fill hole */ unsigned long state_flags; /* protected by host lock */ enum fnic_state state; diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index d9fd2f841585..44dd372aa7d3 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2573,6 +2573,19 @@ int fnic_host_reset(struct scsi_cmnd *sc) unsigned long wait_host_tmo; struct Scsi_Host *shost = sc->device->host; struct fc_lport *lp = shost_priv(shost); + struct fnic *fnic = lport_priv(lp); + unsigned long flags; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->internal_reset_inprogress == 0) { + fnic->internal_reset_inprogress = 1; + } else { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "host reset in progress skipping another host reset\n"); + return SUCCESS; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); /* * If fnic_reset is successful, wait for fabric login to complete @@ -2593,6 +2606,9 @@ int fnic_host_reset(struct scsi_cmnd *sc) } } + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->internal_reset_inprogress = 0; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); return ret; } -- cgit v1.2.3 From 5f704071920299073d23be138939545158ce42d1 Mon Sep 17 00:00:00 2001 From: Burak Ok Date: Wed, 21 Dec 2016 14:45:53 +0100 Subject: scsi: snic: Return error code on memory allocation failure [ Upstream commit 0371adcdaca92912baaa3256ed13e058a016e62d ] If a call to mempool_create_slab_pool() in snic_probe() returns NULL, return -ENOMEM to indicate failure. mempool_creat_slab_pool() only fails if it cannot allocate memory. https://bugzilla.kernel.org/show_bug.cgi?id=189061 Reported-by: bianpan2010@ruc.edu.cn Signed-off-by: Burak Ok Signed-off-by: Andreas Schaertl Acked-by: Narsimhulu Musini Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/snic/snic_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c index 396b32dca074..7cf70aaec0ba 100644 --- a/drivers/scsi/snic/snic_main.c +++ b/drivers/scsi/snic/snic_main.c @@ -591,6 +591,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "dflt sgl pool creation failed\n"); + ret = -ENOMEM; goto err_free_res; } @@ -601,6 +602,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "max sgl pool creation failed\n"); + ret = -ENOMEM; goto err_free_dflt_sgl_pool; } @@ -611,6 +613,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "snic tmreq info pool creation failed.\n"); + ret = -ENOMEM; goto err_free_max_sgl_pool; } -- cgit v1.2.3 From 23ab274ddf83931ecc21968d11c773a2a972e462 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 23 Dec 2016 20:40:19 -0800 Subject: scsi: bfa: Increase requested firmware version to 3.2.5.1 [ Upstream commit 2d1148f0f45079d25a0fa0d67e4fdb2a656d12fb ] bna & bfa firmware version 3.2.5.1 was submitted to linux-firmware on Feb 17 19:10:20 2015 -0500 in 0ab54ff1dc ("linux-firmware: Add QLogic BR Series Adapter Firmware"). bna was updated to use the newer firmware on Feb 19 16:02:32 2015 -0500 in 3f307c3d70 ("bna: Update the Driver and Firmware Version") bfa was not updated. I presume this was an oversight but it broke support for bfa+bna cards such as the following 04:00.0 Fibre Channel [0c04]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) 04:00.1 Fibre Channel [0c04]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) 04:00.2 Ethernet controller [0200]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) 04:00.3 Ethernet controller [0200]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) Currently, if the bfa module is loaded first, bna fails to probe the respective devices with [ 215.026787] bna: QLogic BR-series 10G Ethernet driver - version: 3.2.25.1 [ 215.043707] bna 0000:04:00.2: bar0 mapped to ffffc90001fc0000, len 262144 [ 215.060656] bna 0000:04:00.2: initialization failed err=1 [ 215.073893] bna 0000:04:00.3: bar0 mapped to ffffc90002040000, len 262144 [ 215.090644] bna 0000:04:00.3: initialization failed err=1 Whereas if bna is loaded first, bfa fails with [ 249.592109] QLogic BR-series BFA FC/FCOE SCSI driver - version: 3.2.25.0 [ 249.610738] bfa 0000:04:00.0: Running firmware version is incompatible with the driver version [ 249.833513] bfa 0000:04:00.0: bfa init failed [ 249.833919] scsi host6: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.0 driver: 3.2.25.0 [ 249.841446] bfa 0000:04:00.1: Running firmware version is incompatible with the driver version [ 250.045449] bfa 0000:04:00.1: bfa init failed [ 250.045962] scsi host7: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.1 driver: 3.2.25.0 Increase bfa's requested firmware version. Also increase the driver version. I only tested that all of the devices probe without error. Reported-by: Tim Ehlers Signed-off-by: Benjamin Poirier Acked-by: Rasesh Mody Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bfa/bfad.c | 6 +++--- drivers/scsi/bfa/bfad_drv.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 9d253cb83ee7..e70410beb83a 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -64,9 +64,9 @@ int max_rport_logins = BFA_FCS_MAX_RPORT_LOGINS; u32 bfi_image_cb_size, bfi_image_ct_size, bfi_image_ct2_size; u32 *bfi_image_cb, *bfi_image_ct, *bfi_image_ct2; -#define BFAD_FW_FILE_CB "cbfw-3.2.3.0.bin" -#define BFAD_FW_FILE_CT "ctfw-3.2.3.0.bin" -#define BFAD_FW_FILE_CT2 "ct2fw-3.2.3.0.bin" +#define BFAD_FW_FILE_CB "cbfw-3.2.5.1.bin" +#define BFAD_FW_FILE_CT "ctfw-3.2.5.1.bin" +#define BFAD_FW_FILE_CT2 "ct2fw-3.2.5.1.bin" static u32 *bfad_load_fwimg(struct pci_dev *pdev); static void bfad_free_fwimg(void); diff --git a/drivers/scsi/bfa/bfad_drv.h b/drivers/scsi/bfa/bfad_drv.h index f9e862093a25..cfcfff48e8e1 100644 --- a/drivers/scsi/bfa/bfad_drv.h +++ b/drivers/scsi/bfa/bfad_drv.h @@ -58,7 +58,7 @@ #ifdef BFA_DRIVER_VERSION #define BFAD_DRIVER_VERSION BFA_DRIVER_VERSION #else -#define BFAD_DRIVER_VERSION "3.2.25.0" +#define BFAD_DRIVER_VERSION "3.2.25.1" #endif #define BFAD_PROTO_NAME FCPI_NAME -- cgit v1.2.3 From 27ef0283b60ecb0445a7530db1f3a997ce89715a Mon Sep 17 00:00:00 2001 From: Jeeja KP Date: Mon, 2 Jan 2017 09:50:05 +0530 Subject: ASoC: Intel: Skylake: Release FW ctx in cleanup [ Upstream commit bc65a326c579e93a5c2120a65ede72f11369ee5a ] Saved firmware ctx was not never released, so release Firmware ctx in cleanup routine. Signed-off-by: Jeeja KP Acked-by: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-sst.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c index 8fc3178bc79c..b30bd384c8d3 100644 --- a/sound/soc/intel/skylake/skl-sst.c +++ b/sound/soc/intel/skylake/skl-sst.c @@ -515,6 +515,9 @@ EXPORT_SYMBOL_GPL(skl_sst_init_fw); void skl_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx) { + + if (ctx->dsp->fw) + release_firmware(ctx->dsp->fw); skl_clear_module_table(ctx->dsp); skl_freeup_uuid_list(ctx); skl_ipc_free(&ctx->ipc); -- cgit v1.2.3 From 007dffc661cf20f9e924eccc7b161ce2419ba81b Mon Sep 17 00:00:00 2001 From: Patrick Lai Date: Sat, 31 Dec 2016 22:44:39 -0800 Subject: ASoC: dpcm: Avoid putting stream state to STOP when FE stream is paused [ Upstream commit 9f169b9f52a4afccdab7a7d2311b0c53a78a1e6b ] When multiple front-ends are using the same back-end, putting state of a front-end to STOP state upon receiving pause command will result in backend stream getting released by DPCM framework unintentionally. In order to avoid backend to be released when another active front-end stream is present, put the stream state to PAUSED state instead of STOP state. Signed-off-by: Patrick Lai Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index d56a16a0f6fa..21c3ef01c438 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2184,9 +2184,11 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP; break; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED; + break; } out: -- cgit v1.2.3 From db397d9c6e66afdd34ae430172db122632b5f8a7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 6 Aug 2017 19:00:06 -0700 Subject: Linux 4.9.41 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d9397a912c31..82eb3d1ee801 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 40 +SUBLEVEL = 41 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From 5d23e4f3a333639d36723ac9fe027749e96c9844 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 30 Jul 2017 16:20:19 -0400 Subject: parisc: Handle vma's whose context is not current in flush_cache_range commit 13d57093c141db2036364d6be35e394fc5b64728 upstream. In testing James' patch to drivers/parisc/pdc_stable.c, I hit the BUG statement in flush_cache_range() during a system shutdown: kernel BUG at arch/parisc/kernel/cache.c:595! CPU: 2 PID: 6532 Comm: kworker/2:0 Not tainted 4.13.0-rc2+ #1 Workqueue: events free_ioctx IAOQ[0]: flush_cache_range+0x144/0x148 IAOQ[1]: flush_cache_page+0x0/0x1a8 RP(r2): flush_cache_range+0xec/0x148 Backtrace: [<00000000402910ac>] unmap_page_range+0x84/0x880 [<00000000402918f4>] unmap_single_vma+0x4c/0x60 [<0000000040291a18>] zap_page_range_single+0x110/0x160 [<0000000040291c34>] unmap_mapping_range+0x174/0x1a8 [<000000004026ccd8>] truncate_pagecache+0x50/0xa8 [<000000004026cd84>] truncate_setsize+0x54/0x70 [<000000004033d534>] put_aio_ring_file+0x44/0xb0 [<000000004033d5d8>] aio_free_ring+0x38/0x140 [<000000004033d714>] free_ioctx+0x34/0xa8 [<00000000401b0028>] process_one_work+0x1b8/0x4d0 [<00000000401b04f4>] worker_thread+0x1b4/0x648 [<00000000401b9128>] kthread+0x1b0/0x208 [<0000000040150020>] end_fault_vector+0x20/0x28 [<0000000040639518>] nf_ip_reroute+0x50/0xa8 [<0000000040638ed0>] nf_ip_route+0x10/0x78 [<0000000040638c90>] xfrm4_mode_tunnel_input+0x180/0x1f8 CPU: 2 PID: 6532 Comm: kworker/2:0 Not tainted 4.13.0-rc2+ #1 Workqueue: events free_ioctx Backtrace: [<0000000040163bf0>] show_stack+0x20/0x38 [<0000000040688480>] dump_stack+0xa8/0x120 [<0000000040163dc4>] die_if_kernel+0x19c/0x2b0 [<0000000040164d0c>] handle_interruption+0xa24/0xa48 This patch modifies flush_cache_range() to handle non current contexts. In as much as this occurs infrequently, the simplest approach is to flush the entire cache when this happens. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/cache.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c721ea2fdbd8..df757c9675e6 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -604,13 +604,12 @@ void flush_cache_range(struct vm_area_struct *vma, if (parisc_requires_coherency()) flush_tlb_range(vma, start, end); - if ((end - start) >= parisc_cache_flush_threshold) { + if ((end - start) >= parisc_cache_flush_threshold + || vma->vm_mm->context != mfsp(3)) { flush_cache_all(); return; } - BUG_ON(vma->vm_mm->context != mfsp(3)); - flush_user_dcache_range_asm(start, end); if (vma->vm_flags & VM_EXEC) flush_user_icache_range_asm(start, end); -- cgit v1.2.3 From 4a99eac8d2b3f5e45336894ac16caa87f67d2199 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jul 2017 17:57:46 -0400 Subject: cgroup: create dfl_root files on subsys registration commit 7af608e4f9530372aec6e940552bf76595f2e265 upstream. On subsystem registration, css_populate_dir() is not called on the new root css, so the interface files for the subsystem on cgrp_dfl_root aren't created on registration. This is a residue from the days when cgrp_dfl_root was used only as the parking spot for unused subsystems, which no longer is true as it's used as the root for cgroup2. This is often fine as later operations tend to create them as a part of mount (cgroup1) or subtree_control operations (cgroup2); however, it's not difficult to mount cgroup2 with the controller interface files missing as Waiman found out. Fix it by invoking css_populate_dir() on the root css on subsys registration. Signed-off-by: Tejun Heo Reported-and-tested-by: Waiman Long Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1fde8eec9529..e97826a8b708 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5718,6 +5718,10 @@ int __init cgroup_init(void) if (ss->bind) ss->bind(init_css_set.subsys[ssid]); + + mutex_lock(&cgroup_mutex); + css_populate_dir(init_css_set.subsys[ssid]); + mutex_unlock(&cgroup_mutex); } /* init_css_set.subsys[] has been updated, re-hash */ -- cgit v1.2.3 From 445ee6cdd91ea6b2f65653ca05f3e951d8458ebc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jul 2017 08:14:15 -0400 Subject: cgroup: fix error return value from cgroup_subtree_control() commit 3c74541777302eec43a0d1327c4d58b8659a776b upstream. While refactoring, f7b2814bb9b6 ("cgroup: factor out cgroup_{apply|finalize}_control() from cgroup_subtree_control_write()") broke error return value from the function. The return value from the last operation is always overridden to zero. Fix it. Signed-off-by: Tejun Heo Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e97826a8b708..4c233437ee1a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3487,11 +3487,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, cgrp->subtree_control &= ~disable; ret = cgroup_apply_control(cgrp); - cgroup_finalize_control(cgrp, ret); + if (ret) + goto out_unlock; kernfs_activate(cgrp->kn); - ret = 0; out_unlock: cgroup_kn_unlock(of->kn); return ret ?: nbytes; -- cgit v1.2.3 From 804b1a9f0aeabc002b162c5b1861d65e1316a53a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 Jul 2017 13:06:41 +0300 Subject: libata: array underflow in ata_find_dev() commit 59a5e266c3f5c1567508888dd61a45b86daed0fa upstream. My static checker complains that "devno" can be negative, meaning that we read before the start of the loop. I've looked at the code, and I think the warning is right. This come from /proc so it's root only or it would be quite a quite a serious bug. The call tree looks like this: proc_scsi_write() <- gets id and channel from simple_strtoul() -> scsi_add_single_device() <- calls shost->transportt->user_scan() -> ata_scsi_user_scan() -> ata_find_dev() Signed-off-by: Dan Carpenter Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 8e575fbdf31d..e3e10e8f6f6a 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2971,10 +2971,12 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) static struct ata_device *ata_find_dev(struct ata_port *ap, int devno) { if (!sata_pmp_attached(ap)) { - if (likely(devno < ata_link_max_devices(&ap->link))) + if (likely(devno >= 0 && + devno < ata_link_max_devices(&ap->link))) return &ap->link.device[devno]; } else { - if (likely(devno < ap->nr_pmp_links)) + if (likely(devno >= 0 && + devno < ap->nr_pmp_links)) return &ap->pmp_link[devno].device[0]; } -- cgit v1.2.3 From 61a0adbfaad70713c45964e48b72e5e5b6ef30b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jul 2017 18:41:52 -0400 Subject: workqueue: restore WQ_UNBOUND/max_active==1 to be ordered commit 5c0338c68706be53b3dc472e4308961c36e4ece1 upstream. The combination of WQ_UNBOUND and max_active == 1 used to imply ordered execution. After NUMA affinity 4c16bd327c74 ("workqueue: implement NUMA affinity for unbound workqueues"), this is no longer true due to per-node worker pools. While the right way to create an ordered workqueue is alloc_ordered_workqueue(), the documentation has been misleading for a long time and people do use WQ_UNBOUND and max_active == 1 for ordered workqueues which can lead to subtle bugs which are very difficult to trigger. It's unlikely that we'd see noticeable performance impact by enforcing ordering on WQ_UNBOUND / max_active == 1 workqueues. Let's automatically set __WQ_ORDERED for those workqueues. Signed-off-by: Tejun Heo Reported-by: Christoph Hellwig Reported-by: Alexei Potashnik Fixes: 4c16bd327c74 ("workqueue: implement NUMA affinity for unbound workqueues") Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 479d840db286..e5335c27d72e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3915,6 +3915,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, struct workqueue_struct *wq; struct pool_workqueue *pwq; + /* + * Unbound && max_active == 1 used to imply ordered, which is no + * longer the case on NUMA machines due to per-node pools. While + * alloc_ordered_workqueue() is the right way to create an ordered + * workqueue, keep the previous behavior to avoid subtle breakages + * on NUMA. + */ + if ((flags & WQ_UNBOUND) && max_active == 1) + flags |= __WQ_ORDERED; + /* see the comment above the definition of WQ_POWER_EFFICIENT */ if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) flags |= WQ_UNBOUND; -- cgit v1.2.3 From de8c33290f1c8fdd037b6e3d43ea888928f21e8c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 8 Jun 2017 10:55:26 +0300 Subject: iwlwifi: dvm: prevent an out of bounds access commit 0b0f934e92a8eaed2e6c48a50eae6f84661f74f3 upstream. iwlagn_check_ratid_empty takes the tid as a parameter, but it doesn't check that it is not IWL_TID_NON_QOS. Since IWL_TID_NON_QOS = 8 and iwl_priv::tid_data is an array with 8 entries, accessing iwl_priv::tid_data[IWL_TID_NON_QOS] is a bad idea. This happened in iwlagn_rx_reply_tx. Since iwlagn_check_ratid_empty is relevant only to check whether we can open A-MPDU, this flow is irrelevant if tid is IWL_TID_NON_QOS. Call iwlagn_check_ratid_empty only inside the if (tid != IWL_TID_NON_QOS) a few lines earlier in the function. Reported-by: Seraphime Kirkovski Tested-by: Seraphime Kirkovski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/dvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c index 4b97371c3b42..838946d17b59 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c @@ -1190,11 +1190,11 @@ void iwlagn_rx_reply_tx(struct iwl_priv *priv, struct iwl_rx_cmd_buffer *rxb) next_reclaimed; IWL_DEBUG_TX_REPLY(priv, "Next reclaimed packet:%d\n", next_reclaimed); + iwlagn_check_ratid_empty(priv, sta_id, tid); } iwl_trans_reclaim(priv->trans, txq_id, ssn, &skbs); - iwlagn_check_ratid_empty(priv, sta_id, tid); freed = 0; /* process frames */ -- cgit v1.2.3 From f5214eb4baf87b0134acf98c29150071fa3b815c Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 26 Jul 2017 13:09:24 +0100 Subject: brcmfmac: fix memleak due to calling brcmf_sdiod_sgtable_alloc() twice commit 5f5d03143de5e0c593da4ab18fc6393c2815e108 upstream. Due to a bugfix in wireless tree and the commit mentioned below a merge was needed which went haywire. So the submitted change resulted in the function brcmf_sdiod_sgtable_alloc() being called twice during the probe thus leaking the memory of the first call. Fixes: 4d7928959832 ("brcmfmac: switch to new platform data") Reported-by: Stefan Wahren Tested-by: Stefan Wahren Reviewed-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8744b9beda33..8e3c6f4bdaa0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4161,11 +4161,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) goto fail; } - /* allocate scatter-gather table. sg support - * will be disabled upon allocation failure. - */ - brcmf_sdiod_sgtable_alloc(bus->sdiodev); - /* Query the F2 block size, set roundup accordingly */ bus->blocksize = bus->sdiodev->func[2]->cur_blksize; bus->roundup = min(max_roundup, bus->blocksize); -- cgit v1.2.3 From f7d3e54fb40cfc6f829b8f8ad3aab026714f2ffc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 16:02:47 -0400 Subject: NFSv4: Fix EXCHANGE_ID corrupt verifier issue commit fd40559c8657418385e42f797e0b04bfc0add748 upstream. The verifier is allocated on the stack, but the EXCHANGE_ID RPC call was changed to be asynchronous by commit 8d89bd70bc939. If we interrrupt the call to rpc_wait_for_completion_task(), we can therefore end up transmitting random stack contents in lieu of the verifier. Fixes: 8d89bd70bc939 ("NFS setup async exchange_id") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 11 ++++------- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 46ca7881d80d..a53b8e0c896a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7410,7 +7410,7 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data) cdata->res.server_scope = NULL; } /* Save the EXCHANGE_ID verifier session trunk tests */ - memcpy(clp->cl_confirm.data, cdata->args.verifier->data, + memcpy(clp->cl_confirm.data, cdata->args.verifier.data, sizeof(clp->cl_confirm.data)); } out: @@ -7447,7 +7447,6 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, u32 sp4_how, struct rpc_xprt *xprt) { - nfs4_verifier verifier; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], .rpc_cred = cred, @@ -7470,8 +7469,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (!calldata) goto out; - if (!xprt) - nfs4_init_boot_verifier(clp, &verifier); + nfs4_init_boot_verifier(clp, &calldata->args.verifier); status = nfs4_init_uniform_client_string(clp); if (status) @@ -7516,9 +7514,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, task_setup_data.rpc_xprt = xprt; task_setup_data.flags = RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC; - calldata->args.verifier = &clp->cl_confirm; - } else { - calldata->args.verifier = &verifier; + memcpy(calldata->args.verifier.data, clp->cl_confirm.data, + sizeof(calldata->args.verifier.data)); } calldata->args.client = clp; #ifdef CONFIG_NFS_V4_1_MIGRATION diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c9c4d9855976..5e2724a928ed 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1761,7 +1761,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, int len = 0; encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); - encode_nfs4_verifier(xdr, args->verifier); + encode_nfs4_verifier(xdr, &args->verifier); encode_string(xdr, strlen(args->client->cl_owner_id), args->client->cl_owner_id); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index beb1e10f446e..3bf867a0c3b3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1199,7 +1199,7 @@ struct nfs41_state_protection { struct nfs41_exchange_id_args { struct nfs_client *client; - nfs4_verifier *verifier; + nfs4_verifier verifier; u32 flags; struct nfs41_state_protection state_protect; }; -- cgit v1.2.3 From 347be00b56f72a5a6759930dc7269cb3b1fbbad6 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Wed, 26 Jul 2017 16:02:46 +0200 Subject: mmc: sdhci-of-at91: force card detect value for non removable devices commit 7a1e3f143176e8ebdb2f5a9b3b47abc18b879d90 upstream. When the device is non removable, the card detect signal is often used for another purpose i.e. muxed to another SoC peripheral or used as a GPIO. It could lead to wrong behaviors depending the default value of this signal if not muxed to the SDHCI controller. Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC") Signed-off-by: Ludovic Desroches Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-at91.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index a8b430ff117b..83b84ffec27d 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -31,6 +31,7 @@ #define SDMMC_MC1R 0x204 #define SDMMC_MC1R_DDR BIT(3) +#define SDMMC_MC1R_FCD BIT(7) #define SDMMC_CACR 0x230 #define SDMMC_CACR_CAPWREN BIT(0) #define SDMMC_CACR_KEY (0x46 << 8) @@ -43,6 +44,15 @@ struct sdhci_at91_priv { struct clk *mainck; }; +static void sdhci_at91_set_force_card_detect(struct sdhci_host *host) +{ + u8 mc1r; + + mc1r = readb(host->ioaddr + SDMMC_MC1R); + mc1r |= SDMMC_MC1R_FCD; + writeb(mc1r, host->ioaddr + SDMMC_MC1R); +} + static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) { u16 clk; @@ -112,10 +122,18 @@ void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) sdhci_set_uhs_signaling(host, timing); } +static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) +{ + sdhci_reset(host, mask); + + if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + sdhci_at91_set_force_card_detect(host); +} + static const struct sdhci_ops sdhci_at91_sama5d2_ops = { .set_clock = sdhci_at91_set_clock, .set_bus_width = sdhci_set_bus_width, - .reset = sdhci_reset, + .reset = sdhci_at91_reset, .set_uhs_signaling = sdhci_at91_set_uhs_signaling, .set_power = sdhci_at91_set_power, }; @@ -322,6 +340,21 @@ static int sdhci_at91_probe(struct platform_device *pdev) host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; } + /* + * If the device attached to the MMC bus is not removable, it is safer + * to set the Force Card Detect bit. People often don't connect the + * card detect signal and use this pin for another purpose. If the card + * detect pin is not muxed to SDHCI controller, a default value is + * used. This value can be different from a SoC revision to another + * one. Problems come when this default value is not card present. To + * avoid this case, if the device is non removable then the card + * detection procedure using the SDMCC_CD signal is bypassed. + * This bit is reset when a software reset for all command is performed + * so we need to implement our own reset function to set back this bit. + */ + if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + sdhci_at91_set_force_card_detect(host); + pm_runtime_put_autosuspend(&pdev->dev); return 0; -- cgit v1.2.3 From 1f32e67adac40508e9c8753151effbe0bbf06bdb Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Mar 2017 10:52:24 +0300 Subject: device property: Make dev_fwnode() public commit e44bb0cbdc88686c21e2175a990b40bf6db5d005 upstream. The function to obtain a fwnode related to a struct device is useful for drivers that use the fwnode property API: it allows not being aware of the underlying firmware implementation. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Cc: Chris Metcalf Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 3 ++- include/linux/property.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 43a36d68c3fd..06f66687fe0b 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -182,11 +182,12 @@ static int pset_prop_read_string(struct property_set *pset, return 0; } -static inline struct fwnode_handle *dev_fwnode(struct device *dev) +struct fwnode_handle *dev_fwnode(struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? &dev->of_node->fwnode : dev->fwnode; } +EXPORT_SYMBOL_GPL(dev_fwnode); /** * device_property_present - check if a property of a device is present diff --git a/include/linux/property.h b/include/linux/property.h index 856e50b2140c..338f9b76914b 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -33,6 +33,8 @@ enum dev_dma_attr { DEV_DMA_COHERENT, }; +struct fwnode_handle *dev_fwnode(struct device *dev); + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); -- cgit v1.2.3 From 943281eb4ecfd5485abf41338c967d9ebf168ead Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 1 Mar 2017 14:11:47 -0800 Subject: mmc: core: Fix access to HS400-ES devices commit 773dc118756b1f38766063e90e582016be868f09 upstream. HS400-ES devices fail to initialize with the following error messages. mmc1: power class selection to bus width 8 ddr 0 failed mmc1: error -110 whilst initialising MMC card This was seen on Samsung Chromebook Plus. Code analysis points to commit 3d4ef329757c ("mmc: core: fix multi-bit bus width without high-speed mode"), which attempts to set the bus width for all but HS200 devices unconditionally. However, for HS400-ES, the bus width is already selected. Cc: Anssi Hannula Cc: Douglas Anderson Cc: Brian Norris Fixes: 3d4ef329757c ("mmc: core: fix multi-bit bus width ...") Signed-off-by: Guenter Roeck Reviewed-by: Douglas Anderson Reviewed-by: Shawn Lin Tested-by: Heiko Stuebner Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f57700c4b8f0..323dba35bc9a 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1690,7 +1690,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, err = mmc_select_hs400(card); if (err) goto free_card; - } else { + } else if (!mmc_card_hs400es(card)) { /* Select the desired bus width optionally */ err = mmc_select_bus_width(card); if (err > 0 && mmc_card_hs(card)) { -- cgit v1.2.3 From 5a1eef71aa2aaa46892a07a0cfc94dd38d24b040 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 2 Aug 2017 13:31:52 -0700 Subject: mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries commit 3ea277194daaeaa84ce75180ec7c7a2075027a68 upstream. Nadav Amit identified a theoritical race between page reclaim and mprotect due to TLB flushes being batched outside of the PTL being held. He described the race as follows: CPU0 CPU1 ---- ---- user accesses memory using RW PTE [PTE now cached in TLB] try_to_unmap_one() ==> ptep_get_and_clear() ==> set_tlb_ubc_flush_pending() mprotect(addr, PROT_READ) ==> change_pte_range() ==> [ PTE non-present - no flush ] user writes using cached RW PTE ... try_to_unmap_flush() The same type of race exists for reads when protecting for PROT_NONE and also exists for operations that can leave an old TLB entry behind such as munmap, mremap and madvise. For some operations like mprotect, it's not necessarily a data integrity issue but it is a correctness issue as there is a window where an mprotect that limits access still allows access. For munmap, it's potentially a data integrity issue although the race is massive as an munmap, mmap and return to userspace must all complete between the window when reclaim drops the PTL and flushes the TLB. However, it's theoritically possible so handle this issue by flushing the mm if reclaim is potentially currently batching TLB flushes. Other instances where a flush is required for a present pte should be ok as either the page lock is held preventing parallel reclaim or a page reference count is elevated preventing a parallel free leading to corruption. In the case of page_mkclean there isn't an obvious path that userspace could take advantage of without using the operations that are guarded by this patch. Other users such as gup as a race with reclaim looks just at PTEs. huge page variants should be ok as they don't race with reclaim. mincore only looks at PTEs. userfault also should be ok as if a parallel reclaim takes place, it will either fault the page back in or read some of the data before the flush occurs triggering a fault. Note that a variant of this patch was acked by Andy Lutomirski but this was for the x86 parts on top of his PCID work which didn't make the 4.13 merge window as expected. His ack is dropped from this version and there will be a follow-on patch on top of PCID that will include his ack. [akpm@linux-foundation.org: tweak comments] [akpm@linux-foundation.org: fix spello] Link: http://lkml.kernel.org/r/20170717155523.emckq2esjro6hf3z@suse.de Reported-by: Nadav Amit Signed-off-by: Mel Gorman Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm_types.h | 4 ++++ mm/internal.h | 5 ++++- mm/madvise.c | 2 ++ mm/memory.c | 1 + mm/mprotect.c | 1 + mm/mremap.c | 1 + mm/rmap.c | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 49 insertions(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 08d947fc4c59..e8471c2ca83a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -507,6 +507,10 @@ struct mm_struct { * PROT_NONE or PROT_NUMA mapped page. */ bool tlb_flush_pending; +#endif +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_X86_INTEL_MPX diff --git a/mm/internal.h b/mm/internal.h index 537ac9951f5f..34a5459e5989 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -472,6 +472,7 @@ struct tlbflush_unmap_batch; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); +void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { @@ -479,7 +480,9 @@ static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush_dirty(void) { } - +static inline void flush_tlb_batched_pending(struct mm_struct *mm) +{ +} #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; diff --git a/mm/madvise.c b/mm/madvise.c index 93fb63e88b5e..253b1533fba5 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -21,6 +21,7 @@ #include #include #include +#include "internal.h" #include @@ -282,6 +283,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, return 0; orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; diff --git a/mm/memory.c b/mm/memory.c index e6a5a1f20492..9bf3da0d0e14 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1124,6 +1124,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; diff --git a/mm/mprotect.c b/mm/mprotect.c index 11936526b08b..ae740c9b1f9b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -74,6 +74,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (!pte) return 0; + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { oldpte = *pte; diff --git a/mm/mremap.c b/mm/mremap.c index 30d7d2482eea..15976716dd40 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -142,6 +142,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, diff --git a/mm/rmap.c b/mm/rmap.c index cd37c1c7e21b..94488b0362f8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -616,6 +616,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm)); tlb_ubc->flush_required = true; + /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() @@ -643,6 +650,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, struct page *page, bool writable) -- cgit v1.2.3 From 45a636ec1849f842feb0716c7e25176a8b53af69 Mon Sep 17 00:00:00 2001 From: Dima Zavin Date: Wed, 2 Aug 2017 13:32:18 -0700 Subject: cpuset: fix a deadlock due to incomplete patching of cpusets_enabled() commit 89affbf5d9ebb15c6460596822e8857ea2f9e735 upstream. In codepaths that use the begin/retry interface for reading mems_allowed_seq with irqs disabled, there exists a race condition that stalls the patch process after only modifying a subset of the static_branch call sites. This problem manifested itself as a deadlock in the slub allocator, inside get_any_partial. The loop reads mems_allowed_seq value (via read_mems_allowed_begin), performs the defrag operation, and then verifies the consistency of mem_allowed via the read_mems_allowed_retry and the cookie returned by xxx_begin. The issue here is that both begin and retry first check if cpusets are enabled via cpusets_enabled() static branch. This branch can be rewritted dynamically (via cpuset_inc) if a new cpuset is created. The x86 jump label code fully synchronizes across all CPUs for every entry it rewrites. If it rewrites only one of the callsites (specifically the one in read_mems_allowed_retry) and then waits for the smp_call_function(do_sync_core) to complete while a CPU is inside the begin/retry section with IRQs off and the mems_allowed value is changed, we can hang. This is because begin() will always return 0 (since it wasn't patched yet) while retry() will test the 0 against the actual value of the seq counter. The fix is to use two different static keys: one for begin (pre_enable_key) and one for retry (enable_key). In cpuset_inc(), we first bump the pre_enable key to ensure that cpuset_mems_allowed_begin() always return a valid seqcount if are enabling cpusets. Similarly, when disabling cpusets via cpuset_dec(), we first ensure that callers of cpuset_mems_allowed_retry() will start ignoring the seqcount value before we let cpuset_mems_allowed_begin() return 0. The relevant stack traces of the two stuck threads: CPU: 1 PID: 1415 Comm: mkdir Tainted: G L 4.9.36-00104-g540c51286237 #4 Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017 task: ffff8817f9c28000 task.stack: ffffc9000ffa4000 RIP: smp_call_function_many+0x1f9/0x260 Call Trace: smp_call_function+0x3b/0x70 on_each_cpu+0x2f/0x90 text_poke_bp+0x87/0xd0 arch_jump_label_transform+0x93/0x100 __jump_label_update+0x77/0x90 jump_label_update+0xaa/0xc0 static_key_slow_inc+0x9e/0xb0 cpuset_css_online+0x70/0x2e0 online_css+0x2c/0xa0 cgroup_apply_control_enable+0x27f/0x3d0 cgroup_mkdir+0x2b7/0x420 kernfs_iop_mkdir+0x5a/0x80 vfs_mkdir+0xf6/0x1a0 SyS_mkdir+0xb7/0xe0 entry_SYSCALL_64_fastpath+0x18/0xad ... CPU: 2 PID: 1 Comm: init Tainted: G L 4.9.36-00104-g540c51286237 #4 Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017 task: ffff8818087c0000 task.stack: ffffc90000030000 RIP: int3+0x39/0x70 Call Trace: <#DB> ? ___slab_alloc+0x28b/0x5a0 ? copy_process.part.40+0xf7/0x1de0 __slab_alloc.isra.80+0x54/0x90 copy_process.part.40+0xf7/0x1de0 copy_process.part.40+0xf7/0x1de0 kmem_cache_alloc_node+0x8a/0x280 copy_process.part.40+0xf7/0x1de0 _do_fork+0xe7/0x6c0 _raw_spin_unlock_irq+0x2d/0x60 trace_hardirqs_on_caller+0x136/0x1d0 entry_SYSCALL_64_fastpath+0x5/0xad do_syscall_64+0x27/0x350 SyS_clone+0x19/0x20 do_syscall_64+0x60/0x350 entry_SYSCALL64_slow_path+0x25/0x25 Link: http://lkml.kernel.org/r/20170731040113.14197-1-dmitriyz@waymo.com Fixes: 46e700abc44c ("mm, page_alloc: remove unnecessary taking of a seqlock when cpusets are disabled") Signed-off-by: Dima Zavin Reported-by: Cliff Spradlin Acked-by: Vlastimil Babka Cc: Peter Zijlstra Cc: Christopher Lameter Cc: Li Zefan Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuset.h | 19 +++++++++++++++++-- kernel/cpuset.c | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index bfc204e70338..cd32a49ae81e 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -16,6 +16,19 @@ #ifdef CONFIG_CPUSETS +/* + * Static branch rewrites can happen in an arbitrary order for a given + * key. In code paths where we need to loop with read_mems_allowed_begin() and + * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need + * to ensure that begin() always gets rewritten before retry() in the + * disabled -> enabled transition. If not, then if local irqs are disabled + * around the loop, we can deadlock since retry() would always be + * comparing the latest value of the mems_allowed seqcount against 0 as + * begin() still would see cpusets_enabled() as false. The enabled -> disabled + * transition should happen in reverse order for the same reasons (want to stop + * looking at real value of mems_allowed.sequence in retry() first). + */ +extern struct static_key_false cpusets_pre_enable_key; extern struct static_key_false cpusets_enabled_key; static inline bool cpusets_enabled(void) { @@ -30,12 +43,14 @@ static inline int nr_cpusets(void) static inline void cpuset_inc(void) { + static_branch_inc(&cpusets_pre_enable_key); static_branch_inc(&cpusets_enabled_key); } static inline void cpuset_dec(void) { static_branch_dec(&cpusets_enabled_key); + static_branch_dec(&cpusets_pre_enable_key); } extern int cpuset_init(void); @@ -113,7 +128,7 @@ extern void cpuset_print_current_mems_allowed(void); */ static inline unsigned int read_mems_allowed_begin(void) { - if (!cpusets_enabled()) + if (!static_branch_unlikely(&cpusets_pre_enable_key)) return 0; return read_seqcount_begin(¤t->mems_allowed_seq); @@ -127,7 +142,7 @@ static inline unsigned int read_mems_allowed_begin(void) */ static inline bool read_mems_allowed_retry(unsigned int seq) { - if (!cpusets_enabled()) + if (!static_branch_unlikely(&cpusets_enabled_key)) return false; return read_seqcount_retry(¤t->mems_allowed_seq, seq); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 24d175d2b62d..247afb108343 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,6 +61,7 @@ #include #include +DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); /* See "Frequency meter" comments, below. */ -- cgit v1.2.3 From c531a24057acb51fe60450c48c7f2a7bb80178f1 Mon Sep 17 00:00:00 2001 From: "Sergei A. Trusov" Date: Wed, 2 Aug 2017 20:23:48 +1000 Subject: ALSA: hda - Fix speaker output from VAIO VPCL14M1R commit 3f3c371421e601fa93b6cb7fb52da9ad59ec90b4 upstream. Sony VAIO VPCL14M1R needs the quirk to make the speaker working properly. Tested-by: Dmitriy Signed-off-by: Sergei A. Trusov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bb1aad39d987..6f337f00ba58 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2233,6 +2233,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), + SND_PCI_QUIRK(0x104d, 0x9060, "Sony Vaio VPCL14M1R", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), -- cgit v1.2.3 From 69f15fff03e886e306fb4d7218403d1cb04e4840 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 30 Jul 2017 10:18:25 +0200 Subject: drm/amdgpu: Fix undue fallthroughs in golden registers initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5694785cf09bf0e7bd8e5f62361ea34fa162a4a0 upstream. As I was staring at the si_init_golden_registers code, I noticed that the Pitcairn initialization silently falls through the Cape Verde initialization, and the Oland initialization falls through the Hainan initialization. However there is no comment stating that this is intentional, and the radeon driver doesn't have any such fallthrough, so I suspect this is not supposed to happen. Signed-off-by: Jean Delvare Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10") Cc: Ken Wang Cc: Alex Deucher Cc: "Marek Olšák" Cc: "Christian König" Cc: Flora Cui Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/si.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index dc9511c5ecb8..327bdf13e8bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1301,6 +1301,7 @@ static void si_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, pitcairn_mgcg_cgcg_init, (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init)); + break; case CHIP_VERDE: amdgpu_program_register_sequence(adev, verde_golden_registers, @@ -1325,6 +1326,7 @@ static void si_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, oland_mgcg_cgcg_init, (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init)); + break; case CHIP_HAINAN: amdgpu_program_register_sequence(adev, hainan_golden_registers, -- cgit v1.2.3 From 650c763ea5ea68c136334f0e51ed2cd3938e09f5 Mon Sep 17 00:00:00 2001 From: Banajit Goswami Date: Fri, 14 Jul 2017 23:15:05 -0700 Subject: ASoC: do not close shared backend dailink commit b1cd2e34c69a2f3988786af451b6e17967c293a0 upstream. Multiple frontend dailinks may be connected to a backend dailink at the same time. When one of frontend dailinks is closed, the associated backend dailink should not be closed if it is connected to other active frontend dailinks. Change ensures that backend dailink is closed only after all connected frontend dailinks are closed. Signed-off-by: Gopikrishnaiah Anandan Signed-off-by: Banajit Goswami Signed-off-by: Patrick Lai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 21c3ef01c438..80088c98ce27 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -181,6 +181,10 @@ int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, dev_dbg(be->dev, "ASoC: BE %s event %d dir %d\n", be->dai_link->name, event, dir); + if ((event == SND_SOC_DAPM_STREAM_STOP) && + (be->dpcm[dir].users >= 1)) + continue; + snd_soc_dapm_stream_event(be, dir, event); } -- cgit v1.2.3 From 0c609d3e1fbdc82ca9b628835b8480de98b7f448 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 1 Aug 2017 05:20:03 -0700 Subject: KVM: async_pf: make rcu irq exit if not triggered from idle task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 337c017ccdf2653d0040099433fc1a2b1beb5926 upstream. WARNING: CPU: 5 PID: 1242 at kernel/rcu/tree_plugin.h:323 rcu_note_context_switch+0x207/0x6b0 CPU: 5 PID: 1242 Comm: unity-settings- Not tainted 4.13.0-rc2+ #1 RIP: 0010:rcu_note_context_switch+0x207/0x6b0 Call Trace: __schedule+0xda/0xba0 ? kvm_async_pf_task_wait+0x1b2/0x270 schedule+0x40/0x90 kvm_async_pf_task_wait+0x1cc/0x270 ? prepare_to_swait+0x22/0x70 do_async_page_fault+0x77/0xb0 ? do_async_page_fault+0x77/0xb0 async_page_fault+0x28/0x30 RIP: 0010:__d_lookup_rcu+0x90/0x1e0 I encounter this when trying to stress the async page fault in L1 guest w/ L2 guests running. Commit 9b132fbe5419 (Add rcu user eqs exception hooks for async page fault) adds rcu_irq_enter/exit() to kvm_async_pf_task_wait() to exit cpu idle eqs when needed, to protect the code that needs use rcu. However, we need to call the pair even if the function calls schedule(), as seen from the above backtrace. This patch fixes it by informing the RCU subsystem exit/enter the irq towards/away from idle for both n.halted and !n.halted. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Paul E. McKenney Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9cf697ceedbf..55ffd9dc2258 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -152,6 +152,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -160,11 +162,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_swait(&n.wq, &wait); -- cgit v1.2.3 From 7b95b74563627b95da435f5c120af63451e95380 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 25 Oct 2016 09:51:14 -0500 Subject: mm/page_alloc: Remove kernel address exposure in free_reserved_area() commit adb1fe9ae2ee6ef6bc10f3d5a588020e7664dfa7 upstream. Linus suggested we try to remove some of the low-hanging fruit related to kernel address exposure in dmesg. The only leaks I see on my local system are: Freeing SMP alternatives memory: 32K (ffffffff9e309000 - ffffffff9e311000) Freeing initrd memory: 10588K (ffffa0b736b42000 - ffffa0b737599000) Freeing unused kernel memory: 3592K (ffffffff9df87000 - ffffffff9e309000) Freeing unused kernel memory: 1352K (ffffa0b7288ae000 - ffffa0b728a00000) Freeing unused kernel memory: 632K (ffffa0b728d62000 - ffffa0b728e00000) Linus says: "I suspect we should just remove [the addresses in the 'Freeing' messages]. I'm sure they are useful in theory, but I suspect they were more useful back when the whole "free init memory" was originally done. These days, if we have a use-after-free, I suspect the init-mem situation is the easiest situation by far. Compared to all the dynamic allocations which are much more likely to show it anyway. So having debug output for that case is likely not all that productive." With this patch the freeing messages now look like this: Freeing SMP alternatives memory: 32K Freeing initrd memory: 10588K Freeing unused kernel memory: 3592K Freeing unused kernel memory: 1352K Freeing unused kernel memory: 632K Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/6836ff90c45b71d38e5d4405aec56fa9e5d1d4b2.1477405374.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Cc: Kees Cook Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 56df8c24689d..d4f860cc7163 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6445,8 +6445,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s) } if (pages && s) - pr_info("Freeing %s memory: %ldK (%p - %p)\n", - s, pages << (PAGE_SHIFT - 10), start, end); + pr_info("Freeing %s memory: %ldK\n", + s, pages << (PAGE_SHIFT - 10)); return pages; } -- cgit v1.2.3 From 9ef8b23b94b98ec9b270e6fca5eadb97c96d809a Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Tue, 1 Aug 2017 09:11:52 +0200 Subject: timers: Fix overflow in get_next_timer_interrupt commit 34f41c0316ed52b0b44542491d89278efdaa70e4 upstream. For e.g. HZ=100, timer being 430 jiffies in the future, and 32 bit unsigned int, there is an overflow on unsigned int right-hand side of the expression which results with wrong values being returned. Type cast the multiplier to 64bit to avoid that issue. Fixes: 46c8f0b077a8 ("timers: Fix get_next_timer_interrupt() computation") Signed-off-by: Matija Glavinic Pecotic Signed-off-by: Thomas Gleixner Reviewed-by: Alexander Sverdlin Cc: khilman@baylibre.com Cc: akpm@linux-foundation.org Link: http://lkml.kernel.org/r/a7900f04-2a21-c9fd-67be-ab334d459ee5@nokia.com Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index c611c47de884..944ad64277a6 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1536,7 +1536,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) base->is_idle = false; } else { if (!is_max_delta) - expires = basem + (nextevt - basej) * TICK_NSEC; + expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* * If we expect to sleep more than a tick, mark the base idle: */ -- cgit v1.2.3 From 8164692a2f66ecc08bcdc42955b6ce62d47637ec Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Wed, 19 Jul 2017 01:44:13 -0400 Subject: powerpc/tm: Fix saving of TM SPRs in core dump commit cd63f3cf1d59b7ad8419eba1cac8f9126e79cc43 upstream. Currently flush_tmregs_to_thread() does not save the TM SPRs (TFHAR, TFIAR, TEXASR) to the thread struct, unless the process is currently inside a suspended transaction. If the process is core dumping, and the TM SPRs have changed since the last time the process was context switched, then we will save stale values of the TM SPRs to the core dump. Fix it by saving the live register state to the thread struct in that case. Fixes: 08e1c01d6aed ("powerpc/ptrace: Enable support for TM SPR state") Signed-off-by: Gustavo Romero Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ptrace.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 5c8f12fe9721..dcbb9144c16d 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) * If task is not current, it will have been flushed already to * it's thread_struct during __switch_to(). * - * A reclaim flushes ALL the state. + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. */ - if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(TM_CAUSE_SIGNAL); + if (tsk != current) + return; + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&(tsk->thread)); + } } #else static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } -- cgit v1.2.3 From 03ebdd1eb95afc3d4524540ecb1226567e655e5e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Aug 2017 23:59:28 +1000 Subject: powerpc/64: Fix __check_irq_replay missing decrementer interrupt commit 3db40c312c2c1eb2187c5731102fa8ff380e6e40 upstream. If the decrementer wraps again and de-asserts the decrementer exception while hard-disabled, __check_irq_replay() has a test to notice the wrap when interrupts are re-enabled. The decrementer check must be done when clearing the PACA_IRQ_HARD_DIS flag, not when the PACA_IRQ_DEC flag is tested. Previously this worked because the decrementer interrupt was always the first one checked after clearing the hard disable flag, but HMI check was moved ahead of that, which introduced this bug. This can cause a missed decrementer interrupt if we soft-disable interrupts then take an HMI which is recorded in irq_happened, then hard-disable interrupts for > 4s to wrap the decrementer. Fixes: e0e0d6b7390b ("powerpc/64: Replay hypervisor maintenance interrupt first") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/irq.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3c05c311e35e..028a22bfa90c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -146,6 +146,19 @@ notrace unsigned int __check_irq_replay(void) /* Clear bit 0 which we wouldn't clear otherwise */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + if (happened & PACA_IRQ_HARD_DIS) { + /* + * We may have missed a decrementer interrupt if hard disabled. + * Check the decrementer register in case we had a rollover + * while hard disabled. + */ + if (!(happened & PACA_IRQ_DEC)) { + if (decrementer_check_overflow()) { + local_paca->irq_happened |= PACA_IRQ_DEC; + happened |= PACA_IRQ_DEC; + } + } + } /* * Force the delivery of pending soft-disabled interrupts on PS3. @@ -171,7 +184,7 @@ notrace unsigned int __check_irq_replay(void) * in case we also had a rollover while hard disabled */ local_paca->irq_happened &= ~PACA_IRQ_DEC; - if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) + if (happened & PACA_IRQ_DEC) return 0x900; /* Finally check if an external interrupt happened */ -- cgit v1.2.3 From be5c6efc996e439205c298331c755f007f66484b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 5 Jul 2017 21:29:59 -0500 Subject: iommu/amd: Enable ga_log_intr when enabling guest_mode commit efe6f241602cb61466895f6816b8ea6b90f04d4e upstream. IRTE[GALogIntr] bit should set when enabling guest_mode, which enables IOMMU to generate entry in GALog when IRTE[IsRun] is not set, and send an interrupt to notify IOMMU driver. Signed-off-by: Suravee Suthikulpanit Cc: Joerg Roedel Fixes: d98de49a53e48 ('iommu/amd: Enable vAPIC interrupt remapping mode by default') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 41800b6d492e..c380b7e8f1c6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4294,6 +4294,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) /* Setting */ irte->hi.fields.ga_root_ptr = (pi_data->base >> 12); irte->hi.fields.vector = vcpu_pi_info->vector; + irte->lo.fields_vapic.ga_log_intr = 1; irte->lo.fields_vapic.guest_mode = 1; irte->lo.fields_vapic.ga_tag = pi_data->ga_tag; -- cgit v1.2.3 From b680e22fca767103ea4f86e21a8cca927f9d1ca7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 3 Jul 2017 11:12:03 +0200 Subject: gpiolib: skip unwanted events, don't convert them to opposite edge commit df1e76f28ffe87d1b065eecab2d0fbb89e6bdee5 upstream. The previous fix for filtering out of unwatched events was not entirely correct. Instead of skipping the events we don't want, they are now interpreted as events with opposing edge. In order to fix it: always read the GPIO line value on interrupt and only emit the event if it corresponds with the event type we requested. Fixes: ad537b822577 ("gpiolib: fix filtering out unwanted events") Signed-off-by: Bartosz Golaszewski Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f2bb5122d2c2..063d176baa24 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -703,24 +703,23 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) { struct lineevent_state *le = p; struct gpioevent_data ge; - int ret; + int ret, level; ge.timestamp = ktime_get_real_ns(); + level = gpiod_get_value_cansleep(le->desc); if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) { - int level = gpiod_get_value_cansleep(le->desc); - if (level) /* Emit low-to-high event */ ge.id = GPIOEVENT_EVENT_RISING_EDGE; else /* Emit high-to-low event */ ge.id = GPIOEVENT_EVENT_FALLING_EDGE; - } else if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE) { + } else if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE && level) { /* Emit low-to-high event */ ge.id = GPIOEVENT_EVENT_RISING_EDGE; - } else if (le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) { + } else if (le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE && !level) { /* Emit high-to-low event */ ge.id = GPIOEVENT_EVENT_FALLING_EDGE; } else { -- cgit v1.2.3 From 0814c3a9447ba284ef91b2d27755bf5a4514ca64 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 5 Aug 2017 17:43:24 -0400 Subject: ext4: fix SEEK_HOLE/SEEK_DATA for blocksize < pagesize commit fcf5ea10992fbac3c7473a1db33d56a139333cd1 upstream. ext4_find_unwritten_pgoff() does not properly handle a situation when starting index is in the middle of a page and blocksize < pagesize. The following command shows the bug on filesystem with 1k blocksize: xfs_io -f -c "falloc 0 4k" \ -c "pwrite 1k 1k" \ -c "pwrite 3k 1k" \ -c "seek -a -r 0" foo In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048, SEEK_DATA) will return the correct result. Fix the problem by neglecting buffers in a page before starting offset. Reported-by: Andreas Gruenbacher Signed-off-by: Theodore Ts'o Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9e77c089e8cb..d17d12ed6f73 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -469,6 +469,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = page_offset(page); bh = head = page_buffers(page); do { + if (lastoff + bh->b_size <= startoff) + goto next; if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) @@ -483,6 +485,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); goto out; } +next: lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); -- cgit v1.2.3 From 12353a00df25d5c8da8de8c60b705e5ccc436cfe Mon Sep 17 00:00:00 2001 From: Jerry Lee Date: Sun, 6 Aug 2017 01:18:31 -0400 Subject: ext4: fix overflow caused by missing cast in ext4_resize_fs() commit aec51758ce10a9c847a62a48a168f8c804c6e053 upstream. On a 32-bit platform, the value of n_blcoks_count may be wrong during the file system is resized to size larger than 2^32 blocks. This may caused the superblock being corrupted with zero blocks count. Fixes: 1c6bd7173d66 Signed-off-by: Jerry Lee Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index cf681004b196..95bf46654153 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1926,7 +1926,8 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb); n_group--; /* set to last group number */ } -- cgit v1.2.3 From ff603e017bee376dbc730924bbf4e554a719100f Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 12 Jul 2017 13:23:11 +0200 Subject: ARM: dts: armada-38x: Fix irq type for pca955 commit 8d4514173211586c6238629b1ef1e071927735f5 upstream. As written in the datasheet the PCA955 can only handle low level irq and not edge irq. Without this fix the interrupt is not usable for pca955: the gpio-pca953x driver already set the irq type as low level which is incompatible with edge type, then the kernel prevents using the interrupt: "irq: type mismatch, failed to map hwirq-18 for /soc/internal-regs/gpio@18100!" Fixes: 928413bd859c ("ARM: mvebu: Add Armada 388 General Purpose Development Board support") Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-388-gp.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index 895fa6cfa15a..563901e0ec07 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -75,7 +75,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pca0_pins>; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -87,7 +87,7 @@ compatible = "nxp,pca9555"; pinctrl-names = "default"; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; -- cgit v1.2.3 From 4f3544816937b0b59f81838295f018a2367e7e53 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Fri, 28 Jul 2017 15:27:49 +0200 Subject: ARM: dts: tango4: Request RGMII RX and TX clock delays commit 985333b0eef8603b02181c4ec0a722b82be9642d upstream. RX and TX clock delays are required. Request them explicitly. Fixes: cad008b8a77e6 ("ARM: dts: tango4: Initial device trees") Signed-off-by: Marc Gonzalez Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/tango4-vantage-1172.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tango4-vantage-1172.dts b/arch/arm/boot/dts/tango4-vantage-1172.dts index 4cab64cb581e..e3a51e3538b7 100644 --- a/arch/arm/boot/dts/tango4-vantage-1172.dts +++ b/arch/arm/boot/dts/tango4-vantage-1172.dts @@ -21,7 +21,7 @@ }; ð0 { - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; phy-handle = <ð0_phy>; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From 733d7ab6c5bccef9a644afdcffc1e2369f0d5b6e Mon Sep 17 00:00:00 2001 From: Prabhakar Lad Date: Thu, 20 Jul 2017 08:02:09 -0400 Subject: media: platform: davinci: return -EINVAL for VPFE_CMD_S_CCDC_RAW_PARAMS ioctl commit da05d52d2f0f6bd61094a0cd045fed94bf7d673a upstream. this patch makes sure VPFE_CMD_S_CCDC_RAW_PARAMS ioctl no longer works for vpfe_capture driver with a minimal patch suitable for backporting. - This ioctl was never in public api and was only defined in kernel header. - The function set_params constantly mixes up pointers and phys_addr_t numbers. - This is part of a 'VPFE_CMD_S_CCDC_RAW_PARAMS' ioctl command that is described as an 'experimental ioctl that will change in future kernels'. - The code to allocate the table never gets called after we copy_from_user the user input over the kernel settings, and then compare them for inequality. - We then go on to use an address provided by user space as both the __user pointer for input and pass it through phys_to_virt to come up with a kernel pointer to copy the data to. This looks like a trivially exploitable root hole. Due to these reasons we make sure this ioctl now returns -EINVAL and backport this patch as far as possible. Fixes: 5f15fbb68fd7 ("V4L/DVB (12251): v4l: dm644x ccdc module for vpfe capture driver") Signed-off-by: Lad, Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/davinci/vpfe_capture.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c index 6efb2f1631c4..bdb7a0a00932 100644 --- a/drivers/media/platform/davinci/vpfe_capture.c +++ b/drivers/media/platform/davinci/vpfe_capture.c @@ -1725,27 +1725,9 @@ static long vpfe_param_handler(struct file *file, void *priv, switch (cmd) { case VPFE_CMD_S_CCDC_RAW_PARAMS: + ret = -EINVAL; v4l2_warn(&vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS: experimental ioctl\n"); - if (ccdc_dev->hw_ops.set_params) { - ret = ccdc_dev->hw_ops.set_params(param); - if (ret) { - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "Error setting parameters in CCDC\n"); - goto unlock_out; - } - ret = vpfe_get_ccdc_image_format(vpfe_dev, - &vpfe_dev->fmt); - if (ret < 0) { - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "Invalid image format at CCDC\n"); - goto unlock_out; - } - } else { - ret = -EINVAL; - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); - } + "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); break; default: ret = -ENOTTY; -- cgit v1.2.3 From bdabf097f05b9ef78c58622e3dfdec39b11a2ee5 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 24 May 2017 21:47:09 -0700 Subject: iscsi-target: Fix initial login PDU asynchronous socket close OOPs commit 25cdda95fda78d22d44157da15aa7ea34be3c804 upstream. This patch fixes a OOPs originally introduced by: commit bb048357dad6d604520c91586334c9c230366a14 Author: Nicholas Bellinger Date: Thu Sep 5 14:54:04 2013 -0700 iscsi-target: Add sk->sk_state_change to cleanup after TCP failure which would trigger a NULL pointer dereference when a TCP connection was closed asynchronously via iscsi_target_sk_state_change(), but only when the initial PDU processing in iscsi_target_do_login() from iscsi_np process context was blocked waiting for backend I/O to complete. To address this issue, this patch makes the following changes. First, it introduces some common helper functions used for checking socket closing state, checking login_flags, and atomically checking socket closing state + setting login_flags. Second, it introduces a LOGIN_FLAGS_INITIAL_PDU bit to know when a TCP connection has dropped via iscsi_target_sk_state_change(), but the initial PDU processing within iscsi_target_do_login() in iscsi_np context is still running. For this case, it sets LOGIN_FLAGS_CLOSED, but doesn't invoke schedule_delayed_work(). The original NULL pointer dereference case reported by MNC is now handled by iscsi_target_do_login() doing a iscsi_target_sk_check_close() before transitioning to FFP to determine when the socket has already closed, or iscsi_target_start_negotiation() if the login needs to exchange more PDUs (eg: iscsi_target_do_login returned 0) but the socket has closed. For both of these cases, the cleanup up of remaining connection resources will occur in iscsi_target_start_negotiation() from iscsi_np process context once the failure is detected. Finally, to handle to case where iscsi_target_sk_state_change() is called after the initial PDU procesing is complete, it now invokes conn->login_work -> iscsi_target_do_login_rx() to perform cleanup once existing iscsi_target_sk_check_close() checks detect connection failure. For this case, the cleanup of remaining connection resources will occur in iscsi_target_do_login_rx() from delayed workqueue process context once the failure is detected. Reported-by: Mike Christie Reviewed-by: Mike Christie Tested-by: Mike Christie Cc: Mike Christie Reported-by: Hannes Reinecke Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Varun Prakash Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_nego.c | 204 +++++++++++++++++++++---------- include/target/iscsi/iscsi_target_core.h | 1 + 2 files changed, 138 insertions(+), 67 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 6693d7c69f97..e8efb4299a95 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -490,14 +490,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); -static bool iscsi_target_sk_state_check(struct sock *sk) +static bool __iscsi_target_sk_check_close(struct sock *sk) { if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { - pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE," "returning FALSE\n"); - return false; + return true; } - return true; + return false; +} + +static bool iscsi_target_sk_check_close(struct iscsi_conn *conn) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = test_bit(flag, &conn->login_flags); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + if (!state) + clear_bit(flag, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + return state; } static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) @@ -537,6 +583,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work) pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", conn, current->comm, current->pid); + /* + * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready() + * before initial PDU processing in iscsi_target_start_negotiation() + * has completed, go ahead and retry until it's cleared. + * + * Otherwise if the TCP connection drops while this is occuring, + * iscsi_target_start_negotiation() will detect the failure, call + * cancel_delayed_work_sync(&conn->login_work), and cleanup the + * remaining iscsi connection resources from iscsi_np process context. + */ + if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) { + schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10)); + return; + } spin_lock(&tpg->tpg_state_lock); state = (tpg->tpg_state == TPG_STATE_ACTIVE); @@ -544,26 +604,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work) if (!state) { pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; + goto err; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (iscsi_target_sk_check_close(conn)) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + goto err; } conn->login_kworker = current; @@ -581,34 +627,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work) flush_signals(current); conn->login_kworker = NULL; - if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (rc < 0) + goto err; pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", conn, current->comm, current->pid); rc = iscsi_target_do_login(conn, login); if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); + goto err; } else if (!rc) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } + if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE)) + goto err; } else if (rc == 1) { iscsi_target_nego_release(conn); iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } + return; + +err: + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); } static void iscsi_target_do_cleanup(struct work_struct *work) @@ -656,31 +697,54 @@ static void iscsi_target_sk_state_change(struct sock *sk) orig_state_change(sk); return; } + state = __iscsi_target_sk_check_close(sk); + pr_debug("__iscsi_target_sk_close_change: state: %d\n", state); + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" " conn: %p\n", conn); + if (state) + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", conn); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } + /* + * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED, + * but only queue conn->login_work -> iscsi_target_do_login_rx() + * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared. + * + * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close() + * will detect the dropped TCP connection from delayed workqueue context. + * + * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial + * iscsi_target_start_negotiation() is running, iscsi_target_do_login() + * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation() + * via iscsi_target_sk_check_and_clear() is responsible for detecting the + * dropped TCP connection in iscsi_np process context, and cleaning up + * the remaining iscsi connection resources. + */ + if (state) { + pr_debug("iscsi_target_sk_state_change got failed state\n"); + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); + state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - write_unlock_bh(&sk->sk_callback_lock); - - pr_debug("iscsi_target_sk_state_change: state: %d\n", state); + orig_state_change(sk); - if (!state) { - pr_debug("iscsi_target_sk_state_change got failed state\n"); - schedule_delayed_work(&conn->login_cleanup_work, 0); + if (!state) + schedule_delayed_work(&conn->login_work, 0); return; } + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); } @@ -945,6 +1009,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (iscsi_target_handle_csg_one(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + /* + * Check to make sure the TCP connection has not + * dropped asynchronously while session reinstatement + * was occuring in this kthread context, before + * transitioning to full feature phase operation. + */ + if (iscsi_target_sk_check_close(conn)) + return -1; + login->tsih = conn->sess->tsih; login->login_complete = 1; iscsi_target_restore_sock_callbacks(conn); @@ -971,21 +1044,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - bool state; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login() failed state for" - " conn: %p\n", conn); - return -1; - } - } - return 0; } @@ -1252,13 +1310,25 @@ int iscsi_target_start_negotiation( if (conn->sock) { struct sock *sk = conn->sock->sk; - write_lock_bh(&sk->sk_callback_lock); - set_bit(LOGIN_FLAGS_READY, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } + write_lock_bh(&sk->sk_callback_lock); + set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + /* + * If iscsi_target_do_login returns zero to signal more PDU + * exchanges are required to complete the login, go ahead and + * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection + * is still active. + * + * Otherwise if TCP connection dropped asynchronously, go ahead + * and perform connection cleanup now. + */ + ret = iscsi_target_do_login(conn, login); + if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) + ret = -1; - ret = iscsi_target_do_login(conn, login); - if (ret < 0) { + if (ret < 0) { cancel_delayed_work_sync(&conn->login_work); cancel_delayed_work_sync(&conn->login_cleanup_work); iscsi_target_restore_sock_callbacks(conn); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 33b2e75bf2eb..c8132b419148 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -563,6 +563,7 @@ struct iscsi_conn { #define LOGIN_FLAGS_READ_ACTIVE 1 #define LOGIN_FLAGS_CLOSED 2 #define LOGIN_FLAGS_READY 4 +#define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; struct delayed_work login_cleanup_work; -- cgit v1.2.3 From baf4ad0e51620f88216a6b11cdc868ae861983a9 Mon Sep 17 00:00:00 2001 From: David Woods Date: Fri, 26 May 2017 17:53:20 -0400 Subject: mmc: dw_mmc: Use device_property_read instead of of_property_read commit 852ff5fea9eb6a9799f1881d6df2cd69a9e6eed5 upstream. Using the device_property interfaces allows the dw_mmc driver to work on platforms which run on either device tree or ACPI. Signed-off-by: David Woods Reviewed-by: Chris Metcalf Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index df478ae72e23..f81f4175f49a 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2610,8 +2610,8 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) host->slot[id] = slot; mmc->ops = &dw_mci_ops; - if (of_property_read_u32_array(host->dev->of_node, - "clock-freq-min-max", freq, 2)) { + if (device_property_read_u32_array(host->dev, "clock-freq-min-max", + freq, 2)) { mmc->f_min = DW_MCI_FREQ_MIN; mmc->f_max = DW_MCI_FREQ_MAX; } else { @@ -2709,7 +2709,6 @@ static void dw_mci_init_dma(struct dw_mci *host) { int addr_config; struct device *dev = host->dev; - struct device_node *np = dev->of_node; /* * Check tansfer mode from HCON[17:16] @@ -2770,8 +2769,9 @@ static void dw_mci_init_dma(struct dw_mci *host) dev_info(host->dev, "Using internal DMA controller.\n"); } else { /* TRANS_MODE_EDMAC: check dma bindings again */ - if ((of_property_count_strings(np, "dma-names") < 0) || - (!of_find_property(np, "dmas", NULL))) { + if ((device_property_read_string_array(dev, "dma-names", + NULL, 0) < 0) || + !device_property_present(dev, "dmas")) { goto no_dma; } host->dma_ops = &dw_mci_edmac_ops; @@ -2931,7 +2931,6 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host) { struct dw_mci_board *pdata; struct device *dev = host->dev; - struct device_node *np = dev->of_node; const struct dw_mci_drv_data *drv_data = host->drv_data; int ret; u32 clock_frequency; @@ -2948,15 +2947,16 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host) } /* find out number of slots supported */ - of_property_read_u32(np, "num-slots", &pdata->num_slots); + device_property_read_u32(dev, "num-slots", &pdata->num_slots); - if (of_property_read_u32(np, "fifo-depth", &pdata->fifo_depth)) + if (device_property_read_u32(dev, "fifo-depth", &pdata->fifo_depth)) dev_info(dev, "fifo-depth property not found, using value of FIFOTH register as default\n"); - of_property_read_u32(np, "card-detect-delay", &pdata->detect_delay_ms); + device_property_read_u32(dev, "card-detect-delay", + &pdata->detect_delay_ms); - if (!of_property_read_u32(np, "clock-frequency", &clock_frequency)) + if (!device_property_read_u32(dev, "clock-frequency", &clock_frequency)) pdata->bus_hz = clock_frequency; if (drv_data && drv_data->parse_dt) { -- cgit v1.2.3 From e92add299fee14a3042960d723e074e8bd5c8412 Mon Sep 17 00:00:00 2001 From: David Woods Date: Fri, 26 May 2017 17:53:21 -0400 Subject: mmc: core: Use device_property_read instead of of_property_read commit 73a47a9bb3e2c4a9c553c72456e63ab991b1a4d9 upstream. Using the device_property interfaces allows mmc drivers to work on platforms which run on either device tree or ACPI. Signed-off-by: David Woods Reviewed-by: Chris Metcalf Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/host.c | 70 ++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 98f25ffb4258..848b3453517e 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -179,19 +179,17 @@ static void mmc_retune_timer(unsigned long data) */ int mmc_of_parse(struct mmc_host *host) { - struct device_node *np; + struct device *dev = host->parent; u32 bus_width; int ret; bool cd_cap_invert, cd_gpio_invert = false; bool ro_cap_invert, ro_gpio_invert = false; - if (!host->parent || !host->parent->of_node) + if (!dev || !dev_fwnode(dev)) return 0; - np = host->parent->of_node; - /* "bus-width" is translated to MMC_CAP_*_BIT_DATA flags */ - if (of_property_read_u32(np, "bus-width", &bus_width) < 0) { + if (device_property_read_u32(dev, "bus-width", &bus_width) < 0) { dev_dbg(host->parent, "\"bus-width\" property is missing, assuming 1 bit.\n"); bus_width = 1; @@ -213,7 +211,7 @@ int mmc_of_parse(struct mmc_host *host) } /* f_max is obtained from the optional "max-frequency" property */ - of_property_read_u32(np, "max-frequency", &host->f_max); + device_property_read_u32(dev, "max-frequency", &host->f_max); /* * Configure CD and WP pins. They are both by default active low to @@ -228,12 +226,12 @@ int mmc_of_parse(struct mmc_host *host) */ /* Parse Card Detection */ - if (of_property_read_bool(np, "non-removable")) { + if (device_property_read_bool(dev, "non-removable")) { host->caps |= MMC_CAP_NONREMOVABLE; } else { - cd_cap_invert = of_property_read_bool(np, "cd-inverted"); + cd_cap_invert = device_property_read_bool(dev, "cd-inverted"); - if (of_property_read_bool(np, "broken-cd")) + if (device_property_read_bool(dev, "broken-cd")) host->caps |= MMC_CAP_NEEDS_POLL; ret = mmc_gpiod_request_cd(host, "cd", 0, true, @@ -259,7 +257,7 @@ int mmc_of_parse(struct mmc_host *host) } /* Parse Write Protection */ - ro_cap_invert = of_property_read_bool(np, "wp-inverted"); + ro_cap_invert = device_property_read_bool(dev, "wp-inverted"); ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &ro_gpio_invert); if (!ret) @@ -267,62 +265,62 @@ int mmc_of_parse(struct mmc_host *host) else if (ret != -ENOENT && ret != -ENOSYS) return ret; - if (of_property_read_bool(np, "disable-wp")) + if (device_property_read_bool(dev, "disable-wp")) host->caps2 |= MMC_CAP2_NO_WRITE_PROTECT; /* See the comment on CD inversion above */ if (ro_cap_invert ^ ro_gpio_invert) host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - if (of_property_read_bool(np, "cap-sd-highspeed")) + if (device_property_read_bool(dev, "cap-sd-highspeed")) host->caps |= MMC_CAP_SD_HIGHSPEED; - if (of_property_read_bool(np, "cap-mmc-highspeed")) + if (device_property_read_bool(dev, "cap-mmc-highspeed")) host->caps |= MMC_CAP_MMC_HIGHSPEED; - if (of_property_read_bool(np, "sd-uhs-sdr12")) + if (device_property_read_bool(dev, "sd-uhs-sdr12")) host->caps |= MMC_CAP_UHS_SDR12; - if (of_property_read_bool(np, "sd-uhs-sdr25")) + if (device_property_read_bool(dev, "sd-uhs-sdr25")) host->caps |= MMC_CAP_UHS_SDR25; - if (of_property_read_bool(np, "sd-uhs-sdr50")) + if (device_property_read_bool(dev, "sd-uhs-sdr50")) host->caps |= MMC_CAP_UHS_SDR50; - if (of_property_read_bool(np, "sd-uhs-sdr104")) + if (device_property_read_bool(dev, "sd-uhs-sdr104")) host->caps |= MMC_CAP_UHS_SDR104; - if (of_property_read_bool(np, "sd-uhs-ddr50")) + if (device_property_read_bool(dev, "sd-uhs-ddr50")) host->caps |= MMC_CAP_UHS_DDR50; - if (of_property_read_bool(np, "cap-power-off-card")) + if (device_property_read_bool(dev, "cap-power-off-card")) host->caps |= MMC_CAP_POWER_OFF_CARD; - if (of_property_read_bool(np, "cap-mmc-hw-reset")) + if (device_property_read_bool(dev, "cap-mmc-hw-reset")) host->caps |= MMC_CAP_HW_RESET; - if (of_property_read_bool(np, "cap-sdio-irq")) + if (device_property_read_bool(dev, "cap-sdio-irq")) host->caps |= MMC_CAP_SDIO_IRQ; - if (of_property_read_bool(np, "full-pwr-cycle")) + if (device_property_read_bool(dev, "full-pwr-cycle")) host->caps2 |= MMC_CAP2_FULL_PWR_CYCLE; - if (of_property_read_bool(np, "keep-power-in-suspend")) + if (device_property_read_bool(dev, "keep-power-in-suspend")) host->pm_caps |= MMC_PM_KEEP_POWER; - if (of_property_read_bool(np, "wakeup-source") || - of_property_read_bool(np, "enable-sdio-wakeup")) /* legacy */ + if (device_property_read_bool(dev, "wakeup-source") || + device_property_read_bool(dev, "enable-sdio-wakeup")) /* legacy */ host->pm_caps |= MMC_PM_WAKE_SDIO_IRQ; - if (of_property_read_bool(np, "mmc-ddr-1_8v")) + if (device_property_read_bool(dev, "mmc-ddr-1_8v")) host->caps |= MMC_CAP_1_8V_DDR; - if (of_property_read_bool(np, "mmc-ddr-1_2v")) + if (device_property_read_bool(dev, "mmc-ddr-1_2v")) host->caps |= MMC_CAP_1_2V_DDR; - if (of_property_read_bool(np, "mmc-hs200-1_8v")) + if (device_property_read_bool(dev, "mmc-hs200-1_8v")) host->caps2 |= MMC_CAP2_HS200_1_8V_SDR; - if (of_property_read_bool(np, "mmc-hs200-1_2v")) + if (device_property_read_bool(dev, "mmc-hs200-1_2v")) host->caps2 |= MMC_CAP2_HS200_1_2V_SDR; - if (of_property_read_bool(np, "mmc-hs400-1_8v")) + if (device_property_read_bool(dev, "mmc-hs400-1_8v")) host->caps2 |= MMC_CAP2_HS400_1_8V | MMC_CAP2_HS200_1_8V_SDR; - if (of_property_read_bool(np, "mmc-hs400-1_2v")) + if (device_property_read_bool(dev, "mmc-hs400-1_2v")) host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR; - if (of_property_read_bool(np, "mmc-hs400-enhanced-strobe")) + if (device_property_read_bool(dev, "mmc-hs400-enhanced-strobe")) host->caps2 |= MMC_CAP2_HS400_ES; - if (of_property_read_bool(np, "no-sdio")) + if (device_property_read_bool(dev, "no-sdio")) host->caps2 |= MMC_CAP2_NO_SDIO; - if (of_property_read_bool(np, "no-sd")) + if (device_property_read_bool(dev, "no-sd")) host->caps2 |= MMC_CAP2_NO_SD; - if (of_property_read_bool(np, "no-mmc")) + if (device_property_read_bool(dev, "no-mmc")) host->caps2 |= MMC_CAP2_NO_MMC; - host->dsr_req = !of_property_read_u32(np, "dsr", &host->dsr); + host->dsr_req = !device_property_read_u32(dev, "dsr", &host->dsr); if (host->dsr_req && (host->dsr & ~0xffff)) { dev_err(host->parent, "device tree specified broken value for DSR: 0x%x, ignoring\n", -- cgit v1.2.3 From c14e327bee7018659e0b64db265e19f62cc2bd72 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 7 Jul 2017 18:49:18 -0300 Subject: media: lirc: LIRC_GET_REC_RESOLUTION should return microseconds commit 9f5039ba440e499d85c29b1ddbc3cbc9dc90e44b upstream. Since commit e8f4818895b3 ("[media] lirc: advertise LIRC_CAN_GET_REC_RESOLUTION and improve") lircd uses the ioctl LIRC_GET_REC_RESOLUTION to determine the shortest pulse or space that the hardware can detect. This breaks decoding in lirc because lircd expects the answer in microseconds, but nanoseconds is returned. Reported-by: Derek Tested-by: Derek Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/ir-lirc-codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c index c3277308a70b..b49f80cb49c9 100644 --- a/drivers/media/rc/ir-lirc-codec.c +++ b/drivers/media/rc/ir-lirc-codec.c @@ -254,7 +254,7 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd, return 0; case LIRC_GET_REC_RESOLUTION: - val = dev->rx_resolution; + val = dev->rx_resolution / 1000; break; case LIRC_SET_WIDEBAND_RECEIVER: -- cgit v1.2.3 From 0f442c5b2e4ac0b65027ed3374462f1c38675f7e Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Mon, 15 May 2017 10:45:08 -0700 Subject: f2fs: sanity check checkpoint segno and blkoff commit 15d3042a937c13f5d9244241c7a9c8416ff6e82a upstream. Make sure segno and blkoff read from raw image are valid. Cc: stable@vger.kernel.org Signed-off-by: Jin Qian [Jaegeuk Kim: adjust minor coding style] Signed-off-by: Jaegeuk Kim [AmitP: Found in Android Security bulletin for Aug'17, fixes CVE-2017-10663] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7e0c002c12e9..eb20b8767f3c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1424,6 +1424,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int total, fsmeta; struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned int main_segs, blocks_per_seg; + int i; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -1435,6 +1437,20 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; + main_segs = le32_to_cpu(raw_super->segment_count_main); + blocks_per_seg = sbi->blocks_per_seg; + + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) + return 1; + } + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; -- cgit v1.2.3 From 3a63729427acd0ee9f6265d20cf3b9890e8d7a96 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Jul 2017 15:10:35 -0700 Subject: Btrfs: fix early ENOSPC due to delalloc commit 17024ad0a0fdfcfe53043afb969b813d3e020c21 upstream. If a lot of metadata is reserved for outstanding delayed allocations, we rely on shrink_delalloc() to reclaim metadata space in order to fulfill reservation tickets. However, shrink_delalloc() has a shortcut where if it determines that space can be overcommitted, it will stop early. This made sense before the ticketed enospc system, but now it means that shrink_delalloc() will often not reclaim enough space to fulfill any tickets, leading to an early ENOSPC. (Reservation tickets don't care about being able to overcommit, they need every byte accounted for.) Fix it by getting rid of the shortcut so that shrink_delalloc() reclaims all of the metadata it is supposed to. This fixes early ENOSPCs we were seeing when doing a btrfs receive to populate a new filesystem, as well as early ENOSPCs Christoph saw when doing a big cp -r onto Btrfs. Fixes: 957780eb2788 ("Btrfs: introduce ticketed enospc infrastructure") Tested-by: Christoph Anton Mitterer Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Nikolay Borisov Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 14a37ff0b9e3..705bb5f5a87f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4759,10 +4759,6 @@ skip_async: else flush = BTRFS_RESERVE_NO_FLUSH; spin_lock(&space_info->lock); - if (can_overcommit(root, space_info, orig, flush)) { - spin_unlock(&space_info->lock); - break; - } if (list_empty(&space_info->tickets) && list_empty(&space_info->priority_tickets)) { spin_unlock(&space_info->lock); -- cgit v1.2.3 From 12d17d78e3f74b5022f61eee7d6de082e472a401 Mon Sep 17 00:00:00 2001 From: Steven Toth Date: Tue, 6 Jun 2017 09:30:27 -0300 Subject: saa7164: fix double fetch PCIe access condition commit 6fb05e0dd32e566facb96ea61a48c7488daa5ac3 upstream. Avoid a double fetch by reusing the values from the prior transfer. Originally reported via https://bugzilla.kernel.org/show_bug.cgi?id=195559 Thanks to Pengfei Wang for reporting. Signed-off-by: Steven Toth Reported-by: Pengfei Wang Signed-off-by: Mauro Carvalho Chehab Cc: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7164/saa7164-bus.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/media/pci/saa7164/saa7164-bus.c b/drivers/media/pci/saa7164/saa7164-bus.c index a18fe5d47238..b4857cd7069e 100644 --- a/drivers/media/pci/saa7164/saa7164-bus.c +++ b/drivers/media/pci/saa7164/saa7164-bus.c @@ -393,11 +393,11 @@ int saa7164_bus_get(struct saa7164_dev *dev, struct tmComResInfo* msg, msg_tmp.size = le16_to_cpu((__force __le16)msg_tmp.size); msg_tmp.command = le32_to_cpu((__force __le32)msg_tmp.command); msg_tmp.controlselector = le16_to_cpu((__force __le16)msg_tmp.controlselector); + memcpy(msg, &msg_tmp, sizeof(*msg)); /* No need to update the read positions, because this was a peek */ /* If the caller specifically want to peek, return */ if (peekonly) { - memcpy(msg, &msg_tmp, sizeof(*msg)); goto peekout; } @@ -442,21 +442,15 @@ int saa7164_bus_get(struct saa7164_dev *dev, struct tmComResInfo* msg, space_rem = bus->m_dwSizeGetRing - curr_grp; if (space_rem < sizeof(*msg)) { - /* msg wraps around the ring */ - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, space_rem); - memcpy_fromio((u8 *)msg + space_rem, bus->m_pdwGetRing, - sizeof(*msg) - space_rem); if (buf) memcpy_fromio(buf, bus->m_pdwGetRing + sizeof(*msg) - space_rem, buf_size); } else if (space_rem == sizeof(*msg)) { - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, sizeof(*msg)); if (buf) memcpy_fromio(buf, bus->m_pdwGetRing, buf_size); } else { /* Additional data wraps around the ring */ - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, sizeof(*msg)); if (buf) { memcpy_fromio(buf, bus->m_pdwGetRing + curr_grp + sizeof(*msg), space_rem - sizeof(*msg)); @@ -469,15 +463,10 @@ int saa7164_bus_get(struct saa7164_dev *dev, struct tmComResInfo* msg, } else { /* No wrapping */ - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, sizeof(*msg)); if (buf) memcpy_fromio(buf, bus->m_pdwGetRing + curr_grp + sizeof(*msg), buf_size); } - /* Convert from little endian to CPU */ - msg->size = le16_to_cpu((__force __le16)msg->size); - msg->command = le32_to_cpu((__force __le32)msg->command); - msg->controlselector = le16_to_cpu((__force __le16)msg->controlselector); /* Update the read positions, adjusting the ring */ saa7164_writel(bus->m_dwGetReadPos, new_grp); -- cgit v1.2.3 From 73520d3814b6fd37483dc79e578906301122bbb3 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:21 -0400 Subject: tcp_bbr: cut pacing rate only if filled pipe [ Upstream commit 4aea287e90dd61a48268ff2994b56f9799441b62 ] In bbr_set_pacing_rate(), which decides whether to cut the pacing rate, there was some code that considered exiting STARTUP to be equivalent to the notion of filling the pipe (i.e., bbr_full_bw_reached()). Specifically, as the code was structured, exiting STARTUP and going into PROBE_RTT could cause us to cut the pacing rate down to something silly and low, based on whatever bandwidth samples we've had so far, when it's possible that all of them have been small app-limited bandwidth samples that are not representative of the bandwidth available in the path. (The code was correct at the time it was written, but the state machine changed without this spot being adjusted correspondingly.) Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 0ea66c2c9344..9e3d438faef8 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -191,12 +191,11 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { - struct bbr *bbr = inet_csk_ca(sk); u64 rate = bw; rate = bbr_rate_bytes_per_sec(sk, rate, gain); rate = min_t(u64, rate, sk->sk_max_pacing_rate); - if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } -- cgit v1.2.3 From ec789686d79b5d95c1623d2bbd6f0bb8657f6dcd Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:22 -0400 Subject: tcp_bbr: introduce bbr_bw_to_pacing_rate() helper [ Upstream commit f19fd62dafaf1ed6cf615dba655b82fa9df59074 ] Introduce a helper to convert a BBR bandwidth and gain factor to a pacing rate in bytes per second. This is a pure refactor, but is needed for two following fixes. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 9e3d438faef8..5c5c9f1ef913 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -182,6 +182,16 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) return rate >> BW_SCALE; } +/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ +static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + return rate; +} + /* Pace using current bw estimate and a gain factor. In order to help drive the * network toward lower queues while maintaining high utilization and low * latency, the average pacing rate aims to be slightly (~1%) lower than the @@ -191,10 +201,8 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { - u64 rate = bw; + u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); - rate = bbr_rate_bytes_per_sec(sk, rate, gain); - rate = min_t(u64, rate, sk->sk_max_pacing_rate); if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } -- cgit v1.2.3 From 0c0ede36b9e0a6b35c48c14203432c8ec0619b12 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:23 -0400 Subject: tcp_bbr: introduce bbr_init_pacing_rate_from_rtt() helper [ Upstream commit 79135b89b8af304456bd67916b80116ddf03d7b6 ] Introduce a helper to initialize the BBR pacing rate unconditionally, based on the current cwnd and RTT estimate. This is a pure refactor, but is needed for two following fixes. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 5c5c9f1ef913..c7a4c3f95016 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -192,6 +192,23 @@ static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) return rate; } +/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +static void bbr_init_pacing_rate_from_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, rtt_us); + sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); +} + /* Pace using current bw estimate and a gain factor. In order to help drive the * network toward lower queues while maintaining high utilization and low * latency, the average pacing rate aims to be slightly (~1%) lower than the @@ -776,7 +793,6 @@ static void bbr_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw; bbr->prior_cwnd = 0; bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ @@ -792,11 +808,8 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ - /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ - bw = (u64)tp->snd_cwnd * BW_UNIT; - do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ - bbr_set_pacing_rate(sk, bw, bbr_high_gain); + bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; bbr->round_start = 0; -- cgit v1.2.3 From 2e6b237dc03be379435de3c3f19841fe38096e51 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:24 -0400 Subject: tcp_bbr: remove sk_pacing_rate=0 transient during init [ Upstream commit 1d3648eb5d1fe9ed3d095ed8fa19ad11ca4c8bc0 ] Fix a corner case noticed by Eric Dumazet, where BBR's setting sk->sk_pacing_rate to 0 during initialization could theoretically cause packets in the sending host to hang if there were packets "in flight" in the pacing infrastructure at the time the BBR congestion control state is initialized. This could occur if the pacing infrastructure happened to race with bbr_init() in a way such that the pacer read the 0 rather than the immediately following non-zero pacing rate. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Reported-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index c7a4c3f95016..c798788877e2 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -808,7 +808,6 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ - sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; -- cgit v1.2.3 From 0cd73c42b7a2d9ab96632ccb895ddb521060c7a9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:25 -0400 Subject: tcp_bbr: init pacing rate on first RTT sample [ Upstream commit 32984565574da7ed3afa10647bb4020d7a9e6c93 ] Fixes the following behavior: for connections that had no RTT sample at the time of initializing congestion control, BBR was initializing the pacing rate to a high nominal rate (based an a guess of RTT=1ms, in case this is LAN traffic). Then BBR never adjusted the pacing rate downward upon obtaining an actual RTT sample, if the connection never filled the pipe (e.g. all sends were small app-limited writes()). This fix adjusts the pacing rate upon obtaining the first RTT sample. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index c798788877e2..cb8db347c680 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -83,7 +83,8 @@ struct bbr { cwnd_gain:10, /* current gain for setting cwnd */ full_bw_cnt:3, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ - unused_b:6; + has_seen_rtt:1, /* have we seen an RTT sample yet? */ + unused_b:5; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ }; @@ -196,11 +197,13 @@ static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) static void bbr_init_pacing_rate_from_rtt(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); u64 bw; u32 rtt_us; if (tp->srtt_us) { /* any RTT sample yet? */ rtt_us = max(tp->srtt_us >> 3, 1U); + bbr->has_seen_rtt = 1; } else { /* no RTT sample yet */ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ } @@ -218,8 +221,12 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); + if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) + bbr_init_pacing_rate_from_rtt(sk); if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } @@ -808,6 +815,7 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; -- cgit v1.2.3 From a62bc8d336e3e93538c3c5dd64a36db5efcf2fa8 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 17 Jul 2017 12:35:58 +0200 Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check() [ Upstream commit 18bcf2907df935981266532e1e0d052aff2e6fae ] KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(), which originated from the TCP request socket created in cookie_v6_check(): ================================================================== BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0 CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies. Check SNMP counters. Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 skb_set_hash_from_sk ./include/net/sock.h:2011 tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983 tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493 tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284 tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309 call_timer_fn+0x240/0x520 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 __run_timers+0xc13/0xf10 kernel/time/timer.c:1601 run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614 __do_softirq+0x485/0x942 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 irq_exit+0x1fa/0x230 kernel/softirq.c:405 exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657 smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966 apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489 RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36 RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77 RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440 RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005 RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770 RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004 R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810 R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4 poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293 SYSC_select+0x4b4/0x4e0 fs/select.c:653 SyS_select+0x76/0xa0 fs/select.c:634 entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204 RIP: 0033:0x4597e7 RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20 R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003 chained origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_save_stack mm/kmsan/kmsan.c:317 kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259 tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472 tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103 tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212 cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245 tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 reqsk_alloc ./include/net/request_sock.h:87 inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200 cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169 tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 ================================================================== Similar error is reported for cookie_v4_check(). Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets") Signed-off-by: Alexander Potapenko Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/syncookies.c | 1 + net/ipv6/syncookies.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..b6f710d515d0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->txhash = net_tx_rndhash(); req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..7a86433d8896 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->txhash = net_tx_rndhash(); /* * We need to lookup the dst_entry to get the correct window size. -- cgit v1.2.3 From 0b83249a57adc1afd19c7e4818731d61d3a8ce7e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Jul 2017 13:33:24 -0700 Subject: net: Zero terminate ifr_name in dev_ifname(). [ Upstream commit 63679112c536289826fec61c917621de95ba2ade ] The ifr.ifr_name is passed around and assumed to be NULL terminated. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b94b1d293506..151e047ce072 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; + ifr.ifr_name[IFNAMSIZ-1] = 0; error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); if (error) -- cgit v1.2.3 From 4a2ffe1707e3787f93a7d0ff2dec682a57ba25ad Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 19 Jul 2017 22:28:55 +0200 Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt [ Upstream commit 6399f1fae4ec29fab5ec76070435555e256ca3a6 ] In some cases, offset can overflow and can cause an infinite loop in ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and cap it at IPV6_MAXPLEN, since packets larger than that should be invalid. This problem has been here since before the beginning of git history. Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/output_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index e9065b8d3af8..abb2c307fbe8 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { - u16 offset = sizeof(struct ipv6hdr); + unsigned int offset = sizeof(struct ipv6hdr); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; @@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; + unsigned int len; switch (**nexthdr) { @@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - offset += ipv6_optlen(exthdr); + len = ipv6_optlen(exthdr); + if (len + offset >= IPV6_MAXPLEN) + return -EINVAL; + offset += len; *nexthdr = &exthdr->nexthdr; } -- cgit v1.2.3 From 5a2d511a95ccaa164005991944d7564e2e21bca2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Jul 2017 12:25:22 -0700 Subject: net: dsa: b53: Add missing ARL entries for BCM53125 [ Upstream commit be35e8c516c1915a3035d266a2015b41f73ba3f9 ] The BCM53125 entry was missing an arl_entries member which would basically prevent the ARL search from terminating properly. This switch has 4 ARL entries, so add that. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/b53/b53_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 947adda3397d..3ec573c13dac 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1558,6 +1558,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .dev_name = "BCM53125", .vlans = 4096, .enabled_ports = 0xff, + .arl_entries = 4, .cpu_port = B53_CPU_PORT, .vta_regs = B53_VTA_REGS, .duplex_reg = B53_DUPLEX_STAT_GE, -- cgit v1.2.3 From 0c47f11bc4a5ddd37962b2ba5a4d53f6caeeaec1 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 19 Jul 2017 15:41:33 -0700 Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call. [ Upstream commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 ] Net stack initialization currently initializes fib-trie after the first call to netdevice_notifier() call. In fact fib_trie initialization needs to happen before first rtnl_register(). It does not cause any problem since there are no devices UP at this moment, but trying to bring 'lo' UP at initialization would make this assumption wrong and exposes the issue. Fixes following crash Call Trace: ? alternate_node_alloc+0x76/0xa0 fib_table_insert+0x1b7/0x4b0 fib_magic.isra.17+0xea/0x120 fib_add_ifaddr+0x7b/0x190 fib_netdev_event+0xc0/0x130 register_netdevice_notifier+0x1c1/0x1d0 ip_fib_init+0x72/0x85 ip_rt_init+0x187/0x1e9 ip_init+0xe/0x1a inet_init+0x171/0x26c ? ipv4_offload_init+0x66/0x66 do_one_initcall+0x43/0x160 kernel_init_freeable+0x191/0x219 ? rest_init+0x80/0x80 kernel_init+0xe/0x150 ret_from_fork+0x22/0x30 Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08 RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28 CR2: 0000000000000014 Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.") Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization") Signed-off-by: Mahesh Bandewar Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3d92534c4450..968d8e165e3d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1319,13 +1319,14 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + fib_trie_init(); register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_trie_init(); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); } -- cgit v1.2.3 From cda2bc91baf7b6849733145331726a1364f20512 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 20 Jul 2017 11:27:57 -0700 Subject: rtnetlink: allocate more memory for dev_set_mac_address() [ Upstream commit 153711f9421be5dbc973dc57a4109dc9d54c89b1 ] virtnet_set_mac_address() interprets mac address as struct sockaddr, but upper layer only allocates dev->addr_len which is ETH_ALEN + sizeof(sa_family_t) in this case. We lack a unified definition for mac address, so just fix the upper layer, this also allows drivers to interpret it to struct sockaddr freely. Reported-by: David Ahern Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c6fd7f83a4a..4d2629781e8b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1965,7 +1965,8 @@ static int do_setlink(const struct sk_buff *skb, struct sockaddr *sa; int len; - len = sizeof(sa_family_t) + dev->addr_len; + len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, + sizeof(*sa)); sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; -- cgit v1.2.3 From 59e76ed17981aa9ee8da4496d13edf42ec29958c Mon Sep 17 00:00:00 2001 From: Thomas Jarosch Date: Sat, 22 Jul 2017 17:14:34 +0200 Subject: mcs7780: Fix initialization when CONFIG_VMAP_STACK is enabled [ Upstream commit 9476d393667968b4a02afbe9d35a3558482b943e ] DMA transfers are not allowed to buffers that are on the stack. Therefore allocate a buffer to store the result of usb_control_message(). Fixes these bugreports: https://bugzilla.kernel.org/show_bug.cgi?id=195217 https://bugzilla.redhat.com/show_bug.cgi?id=1421387 https://bugzilla.redhat.com/show_bug.cgi?id=1427398 Shortened kernel backtrace from 4.11.9-200.fc25.x86_64: kernel: ------------[ cut here ]------------ kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587 kernel: transfer buffer not dma capable kernel: Call Trace: kernel: dump_stack+0x63/0x86 kernel: __warn+0xcb/0xf0 kernel: warn_slowpath_fmt+0x5a/0x80 kernel: usb_hcd_map_urb_for_dma+0x37f/0x570 kernel: ? try_to_del_timer_sync+0x53/0x80 kernel: usb_hcd_submit_urb+0x34e/0xb90 kernel: ? schedule_timeout+0x17e/0x300 kernel: ? del_timer_sync+0x50/0x50 kernel: ? __slab_free+0xa9/0x300 kernel: usb_submit_urb+0x2f4/0x560 kernel: ? urb_destroy+0x24/0x30 kernel: usb_start_wait_urb+0x6e/0x170 kernel: usb_control_msg+0xdc/0x120 kernel: mcs_get_reg+0x36/0x40 [mcs7780] kernel: mcs_net_open+0xb5/0x5c0 [mcs7780] ... Regression goes back to 4.9, so it's a good candidate for -stable. Though it's the decision of the maintainer. Thanks to Dan Williams for adding the "transfer buffer not dma capable" warning in the first place. It instantly pointed me in the right direction. Patch has been tested with transferring data from a Polar watch. Signed-off-by: Thomas Jarosch Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/irda/mcs7780.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c index bca6a1e72d1d..e1bb802d4a4d 100644 --- a/drivers/net/irda/mcs7780.c +++ b/drivers/net/irda/mcs7780.c @@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mcs, __u16 reg, __u16 val) static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val) { struct usb_device *dev = mcs->usbdev; - int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, - MCS_RD_RTYPE, 0, reg, val, 2, - msecs_to_jiffies(MCS_CTRL_TIMEOUT)); + void *dmabuf; + int ret; + + dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL); + if (!dmabuf) + return -ENOMEM; + + ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, + MCS_RD_RTYPE, 0, reg, dmabuf, 2, + msecs_to_jiffies(MCS_CTRL_TIMEOUT)); + + memcpy(val, dmabuf, sizeof(__u16)); + kfree(dmabuf); return ret; } -- cgit v1.2.3 From d53ff3816d94c841f7a7f1bf68505a2abb06757a Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 23 Jul 2017 17:52:23 +0800 Subject: openvswitch: fix potential out of bound access in parse_ct [ Upstream commit 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c ] Before the 'type' is validated, we shouldn't use it to fetch the ovs_ct_attr_lens's minlen and maxlen, else, out of bound access may happen. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Liping Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/conntrack.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 48386bff8b4e..b28e45b691de 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1088,8 +1088,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, nla_for_each_nested(a, attr, rem) { int type = nla_type(a); - int maxlen = ovs_ct_attr_lens[type].maxlen; - int minlen = ovs_ct_attr_lens[type].minlen; + int maxlen; + int minlen; if (type > OVS_CT_ATTR_MAX) { OVS_NLERR(log, @@ -1097,6 +1097,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, type, OVS_CT_ATTR_MAX); return -EINVAL; } + + maxlen = ovs_ct_attr_lens[type].maxlen; + minlen = ovs_ct_attr_lens[type].minlen; if (nla_len(a) < minlen || nla_len(a) > maxlen) { OVS_NLERR(log, "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", -- cgit v1.2.3 From 47f3bf6e31164a0a3b77746d30bf9b3ac9664b03 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 24 Jul 2017 10:07:32 -0700 Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired() [ Upstream commit c800aaf8d869f2b9b47b10c5c312fe19f0a94042 ] There are multiple reports showing we have a use-after-free in the timer prb_retire_rx_blk_timer_expired(), where we use struct tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by free_pg_vec(). The interesting part is it is not freed via packet_release() but via packet_setsockopt(), which means we are not closing the socket. Looking into the big and fat function packet_set_ring(), this could happen if we satisfy the following conditions: 1. closing == 0, not on packet_release() path 2. req->tp_block_nr == 0, we don't allocate a new pg_vec 3. rx_ring->pg_vec is already set as V3, which means we already called packet_set_ring() wtih req->tp_block_nr > 0 previously 4. req->tp_frame_nr == 0, pass sanity check 5. po->mapped == 0, never called mmap() In this scenario we are clearing the old rx_ring->pg_vec, so we need to free this pg_vec, but we don't stop the timer on this path because of closing==0. The timer has to be stopped as long as we need to free pg_vec, therefore the check on closing!=0 is wrong, we should check pg_vec!=NULL instead. Thanks to liujian for testing different fixes. Reported-by: alexander.levin@verizon.com Reported-by: Dave Jones Reported-by: liujian (CE) Tested-by: liujian (CE) Cc: Ding Tianhong Cc: Willem de Bruijn Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6a563e6e24de..365c83fcee02 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4322,7 +4322,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - if (closing && (po->tp_version > TPACKET_V2)) { + if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); -- cgit v1.2.3 From 23a91c8ab682b841a18a88cb1d5332f20f1ec219 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 24 Jul 2017 23:14:28 +0200 Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment() [ Upstream commit afce615aaabfbaad02550e75c0bec106dafa1adf ] RFC 2465 defines ipv6IfStatsOutFragFails as: "The number of IPv6 datagrams that have been discarded because they needed to be fragmented at this output interface but could not be." The existing implementation, instead, would increase the counter twice in case we fail to allocate room for single fragments: once for the fragment, once for the datagram. This didn't look intentional though. In one of the two affected affected failure paths, the double increase was simply a result of a new 'goto fail' statement, introduced to avoid a skb leak. The other path appears to be affected since at least 2.6.12-rc2. Reported-by: Sabrina Dubroca Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5a4b8e7bcedd..efe811c6eccf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -662,8 +662,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -782,8 +780,6 @@ slow_path: frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + hroom + troom, GFP_ATOMIC); if (!frag) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } -- cgit v1.2.3 From c0c8688f0debfef57af1f3a48cee4027ef7f50ab Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 25 Jul 2017 14:35:03 +0200 Subject: net: ethernet: nb8800: Handle all 4 RGMII modes identically [ Upstream commit 4813497b537c6208c90d6cbecac5072d347de900 ] Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear whose responsibility it was to insert the required clock skew, and in hindsight, some PHY drivers got it wrong. The solution forward is to introduce a new property, explicitly requiring skew from the node to which it is attached. In the interim, this driver will handle all 4 RGMII modes identically (no skew). Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller") Signed-off-by: Marc Gonzalez Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/aurora/nb8800.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c index e078d8da978c..29d29af612d1 100644 --- a/drivers/net/ethernet/aurora/nb8800.c +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev) mac_mode |= HALF_DUPLEX; if (gigabit) { - if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_is_rgmii(dev->phydev)) mac_mode |= RGMII_MODE; mac_mode |= GMAC_MODE; @@ -1277,11 +1277,10 @@ static int nb8800_tangox_init(struct net_device *dev) break; case PHY_INTERFACE_MODE_RGMII: - pad_mode = PAD_MODE_RGMII; - break; - + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY; + pad_mode = PAD_MODE_RGMII; break; default: -- cgit v1.2.3 From 9ffa6727f5712b8774b265b0a3c020acb9da726e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 14:19:09 +0800 Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly [ Upstream commit 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 ] In dccp_v6_conn_request, after reqsk gets alloced and hashed into ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer, one is for hlist, and the other one is for current using. The problem is when dccp_v6_conn_request returns and finishes using reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and reqsk obj never gets freed. Jianlin found this issue when running dccp_memleak.c in a loop, the system memory would run out. dccp_memleak.c: int s1 = socket(PF_INET6, 6, IPPROTO_IP); bind(s1, &sa1, 0x20); listen(s1, 0x9); int s2 = socket(PF_INET6, 6, IPPROTO_IP); connect(s2, &sa1, 0x20); close(s1); close(s2); This patch is to put the reqsk before dccp_v6_conn_request returns, just as what tcp_conn_request does. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2ac9d2a1aaab..28e8252cc5ea 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: -- cgit v1.2.3 From 4d938b6fcb90c5d19ce7d23581d9bccc7ce70724 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 14:19:46 +0800 Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly [ Upstream commit b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 ] The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue exists on dccp_ipv4. This patch is to fix it for dccp_ipv4. Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 86b0933ecd45..8fc160098e11 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -637,6 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: -- cgit v1.2.3 From bb21d2e75378ddda86775315e39344be624f9987 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 14:20:15 +0800 Subject: dccp: fix a memleak for dccp_feat_init err process [ Upstream commit e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b ] In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc memory for rx.val, it should free tx.val before returning an error. Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/feat.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 1704948e6a12..f227f002c73d 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk) * singleton values (which always leads to failure). * These settings can still (later) be overridden via sockopts. */ - if (ccid_get_builtin_ccids(&tx.val, &tx.len) || - ccid_get_builtin_ccids(&rx.val, &rx.len)) + if (ccid_get_builtin_ccids(&tx.val, &tx.len)) return -ENOBUFS; + if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { + kfree(tx.val); + return -ENOBUFS; + } if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) -- cgit v1.2.3 From cc6f1486f2cb053a647db3a51cba5fbeabaa4a3f Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Jul 2017 18:32:45 +0200 Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}() [ Upstream commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a ] If the length field of the iterator (|pos.p| or |err|) is past the end of the chunk, we shouldn't access it. This bug has been detected by KMSAN. For the following pair of system calls: socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3 sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 1 the tool has reported a use of uninitialized memory: ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0 CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 __sctp_rcv_init_lookup net/sctp/input.c:1074 __sctp_rcv_lookup_harder net/sctp/input.c:1233 __sctp_rcv_lookup net/sctp/input.c:1255 sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170 sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq kernel/softirq.c:328 __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181 local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31 rcu_read_unlock_bh ./include/linux/rcupdate.h:931 ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124 ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149 NF_HOOK_COND ./include/linux/netfilter.h:246 ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163 dst_output ./include/net/dst.h:486 NF_HOOK ./include/linux/netfilter.h:257 ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261 sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225 sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 RIP: 0033:0x401133 RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133 RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003 RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211 slab_alloc_node mm/slub.c:2743 __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x26b/0x840 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:933 sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/sctp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 31acc3f4f132..02170bbb4490 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -460,6 +460,8 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) @@ -470,6 +472,8 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) -- cgit v1.2.3 From df32d08293ea7db4fe7e030bd47c71b7f63fc05f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 16:24:59 +0800 Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors [ Upstream commit 6b84202c946cd3da3a8daa92c682510e9ed80321 ] Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") tried to fix the issue that it may overstep the chunk end for _sctp_walk_{params, errors} with 'chunk_end > offset(length) + sizeof(length)'. But it introduced a side effect: When processing INIT, it verifies the chunks with 'param.v == chunk_end' after iterating all params by sctp_walk_params(). With the check 'chunk_end > offset(length) + sizeof(length)', it would return when the last param is not yet accessed. Because the last param usually is fwdtsn supported param whose size is 4 and 'chunk_end == offset(length) + sizeof(length)' This is a badly issue even causing sctp couldn't process 4-shakes. Client would always get abort when connecting to server, due to the failure of INIT chunk verification on server. The patch is to use 'chunk_end <= offset(length) + sizeof(length)' instead of 'chunk_end < offset(length) + sizeof(length)' for both _sctp_walk_params and _sctp_walk_errors. Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 02170bbb4490..61d9ce89d10d 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -460,7 +460,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ - (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ @@ -472,7 +472,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ -- cgit v1.2.3 From fca84d617f77ef8b5e52ef4db057971e96eee25a Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 2 Jul 2017 19:13:43 +0300 Subject: net/mlx5: Consider tx_enabled in all modes on remap [ Upstream commit dc798b4cc0f2a06e7ad7d522403de274b86a0a6f ] The tx_enabled lag event field is used to determine whether a slave is active. Current logic uses this value only if the mode is active-backup. However, LACP mode, although considered a load balancing mode, can mark a slave as inactive in certain situations (e.g., LACP timeout). This fix takes the tx_enabled value into account when remapping, with no respect to the LAG mode (this should not affect the behavior in XOR mode, since in this mode both slaves are marked as active). Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature) Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index b5d5519542e8..0ca4623bda6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -157,22 +157,17 @@ static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, u8 *port1, u8 *port2) { - if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - if (tracker->netdev_state[0].tx_enabled) { - *port1 = 1; - *port2 = 1; - } else { - *port1 = 2; - *port2 = 2; - } - } else { - *port1 = 1; - *port2 = 2; - if (!tracker->netdev_state[0].link_up) - *port1 = 2; - else if (!tracker->netdev_state[1].link_up) - *port2 = 1; + *port1 = 1; + *port2 = 2; + if (!tracker->netdev_state[0].tx_enabled || + !tracker->netdev_state[0].link_up) { + *port1 = 2; + return; } + + if (!tracker->netdev_state[1].tx_enabled || + !tracker->netdev_state[1].link_up) + *port2 = 1; } static void mlx5_activate_lag(struct mlx5_lag *ldev, -- cgit v1.2.3 From d19d0ac684e505200aad8db44271acbd0ab35bd1 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 25 Jun 2017 18:45:32 +0300 Subject: net/mlx5: Fix command bad flow on command entry allocation failure [ Upstream commit 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 ] When driver fail to allocate an entry to send command to FW, it must notify the calling function and release the memory allocated for this command. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Moshe Shemesh Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index cb45390c7623..f7fabecc104f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -770,6 +770,10 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -779,16 +783,27 @@ static void cmd_work_handler(struct work_struct *work) struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; + int alloc_ret; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - ent->idx = alloc_ent(cmd); - if (ent->idx < 0) { + alloc_ret = alloc_ent(cmd); + if (alloc_ret < 0) { + if (ent->callback) { + ent->callback(-EAGAIN, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + free_cmd(ent); + } else { + ent->ret = -EAGAIN; + complete(&ent->done); + } mlx5_core_err(dev, "failed to allocate command entry\n"); up(sem); return; } + ent->idx = alloc_ret; } else { ent->idx = cmd->max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); -- cgit v1.2.3 From d7049799668a206eb2a97024c91fc822e0a8a5c6 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Wed, 5 Jul 2017 10:17:04 +0300 Subject: net/mlx5e: Fix outer_header_zero() check size [ Upstream commit 0242f4a0bb03906010bbf80495512be00494a0ef ] outer_header_zero() routine checks if the outer_headers match of a flow-table entry are all zero. This function uses the size of whole fte_match_param, instead of just the outer_headers member, causing failure to detect all-zeros if any other members of the fte_match_param are non-zero. Use the correct size for zero check. Fixes: 6dc6071cfcde ("net/mlx5e: Add ethtool flow steering support") Signed-off-by: Ilan Tayari Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index e034dbc4913d..cf070fc0fb6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -276,7 +276,7 @@ static void add_rule_to_list(struct mlx5e_priv *priv, static bool outer_header_zero(u32 *match_criteria) { - int size = MLX5_ST_SZ_BYTES(fte_match_param); + int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers); char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers); -- cgit v1.2.3 From 862ade9a4383fc336d20036a085a13a196e686ce Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 12 Jul 2017 17:27:18 +0300 Subject: net/mlx5e: Fix wrong delay calculation for overflow check scheduling [ Upstream commit d439c84509a510e864fdc6166c760482cd03fc57 ] The overflow_period is calculated in seconds. In order to use it for delayed work scheduling translation to jiffies is needed. Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support') Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 13dc388667b6..fddcff015522 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -67,7 +67,8 @@ static void mlx5e_timestamp_overflow(struct work_struct *work) write_lock_irqsave(&tstamp->lock, flags); timecounter_read(&tstamp->clock); write_unlock_irqrestore(&tstamp->lock, flags); - schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period); + schedule_delayed_work(&tstamp->overflow_work, + msecs_to_jiffies(tstamp->overflow_period * 1000)); } int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) -- cgit v1.2.3 From 33e25b2168dda8b000bc2951762c86fcbc23229b Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 12 Jul 2017 17:44:07 +0300 Subject: net/mlx5e: Schedule overflow check work to mlx5e workqueue [ Upstream commit f08c39ed0bfb503c7b3e013cd40d036ce6a0941a ] This is done in order to ensure that work will not run after the cleanup. Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support') Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index fddcff015522..1612ec0d9103 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -62,13 +62,14 @@ static void mlx5e_timestamp_overflow(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, overflow_work); + struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); unsigned long flags; write_lock_irqsave(&tstamp->lock, flags); timecounter_read(&tstamp->clock); write_unlock_irqrestore(&tstamp->lock, flags); - schedule_delayed_work(&tstamp->overflow_work, - msecs_to_jiffies(tstamp->overflow_period * 1000)); + queue_delayed_work(priv->wq, &tstamp->overflow_work, + msecs_to_jiffies(tstamp->overflow_period * 1000)); } int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) @@ -264,7 +265,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); if (tstamp->overflow_period) - schedule_delayed_work(&tstamp->overflow_work, 0); + queue_delayed_work(priv->wq, &tstamp->overflow_work, 0); else mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); -- cgit v1.2.3 From 38e71eabb8f71db17ce1aa846fb31abc936dcefa Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Jul 2017 11:58:36 -0700 Subject: net: phy: Correctly process PHY_HALTED in phy_stop_machine() [ Upstream commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ] Marc reported that he was not getting the PHY library adjust_link() callback function to run when calling phy_stop() + phy_disconnect() which does not indeed happen because we set the state machine to PHY_HALTED but we don't get to run it to process this state past that point. Fix this with a synchronous call to phy_state_machine() in order to have the state machine actually act on PHY_HALTED, set the PHY device's link down, turn the network device's carrier off and finally call the adjust_link() function. Reported-by: Marc Gonzalez Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work") Signed-off-by: Florian Fainelli Signed-off-by: Marc Gonzalez Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index edd30ebbf275..7103dc1d20f2 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -674,6 +674,9 @@ void phy_stop_machine(struct phy_device *phydev) if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); + + /* Now we can run the state machine synchronously */ + phy_state_machine(&phydev->state_queue.work); } /** -- cgit v1.2.3 From 816843a257c77828c389cf9cce2170ef057bfc1b Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 21 Jun 2017 10:21:22 +0100 Subject: xen-netback: correctly schedule rate-limited queues [ Upstream commit dfa523ae9f2542bee4cddaea37b3be3e157f6e6b ] Add a flag to indicate if a queue is rate-limited. Test the flag in NAPI poll handler and avoid rescheduling the queue if true, otherwise we risk locking up the host. The rescheduling will be done in the timer callback function. Reported-by: Jean-Louis Dupond Signed-off-by: Wei Liu Tested-by: Jean-Louis Dupond Reviewed-by: Paul Durrant Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/common.h | 1 + drivers/net/xen-netback/interface.c | 6 +++++- drivers/net/xen-netback/netback.c | 6 +++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3ce1f7da8647..cb7365bdf6e0 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned long remaining_credit; struct timer_list credit_timeout; u64 credit_window_start; + bool rate_limited; /* Statistics */ struct xenvif_stats stats; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index b009d7966b46..5bfaf5578810 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -105,7 +105,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete(napi); - xenvif_napi_schedule_or_enable_events(queue); + /* If the queue is rate-limited, it shall be + * rescheduled in the timer callback. + */ + if (likely(!queue->rate_limited)) + xenvif_napi_schedule_or_enable_events(queue); } return work_done; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 47b481095d77..d9b5b73c35a0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -179,6 +179,7 @@ static void tx_add_credit(struct xenvif_queue *queue) max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ queue->remaining_credit = min(max_credit, max_burst); + queue->rate_limited = false; } void xenvif_tx_credit_callback(unsigned long data) @@ -685,8 +686,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) msecs_to_jiffies(queue->credit_usec / 1000); /* Timer could already be pending in rare cases. */ - if (timer_pending(&queue->credit_timeout)) + if (timer_pending(&queue->credit_timeout)) { + queue->rate_limited = true; return true; + } /* Passed the point where we can replenish credit? */ if (time_after_eq64(now, next_credit)) { @@ -701,6 +704,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) mod_timer(&queue->credit_timeout, next_credit); queue->credit_window_start = next_credit; + queue->rate_limited = true; return true; } -- cgit v1.2.3 From bfafa56e6c675adaa4942c4259baacc9650073ff Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Tue, 11 Jul 2017 12:00:54 -0600 Subject: sparc64: Measure receiver forward progress to avoid send mondo timeout [ Upstream commit 9d53caec84c7c5700e7c1ed744ea584fff55f9ac ] A large sun4v SPARC system may have moments of intensive xcall activities, usually caused by unmapping many pages on many CPUs concurrently. This can flood receivers with CPU mondo interrupts for an extended period, causing some unlucky senders to hit send-mondo timeout. This problem gets worse as cpu count increases because sometimes mappings must be invalidated on all CPUs, and sometimes all CPUs may gang up on a single CPU. But a busy system is not a broken system. In the above scenario, as long as the receiver is making forward progress processing mondo interrupts, the sender should continue to retry. This patch implements the receiver's forward progress meter by introducing a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range of 0..NR_CPUS. The receiver increments its counter as soon as it receives a mondo and the sender tracks the receiver's counter. If the receiver has stopped making forward progress when the retry limit is reached, the sender declares send-mondo-timeout and panic; otherwise, the receiver is allowed to keep making forward progress. In addition, it's been observed that PCIe hotplug events generate Correctable Errors that are handled by hypervisor and then OS. Hypervisor 'borrows' a guest cpu strand briefly to provide the service. If the cpu strand is simultaneously the only cpu targeted by a mondo, it may not be available for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second is the agreed wait time between hypervisor and guest OS, this patch makes the adjustment. Orabug: 25476541 Orabug: 26417466 Signed-off-by: Jane Chu Reviewed-by: Steve Sistare Reviewed-by: Anthony Yznaga Reviewed-by: Rob Gardner Reviewed-by: Thomas Tai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/trap_block.h | 1 + arch/sparc/kernel/smp_64.c | 185 ++++++++++++++++++++++-------------- arch/sparc/kernel/sun4v_ivec.S | 15 +++ arch/sparc/kernel/traps_64.c | 1 + 4 files changed, 132 insertions(+), 70 deletions(-) diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; void init_cur_cpu_trap(struct thread_info *); void setup_tba(void); extern int ncpus_probed; +extern u64 cpu_mondo_counter[NR_CPUS]; unsigned long real_hard_smp_processor_id(void); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d5807d24b98f..2deb89ef1d5f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -621,22 +621,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -644,94 +670,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. + * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; - prev_sent = n_sent; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* Record the overall number of mondos received by the + * first of the remaining cpus. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); + prev_sent = n_sent; - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } - } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); - return; + retries++; + } while (1); -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); + fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -26,6 +26,21 @@ sun4v_cpu_mondo: ldxa [%g0] ASI_SCRATCHPAD, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + /* Get smp_processor_id() into %g3 */ + sethi %hi(trap_block), %g5 + or %g5, %lo(trap_block), %g5 + sub %g4, %g5, %g3 + srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + + /* Increment cpu_mondo_counter[smp_processor_id()] */ + sethi %hi(cpu_mondo_counter), %g5 + or %g5, %lo(cpu_mondo_counter), %g5 + sllx %g3, 3, %g3 + add %g5, %g3, %g5 + ldx [%g5], %g3 + add %g3, 1, %g3 + stx %g3, [%g5] + /* Get CPU mondo queue base phys address into %g7. */ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d44fb806bbd7..32dafb920908 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2732,6 +2732,7 @@ void do_getpsr(struct pt_regs *regs) } } +u64 cpu_mondo_counter[NR_CPUS] = {0}; struct trap_per_cpu trap_block[NR_CPUS]; EXPORT_SYMBOL(trap_block); -- cgit v1.2.3 From b9d68cdce72d49c06c24fb2f5bee5673557b33ac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Aug 2017 09:47:52 -0700 Subject: sparc64: Fix exception handling in UltraSPARC-III memcpy. [ Upstream commit 0ede1c401332173ab0693121dc6cde04a4dbf131 ] Mikael Pettersson reported that some test programs in the strace-4.18 testsuite cause an OOPS. After some debugging it turns out that garbage values are returned when an exception occurs, causing the fixup memset() to be run with bogus arguments. The problem is that two of the exception handler stubs write the successfully copied length into the wrong register. Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.") Reported-by: Mikael Pettersson Tested-by: Mikael Pettersson Reviewed-by: Sam Ravnborg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/lib/U3memcpy.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 54f98706b03b..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ENTRY(U3_retl_o2_and_7_plus_GS) and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) add GLOBAL_SPARE, 8, GLOBAL_SPARE and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) #endif -- cgit v1.2.3 From b87145215abe415dc3a31d0ff0e187117fe8e1b6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:35:25 +0100 Subject: wext: handle NULL extra data in iwe_stream_add_point better commit 93be2b74279c15c2844684b1a027fdc71dd5d9bf upstream. gcc-7 complains that wl3501_cs passes NULL into a function that then uses the argument as the input for memcpy: drivers/net/wireless/wl3501_cs.c: In function 'wl3501_get_scan': include/net/iw_handler.h:559:3: error: argument 2 null where non-null expected [-Werror=nonnull] memcpy(stream + point_len, extra, iwe->u.data.length); This works fine here because iwe->u.data.length is guaranteed to be 0 and the memcpy doesn't actually have an effect. Making the length check explicit avoids the warning and should have no other effect here. Also check the pointer itself, since otherwise we get warnings elsewhere in the code. Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/net/iw_handler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index e0f4109e64c6..c2aa73e5e6bb 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -556,7 +556,8 @@ iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, memcpy(stream + lcp_len, ((char *) &iwe->u) + IW_EV_POINT_OFF, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); - memcpy(stream + point_len, extra, iwe->u.data.length); + if (iwe->u.data.length && extra) + memcpy(stream + point_len, extra, iwe->u.data.length); stream += event_len; } return stream; -- cgit v1.2.3 From 2d4fef7f4102ec8c9884a2600fd16218ccce7c1a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 4 Jan 2017 22:18:24 +0300 Subject: sh_eth: fix EESIPR values for SH77{34|63} [ Upstream commit 978d3639fd13d987950e4ce85c8737ae92154b2c ] As the SH77{34|63} manuals are freely available, I've checked the EESIPR values written against the manuals, and they appeared to set the reserved bits 11-15 (which should be 0 on write). Fix those EESIPR values. Fixes: 380af9e390ec ("net: sh_eth: CPU dependency code collect to "struct sh_eth_cpu_data"") Fixes: f5d12767c8fd ("sh_eth: get SH77{34|63} support out of #ifdef") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 12be259394c6..be2520e977d7 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -802,7 +802,7 @@ static struct sh_eth_cpu_data sh7734_data = { .ecsr_value = ECSR_ICD | ECSR_MPD, .ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP, - .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, + .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003f07ff, .tx_check = EESR_TC1 | EESR_FTC, .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | @@ -832,7 +832,7 @@ static struct sh_eth_cpu_data sh7763_data = { .ecsr_value = ECSR_ICD | ECSR_MPD, .ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP, - .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, + .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003f07ff, .tx_check = EESR_TC1 | EESR_FTC, .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | -- cgit v1.2.3 From bfe384ed452af92f390813e0fb041a5a7631a3ab Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 5 Jan 2017 00:29:32 +0300 Subject: sh_eth: R8A7740 supports packet shecksumming [ Upstream commit 0f1f9cbc04dbb3cc310f70a11cba0cf1f2109d9c ] The R8A7740 GEther controller supports the packet checksum offloading but the 'hw_crc' (bad name, I'll fix it) flag isn't set in the R8A7740 data, thus CSMR isn't cleared... Fixes: 73a0d907301e ("net: sh_eth: add support R8A7740") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index be2520e977d7..2140dedab712 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -574,6 +574,7 @@ static struct sh_eth_cpu_data r8a7740_data = { .rpadir_value = 2 << 16, .no_trimd = 1, .no_ade = 1, + .hw_crc = 1, .tsu = 1, .select_mii = 1, .shift_rd0 = 1, -- cgit v1.2.3 From 5f7eeee7940e256d786b883aa5b37188a05a156b Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 5 Jan 2017 14:48:07 -0600 Subject: net: phy: dp83867: fix irq generation [ Upstream commit 5ca7d1ca77dc23934504b95a96d2660d345f83c2 ] For proper IRQ generation by DP83867 phy the INT/PWDN pin has to be programmed as an interrupt output instead of a Powerdown input in Configuration Register 3 (CFG3), Address 0x001E, bit 7 INT_OE = 1. The current driver doesn't do this and as result IRQs will not be generated by DP83867 phy even if they are properly configured in DT. Hence, fix IRQ generation by properly configuring CFG3.INT_OE bit and ensure that Link Status Change (LINK_STATUS_CHNG_INT) and Auto-Negotiation Complete (AUTONEG_COMP_INT) interrupt are enabled. After this the DP83867 driver will work properly in interrupt enabled mode. Signed-off-by: Grygorii Strashko Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83867.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 4cad95552cf1..01cf094bee18 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -29,6 +29,7 @@ #define MII_DP83867_MICR 0x12 #define MII_DP83867_ISR 0x13 #define DP83867_CTRL 0x1f +#define DP83867_CFG3 0x1e /* Extended Registers */ #define DP83867_RGMIICTL 0x0032 @@ -90,6 +91,8 @@ static int dp83867_config_intr(struct phy_device *phydev) micr_status |= (MII_DP83867_MICR_AN_ERR_INT_EN | MII_DP83867_MICR_SPEED_CHNG_INT_EN | + MII_DP83867_MICR_AUTONEG_COMP_INT_EN | + MII_DP83867_MICR_LINK_STS_CHNG_INT_EN | MII_DP83867_MICR_DUP_MODE_CHNG_INT_EN | MII_DP83867_MICR_SLEEP_MODE_CHNG_INT_EN); @@ -190,6 +193,13 @@ static int dp83867_config_init(struct phy_device *phydev) DP83867_DEVADDR, delay); } + /* Enable Interrupt output INT_OE in CFG3 register */ + if (phy_interrupt_is_valid(phydev)) { + val = phy_read(phydev, DP83867_CFG3); + val |= BIT(7); + phy_write(phydev, DP83867_CFG3, val); + } + return 0; } -- cgit v1.2.3 From 1042bd47ddfbe4cbb7b91156317752fec43f2d46 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 6 Jan 2017 16:18:53 -0500 Subject: tg3: Fix race condition in tg3_get_stats64(). [ Upstream commit f5992b72ebe0dde488fa8f706b887194020c66fc ] The driver's ndo_get_stats64() method is not always called under RTNL. So it can race with driver close or ethtool reconfigurations. Fix the race condition by taking tp->lock spinlock in tg3_free_consistent() when freeing the tp->hw_stats memory block. tg3_get_stats64() is already taking tp->lock. Reported-by: Wang Yufen Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a927a730da10..edae2dcc4927 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8720,11 +8720,14 @@ static void tg3_free_consistent(struct tg3 *tp) tg3_mem_rx_release(tp); tg3_mem_tx_release(tp); + /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */ + tg3_full_lock(tp, 0); if (tp->hw_stats) { dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), tp->hw_stats, tp->stats_mapping); tp->hw_stats = NULL; } + tg3_full_unlock(tp); } /* -- cgit v1.2.3 From f897e9ae07c428a04cf4fae0150a3d148ed1b09a Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 7 Jan 2017 10:38:31 +0100 Subject: x86/boot: Add missing declaration of string functions [ Upstream commit fac69d0efad08fc15e4dbfc116830782acc0dc9a ] Add the missing declarations of basic string functions to string.h to allow a clean build. Fixes: 5be865661516 ("String-handling functions for the new x86 setup code.") Signed-off-by: Nicholas Mc Guire Link: http://lkml.kernel.org/r/1483781911-21399-1-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/string.c | 1 + arch/x86/boot/string.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index cc3bd583dce1..9e240fcba784 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -14,6 +14,7 @@ #include #include "ctype.h" +#include "string.h" int memcmp(const void *s1, const void *s2, size_t len) { diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 725e820602b1..113588ddb43f 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len); #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp +extern int strcmp(const char *str1, const char *str2); +extern int strncmp(const char *cs, const char *ct, size_t count); +extern size_t strlen(const char *s); +extern char *strstr(const char *s1, const char *s2); +extern size_t strnlen(const char *s, size_t maxlen); +extern unsigned int atou(const char *s); +extern unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base); + #endif /* BOOT_STRING_H */ -- cgit v1.2.3 From c77512d671027c613db117dd1b45bd983c3a9954 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 9 Jan 2017 01:02:47 +0100 Subject: spi: spi-axi: Free resources on error path [ Upstream commit 9620ca90115d4bd700f05862d3b210a266a66efe ] We should go to 'err_put_master' here instead of returning directly. Otherwise a call to 'spi_master_put' is missing. Signed-off-by: Christophe JAILLET Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-axi-spi-engine.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 2b1456e5e221..c1eafbd7610a 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -494,7 +494,8 @@ static int spi_engine_probe(struct platform_device *pdev) SPI_ENGINE_VERSION_MAJOR(version), SPI_ENGINE_VERSION_MINOR(version), SPI_ENGINE_VERSION_PATCH(version)); - return -ENODEV; + ret = -ENODEV; + goto err_put_master; } spi_engine->clk = devm_clk_get(&pdev->dev, "s_axi_aclk"); -- cgit v1.2.3 From 2e2a390b7f6784d2409656f5ccf3433f4ec3e18d Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 27 Dec 2016 12:05:05 +0800 Subject: ASoC: rt5645: set sel_i2s_pre_div1 to 2 [ Upstream commit 02c5c03283c52157d336abf5e44ffcda10579fbf ] The i2s clock pre-divider 1 is used for both i2s1 and sysclk. The i2s1 is usually used for the main i2s and the pre-divider will be set in hw_params function. However, if i2s2 is used, the pre-divider is not set in the hw_params function and the default value of i2s clock pre-divider 1 is too high for sysclk and DMIC usage. Fix by overriding default divider value to 2. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=95681 Tested-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5645.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 10c2a564a715..1ac96ef9ee20 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3833,6 +3833,9 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, } } + regmap_update_bits(rt5645->regmap, RT5645_ADDA_CLK1, + RT5645_I2S_PD1_MASK, RT5645_I2S_PD1_2); + if (rt5645->pdata.jd_invert) { regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV); -- cgit v1.2.3 From 9acfb313849e44e956a393b91a63386f7ef1df97 Mon Sep 17 00:00:00 2001 From: Pau Espin Pedrol Date: Fri, 6 Jan 2017 20:33:27 +0100 Subject: netfilter: use fwmark_reflect in nf_send_reset [ Upstream commit cc31d43b4154ad5a7d8aa5543255a93b7e89edc2 ] Otherwise, RST packets generated by ipt_REJECT always have mark 0 when the routing is checked later in the same code path. Fixes: e110861f8609 ("net: add a sysctl to reflect the fwmark on replies") Cc: Lorenzo Colitti Signed-off-by: Pau Espin Pedrol Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/nf_reject_ipv4.c | 2 ++ net/ipv6/netfilter/nf_reject_ipv6.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index fd8220213afc..146d86105183 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->mark = IP4_REPLY_MARK(net, oldskb->mark); + skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, ip4_dst_hoplimit(skb_dst(nskb))); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 10090400c72f..eedee5d108d9 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); + fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { @@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) skb_dst_set(nskb, dst); + nskb->mark = fl6.flowi6_mark; + skb_reserve(nskb, hh_len + dst->header_len); ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); -- cgit v1.2.3 From 0bbbbaef6aa0df22c4bafd6db231b173697483ee Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Fri, 6 Jan 2017 12:14:48 +0100 Subject: phy state machine: failsafe leave invalid RUNNING state [ Upstream commit 811a919135b980bac8009d042acdccf10dc1ef5e ] While in RUNNING state, phy_state_machine() checks for link changes by comparing phydev->link before and after calling phy_read_status(). This works as long as it is guaranteed that phydev->link is never changed outside the phy_state_machine(). If in some setups this happens, it causes the state machine to miss a link loss and remain RUNNING despite phydev->link being 0. This has been observed running a dsa setup with a process continuously polling the link states over ethtool each second (SNMPD RFC-1213 agent). Disconnecting the link on a phy followed by a ETHTOOL_GSET causes dsa_slave_get_settings() / dsa_slave_get_link_ksettings() to call phy_read_status() and with that modify the link status - and with that bricking the phy state machine. This patch adds a fail-safe check while in RUNNING, which causes to move to CHANGELINK when the link is gone and we are still RUNNING. Signed-off-by: Zefir Kurtisi Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 7103dc1d20f2..775a6e1fdef9 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1063,6 +1063,15 @@ void phy_state_machine(struct work_struct *work) if (old_link != phydev->link) phydev->state = PHY_CHANGELINK; } + /* + * Failsafe: check that nobody set phydev->link=0 between two + * poll cycles, otherwise we won't leave RUNNING state as long + * as link remains down. + */ + if (!phydev->link && phydev->state == PHY_RUNNING) { + phydev->state = PHY_CHANGELINK; + phydev_err(phydev, "no link in PHY_RUNNING\n"); + } break; case PHY_CHANGELINK: err = phy_read_status(phydev); -- cgit v1.2.3 From ee96797b78cacf08b0d3d5ca72f725bd4e371c20 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 9 Jan 2017 10:45:49 +0300 Subject: ipv4: make tcp_notsent_lowat sysctl knob behave as true unsigned int [ Upstream commit b007f09072ca8afa118ade333e717ba443e8d807 ] > cat /proc/sys/net/ipv4/tcp_notsent_lowat -1 > echo 4294967295 > /proc/sys/net/ipv4/tcp_notsent_lowat -bash: echo: write error: Invalid argument > echo -2147483648 > /proc/sys/net/ipv4/tcp_notsent_lowat > cat /proc/sys/net/ipv4/tcp_notsent_lowat -2147483648 but in documentation we have "tcp_notsent_lowat - UNSIGNED INTEGER" v2: simplify to just proc_douintvec Signed-off-by: Pavel Tikhomirov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..566cfc50f7cf 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -958,7 +958,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec, }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { -- cgit v1.2.3 From 99eb27d6182d0d90c7f0786a1fa0147d75c25684 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 22 Dec 2016 10:44:30 +0100 Subject: clk/samsung: exynos542x: mark some clocks as critical [ Upstream commit 318fa46cc60d37fec1e87dbf03a82aca0f5ce695 ] Some parent clocks of the Exynos542x clock blocks, which have separate power domains (like DISP, MFC, MSC, GSC, FSYS and G2D) must be always enabled to access any register related to power management unit or devices connected to it. For the time being, until a proper solution based on runtime PM is applied, mark those clocks as critical (instead of ignore unused or even no flags) to prevent disabling them. Signed-off-by: Marek Szyprowski Acked-by: Sylwester Nawrocki Reviewed-by: Chanwoo Choi Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas [Exynos5800 Peach Pi Chromebook] Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynos5420.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 8c8b495cbf0d..cdc092a1d9ef 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -586,7 +586,7 @@ static const struct samsung_gate_clock exynos5800_gate_clks[] __initconst = { GATE(CLK_ACLK550_CAM, "aclk550_cam", "mout_user_aclk550_cam", GATE_BUS_TOP, 24, 0, 0), GATE(CLK_ACLK432_SCALER, "aclk432_scaler", "mout_user_aclk432_scaler", - GATE_BUS_TOP, 27, 0, 0), + GATE_BUS_TOP, 27, CLK_IS_CRITICAL, 0), }; static const struct samsung_mux_clock exynos5420_mux_clks[] __initconst = { @@ -956,20 +956,20 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(CLK_SMMU_G2D, "smmu_g2d", "aclk333_g2d", GATE_IP_G2D, 7, 0, 0), GATE(0, "aclk200_fsys", "mout_user_aclk200_fsys", - GATE_BUS_FSYS0, 9, CLK_IGNORE_UNUSED, 0), + GATE_BUS_FSYS0, 9, CLK_IS_CRITICAL, 0), GATE(0, "aclk200_fsys2", "mout_user_aclk200_fsys2", GATE_BUS_FSYS0, 10, CLK_IGNORE_UNUSED, 0), GATE(0, "aclk333_g2d", "mout_user_aclk333_g2d", GATE_BUS_TOP, 0, CLK_IGNORE_UNUSED, 0), GATE(0, "aclk266_g2d", "mout_user_aclk266_g2d", - GATE_BUS_TOP, 1, CLK_IGNORE_UNUSED, 0), + GATE_BUS_TOP, 1, CLK_IS_CRITICAL, 0), GATE(0, "aclk300_jpeg", "mout_user_aclk300_jpeg", GATE_BUS_TOP, 4, CLK_IGNORE_UNUSED, 0), GATE(0, "aclk333_432_isp0", "mout_user_aclk333_432_isp0", GATE_BUS_TOP, 5, 0, 0), GATE(0, "aclk300_gscl", "mout_user_aclk300_gscl", - GATE_BUS_TOP, 6, CLK_IGNORE_UNUSED, 0), + GATE_BUS_TOP, 6, CLK_IS_CRITICAL, 0), GATE(0, "aclk333_432_gscl", "mout_user_aclk333_432_gscl", GATE_BUS_TOP, 7, CLK_IGNORE_UNUSED, 0), GATE(0, "aclk333_432_isp", "mout_user_aclk333_432_isp", @@ -983,20 +983,20 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(0, "aclk166", "mout_user_aclk166", GATE_BUS_TOP, 14, CLK_IGNORE_UNUSED, 0), GATE(CLK_ACLK333, "aclk333", "mout_user_aclk333", - GATE_BUS_TOP, 15, CLK_IGNORE_UNUSED, 0), + GATE_BUS_TOP, 15, CLK_IS_CRITICAL, 0), GATE(0, "aclk400_isp", "mout_user_aclk400_isp", GATE_BUS_TOP, 16, 0, 0), GATE(0, "aclk400_mscl", "mout_user_aclk400_mscl", GATE_BUS_TOP, 17, 0, 0), GATE(0, "aclk200_disp1", "mout_user_aclk200_disp1", - GATE_BUS_TOP, 18, 0, 0), + GATE_BUS_TOP, 18, CLK_IS_CRITICAL, 0), GATE(CLK_SCLK_MPHY_IXTAL24, "sclk_mphy_ixtal24", "mphy_refclk_ixtal24", GATE_BUS_TOP, 28, 0, 0), GATE(CLK_SCLK_HSIC_12M, "sclk_hsic_12m", "ff_hsic_12m", GATE_BUS_TOP, 29, 0, 0), GATE(0, "aclk300_disp1", "mout_user_aclk300_disp1", - SRC_MASK_TOP2, 24, 0, 0), + SRC_MASK_TOP2, 24, CLK_IS_CRITICAL, 0), GATE(CLK_MAU_EPLL, "mau_epll", "mout_mau_epll_clk", SRC_MASK_TOP7, 20, 0, 0), -- cgit v1.2.3 From 2a60965a0ef7e92325c4e72d8965c4b8c8fdc429 Mon Sep 17 00:00:00 2001 From: "Milan P. Gandhi" Date: Sat, 24 Dec 2016 22:02:46 +0530 Subject: scsi: qla2xxx: Get mutex lock before checking optrom_state [ Upstream commit c7702b8c22712a06080e10f1d2dee1a133ec8809 ] There is a race condition with qla2xxx optrom functions where one thread might modify optrom buffer, optrom_state while other thread is still reading from it. In couple of crashes, it was found that we had successfully passed the following 'if' check where we confirm optrom_state to be QLA_SREADING. But by the time we acquired mutex lock to proceed with memory_read_from_buffer function, some other thread/process had already modified that option rom buffer and optrom_state from QLA_SREADING to QLA_SWAITING. Then we got ha->optrom_buffer 0x0 and crashed the system: if (ha->optrom_state != QLA_SREADING) return 0; mutex_lock(&ha->optrom_mutex); rval = memory_read_from_buffer(buf, count, &off, ha->optrom_buffer, ha->optrom_region_size); mutex_unlock(&ha->optrom_mutex); With current optrom function we get following crash due to a race condition: [ 1479.466679] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1479.466707] IP: [] memcpy+0x6/0x110 [...] [ 1479.473673] Call Trace: [ 1479.474296] [] ? memory_read_from_buffer+0x3c/0x60 [ 1479.474941] [] qla2x00_sysfs_read_optrom+0x9c/0xc0 [qla2xxx] [ 1479.475571] [] read+0xdb/0x1f0 [ 1479.476206] [] vfs_read+0x9e/0x170 [ 1479.476839] [] SyS_read+0x7f/0xe0 [ 1479.477466] [] system_call_fastpath+0x16/0x1b Below patch modifies qla2x00_sysfs_read_optrom, qla2x00_sysfs_write_optrom functions to get the mutex_lock before checking ha->optrom_state to avoid similar crashes. The patch was applied and tested and same crashes were no longer observed again. Tested-by: Milan P. Gandhi Signed-off-by: Milan P. Gandhi Reviewed-by: Laurence Oberman Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index ad33238cef17..8c4641b518b5 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -243,12 +243,15 @@ qla2x00_sysfs_read_optrom(struct file *filp, struct kobject *kobj, struct qla_hw_data *ha = vha->hw; ssize_t rval = 0; + mutex_lock(&ha->optrom_mutex); + if (ha->optrom_state != QLA_SREADING) - return 0; + goto out; - mutex_lock(&ha->optrom_mutex); rval = memory_read_from_buffer(buf, count, &off, ha->optrom_buffer, ha->optrom_region_size); + +out: mutex_unlock(&ha->optrom_mutex); return rval; @@ -263,14 +266,19 @@ qla2x00_sysfs_write_optrom(struct file *filp, struct kobject *kobj, struct device, kobj))); struct qla_hw_data *ha = vha->hw; - if (ha->optrom_state != QLA_SWRITING) + mutex_lock(&ha->optrom_mutex); + + if (ha->optrom_state != QLA_SWRITING) { + mutex_unlock(&ha->optrom_mutex); return -EINVAL; - if (off > ha->optrom_region_size) + } + if (off > ha->optrom_region_size) { + mutex_unlock(&ha->optrom_mutex); return -ERANGE; + } if (off + count > ha->optrom_region_size) count = ha->optrom_region_size - off; - mutex_lock(&ha->optrom_mutex); memcpy(&ha->optrom_buffer[off], buf, count); mutex_unlock(&ha->optrom_mutex); -- cgit v1.2.3 From 61cf0afe12eb0e5417b0c39087001412ca470d32 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 28 Nov 2016 08:52:20 +0100 Subject: drm/virtio: fix framebuffer sparse warning [ Upstream commit 71d3f6ef7f5af38dea2975ec5715c88bae92e92d ] virtio uses normal ram as backing storage for the framebuffer, so we should assign the address to new screen_buffer (added by commit 17a7b0b4d9749f80d365d7baff5dec2f54b0e992) instead of screen_base. Reported-by: Michael S. Tsirkin Signed-off-by: Gerd Hoffmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c index 2242a80866a9..dc2976c2bed3 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fb.c +++ b/drivers/gpu/drm/virtio/virtgpu_fb.c @@ -337,7 +337,7 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper, info->fbops = &virtio_gpufb_ops; info->pixmap.flags = FB_PIXMAP_SYSTEM; - info->screen_base = obj->vmap; + info->screen_buffer = obj->vmap; info->screen_size = obj->gem_base.size; drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth); drm_fb_helper_fill_var(info, &vfbdev->helper, -- cgit v1.2.3 From 1e9e71782f3462d5aecb0720d26298253bdbeca7 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Tue, 13 Dec 2016 08:18:15 +0900 Subject: ARM: dts: sun8i: Support DTB build for NanoPi M1 [ Upstream commit 661ccdc1a95f18ab6c1373322fde09afd5b90a1f ] The commit 10efbf5f1633 ("ARM: dts: sun8i: Add dts file for NanoPi M1 SBC") introduced NanoPi M1 board but it's missing in Allwinner H3 DTB build. Signed-off-by: Milo Kim Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 7037201c5e3a..f3baa896ce84 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -820,6 +820,7 @@ dtb-$(CONFIG_MACH_SUN8I) += \ sun8i-a83t-allwinner-h8homlet-v2.dtb \ sun8i-a83t-cubietruck-plus.dtb \ sun8i-h3-bananapi-m2-plus.dtb \ + sun8i-h3-nanopi-m1.dtb \ sun8i-h3-nanopi-neo.dtb \ sun8i-h3-orangepi-2.dtb \ sun8i-h3-orangepi-lite.dtb \ -- cgit v1.2.3 From 984922f2c897f669e994ad851eb799d8348f92fd Mon Sep 17 00:00:00 2001 From: Emmanuel Vadot Date: Wed, 14 Dec 2016 15:57:24 +0100 Subject: ARM: dts: sunxi: Change node name for pwrseq pin on Olinuxino-lime2-emmc [ Upstream commit 3116d37651d77125bf50f81f859b1278e02ccce6 ] The node name for the power seq pin is mmc2@0 like the mmc2_pins_a one. This makes the original node (mmc2_pins_a) scrapped out of the dtb and result in a unusable eMMC if U-Boot didn't configured the pins to the correct functions. Signed-off-by: Emmanuel Vadot Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun7i-a20-olinuxino-lime2-emmc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2-emmc.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2-emmc.dts index 5ea4915f6d75..10d307408f23 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2-emmc.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2-emmc.dts @@ -56,7 +56,7 @@ }; &pio { - mmc2_pins_nrst: mmc2@0 { + mmc2_pins_nrst: mmc2-rst-pin { allwinner,pins = "PC16"; allwinner,function = "gpio_out"; allwinner,drive = ; -- cgit v1.2.3 From 44d52834efb3d23e73a014afd8b22cfedb9a0eae Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 22 Dec 2016 07:40:37 -0800 Subject: iw_cxgb4: do not send RX_DATA_ACK CPLs after close/abort [ Upstream commit 3bcf96e0183f5c863657cb6ae9adad307a0f6071 ] Function rx_data(), which handles ingress CPL_RX_DATA messages, was always sending an RX_DATA_ACK with the goal of updating the credits. However, if the RDMA connection is moved out of FPDU mode abruptly, then it is possible for iw_cxgb4 to process queued RX_DATA CPLs after HW has aborted the connection. These CPLs should not trigger RX_DATA_ACKS. If they do, HW can see a READ after DELETE of the DB_LE hash entry for the tid and post a LE_DB HashTblMemCrcError. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/cm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index f1510cc76d2d..9398143d7c5e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1804,20 +1804,21 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) skb_trim(skb, dlen); mutex_lock(&ep->com.mutex); - /* update RX credits */ - update_rx_credits(ep, dlen); - switch (ep->com.state) { case MPA_REQ_SENT: + update_rx_credits(ep, dlen); ep->rcv_seq += dlen; disconnect = process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: + update_rx_credits(ep, dlen); ep->rcv_seq += dlen; disconnect = process_mpa_request(ep, skb); break; case FPDU_MODE: { struct c4iw_qp_attributes attrs; + + update_rx_credits(ep, dlen); BUG_ON(!ep->com.qp); if (status) pr_err("%s Unexpected streaming data." \ -- cgit v1.2.3 From 952d07ac1e6e9e59267d69341c2e820710eaa718 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 9 Jan 2017 15:20:31 -0500 Subject: nbd: blk_mq_init_queue returns an error code on failure, not NULL [ Upstream commit 25b4acfc7de0fc4da3bfea3a316f7282c6fbde81 ] Additionally, don't assign directly to disk->queue, otherwise blk_put_queue (called via put_disk) will choke (panic) on the errno stored there. Bug found by code inspection after Omar found a similar issue in virtio_blk. Compile-tested only. Signed-off-by: Jeff Moyer Reviewed-by: Omar Sandoval Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c9441f9d4585..98b767d3171e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -929,6 +929,7 @@ static int __init nbd_init(void) return -ENOMEM; for (i = 0; i < nbds_max; i++) { + struct request_queue *q; struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) goto out; @@ -954,12 +955,13 @@ static int __init nbd_init(void) * every gendisk to have its very own request_queue struct. * These structs are big so we dynamically allocate them. */ - disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set); - if (!disk->queue) { + q = blk_mq_init_queue(&nbd_dev[i].tag_set); + if (IS_ERR(q)) { blk_mq_free_tag_set(&nbd_dev[i].tag_set); put_disk(disk); goto out; } + disk->queue = q; /* * Tell the block layer that we are not a rotational device -- cgit v1.2.3 From e6d53f5f8c0311032bf518d253522b73c96416f3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 9 Jan 2017 11:44:12 -0800 Subject: virtio_blk: fix panic in initialization error path [ Upstream commit 6bf6b0aa3da84a3d9126919a94c49c0fb7ee2fb3 ] If blk_mq_init_queue() returns an error, it gets assigned to vblk->disk->queue. Then, when we call put_disk(), we end up calling blk_put_queue() with the ERR_PTR, causing a bad dereference. Fix it by only assigning to vblk->disk->queue on success. Signed-off-by: Omar Sandoval Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/virtio_blk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3c3b8f601469..10332c24f961 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -630,11 +630,12 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_put_disk; - q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); + q = blk_mq_init_queue(&vblk->tag_set); if (IS_ERR(q)) { err = -ENOMEM; goto out_free_tags; } + vblk->disk->queue = q; q->queuedata = vblk; -- cgit v1.2.3 From bebbe845cb50ab4b499bff2d0bbe2fef09de86af Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 23 Nov 2016 13:02:32 +0100 Subject: ARM: 8632/1: ftrace: fix syscall name matching [ Upstream commit 270c8cf1cacc69cb8d99dea812f06067a45e4609 ] ARM has a few system calls (most notably mmap) for which the names of the functions which are referenced in the syscall table do not match the names of the syscall tracepoints. As a consequence of this, these tracepoints are not made available. Implement arch_syscall_match_sym_name to fix this and allow tracing even these system calls. Signed-off-by: Rabin Vincent Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/ftrace.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index bfe2a2f5a644..22b73112b75f 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -54,6 +54,24 @@ static inline void *return_address(unsigned int level) #define ftrace_return_address(n) return_address(n) +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME + +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + if (!strcmp(sym, "sys_mmap2")) + sym = "sys_mmap_pgoff"; + else if (!strcmp(sym, "sys_statfs64_wrapper")) + sym = "sys_statfs64"; + else if (!strcmp(sym, "sys_fstatfs64_wrapper")) + sym = "sys_fstatfs64"; + else if (!strcmp(sym, "sys_arm_fadvise64_64")) + sym = "sys_fadvise64_64"; + + /* Ignore case since sym may start with "SyS" instead of "sys" */ + return !strcasecmp(sym, name); +} + #endif /* ifndef __ASSEMBLY__ */ #endif /* _ASM_ARM_FTRACE */ -- cgit v1.2.3 From c736011052cf2fdc6e310195ce0fd0805ddbb0b6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Jan 2017 16:57:27 -0800 Subject: mm, slab: make sure that KMALLOC_MAX_SIZE will fit into MAX_ORDER [ Upstream commit bb1107f7c6052c863692a41f78c000db792334bf ] Andrey Konovalov has reported the following warning triggered by the syzkaller fuzzer. WARNING: CPU: 1 PID: 9935 at mm/page_alloc.c:3511 __alloc_pages_nodemask+0x159c/0x1e20 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 9935 Comm: syz-executor0 Not tainted 4.9.0-rc7+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __alloc_pages_slowpath mm/page_alloc.c:3511 __alloc_pages_nodemask+0x159c/0x1e20 mm/page_alloc.c:3781 alloc_pages_current+0x1c7/0x6b0 mm/mempolicy.c:2072 alloc_pages include/linux/gfp.h:469 kmalloc_order+0x1f/0x70 mm/slab_common.c:1015 kmalloc_order_trace+0x1f/0x160 mm/slab_common.c:1026 kmalloc_large include/linux/slab.h:422 __kmalloc+0x210/0x2d0 mm/slub.c:3723 kmalloc include/linux/slab.h:495 ep_write_iter+0x167/0xb50 drivers/usb/gadget/legacy/inode.c:664 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x170/0x4e0 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 The issue is caused by a lack of size check for the request size in ep_write_iter which should be fixed. It, however, points to another problem, that SLUB defines KMALLOC_MAX_SIZE too large because the its KMALLOC_SHIFT_MAX is (MAX_ORDER + PAGE_SHIFT) which means that the resulting page allocator request might be MAX_ORDER which is too large (see __alloc_pages_slowpath). The same applies to the SLOB allocator which allows even larger sizes. Make sure that they are capped properly and never request more than MAX_ORDER order. Link: http://lkml.kernel.org/r/20161220130659.16461-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Andrey Konovalov Acked-by: Christoph Lameter Cc: Alexei Starovoitov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/slab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 084b12bad198..4c5363566815 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -226,7 +226,7 @@ static inline const char *__check_heap_object(const void *ptr, * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif @@ -239,7 +239,7 @@ static inline const char *__check_heap_object(const void *ptr, * be allocated from the same page. */ #define KMALLOC_SHIFT_HIGH PAGE_SHIFT -#define KMALLOC_SHIFT_MAX 30 +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif -- cgit v1.2.3 From d12824c897a2dea1b8b794e28b4b88f9cd197d28 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 10 Jan 2017 16:57:45 -0800 Subject: lib/Kconfig.debug: fix frv build failure [ Upstream commit da0510c47519fe0999cffe316e1d370e29f952be ] The build of frv allmodconfig was failing with the errors like: /tmp/cc0JSPc3.s: Assembler messages: /tmp/cc0JSPc3.s:1839: Error: symbol `.LSLT0' is already defined /tmp/cc0JSPc3.s:1842: Error: symbol `.LASLTP0' is already defined /tmp/cc0JSPc3.s:1969: Error: symbol `.LELTP0' is already defined /tmp/cc0JSPc3.s:1970: Error: symbol `.LELT0' is already defined Commit 866ced950bcd ("kbuild: Support split debug info v4") introduced splitting the debug info and keeping that in a separate file. Somehow, the frv-linux gcc did not like that and I am guessing that instead of splitting it started copying. The first report about this is at: https://lists.01.org/pipermail/kbuild-all/2015-July/010527.html. I will try and see if this can work with frv and if still fails I will open a bug report with gcc. But meanwhile this is the easiest option to solve build failure of frv. Fixes: 866ced950bcd ("kbuild: Support split debug info v4") Link: http://lkml.kernel.org/r/1482062348-5352-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Reported-by: Fengguang Wu Cc: Andi Kleen Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a6c8db1d62f6..f60e67217f18 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -145,7 +145,7 @@ config DEBUG_INFO_REDUCED config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" - depends on DEBUG_INFO + depends on DEBUG_INFO && !FRV help Generate debug info into separate .dwo files. This significantly reduces the build directory size for builds with DEBUG_INFO, -- cgit v1.2.3 From 916a05b90d8322f38ea133feb3a194617e507c3c Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 10 Jan 2017 16:57:54 -0800 Subject: signal: protect SIGNAL_UNKILLABLE from unintentional clearing. [ Upstream commit 2d39b3cd34e6d323720d4c61bd714f5ae202c022 ] Since commit 00cd5c37afd5 ("ptrace: permit ptracing of /sbin/init") we can now trace init processes. init is initially protected with SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but there are a number of paths during tracing where SIGNAL_UNKILLABLE can be implicitly cleared. This can result in init becoming stoppable/killable after tracing. For example, running: while true; do kill -STOP 1; done & strace -p 1 and then stopping strace and the kill loop will result in init being left in state TASK_STOPPED. Sending SIGCONT to init will resume it, but init will now respond to future SIGSTOP signals rather than ignoring them. Make sure that when setting SIGNAL_STOP_CONTINUED/SIGNAL_STOP_STOPPED that we don't clear SIGNAL_UNKILLABLE. Link: http://lkml.kernel.org/r/20170104122017.25047-1-jamie.iles@oracle.com Signed-off-by: Jamie Iles Acked-by: Oleg Nesterov Cc: Alexander Viro Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 10 ++++++++++ kernel/signal.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f425eb3318ab..14f58cf06054 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,16 @@ struct signal_struct { #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { diff --git a/kernel/signal.c b/kernel/signal.c index deb04d5983ed..e48668c3c972 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task) * fresh group stop. Read comment in do_signal_stop() for details. */ if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { - sig->flags = SIGNAL_STOP_STOPPED; + signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED); return true; } return false; @@ -845,7 +845,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) * will take ->siglock, notice SIGNAL_CLD_MASK, and * notify its parent. See get_signal_to_deliver(). */ - signal->flags = why | SIGNAL_STOP_CONTINUED; + signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED); signal->group_stop_count = 0; signal->group_exit_code = 0; } -- cgit v1.2.3 From 22cccef1fc709cae4d78c47c7371acf24f328037 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 10 Jan 2017 16:58:00 -0800 Subject: mm: don't dereference struct page fields of invalid pages [ Upstream commit f073bdc51771f5a5c7a8d1191bfc3ae371d44de7 ] The VM_BUG_ON() check in move_freepages() checks whether the node id of a page matches the node id of its zone. However, it does this before having checked whether the struct page pointer refers to a valid struct page to begin with. This is guaranteed in most cases, but may not be the case if CONFIG_HOLES_IN_ZONE=y. So reorder the VM_BUG_ON() with the pfn_valid_within() check. Link: http://lkml.kernel.org/r/1481706707-6211-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Cc: Catalin Marinas Cc: Hanjun Guo Cc: Yisheng Xie Cc: Robert Richter Cc: James Morse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4f860cc7163..77b797c2d094 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1875,14 +1875,14 @@ int move_freepages(struct zone *zone, #endif for (page = start_page; page <= end_page;) { - /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); - if (!pfn_valid_within(page_to_pfn(page))) { page++; continue; } + /* Make sure we are not inadvertently changing nodes */ + VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); + if (!PageBuddy(page)) { page++; continue; -- cgit v1.2.3 From a69f0d54fdc96914a041039c22e46da8e95c4d22 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 15 Jan 2017 19:05:38 +0200 Subject: net/mlx5: E-Switch, Re-enable RoCE on mode change only after FDB destroy [ Upstream commit 5bae8c031053c69b4aa74b7f1ba15d4ec8426208 ] We must re-enable RoCE on the e-switch management port (PF) only after destroying the FDB in its switchdev/offloaded mode. Otherwise, when encapsulation is supported, this re-enablement will fail. Also, it's more natural and symmetric to disable RoCE on the PF before we create the FDB under switchdev mode, so do that as well and revert if getting into error during the mode change later. Fixes: 9da34cd34e85 ('net/mlx5: Disable RoCE on the e-switch management [..]') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 29 ++++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 6ffd5d2a70aa..52a38106448e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -651,9 +651,14 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) int vport; int err; + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + err = esw_create_offloads_fdb_table(esw, nvports); if (err) - return err; + goto create_fdb_err; err = esw_create_offloads_table(esw); if (err) @@ -673,11 +678,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) goto err_reps; } - /* disable PF RoCE so missed packets don't go through RoCE steering */ - mlx5_dev_list_lock(); - mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - return 0; err_reps: @@ -694,6 +694,13 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_table(esw); + +create_fdb_err: + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return err; } @@ -701,11 +708,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { @@ -715,6 +717,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); } + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return err; } -- cgit v1.2.3 From f102bb7164c9020e12662998f0fd99c3be72d4f6 Mon Sep 17 00:00:00 2001 From: zheng li Date: Mon, 12 Dec 2016 09:56:05 +0800 Subject: ipv4: Should use consistent conditional judgement for ip fragment in __ip_append_data and ip_finish_output [ Upstream commit 0a28cfd51e17f4f0a056bcf66bfbe492c3b99f38 ] There is an inconsistent conditional judgement in __ip_append_data and ip_finish_output functions, the variable length in __ip_append_data just include the length of application's payload and udp header, don't include the length of ip header, but in ip_finish_output use (skb->len > ip_skb_dst_mtu(skb)) as judgement, and skb->len include the length of ip header. That causes some particular application's udp payload whose length is between (MTU - IP Header) and MTU were fragmented by ip_fragment even though the rst->dev support UFO feature. Add the length of ip header to length in __ip_append_data to keep consistent conditional judgement as ip_finish_output for ip fragment. Signed-off-by: Zheng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5c1dbef3626..e65bceae823b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -936,7 +936,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { -- cgit v1.2.3 From ef09c9ff343122a0b245416066992d096416ff19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 19 Jun 2017 13:03:43 +0200 Subject: net: account for current skb length when deciding about UFO [ Upstream commit a5cb659bbc1c8644efa0c3138a757a1e432a4880 ] Our customer encountered stuck NFS writes for blocks starting at specific offsets w.r.t. page boundary caused by networking stack sending packets via UFO enabled device with wrong checksum. The problem can be reproduced by composing a long UDP datagram from multiple parts using MSG_MORE flag: sendto(sd, buff, 1000, MSG_MORE, ...); sendto(sd, buff, 1000, MSG_MORE, ...); sendto(sd, buff, 3000, 0, ...); Assume this packet is to be routed via a device with MTU 1500 and NETIF_F_UFO enabled. When second sendto() gets into __ip_append_data(), this condition is tested (among others) to decide whether to call ip_ufo_append_data(): ((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb)) At the moment, we already have skb with 1028 bytes of data which is not marked for GSO so that the test is false (fragheaderlen is usually 20). Thus we append second 1000 bytes to this skb without invoking UFO. Third sendto(), however, has sufficient length to trigger the UFO path so that we end up with non-UFO skb followed by a UFO one. Later on, udp_send_skb() uses udp_csum() to calculate the checksum but that assumes all fragments have correct checksum in skb->csum which is not true for UFO fragments. When checking against MTU, we need to add skb->len to length of new segment if we already have a partially filled skb and fragheaderlen only if there isn't one. In the IPv6 case, skb can only be null if this is the first segment so that we have to use headersize (length of the first IPv6 header) rather than fragheaderlen (length of IPv6 header of further fragments) for skb == NULL. Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Fixes: e4c5e13aa45c ("ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output") Signed-off-by: Michal Kubecek Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 3 ++- net/ipv6/ip6_output.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e65bceae823b..06215ba88b93 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -936,7 +936,8 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && + if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || + (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index efe811c6eccf..a5cdf2a23609 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1372,7 +1372,7 @@ emsgsize: */ cork->length += length; - if ((((length + fragheaderlen) > mtu) || + if ((((length + (skb ? skb->len : headersize)) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && -- cgit v1.2.3 From c3c94716855a1601b39c2ff587812906ea8b451c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 17 Feb 2017 16:07:33 -0800 Subject: net: phy: Fix PHY unbind crash commit 7b9a88a390dacb37b051a7b09b9a08f546edf5eb upstream. The PHY library does not deal very well with bind and unbind events. The first thing we would see is that we were not properly canceling the PHY state machine workqueue, so we would be crashing while dereferencing phydev->drv since there is no driver attached anymore. Suggested-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9e7b7836774f..bf02f8e4648a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1714,6 +1714,8 @@ static int phy_remove(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); + cancel_delayed_work_sync(&phydev->state_queue); + mutex_lock(&phydev->lock); phydev->state = PHY_DOWN; mutex_unlock(&phydev->lock); -- cgit v1.2.3 From f9636c9bdd5828f29cdfaf620e3a424a5f8cc221 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jul 2017 08:36:15 -0400 Subject: workqueue: implicit ordered attribute should be overridable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0a94efb5acbb6980d7c9ab604372d93cd507e4d8 upstream. 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") automatically enabled ordered attribute for unbound workqueues w/ max_active == 1. Because ordered workqueues reject max_active and some attribute changes, this implicit ordered mode broke cases where the user creates an unbound workqueue w/ max_active == 1 and later explicitly changes the related attributes. This patch distinguishes explicit and implicit ordered setting and overrides from attribute changes if implict. Signed-off-by: Tejun Heo Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") Cc: Holger Hoffstätte Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 4 +++- kernel/workqueue.c | 13 +++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fc6e22186405..733a21ef8da4 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -311,6 +311,7 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ @@ -409,7 +410,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ + __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e5335c27d72e..776dda02e751 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3730,8 +3730,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, return -EINVAL; /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + if (!list_empty(&wq->pwqs)) { + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) + return -EINVAL; + + wq->flags &= ~__WQ_ORDERED; + } ctx = apply_wqattrs_prepare(wq, attrs); if (!ctx) @@ -4113,13 +4117,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) struct pool_workqueue *pwq; /* disallow meddling with max_active for ordered workqueues */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return; max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); mutex_lock(&wq->mutex); + wq->flags &= ~__WQ_ORDERED; wq->saved_max_active = max_active; for_each_pwq(pwq, wq) @@ -5224,7 +5229,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) * attributes breaks ordering guarantee. Disallow exposing ordered * workqueues. */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return -EINVAL; wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); -- cgit v1.2.3 From 4c666b0d9070a095e945387bd674476820f79528 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Aug 2017 08:49:50 -0700 Subject: Linux 4.9.42 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 82eb3d1ee801..34d4d9f8a4b2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 41 +SUBLEVEL = 42 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From 3b25bfc11cf95c75a52e2ca3745b5bffb5fb02dc Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 17 Jul 2017 18:34:42 +0800 Subject: ppp: Fix false xmit recursion detect with two ppp devices [ Upstream commit e5dadc65f9e0177eb649bcd9d333f1ebf871223e ] The global percpu variable ppp_xmit_recursion is used to detect the ppp xmit recursion to avoid the deadlock, which is caused by one CPU tries to lock the xmit lock twice. But it would report false recursion when one CPU wants to send the skb from two different PPP devices, like one L2TP on the PPPoE. It is a normal case actually. Now use one percpu member of struct ppp instead of the gloable variable to detect the xmit recursion of one ppp device. Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit") Signed-off-by: Gao Feng Signed-off-by: Liu Jianying Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 5489c0ec1d9a..667f197ab66c 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -119,6 +119,7 @@ struct ppp { int n_channels; /* how many channels are attached 54 */ spinlock_t rlock; /* lock for receive side 58 */ spinlock_t wlock; /* lock for transmit side 5c */ + int *xmit_recursion __percpu; /* xmit recursion detect */ int mru; /* max receive unit 60 */ unsigned int flags; /* control bits 64 */ unsigned int xstate; /* transmit state bits 68 */ @@ -1024,6 +1025,7 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev, struct ppp *ppp = netdev_priv(dev); int indx; int err; + int cpu; ppp->dev = dev; ppp->ppp_net = src_net; @@ -1038,6 +1040,15 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev, INIT_LIST_HEAD(&ppp->channels); spin_lock_init(&ppp->rlock); spin_lock_init(&ppp->wlock); + + ppp->xmit_recursion = alloc_percpu(int); + if (!ppp->xmit_recursion) { + err = -ENOMEM; + goto err1; + } + for_each_possible_cpu(cpu) + (*per_cpu_ptr(ppp->xmit_recursion, cpu)) = 0; + #ifdef CONFIG_PPP_MULTILINK ppp->minseq = -1; skb_queue_head_init(&ppp->mrq); @@ -1049,11 +1060,15 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev, err = ppp_unit_register(ppp, conf->unit, conf->ifname_is_set); if (err < 0) - return err; + goto err2; conf->file->private_data = &ppp->file; return 0; +err2: + free_percpu(ppp->xmit_recursion); +err1: + return err; } static const struct nla_policy ppp_nl_policy[IFLA_PPP_MAX + 1] = { @@ -1399,18 +1414,16 @@ static void __ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } -static DEFINE_PER_CPU(int, ppp_xmit_recursion); - static void ppp_xmit_process(struct ppp *ppp) { local_bh_disable(); - if (unlikely(__this_cpu_read(ppp_xmit_recursion))) + if (unlikely(*this_cpu_ptr(ppp->xmit_recursion))) goto err; - __this_cpu_inc(ppp_xmit_recursion); + (*this_cpu_ptr(ppp->xmit_recursion))++; __ppp_xmit_process(ppp); - __this_cpu_dec(ppp_xmit_recursion); + (*this_cpu_ptr(ppp->xmit_recursion))--; local_bh_enable(); @@ -1904,7 +1917,7 @@ static void __ppp_channel_push(struct channel *pch) read_lock_bh(&pch->upl); ppp = pch->ppp; if (ppp) - __ppp_xmit_process(ppp); + ppp_xmit_process(ppp); read_unlock_bh(&pch->upl); } } @@ -1913,9 +1926,7 @@ static void ppp_channel_push(struct channel *pch) { local_bh_disable(); - __this_cpu_inc(ppp_xmit_recursion); __ppp_channel_push(pch); - __this_cpu_dec(ppp_xmit_recursion); local_bh_enable(); } @@ -3056,6 +3067,7 @@ static void ppp_destroy_interface(struct ppp *ppp) #endif /* CONFIG_PPP_FILTER */ kfree_skb(ppp->xmit_pending); + free_percpu(ppp->xmit_recursion); free_netdev(ppp->dev); } -- cgit v1.2.3 From 6ec6ec3bbb878bf2e4cea54028fc412c3ed6514b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 8 Aug 2017 11:43:24 +0200 Subject: ppp: fix xmit recursion detection on ppp channels [ Upstream commit 0a0e1a85c83775a648041be2b15de6d0a2f2b8eb ] Commit e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices") dropped the xmit_recursion counter incrementation in ppp_channel_push() and relied on ppp_xmit_process() for this task. But __ppp_channel_push() can also send packets directly (using the .start_xmit() channel callback), in which case the xmit_recursion counter isn't incremented anymore. If such packets get routed back to the parent ppp unit, ppp_xmit_process() won't notice the recursion and will call ppp_channel_push() on the same channel, effectively creating the deadlock situation that the xmit_recursion mechanism was supposed to prevent. This patch re-introduces the xmit_recursion counter incrementation in ppp_channel_push(). Since the xmit_recursion variable is now part of the parent ppp unit, incrementation is skipped if the channel doesn't have any. This is fine because only packets routed through the parent unit may enter the channel recursively. Finally, we have to ensure that pch->ppp is not going to be modified while executing ppp_channel_push(). Instead of taking this lock only while calling ppp_xmit_process(), we now have to hold it for the full ppp_channel_push() execution. This respects the ppp locks ordering which requires locking ->upl before ->downl. Fixes: e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 667f197ab66c..96fa0e61d3af 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1914,21 +1914,23 @@ static void __ppp_channel_push(struct channel *pch) spin_unlock_bh(&pch->downl); /* see if there is anything from the attached unit to be sent */ if (skb_queue_empty(&pch->file.xq)) { - read_lock_bh(&pch->upl); ppp = pch->ppp; if (ppp) - ppp_xmit_process(ppp); - read_unlock_bh(&pch->upl); + __ppp_xmit_process(ppp); } } static void ppp_channel_push(struct channel *pch) { - local_bh_disable(); - - __ppp_channel_push(pch); - - local_bh_enable(); + read_lock_bh(&pch->upl); + if (pch->ppp) { + (*this_cpu_ptr(pch->ppp->xmit_recursion))++; + __ppp_channel_push(pch); + (*this_cpu_ptr(pch->ppp->xmit_recursion))--; + } else { + __ppp_channel_push(pch); + } + read_unlock_bh(&pch->upl); } /* -- cgit v1.2.3 From 3914a7eccbc742ab87c5be1acd3006e051e33710 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 1 Aug 2017 13:22:32 -0700 Subject: tcp: avoid setting cwnd to invalid ssthresh after cwnd reduction states [ Upstream commit ed254971edea92c3ac5c67c6a05247a92aa6075e ] If the sender switches the congestion control during ECN-triggered cwnd-reduction state (CA_CWR), upon exiting recovery cwnd is set to the ssthresh value calculated by the previous congestion control. If the previous congestion control is BBR that always keep ssthresh to TCP_INIFINITE_SSTHRESH, cwnd ends up being infinite. The safe step is to avoid assigning invalid ssthresh value when recovery ends. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 01336aa5f973..32c540145c17 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2560,8 +2560,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) return; /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } -- cgit v1.2.3 From 05046af36302083abb9ee501cd3ae3822c55701a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2017 23:10:46 -0700 Subject: net: fix keepalive code vs TCP_FASTOPEN_CONNECT [ Upstream commit 2dda640040876cd8ae646408b69eea40c24f9ae9 ] syzkaller was able to trigger a divide by 0 in TCP stack [1] Issue here is that keepalive timer needs to be updated to not attempt to send a probe if the connection setup was deferred using TCP_FASTOPEN_CONNECT socket option added in linux-4.11 [1] divide error: 0000 [#1] SMP CPU: 18 PID: 0 Comm: swapper/18 Not tainted task: ffff986f62f4b040 ti: ffff986f62fa2000 task.ti: ffff986f62fa2000 RIP: 0010:[] [] __tcp_select_window+0x8d/0x160 Call Trace: [] tcp_transmit_skb+0x11/0x20 [] tcp_xmit_probe_skb+0xc1/0xe0 [] tcp_write_wakeup+0x68/0x160 [] tcp_keepalive_timer+0x17b/0x230 [] call_timer_fn+0x39/0xf0 [] run_timer_softirq+0x1d7/0x280 [] __do_softirq+0xcb/0x257 [] irq_exit+0x9c/0xb0 [] smp_apic_timer_interrupt+0x6a/0x80 [] apic_timer_interrupt+0x7f/0x90 [] ? cpuidle_enter_state+0x13a/0x3b0 [] ? cpuidle_enter_state+0x11d/0x3b0 Tested: Following packetdrill no longer crashes the kernel `echo 0 >/proc/sys/net/ipv4/tcp_timestamps` // Cache warmup: send a Fast Open cookie request 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) +0 > S 0:0(0) +.01 < S. 123:123(0) ack 1 win 14600 +0 > . 1:1(0) ack 1 +0 close(3) = 0 +0 > F. 1:1(0) ack 1 +0 < F. 1:1(0) ack 2 win 92 +0 > . 2:2(0) ack 2 +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 +0 setsockopt(4, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0 +.01 connect(4, ..., ...) = 0 +0 setsockopt(4, SOL_TCP, TCP_KEEPIDLE, [5], 4) = 0 +10 close(4) = 0 `echo 1 >/proc/sys/net/ipv4/tcp_timestamps` Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Wei Wang Cc: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_timer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b1e65b3b4361..74db43b47917 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -654,7 +654,8 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) + if (!sock_flag(sk, SOCK_KEEPOPEN) || + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); -- cgit v1.2.3 From 1b582a2c366da54463e3dc97360751f21fa46453 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2017 14:20:54 +0200 Subject: bpf, s390: fix jit branch offset related to ldimm64 [ Upstream commit b0a0c2566f28e71e5e32121992ac8060cec75510 ] While testing some other work that required JIT modifications, I run into test_bpf causing a hang when JIT enabled on s390. The problematic test case was the one from ddc665a4bb4b (bpf, arm64: fix jit branch offset related to ldimm64), and turns out that we do have a similar issue on s390 as well. In bpf_jit_prog() we update next instruction address after returning from bpf_jit_insn() with an insn_count. bpf_jit_insn() returns either -1 in case of error (e.g. unsupported insn), 1 or 2. The latter is only the case for ldimm64 due to spanning 2 insns, however, next address is only set to i + 1 not taking actual insn_count into account, thus fix is to use insn_count instead of 1. bpf_jit_enable in mode 2 provides also disasm on s390: Before fix: 000003ff800349b6: a7f40003 brc 15,3ff800349bc ; target 000003ff800349ba: 0000 unknown 000003ff800349bc: e3b0f0700024 stg %r11,112(%r15) 000003ff800349c2: e3e0f0880024 stg %r14,136(%r15) 000003ff800349c8: 0db0 basr %r11,%r0 000003ff800349ca: c0ef00000000 llilf %r14,0 000003ff800349d0: e320b0360004 lg %r2,54(%r11) 000003ff800349d6: e330b03e0004 lg %r3,62(%r11) 000003ff800349dc: ec23ffeda065 clgrj %r2,%r3,10,3ff800349b6 ; jmp 000003ff800349e2: e3e0b0460004 lg %r14,70(%r11) 000003ff800349e8: e3e0b04e0004 lg %r14,78(%r11) 000003ff800349ee: b904002e lgr %r2,%r14 000003ff800349f2: e3b0f0700004 lg %r11,112(%r15) 000003ff800349f8: e3e0f0880004 lg %r14,136(%r15) 000003ff800349fe: 07fe bcr 15,%r14 After fix: 000003ff80ef3db4: a7f40003 brc 15,3ff80ef3dba 000003ff80ef3db8: 0000 unknown 000003ff80ef3dba: e3b0f0700024 stg %r11,112(%r15) 000003ff80ef3dc0: e3e0f0880024 stg %r14,136(%r15) 000003ff80ef3dc6: 0db0 basr %r11,%r0 000003ff80ef3dc8: c0ef00000000 llilf %r14,0 000003ff80ef3dce: e320b0360004 lg %r2,54(%r11) 000003ff80ef3dd4: e330b03e0004 lg %r3,62(%r11) 000003ff80ef3dda: ec230006a065 clgrj %r2,%r3,10,3ff80ef3de6 ; jmp 000003ff80ef3de0: e3e0b0460004 lg %r14,70(%r11) 000003ff80ef3de6: e3e0b04e0004 lg %r14,78(%r11) ; target 000003ff80ef3dec: b904002e lgr %r2,%r14 000003ff80ef3df0: e3b0f0700004 lg %r11,112(%r15) 000003ff80ef3df6: e3e0f0880004 lg %r14,136(%r15) 000003ff80ef3dfc: 07fe bcr 15,%r14 test_bpf.ko suite runs fine after the fix. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Daniel Borkmann Tested-by: Michael Holzheu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index bee281f3163d..e8dee623d545 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1252,7 +1252,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) return -1; - jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + /* Next instruction address */ + jit->addrs[i + insn_count] = jit->prg; } bpf_jit_epilogue(jit); -- cgit v1.2.3 From 35d90144e2ceb19a8c649fd1422d507eac946893 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Aug 2017 22:54:48 +0200 Subject: net/mlx4_en: don't set CHECKSUM_COMPLETE on SCTP packets [ Upstream commit e718fe450e616227b74d27a233cdf37b4df0c82b ] if the NIC fails to validate the checksum on TCP/UDP, and validation of IP checksum is successful, the driver subtracts the pseudo-header checksum from the value obtained by the hardware and sets CHECKSUM_COMPLETE. Don't do that if protocol is IPPROTO_SCTP, otherwise CRC32c validation fails. V2: don't test MLX4_CQE_STATUS_IPV6 if MLX4_CQE_STATUS_IPV4 is set Reported-by: Shuang Li Fixes: f8c6455bb04b ("net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE") Signed-off-by: Davide Caratti Acked-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5d484581becd..bcbb80ff86a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -724,16 +724,21 @@ static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, * header, the HW adds it. To address that, we are subtracting the pseudo * header checksum from the checksum value provided by the HW. */ -static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, - struct iphdr *iph) +static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) { __u16 length_for_csum = 0; __wsum csum_pseudo_header = 0; + __u8 ipproto = iph->protocol; + + if (unlikely(ipproto == IPPROTO_SCTP)) + return -1; length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, - length_for_csum, iph->protocol, 0); + length_for_csum, ipproto, 0); skb->csum = csum_sub(hw_checksum, csum_pseudo_header); + return 0; } #if IS_ENABLED(CONFIG_IPV6) @@ -744,17 +749,20 @@ static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, struct ipv6hdr *ipv6h) { + __u8 nexthdr = ipv6h->nexthdr; __wsum csum_pseudo_hdr = 0; - if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || - ipv6h->nexthdr == IPPROTO_HOPOPTS)) + if (unlikely(nexthdr == IPPROTO_FRAGMENT || + nexthdr == IPPROTO_HOPOPTS || + nexthdr == IPPROTO_SCTP)) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(nexthdr)); csum_pseudo_hdr = csum_partial(&ipv6h->saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len); - csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr)); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, + (__force __wsum)htons(nexthdr)); skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr); skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0)); @@ -777,11 +785,10 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, } if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4)) - get_fixed_ipv4_csum(hw_checksum, skb, hdr); + return get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) - else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) - return -1; + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + return get_fixed_ipv6_csum(hw_checksum, skb, hdr); #endif return 0; } -- cgit v1.2.3 From e392e305af01c4512a7d78b6ca47d9cf57f68d0d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 9 Aug 2017 18:15:19 +0800 Subject: net: sched: set xt_tgchk_param par.nft_compat as 0 in ipt_init_target [ Upstream commit 96d9703050a0036a3360ec98bb41e107c90664fe ] Commit 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat") introduced a member nft_compat to xt_tgchk_param structure. But it didn't set it's value for ipt_init_target. With unexpected value in par.nft_compat, it may return unexpected result in some target's checkentry. This patch is to set all it's fields as 0 and only initialize the non-zero fields in ipt_init_target. v1->v2: As Wang Cong's suggestion, fix it by setting all it's fields as 0 and only initializing the non-zero fields. Fixes: 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat") Suggested-by: Cong Wang Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ipt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 378c1c976058..a1aec0a6c789 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -49,8 +49,8 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, return PTR_ERR(target); t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); par.table = table; - par.entryinfo = NULL; par.target = target; par.targinfo = t->data; par.hook_mask = hook; -- cgit v1.2.3 From 87fdcfe211956379d57092a6b7a4e669d318fc0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Aug 2017 01:41:58 -0700 Subject: tcp: fastopen: tcp_connect() must refresh the route [ Upstream commit 8ba60924710cde564a3905588b6219741d6356d0 ] With new TCP_FASTOPEN_CONNECT socket option, there is a possibility to call tcp_connect() while socket sk_dst_cache is either NULL or invalid. +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 +0 connect(4, ..., ...) = 0 << sk->sk_dst_cache becomes obsolete, or even set to NULL >> +1 sendto(4, ..., 1000, MSG_FASTOPEN, ..., ...) = 1000 We need to refresh the route otherwise bad things can happen, especially when syzkaller is running on the host :/ Fixes: 19f6d3f3c8422 ("net/tcp-fastopen: Add new API support") Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Yuchung Cheng Acked-by: Wei Wang Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dc4258fd15dc..5d836b037442 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3344,6 +3344,9 @@ int tcp_connect(struct sock *sk) struct sk_buff *buff; int err; + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + tcp_connect_init(sk); if (unlikely(tp->repair)) { -- cgit v1.2.3 From 69ffc9644ffec424cd3f74794b2d616405576e1e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Aug 2017 14:22:55 -0400 Subject: net: avoid skb_warn_bad_offload false positives on UFO [ Upstream commit 8d63bee643f1fb53e472f0e135cae4eb99d62d19 ] skb_warn_bad_offload triggers a warning when an skb enters the GSO stack at __skb_gso_segment that does not have CHECKSUM_PARTIAL checksum offload set. Commit b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") observed that SKB_GSO_DODGY producers can trigger the check and that passing those packets through the GSO handlers will fix it up. But, the software UFO handler will set ip_summed to CHECKSUM_NONE. When __skb_gso_segment is called from the receive path, this triggers the warning again. Make UFO set CHECKSUM_UNNECESSARY instead of CHECKSUM_NONE. On Tx these two are equivalent. On Rx, this better matches the skb state (checksum computed), as CHECKSUM_NONE here means no checksum computed. See also this thread for context: http://patchwork.ozlabs.org/patch/799015/ Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/udp_offload.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0af019dfe846..1d0a7369d5a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2703,7 +2703,7 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_NONE; + skb->ip_summed != CHECKSUM_UNNECESSARY; return skb->ip_summed == CHECKSUM_NONE; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b2be1d9757ef..6de016f80f17 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -232,7 +232,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a2267f80febb..e7d378c032cb 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -72,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in -- cgit v1.2.3 From f628c9df047b132b2c3eba997bf06ab1dcbbeea6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 9 Aug 2017 14:38:04 +0300 Subject: igmp: Fix regression caused by igmp sysctl namespace code. [ Upstream commit 1714020e42b17135032c8606f7185b3fb2ba5d78 ] Commit dcd87999d415 ("igmp: net: Move igmp namespace init to correct file") moved the igmp sysctls initialization from tcp_sk_init to igmp_net_init. This function is only called as part of per-namespace initialization, only if CONFIG_IP_MULTICAST is defined, otherwise igmp_mc_init() call in ip_init is compiled out, casuing the igmp pernet ops to not be registerd and those sysctl being left initialized with 0. However, there are certain functions, such as ip_mc_join_group which are always compiled and make use of some of those sysctls. Let's do a partial revert of the aforementioned commit and move the sysctl initialization into inet_init_net, that way they will always have sane values. Fixes: dcd87999d415 ("igmp: net: Move igmp namespace init to correct file") Link: https://bugzilla.kernel.org/show_bug.cgi?id=196595 Reported-by: Gerardo Exequiel Pozzi Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/af_inet.c | 7 +++++++ net/ipv4/igmp.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f60fe82c2c1e..b5116ec31757 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1693,6 +1693,13 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; + /* Some igmp sysctl, whose values are always used */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; + return 0; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 19930da56b0a..08575e3bd135 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } - /* Sysctl initialization */ - net->ipv4.sysctl_igmp_max_memberships = 20; - net->ipv4.sysctl_igmp_max_msf = 10; - /* IGMP reports for link-local multicast groups are enabled by default */ - net->ipv4.sysctl_igmp_llm_reports = 1; - net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: -- cgit v1.2.3 From e5841355061332f8b326e098949490345dba776b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:41:58 -0400 Subject: packet: fix tp_reserve race in packet_set_ring [ Upstream commit c27927e372f0785f3303e8fad94b85945e2c97b7 ] Updates to tp_reserve can race with reads of the field in packet_set_ring. Avoid this by holding the socket lock during updates in setsockopt PACKET_RESERVE. This bug was discovered by syzkaller. Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 365c83fcee02..ae7bfd26cd91 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3698,14 +3698,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; - po->tp_reserve = val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_reserve = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_LOSS: { -- cgit v1.2.3 From 53eed8aadc58a5c2b1563d8755d8546c88222f0c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Aug 2017 09:14:09 -0700 Subject: revert "net: account for current skb length when deciding about UFO" This reverts commit ef09c9ff343122a0b245416066992d096416ff19 which is commit a5cb659bbc1c8644efa0c3138a757a1e432a4880 upstream as it causes merge issues with later patches that are much more important... Cc: Michal Kubecek Cc: Vlad Yasevich Cc: David S. Miller Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 3 +-- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 06215ba88b93..e65bceae823b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -936,8 +936,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a5cdf2a23609..efe811c6eccf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1372,7 +1372,7 @@ emsgsize: */ cork->length += length; - if ((((length + (skb ? skb->len : headersize)) > mtu) || + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && -- cgit v1.2.3 From 4688f042599f8eabe45713f7d5e88a4da5a89765 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Aug 2017 09:19:02 -0700 Subject: revert "ipv4: Should use consistent conditional judgement for ip fragment in __ip_append_data and ip_finish_output" This reverts commit f102bb7164c9020e12662998f0fd99c3be72d4f6 which is commit 0a28cfd51e17f4f0a056bcf66bfbe492c3b99f38 upstream as there is another patch that needs to be applied instead of this one. Cc: Zheng Li Cc: David S. Miller Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e65bceae823b..e5c1dbef3626 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -936,7 +936,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && + if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { -- cgit v1.2.3 From 33dc6a6a85f1d6ce71e7056d009b8a5fcbf10f70 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:29:19 -0400 Subject: udp: consistently apply ufo or fragmentation [ Upstream commit 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa ] When iteratively building a UDP datagram with MSG_MORE and that datagram exceeds MTU, consistently choose UFO or fragmentation. Once skb_is_gso, always apply ufo. Conversely, once a datagram is split across multiple skbs, do not consider ufo. Sendpage already maintains the first invariant, only add the second. IPv6 does not have a sendpage implementation to modify. A gso skb must have a partial checksum, do not follow sk_no_check_tx in udp_send_skb. Found by syzkaller. Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 7 +++++-- net/ipv4/udp.c | 2 +- net/ipv6/ip6_output.c | 7 ++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5c1dbef3626..2c3c1a223df4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -936,10 +936,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + ((length > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1255,6 +1257,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EINVAL; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { if (skb->ip_summed != CHECKSUM_PARTIAL) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5bab6c3f7a2f..4363b1e89bdf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -813,7 +813,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index efe811c6eccf..e0236e902ea7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1372,11 +1372,12 @@ emsgsize: */ cork->length += length; - if ((((length + fragheaderlen) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + fragheaderlen) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); -- cgit v1.2.3 From 6309eb77d823393ff427312bfd53e2b7f1bdcab2 Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Mon, 17 Jul 2017 09:22:27 -0600 Subject: sparc64: Prevent perf from running during super critical sections commit fc290a114fc6034b0f6a5a46e2fb7d54976cf87a upstream. This fixes another cause of random segfaults and bus errors that may occur while running perf with the callgraph option. Critical sections beginning with spin_lock_irqsave() raise the interrupt level to PIL_NORMAL_MAX (14) and intentionally do not block performance counter interrupts, which arrive at PIL_NMI (15). But some sections of code are "super critical" with respect to perf because the perf_callchain_user() path accesses user space and may cause TLB activity as well as faults as it unwinds the user stack. One particular critical section occurs in switch_mm: spin_lock_irqsave(&mm->context.lock, flags); ... load_secondary_context(mm); tsb_context_switch(mm); ... spin_unlock_irqrestore(&mm->context.lock, flags); If a perf interrupt arrives in between load_secondary_context() and tsb_context_switch(), then perf_callchain_user() could execute with the context ID of one process, but with an active TSB for a different process. When the user stack is accessed, it is very likely to incur a TLB miss, since the h/w context ID has been changed. The TLB will then be reloaded with a translation from the TSB for one process, but using a context ID for another process. This exposes memory from one process to another, and since it is a mapping for stack memory, this usually causes the new process to crash quickly. This super critical section needs more protection than is provided by spin_lock_irqsave() since perf interrupts must not be allowed in. Since __tsb_context_switch already goes through the trouble of disabling interrupts completely, we fix this by moving the secondary context load down into this better protected region. Orabug: 25577560 Signed-off-by: Dave Aldridge Signed-off-by: Rob Gardner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/mmu_context_64.h | 14 +++++++++----- arch/sparc/kernel/tsb.S | 12 ++++++++++++ arch/sparc/power/hibernate.c | 3 +-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 349dd23e2876..0cdeb2b483a0 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -25,9 +25,11 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[0], @@ -38,9 +40,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[0])); + , __pa(&mm->context.tsb_descr[0]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -110,8 +115,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 395ec1800530..7d961f6e3907 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -375,6 +375,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -387,6 +388,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } -- cgit v1.2.3 From 3f0075cdbdd8ec09cd04034c87d1d1fb9069571e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 5 Jul 2017 09:57:00 +0100 Subject: KVM: arm/arm64: Handle hva aging while destroying the vm commit 7e5a672289c9754d07e1c3b33649786d3d70f5e4 upstream. The mmu_notifier_release() callback of KVM triggers cleaning up the stage2 page table on kvm-arm. However there could be other notifier callbacks in parallel with the mmu_notifier_release(), which could cause the call backs ending up in an empty stage2 page table. Make sure we check it for all the notifier callbacks. Fixes: commit 293f29363 ("kvm-arm: Unmap shadow pagetables properly") Reported-by: Alex Graf Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 332ce3b5a34f..710511cadd50 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1664,12 +1664,16 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { + if (!kvm->arch.pgd) + return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { + if (!kvm->arch.pgd) + return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); } -- cgit v1.2.3 From 1166e3e03342d179dff806457d4f76cf90490d18 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 12 Aug 2017 09:08:48 -0700 Subject: Revert "ARM: dts: sun8i: Support DTB build for NanoPi M1" This reverts commit 1e9e71782f3462d5aecb0720d26298253bdbeca7 which is commit 661ccdc1a95f18ab6c1373322fde09afd5b90a1f upstream. It's not needed in 4.9, and it breaks the build. Reported-by: Guenter Roeck Cc: Milo Kim Cc: Maxime Ripard Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index f3baa896ce84..7037201c5e3a 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -820,7 +820,6 @@ dtb-$(CONFIG_MACH_SUN8I) += \ sun8i-a83t-allwinner-h8homlet-v2.dtb \ sun8i-a83t-cubietruck-plus.dtb \ sun8i-h3-bananapi-m2-plus.dtb \ - sun8i-h3-nanopi-m1.dtb \ sun8i-h3-nanopi-neo.dtb \ sun8i-h3-orangepi-2.dtb \ sun8i-h3-orangepi-lite.dtb \ -- cgit v1.2.3 From 6da35f43acde8f718b53f6f05fc865bffa709fc5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 12 Aug 2017 19:31:45 -0700 Subject: Linux 4.9.43 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 34d4d9f8a4b2..77953bf3f40a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 42 +SUBLEVEL = 43 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From b56cd77c1205fae6c92a519bd11cc26e78292bfe Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Thu, 10 Aug 2017 15:23:35 -0700 Subject: mm: ratelimit PFNs busy info message commit 75dddef32514f7aa58930bde6a1263253bc3d4ba upstream. The RDMA subsystem can generate several thousand of these messages per second eventually leading to a kernel crash. Ratelimit these messages to prevent this crash. Doug said: "I've been carrying a version of this for several kernel versions. I don't remember when they started, but we have one (and only one) class of machines: Dell PE R730xd, that generate these errors. When it happens, without a rate limit, we get rcu timeouts and kernel oopses. With the rate limit, we just get a lot of annoying kernel messages but the machine continues on, recovers, and eventually the memory operations all succeed" And: "> Well... why are all these EBUSY's occurring? It sounds inefficient > (at least) but if it is expected, normal and unavoidable then > perhaps we should just remove that message altogether? I don't have an answer to that question. To be honest, I haven't looked real hard. We never had this at all, then it started out of the blue, but only on our Dell 730xd machines (and it hits all of them), but no other classes or brands of machines. And we have our 730xd machines loaded up with different brands and models of cards (for instance one dedicated to mlx4 hardware, one for qib, one for mlx5, an ocrdma/cxgb4 combo, etc), so the fact that it hit all of the machines meant it wasn't tied to any particular brand/model of RDMA hardware. To me, it always smelled of a hardware oddity specific to maybe the CPUs or mainboard chipsets in these machines, so given that I'm not an mm expert anyway, I never chased it down. A few other relevant details: it showed up somewhere around 4.8/4.9 or thereabouts. It never happened before, but the prinkt has been there since the 3.18 days, so possibly the test to trigger this message was changed, or something else in the allocator changed such that the situation started happening on these machines? And, like I said, it is specific to our 730xd machines (but they are all identical, so that could mean it's something like their specific ram configuration is causing the allocator to hit this on these machine but not on other machines in the cluster, I don't want to say it's necessarily the model of chipset or CPU, there are other bits of identicalness between these machines)" Link: http://lkml.kernel.org/r/499c0f6cc10d6eb829a67f2a4d75b4228a9b356e.1501695897.git.jtoppins@redhat.com Signed-off-by: Jonathan Toppins Reviewed-by: Doug Ledford Tested-by: Doug Ledford Cc: Michal Hocko Cc: Vlastimil Babka Cc: Mel Gorman Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 77b797c2d094..9419aa4e5441 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7335,7 +7335,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { - pr_info("%s: [%lx, %lx) PFNs busy\n", + pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", __func__, outer_start, end); ret = -EBUSY; goto done; -- cgit v1.2.3 From e2286916ac078728949163d7cbd5d4875a57dbfb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 10 Aug 2017 15:24:24 -0700 Subject: mm: fix list corruptions on shmem shrinklist commit d041353dc98a6339182cd6f628b4c8f111278cb3 upstream. We saw many list corruption warnings on shmem shrinklist: WARNING: CPU: 18 PID: 177 at lib/list_debug.c:59 __list_del_entry+0x9e/0xc0 list_del corruption. prev->next should be ffff9ae5694b82d8, but was ffff9ae5699ba960 Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt CPU: 18 PID: 177 Comm: kswapd1 Not tainted 4.9.34-t3.el7.twitter.x86_64 #1 Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013 Call Trace: dump_stack+0x4d/0x66 __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 __list_del_entry+0x9e/0xc0 shmem_unused_huge_shrink+0xfa/0x2e0 shmem_unused_huge_scan+0x20/0x30 super_cache_scan+0x193/0x1a0 shrink_slab.part.41+0x1e3/0x3f0 shrink_slab+0x29/0x30 shrink_node+0xf9/0x2f0 kswapd+0x2d8/0x6c0 kthread+0xd7/0xf0 ret_from_fork+0x22/0x30 WARNING: CPU: 23 PID: 639 at lib/list_debug.c:33 __list_add+0x89/0xb0 list_add corruption. prev->next should be next (ffff9ae5699ba960), but was ffff9ae5694b82d8. (prev=ffff9ae5694b82d8). Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt CPU: 23 PID: 639 Comm: systemd-udevd Tainted: G W 4.9.34-t3.el7.twitter.x86_64 #1 Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013 Call Trace: dump_stack+0x4d/0x66 __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 __list_add+0x89/0xb0 shmem_setattr+0x204/0x230 notify_change+0x2ef/0x440 do_truncate+0x5d/0x90 path_openat+0x331/0x1190 do_filp_open+0x7e/0xe0 do_sys_open+0x123/0x200 SyS_open+0x1e/0x20 do_syscall_64+0x61/0x170 entry_SYSCALL64_slow_path+0x25/0x25 The problem is that shmem_unused_huge_shrink() moves entries from the global sbinfo->shrinklist to its local lists and then releases the spinlock. However, a parallel shmem_setattr() could access one of these entries directly and add it back to the global shrinklist if it is removed, with the spinlock held. The logic itself looks solid since an entry could be either in a local list or the global list, otherwise it is removed from one of them by list_del_init(). So probably the race condition is that, one CPU is in the middle of INIT_LIST_HEAD() but the other CPU calls list_empty() which returns true too early then the following list_add_tail() sees a corrupted entry. list_empty_careful() is designed to fix this situation. [akpm@linux-foundation.org: add comments] Link: http://lkml.kernel.org/r/20170803054630.18775-1-xiyou.wangcong@gmail.com Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure") Signed-off-by: Cong Wang Acked-by: Linus Torvalds Acked-by: Kirill A. Shutemov Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index d99cfb6eb03a..7ee5444ffb6d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1007,7 +1007,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) */ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { spin_lock(&sbinfo->shrinklist_lock); - if (list_empty(&info->shrinklist)) { + /* + * _careful to defend against unlocked access to + * ->shrink_list in shmem_unused_huge_shrink() + */ + if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; @@ -1774,7 +1778,11 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo, * to shrink under memory pressure. */ spin_lock(&sbinfo->shrinklist_lock); - if (list_empty(&info->shrinklist)) { + /* + * _careful to defend against unlocked access to + * ->shrink_list in shmem_unused_huge_shrink() + */ + if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; -- cgit v1.2.3 From 0041042de554cbcaa73163747d7179be715923fc Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 9 Aug 2017 08:27:11 +0100 Subject: futex: Remove unnecessary warning from get_futex_key commit 48fb6f4db940e92cfb16cd878cddd59ea6120d06 upstream. Commit 65d8fc777f6d ("futex: Remove requirement for lock_page() in get_futex_key()") removed an unnecessary lock_page() with the side-effect that page->mapping needed to be treated very carefully. Two defensive warnings were added in case any assumption was missed and the first warning assumed a correct application would not alter a mapping backing a futex key. Since merging, it has not triggered for any unexpected case but Mark Rutland reported the following bug triggering due to the first warning. kernel BUG at kernel/futex.c:679! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 3695 Comm: syz-executor1 Not tainted 4.13.0-rc3-00020-g307fec773ba3 #3 Hardware name: linux,dummy-virt (DT) task: ffff80001e271780 task.stack: ffff000010908000 PC is at get_futex_key+0x6a4/0xcf0 kernel/futex.c:679 LR is at get_futex_key+0x6a4/0xcf0 kernel/futex.c:679 pc : [] lr : [] pstate: 80000145 The fact that it's a bug instead of a warning was due to an unrelated arm64 problem, but the warning itself triggered because the underlying mapping changed. This is an application issue but from a kernel perspective it's a recoverable situation and the warning is unnecessary so this patch removes the warning. The warning may potentially be triggered with the following test program from Mark although it may be necessary to adjust NR_FUTEX_THREADS to be a value smaller than the number of CPUs in the system. #include #include #include #include #include #include #include #include #define NR_FUTEX_THREADS 16 pthread_t threads[NR_FUTEX_THREADS]; void *mem; #define MEM_PROT (PROT_READ | PROT_WRITE) #define MEM_SIZE 65536 static int futex_wrapper(int *uaddr, int op, int val, const struct timespec *timeout, int *uaddr2, int val3) { syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3); } void *poll_futex(void *unused) { for (;;) { futex_wrapper(mem, FUTEX_CMP_REQUEUE_PI, 1, NULL, mem + 4, 1); } } int main(int argc, char *argv[]) { int i; mem = mmap(NULL, MEM_SIZE, MEM_PROT, MAP_SHARED | MAP_ANONYMOUS, -1, 0); printf("Mapping @ %p\n", mem); printf("Creating futex threads...\n"); for (i = 0; i < NR_FUTEX_THREADS; i++) pthread_create(&threads[i], NULL, poll_futex, NULL); printf("Flipping mapping...\n"); for (;;) { mmap(mem, MEM_SIZE, MEM_PROT, MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0); } return 0; } Reported-and-tested-by: Mark Rutland Signed-off-by: Mel Gorman Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 4c6b6e697b73..88bad86180ac 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -668,13 +668,14 @@ again: * this reference was taken by ihold under the page lock * pinning the inode in place so i_lock was unnecessary. The * only way for this check to fail is if the inode was - * truncated in parallel so warn for now if this happens. + * truncated in parallel which is almost certainly an + * application bug. In such a case, just retry. * * We are not calling into get_futex_key_refs() in file-backed * cases, therefore a successful atomic_inc return below will * guarantee that get_futex_key() will still imply smp_mb(); (B). */ - if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { + if (!atomic_inc_not_zero(&inode->i_count)) { rcu_read_unlock(); put_page(page); -- cgit v1.2.3 From 03973c57e1a27de080ed06a3961f1d6afec27539 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 28 Jul 2017 17:42:59 -0700 Subject: xtensa: fix cache aliasing handling code for WT cache commit 6d0f581d1768d3eaba15776e7dd1fdfec10cfe36 upstream. Currently building kernel for xtensa core with aliasing WT cache fails with the following messages: mm/memory.c:2152: undefined reference to `flush_dcache_page' mm/memory.c:2332: undefined reference to `local_flush_cache_page' mm/memory.c:1919: undefined reference to `local_flush_cache_range' mm/memory.c:4179: undefined reference to `copy_to_user_page' mm/memory.c:4183: undefined reference to `copy_from_user_page' This happens because implementation of these functions is only compiled when data cache is WB, which looks wrong: even when data cache doesn't need flushing it still needs invalidation. The functions like __flush_[invalidate_]dcache_* are correctly defined for both WB and WT caches (and even if they weren't that'd still be ok, just slower). Fix this by providing the same implementation of the above functions for both WB and WT cache. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/mm/cache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 1a804a2f9a5b..dbb1cdef3663 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -120,10 +120,6 @@ void copy_user_highpage(struct page *dst, struct page *src, preempt_enable(); } -#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ - -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK - /* * Any time the kernel writes to a user page cache page, or it is about to * read from a page cache page this routine is called. @@ -208,7 +204,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __invalidate_icache_page_alias(virt, phys); } -#endif +#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ void update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) @@ -225,7 +221,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) flush_tlb_page(vma, addr); -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) { unsigned long phys = page_to_phys(page); @@ -256,7 +252,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) * flush_dcache_page() on the page. */ -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, -- cgit v1.2.3 From a3ab0f069f466acb7da4f960d8e1bc3cfdf1309e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 1 Aug 2017 11:15:15 -0700 Subject: xtensa: mm/cache: add missing EXPORT_SYMBOLs commit bc652eb6a0d5cffaea7dc8e8ad488aab2a1bf1ed upstream. Functions clear_user_highpage, copy_user_highpage, flush_dcache_page, local_flush_cache_range and local_flush_cache_page may be used from modules. Export them. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/mm/cache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index dbb1cdef3663..3c75c4e597da 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -103,6 +103,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr) clear_page_alias(kvaddr, paddr); preempt_enable(); } +EXPORT_SYMBOL(clear_user_highpage); void copy_user_highpage(struct page *dst, struct page *src, unsigned long vaddr, struct vm_area_struct *vma) @@ -119,6 +120,7 @@ void copy_user_highpage(struct page *dst, struct page *src, copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr); preempt_enable(); } +EXPORT_SYMBOL(copy_user_highpage); /* * Any time the kernel writes to a user page cache page, or it is about to @@ -172,7 +174,7 @@ void flush_dcache_page(struct page *page) /* There shouldn't be an entry in the cache for this page anymore. */ } - +EXPORT_SYMBOL(flush_dcache_page); /* * For now, flush the whole cache. FIXME?? @@ -184,6 +186,7 @@ void local_flush_cache_range(struct vm_area_struct *vma, __flush_invalidate_dcache_all(); __invalidate_icache_all(); } +EXPORT_SYMBOL(local_flush_cache_range); /* * Remove any entry in the cache for this page. @@ -203,6 +206,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); } +EXPORT_SYMBOL(local_flush_cache_page); #endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ -- cgit v1.2.3 From a311810903c700ff7411be16489df09ad6f3faee Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 1 Aug 2017 11:02:46 -0700 Subject: xtensa: don't limit csum_partial export by CONFIG_NET commit 7f81e55c737a8fa82c71f290945d729a4902f8d2 upstream. csum_partial and csum_partial_copy_generic are defined unconditionally and are available even when CONFIG_NET is disabled. They are used not only by the network drivers, but also by scsi and media. Don't limit these functions export by CONFIG_NET. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/xtensa_ksyms.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 4d2872fd9bb5..a71d2739fa82 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -94,13 +94,11 @@ unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) } EXPORT_SYMBOL(__sync_fetch_and_or_4); -#ifdef CONFIG_NET /* * Networking support */ EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); -#endif /* CONFIG_NET */ /* * Architecture-specific symbols -- cgit v1.2.3 From ced271b814e441ac5ce079427c14860f9534b57a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 31 Jul 2017 10:29:56 +0200 Subject: mtd: nand: Fix timing setup for NANDs that do not support SET FEATURES commit a11bf5ed951f8900d244d09eb03a888b59c7fc82 upstream. Some ONFI NANDs do not support the SET/GET FEATURES commands, which, according to the spec, is perfectly valid. On these NANDs we can't set a specific timing mode using the "timing mode" feature, and we should assume the NAND does not require any setup to enter a specific timing mode. Signed-off-by: Boris Brezillon Fixes: d8e725dd8311 ("mtd: nand: automate NAND timings selection") Reported-by: Alexander Dahl Tested-by: Alexander Dahl Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index f222f8a7ba52..d1b253f73c56 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1081,7 +1081,9 @@ static int nand_setup_data_interface(struct nand_chip *chip) * Ensure the timing mode has been changed on the chip side * before changing timings on the controller side. */ - if (chip->onfi_version) { + if (chip->onfi_version && + (le16_to_cpu(chip->onfi_params.opt_cmd) & + ONFI_OPT_CMD_SET_GET_FEATURES)) { u8 tmode_param[ONFI_SUBFEATURE_PARAM_LEN] = { chip->onfi_timing_mode_default, }; -- cgit v1.2.3 From e6a0599b746427682df1e8d444eee9e0641b4c09 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sun, 23 Jul 2017 20:03:33 +0530 Subject: iscsi-target: fix memory leak in iscsit_setup_text_cmd() commit ea8dc5b4cd2195ee582cae28afa4164c6dea1738 upstream. On receiving text request iscsi-target allocates buffer for payload in iscsit_handle_text_cmd() and assigns buffer pointer to cmd->text_in_ptr, this buffer is currently freed in iscsit_release_cmd(), if iscsi-target sets 'C' bit in text response then it will receive another text request from the initiator with ttt != 0xffffffff in this case iscsi-target will find cmd using itt and call iscsit_setup_text_cmd() which will set cmd->text_in_ptr to NULL without freeing previously allocated buffer. This patch fixes this issue by calling kfree(cmd->text_in_ptr) in iscsit_setup_text_cmd() before assigning NULL to it. For the first text request cmd->text_in_ptr is NULL as cmd is memset to 0 in iscsit_allocate_cmd(). Signed-off-by: Varun Prakash Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 155fe0e0623f..2d93946cb6c2 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2177,6 +2177,7 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); cmd->data_direction = DMA_NONE; + kfree(cmd->text_in_ptr); cmd->text_in_ptr = NULL; return 0; -- cgit v1.2.3 From b51a71635576ddd2f30597c148ca4b0d62cec288 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 4 Aug 2017 23:59:31 -0700 Subject: iscsi-target: Fix iscsi_np reset hung task during parallel delete commit 978d13d60c34818a41fc35962602bdfa5c03f214 upstream. This patch fixes a bug associated with iscsit_reset_np_thread() that can occur during parallel configfs rmdir of a single iscsi_np used across multiple iscsi-target instances, that would result in hung task(s) similar to below where configfs rmdir process context was blocked indefinately waiting for iscsi_np->np_restart_comp to finish: [ 6726.112076] INFO: task dcp_proxy_node_:15550 blocked for more than 120 seconds. [ 6726.119440] Tainted: G W O 4.1.26-3321 #2 [ 6726.125045] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 6726.132927] dcp_proxy_node_ D ffff8803f202bc88 0 15550 1 0x00000000 [ 6726.140058] ffff8803f202bc88 ffff88085c64d960 ffff88083b3b1ad0 ffff88087fffeb08 [ 6726.147593] ffff8803f202c000 7fffffffffffffff ffff88083f459c28 ffff88083b3b1ad0 [ 6726.155132] ffff88035373c100 ffff8803f202bca8 ffffffff8168ced2 ffff8803f202bcb8 [ 6726.162667] Call Trace: [ 6726.165150] [] schedule+0x32/0x80 [ 6726.170156] [] schedule_timeout+0x214/0x290 [ 6726.176030] [] ? __send_signal+0x52/0x4a0 [ 6726.181728] [] wait_for_completion+0x96/0x100 [ 6726.187774] [] ? wake_up_state+0x10/0x10 [ 6726.193395] [] iscsit_reset_np_thread+0x62/0xe0 [iscsi_target_mod] [ 6726.201278] [] iscsit_tpg_disable_portal_group+0x96/0x190 [iscsi_target_mod] [ 6726.210033] [] lio_target_tpg_store_enable+0x4f/0xc0 [iscsi_target_mod] [ 6726.218351] [] configfs_write_file+0xaa/0x110 [ 6726.224392] [] vfs_write+0xa4/0x1b0 [ 6726.229576] [] SyS_write+0x41/0xb0 [ 6726.234659] [] system_call_fastpath+0x12/0x71 It would happen because each iscsit_reset_np_thread() sets state to ISCSI_NP_THREAD_RESET, sends SIGINT, and then blocks waiting for completion on iscsi_np->np_restart_comp. However, if iscsi_np was active processing a login request and more than a single iscsit_reset_np_thread() caller to the same iscsi_np was blocked on iscsi_np->np_restart_comp, iscsi_np kthread process context in __iscsi_target_login_thread() would flush pending signals and only perform a single completion of np->np_restart_comp before going back to sleep within transport specific iscsit_transport->iscsi_accept_np code. To address this bug, add a iscsi_np->np_reset_count and update __iscsi_target_login_thread() to keep completing np->np_restart_comp until ->np_reset_count has reached zero. Reported-by: Gary Guo Tested-by: Gary Guo Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 1 + drivers/target/iscsi/iscsi_target_login.c | 7 +++++-- include/target/iscsi/iscsi_target_core.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 2d93946cb6c2..e49fcd5e61f7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -418,6 +418,7 @@ int iscsit_reset_np_thread( return 0; } np->np_thread_state = ISCSI_NP_THREAD_RESET; + atomic_inc(&np->np_reset_count); if (np->np_thread) { spin_unlock_bh(&np->np_thread_lock); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 6128e8e80170..9ccd5da8f204 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1233,9 +1233,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) flush_signals(current); spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); + return 1; } else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) { spin_unlock_bh(&np->np_thread_lock); goto exit; @@ -1268,7 +1270,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto exit; } else if (rc < 0) { spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); iscsit_put_transport(conn->conn_transport); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index c8132b419148..6021c3acb6c5 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -785,6 +785,7 @@ struct iscsi_np { int np_sock_type; enum np_thread_state_table np_thread_state; bool enabled; + atomic_t np_reset_count; enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; -- cgit v1.2.3 From 1da30c23b63b3ed426880ac11179dbc2d2bbf368 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Aug 2017 16:10:03 -0700 Subject: target: Fix node_acl demo-mode + uncached dynamic shutdown regression commit 6f48655facfd7f7ccfe6d252ac0fe319ab02e4dd upstream. This patch fixes a generate_node_acls = 1 + cache_dynamic_acls = 0 regression, that was introduced by commit 01d4d673558985d9a118e1e05026633c3e2ade9b Author: Nicholas Bellinger Date: Wed Dec 7 12:55:54 2016 -0800 which originally had the proper list_del_init() usage, but was dropped during list review as it was thought unnecessary by HCH. However, list_del_init() usage is required during the special generate_node_acls = 1 + cache_dynamic_acls = 0 case when transport_free_session() does a list_del(&se_nacl->acl_list), followed by target_complete_nacl() doing the same thing. This was manifesting as a general protection fault as reported by Justin: kernel: general protection fault: 0000 [#1] SMP kernel: Modules linked in: kernel: CPU: 0 PID: 11047 Comm: iscsi_ttx Not tainted 4.13.0-rc2.x86_64.1+ #20 kernel: Hardware name: Intel Corporation S5500BC/S5500BC, BIOS S5500.86B.01.00.0064.050520141428 05/05/2014 kernel: task: ffff88026939e800 task.stack: ffffc90007884000 kernel: RIP: 0010:target_put_nacl+0x49/0xb0 kernel: RSP: 0018:ffffc90007887d70 EFLAGS: 00010246 kernel: RAX: dead000000000200 RBX: ffff8802556ca000 RCX: 0000000000000000 kernel: RDX: dead000000000100 RSI: 0000000000000246 RDI: ffff8802556ce028 kernel: RBP: ffffc90007887d88 R08: 0000000000000001 R09: 0000000000000000 kernel: R10: ffffc90007887df8 R11: ffffea0009986900 R12: ffff8802556ce020 kernel: R13: ffff8802556ce028 R14: ffff8802556ce028 R15: ffffffff88d85540 kernel: FS: 0000000000000000(0000) GS:ffff88027fc00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007fffe36f5f94 CR3: 0000000009209000 CR4: 00000000003406f0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: Call Trace: kernel: transport_free_session+0x67/0x140 kernel: transport_deregister_session+0x7a/0xc0 kernel: iscsit_close_session+0x92/0x210 kernel: iscsit_close_connection+0x5f9/0x840 kernel: iscsit_take_action_for_connection_exit+0xfe/0x110 kernel: iscsi_target_tx_thread+0x140/0x1e0 kernel: ? wait_woken+0x90/0x90 kernel: kthread+0x124/0x160 kernel: ? iscsit_thread_get_cpumask+0x90/0x90 kernel: ? kthread_create_on_node+0x40/0x40 kernel: ret_from_fork+0x22/0x30 kernel: Code: 00 48 89 fb 4c 8b a7 48 01 00 00 74 68 4d 8d 6c 24 08 4c 89 ef e8 e8 28 43 00 48 8b 93 20 04 00 00 48 8b 83 28 04 00 00 4c 89 ef <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 20 kernel: RIP: target_put_nacl+0x49/0xb0 RSP: ffffc90007887d70 kernel: ---[ end trace f12821adbfd46fed ]--- To address this, go ahead and use proper list_del_list() for all cases of se_nacl->acl_list deletion. Reported-by: Justin Maggard Tested-by: Justin Maggard Cc: Justin Maggard Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tpg.c | 4 ++-- drivers/target/target_core_transport.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 1949f50725a5..0e2e71f6c198 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -364,7 +364,7 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) mutex_lock(&tpg->acl_node_mutex); if (acl->dynamic_node_acl) acl->dynamic_node_acl = 0; - list_del(&acl->acl_list); + list_del_init(&acl->acl_list); mutex_unlock(&tpg->acl_node_mutex); target_shutdown_sessions(acl); @@ -540,7 +540,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) * in transport_deregister_session(). */ list_for_each_entry_safe(nacl, nacl_tmp, &node_list, acl_list) { - list_del(&nacl->acl_list); + list_del_init(&nacl->acl_list); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index e8a1f5cadba5..bacfa8f81be8 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -465,7 +465,7 @@ static void target_complete_nacl(struct kref *kref) } mutex_lock(&se_tpg->acl_node_mutex); - list_del(&nacl->acl_list); + list_del_init(&nacl->acl_list); mutex_unlock(&se_tpg->acl_node_mutex); core_tpg_wait_for_nacl_pr_ref(nacl); @@ -537,7 +537,7 @@ void transport_free_session(struct se_session *se_sess) spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags); if (se_nacl->dynamic_stop) - list_del(&se_nacl->acl_list); + list_del_init(&se_nacl->acl_list); } mutex_unlock(&se_tpg->acl_node_mutex); -- cgit v1.2.3 From 227559e6233c9af382efcfd4c9ac87e090bb4853 Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 7 Jun 2017 12:26:49 +0200 Subject: fuse: initialize the flock flag in fuse_file on allocation commit 68227c03cba84a24faf8a7277d2b1a03c8959c2c upstream. Before the patch, the flock flag could remain uninitialized for the lifespan of the fuse_file allocation. Unless set to true in fuse_file_flock(), it would remain in an indeterminate state until read in an if statement in fuse_release_common(). This could consequently lead to taking an unexpected branch in the code. The bug was discovered by a runtime instrumentation designed to detect use of uninitialized memory in the kernel. Signed-off-by: Mateusz Jurczyk Fixes: 37fb3a30b462 ("fuse: fix flock") Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5ec5870e423a..996aa23c409e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; - ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); + ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); if (unlikely(!ff)) return NULL; -- cgit v1.2.3 From 160c365b5879e6d4ea1c299b68c9702e8c783298 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 5 Jul 2017 08:51:09 +0200 Subject: nand: fix wrong default oob layout for small pages using soft ecc commit f7f8c1756e9a5f1258a7cc6b663f8451b724900f upstream. When using soft ecc, if no ooblayout is given, the core automatically uses one of the nand_ooblayout_{sp,lp}*() functions to determine the layout inside the out of band data. Until kernel version 4.6, struct nand_ecclayout was used for that purpose. During the migration from 4.6 to 4.7, an error shown up in the small page layout, in the case oob section is only 8 bytes long. The layout was using three bytes (0, 1, 2) for ecc, two bytes (3, 4) as free bytes, one byte (5) for bad block marker and finally two bytes (6, 7) as free bytes, as shown there: [linux-4.6] drivers/mtd/nand/nand_base.c:52 static struct nand_ecclayout nand_oob_8 = { .eccbytes = 3, .eccpos = {0, 1, 2}, .oobfree = { {.offset = 3, .length = 2}, {.offset = 6, .length = 2} } }; This fixes the current implementation which is incoherent. It references bit 3 at the same time as an ecc byte and a free byte. Furthermore, it is clear with the previous implementation that there is only one ecc section with 8 bytes oob sections. We shall return -ERANGE in the nand_ooblayout_ecc_sp() function when asked for the second section. Signed-off-by: Miquel Raynal Fixes: 41b207a70d3a ("mtd: nand: implement the default mtd_ooblayout_ops") Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index d1b253f73c56..31a6ee307d80 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -64,8 +64,14 @@ static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section, if (!section) { oobregion->offset = 0; - oobregion->length = 4; + if (mtd->oobsize == 16) + oobregion->length = 4; + else + oobregion->length = 3; } else { + if (mtd->oobsize == 8) + return -ERANGE; + oobregion->offset = 6; oobregion->length = ecc->total - 4; } -- cgit v1.2.3 From 0a205d8145c22d83e24ada019b599a33f7bf68ed Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 8 Aug 2017 18:54:01 +0800 Subject: mmc: mmc: correct the logic for setting HS400ES signal voltage commit 92ddd95919466de5d34f3cb43635da9a7f9ab814 upstream. Change the default err value to -EINVAL, make sure the card only has type EXT_CSD_CARD_TYPE_HS400_1_8V also do the signal voltage setting when select hs400es mode. Fixes: commit 1720d3545b77 ("mmc: core: switch to 1V8 or 1V2 for hs400es mode") Signed-off-by: Haibo Chen Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 323dba35bc9a..b2ca10c4d2a5 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1258,7 +1258,7 @@ out_err: static int mmc_select_hs400es(struct mmc_card *card) { struct mmc_host *host = card->host; - int err = 0; + int err = -EINVAL; u8 val; if (!(host->caps & MMC_CAP_8_BIT_DATA)) { -- cgit v1.2.3 From 00f3c2a253f752948c5cac9f3a554407ab43d9c2 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 1 Aug 2017 16:25:01 -0400 Subject: nfs/flexfiles: fix leak of nfs4_ff_ds_version arrays commit 1feb26162bee7b2f110facfec71b5c7bdbc7d14d upstream. The client was freeing the nfs4_ff_layout_ds, but not the contained nfs4_ff_ds_version array. Signed-off-by: Weston Andros Adamson Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index f7a3f6b05369..90099896b838 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -30,6 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { nfs4_print_deviceid(&mirror_ds->id_node.deviceid); nfs4_pnfs_ds_put(mirror_ds->ds); + kfree(mirror_ds->ds_versions); kfree_rcu(mirror_ds, id_node.rcu); } -- cgit v1.2.3 From 4381e2c30008fa5ceb28957cb186a25871b09bf1 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Tue, 25 Jul 2017 14:33:36 +0200 Subject: drm/etnaviv: Fix off-by-one error in reloc checking commit d6f756e09f01ea7a0efbbcef269a1e384a35d824 upstream. A relocation pointing to the last four bytes of a buffer can legitimately happen in the case of small vertex buffers. Signed-off-by: Wladimir J. van der Laan Reviewed-by: Philipp Zabel Reviewed-by: Christian Gmeiner Signed-off-by: Lucas Stach Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index afdd55ddf821..1ac9a95e1d78 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -264,8 +264,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream, if (ret) return ret; - if (r->reloc_offset >= bo->obj->base.size - sizeof(*ptr)) { - DRM_ERROR("relocation %u outside object", i); + if (r->reloc_offset > bo->obj->base.size - sizeof(*ptr)) { + DRM_ERROR("relocation %u outside object\n", i); return -EINVAL; } -- cgit v1.2.3 From 2b3bf207b2a2465115d2bded0eef8a3c4ee0c6c2 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 24 Jul 2017 11:14:31 +0200 Subject: drm/i915: Fix out-of-bounds array access in bdw_load_gamma_lut commit 5279fc7724ae3a82c9cfe5b09c1fb07ff0e41056 upstream. bdw_load_gamma_lut is writing beyond the array to the maximum value. The intend of the function is to clamp values > 1 to 1, so write the intended color to the max register. This fixes the following KASAN warning: [ 197.020857] [IGT] kms_pipe_color: executing [ 197.063434] [IGT] kms_pipe_color: starting subtest ctm-0-25-pipe0 [ 197.078989] ================================================================== [ 197.079127] BUG: KASAN: slab-out-of-bounds in bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915] [ 197.079188] Read of size 2 at addr ffff8800d38db150 by task kms_pipe_color/1839 [ 197.079208] CPU: 2 PID: 1839 Comm: kms_pipe_color Tainted: G U 4.13.0-rc1-patser+ #5211 [ 197.079215] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 [ 197.079220] Call Trace: [ 197.079230] dump_stack+0x68/0x9e [ 197.079239] print_address_description+0x6f/0x250 [ 197.079251] kasan_report+0x216/0x370 [ 197.079374] ? bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915] [ 197.079451] ? gen8_write16+0x4e0/0x4e0 [i915] [ 197.079460] __asan_report_load2_noabort+0x14/0x20 [ 197.079535] bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915] [ 197.079612] broadwell_load_luts+0x1df/0x550 [i915] [ 197.079690] intel_color_load_luts+0x7b/0x80 [i915] [ 197.079764] intel_begin_crtc_commit+0x138/0x760 [i915] [ 197.079783] drm_atomic_helper_commit_planes_on_crtc+0x1a3/0x820 [drm_kms_helper] [ 197.079859] ? intel_pre_plane_update+0x571/0x580 [i915] [ 197.079937] intel_update_crtc+0x238/0x330 [i915] [ 197.080016] intel_update_crtcs+0x10f/0x210 [i915] [ 197.080092] intel_atomic_commit_tail+0x1552/0x3340 [i915] [ 197.080101] ? _raw_spin_unlock+0x3c/0x40 [ 197.080110] ? __queue_work+0xb40/0xbf0 [ 197.080188] ? skl_update_crtcs+0xc00/0xc00 [i915] [ 197.080195] ? trace_hardirqs_on+0xd/0x10 [ 197.080269] ? intel_atomic_commit_ready+0x128/0x13c [i915] [ 197.080329] ? __i915_sw_fence_complete+0x5b8/0x6d0 [i915] [ 197.080336] ? debug_object_activate+0x39e/0x580 [ 197.080397] ? i915_sw_fence_await+0x30/0x30 [i915] [ 197.080409] ? __might_sleep+0x15b/0x180 [ 197.080483] intel_atomic_commit+0x944/0xa70 [i915] [ 197.080490] ? refcount_dec_and_test+0x11/0x20 [ 197.080567] ? intel_atomic_commit_tail+0x3340/0x3340 [i915] [ 197.080597] ? drm_atomic_crtc_set_property+0x303/0x580 [drm] [ 197.080674] ? intel_atomic_commit_tail+0x3340/0x3340 [i915] [ 197.080704] drm_atomic_commit+0xd7/0xe0 [drm] [ 197.080722] drm_atomic_helper_crtc_set_property+0xec/0x130 [drm_kms_helper] [ 197.080749] drm_mode_crtc_set_obj_prop+0x7d/0xb0 [drm] [ 197.080775] drm_mode_obj_set_property_ioctl+0x50b/0x5d0 [drm] [ 197.080783] ? __might_fault+0x104/0x180 [ 197.080809] ? drm_mode_obj_find_prop_id+0x160/0x160 [drm] [ 197.080838] ? drm_mode_obj_find_prop_id+0x160/0x160 [drm] [ 197.080861] drm_ioctl_kernel+0x154/0x1a0 [drm] [ 197.080885] drm_ioctl+0x624/0x8f0 [drm] [ 197.080910] ? drm_mode_obj_find_prop_id+0x160/0x160 [drm] [ 197.080934] ? drm_getunique+0x210/0x210 [drm] [ 197.080943] ? __handle_mm_fault+0x1bd0/0x1ce0 [ 197.080949] ? lock_downgrade+0x610/0x610 [ 197.080957] ? __lru_cache_add+0x15a/0x180 [ 197.080967] do_vfs_ioctl+0xd92/0xe40 [ 197.080975] ? ioctl_preallocate+0x1b0/0x1b0 [ 197.080982] ? selinux_capable+0x20/0x20 [ 197.080991] ? __do_page_fault+0x7b7/0x9a0 [ 197.080997] ? lock_downgrade+0x5bb/0x610 [ 197.081007] ? security_file_ioctl+0x57/0x90 [ 197.081016] SyS_ioctl+0x4e/0x80 [ 197.081024] entry_SYSCALL_64_fastpath+0x18/0xad [ 197.081030] RIP: 0033:0x7f61f287a987 [ 197.081035] RSP: 002b:00007fff7d44d188 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 197.081043] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f61f287a987 [ 197.081048] RDX: 00007fff7d44d1c0 RSI: 00000000c01864ba RDI: 0000000000000003 [ 197.081053] RBP: 00007f61f2b3eb00 R08: 0000000000000059 R09: 0000000000000000 [ 197.081058] R10: 0000002ea5c4a290 R11: 0000000000000246 R12: 00007f61f2b3eb58 [ 197.081063] R13: 0000000000001010 R14: 00007f61f2b3eb58 R15: 0000000000002702 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101659 Signed-off-by: Maarten Lankhorst Reported-by: Martin Peres Cc: Martin Peres Fixes: 82cf435b3134 ("drm/i915: Implement color management on bdw/skl/bxt/kbl") Cc: Shashank Sharma Cc: Kiran S Kumar Cc: Kausal Malladi Cc: Lionel Landwerlin Cc: Matt Roper Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20170724091431.24251-1-maarten.lankhorst@linux.intel.com Reviewed-by: Lionel Landwerlin (cherry picked from commit 09a92bc8773b4314e02b478e003fe5936ce85adb) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_color.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 95a72771eea6..89a77743ab29 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -394,6 +394,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) } /* Program the max register to clamp values > 1.0. */ + i = lut_size - 1; I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), drm_color_lut_extract(lut[i].red, 16)); I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), -- cgit v1.2.3 From e27f58cd130bae882e6bd66e094c036056a03278 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 2 Aug 2017 00:45:06 +0900 Subject: USB: serial: option: add D-Link DWM-222 device ID commit fd1b8668af59a11bb754a6c9b0051c6c5ce73b74 upstream. Add device id for D-Link DWM-222. Signed-off-by: Hector Martin Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ebe51f11105d..fe123153b1a5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2025,6 +2025,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ -- cgit v1.2.3 From 5665164015017a8abeb4b401656db678b43dafac Mon Sep 17 00:00:00 2001 From: Stefan Triller Date: Fri, 30 Jun 2017 14:44:03 +0200 Subject: USB: serial: cp210x: add support for Qivicon USB ZigBee dongle commit 9585e340db9f6cc1c0928d82c3a23cc4460f0a3f upstream. The German Telekom offers a ZigBee USB Stick under the brand name Qivicon for their SmartHome Home Base in its 1. Generation. The productId is not known by the according kernel module, this patch adds support for it. Signed-off-by: Stefan Triller Reviewed-by: Frans Klaver Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 84b444f69c9b..470b17b0c11b 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -136,6 +136,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ + { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ -- cgit v1.2.3 From 4fd8c366acac7a888b83a25d349476a111e49ac7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Aug 2017 11:54:12 -0700 Subject: USB: serial: pl2303: add new ATEN device id commit 3b6bcd3d093c698d32e93d4da57679b8fbc5e01e upstream. This adds a new ATEN device id for a new pl2303-based device. Reported-by: Peter Kuo Cc: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 1db4b61bdf7b..a51b28379850 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC485) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 09d9be88209e..3b5a15d1dc0d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -27,6 +27,7 @@ #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 +#define ATEN_PRODUCT_UC485 0x2021 #define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb -- cgit v1.2.3 From 821ccbe2937ece265dfc087c4fe272242b70350a Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 25 Jul 2017 09:31:34 -0500 Subject: usb: musb: fix tx fifo flush handling again commit 45d73860530a14c608f410b91c6c341777bfa85d upstream. commit 68fe05e2a451 ("usb: musb: fix tx fifo flush handling") drops the 1ms delay trying to solve the long disconnect time issue when application queued many tx urbs. However, the 1ms delay is needed for some use cases, for example, without the delay, reconnecting AR9271 WIFI dongle no longer works if the connection is dropped from the AP. So let's add back the 1ms delay in musb_h_tx_flush_fifo(), and solve the long disconnect time problem with a separate patch for usb_hcd_flush_endpoint(). Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 99beda9e241d..55c624f2a8c0 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -139,6 +139,7 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep) "Could not flush host TX%d fifo: csr: %04x\n", ep->epnum, csr)) return; + mdelay(1); } } -- cgit v1.2.3 From 199a3f26e9d8c5712833084be362004c959ebb59 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Jul 2017 23:58:50 +0200 Subject: USB: hcd: Mark secondary HCD as dead if the primary one died commit cd5a6a4fdaba150089af2afc220eae0fef74878a upstream. Make usb_hc_died() clear the HCD_FLAG_RH_RUNNING flag for the shared HCD and set HCD_FLAG_DEAD for it, in analogy with what is done for the primary one. Among other thigs, this prevents check_root_hub_suspended() from returning -EBUSY for dead HCDs which helps to work around system suspend issues in some situations. This actually fixes occasional suspend failures on one of my test machines. Suggested-by: Alan Stern Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 8a7c6bbaed7e..2d4fe5ae7e73 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2474,6 +2474,8 @@ void usb_hc_died (struct usb_hcd *hcd) } if (usb_hcd_is_primary_hcd(hcd) && hcd->shared_hcd) { hcd = hcd->shared_hcd; + clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags); + set_bit(HCD_FLAG_DEAD, &hcd->flags); if (hcd->rh_registered) { clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); -- cgit v1.2.3 From c5347390e57a19b4f25fc58f3c77e2cc15d2e47a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 11:31:03 +0200 Subject: staging:iio:resolver:ad2s1210 fix negative IIO_ANGL_VEL read commit 105967ad68d2eb1a041bc041f9cf96af2a653b65 upstream. gcc-7 points out an older regression: drivers/staging/iio/resolver/ad2s1210.c: In function 'ad2s1210_read_raw': drivers/staging/iio/resolver/ad2s1210.c:515:42: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context] The original code had 'unsigned short' here, but incorrectly got converted to 'bool'. This reverts the regression and uses a normal type instead. Fixes: 29148543c521 ("staging:iio:resolver:ad2s1210 minimal chan spec conversion.") Signed-off-by: Arnd Bergmann Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/resolver/ad2s1210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c index 6b992634f009..598f0faa48c8 100644 --- a/drivers/staging/iio/resolver/ad2s1210.c +++ b/drivers/staging/iio/resolver/ad2s1210.c @@ -472,7 +472,7 @@ static int ad2s1210_read_raw(struct iio_dev *indio_dev, long m) { struct ad2s1210_state *st = iio_priv(indio_dev); - bool negative; + u16 negative; int ret = 0; u16 pos; s16 vel; -- cgit v1.2.3 From 1ca3869234d300b9a0543cf55dccce42c2fc77c1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Jul 2017 15:13:41 +0200 Subject: iio: accel: bmc150: Always restore device to normal mode after suspend-resume commit e59e18989c68a8d7941005f81ad6abc4ca682de0 upstream. After probe we would put the device in normal mode, after a runtime suspend-resume we would put it back in normal mode. But for a regular suspend-resume we would only put it back in normal mode if triggers or events have been requested. This is not consistent and breaks reading raw values after a suspend-resume. This commit changes the regular resume path to also unconditionally put the device back in normal mode, fixing reading of raw values not working after a regular suspend-resume cycle. Signed-off-by: Hans de Goede Reviewed-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/bmc150-accel-core.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 59b380dbf27f..c3888822add1 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -193,7 +193,6 @@ struct bmc150_accel_data { struct regmap *regmap; int irq; struct bmc150_accel_interrupt interrupts[BMC150_ACCEL_INTERRUPTS]; - atomic_t active_intr; struct bmc150_accel_trigger triggers[BMC150_ACCEL_TRIGGERS]; struct mutex mutex; u8 fifo_mode, watermark; @@ -493,11 +492,6 @@ static int bmc150_accel_set_interrupt(struct bmc150_accel_data *data, int i, goto out_fix_power_state; } - if (state) - atomic_inc(&data->active_intr); - else - atomic_dec(&data->active_intr); - return 0; out_fix_power_state: @@ -1709,8 +1703,7 @@ static int bmc150_accel_resume(struct device *dev) struct bmc150_accel_data *data = iio_priv(indio_dev); mutex_lock(&data->mutex); - if (atomic_read(&data->active_intr)) - bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0); + bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0); bmc150_accel_fifo_set_mode(data); mutex_unlock(&data->mutex); -- cgit v1.2.3 From bbae08213e6e9811a2d3ece7ba95b1ac0e21f47c Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 21 Jun 2017 01:46:37 +0900 Subject: iio: light: tsl2563: use correct event code commit a3507e48d3f99a93a3056a34a5365f310434570f upstream. The TSL2563 driver provides three iio channels, two of which are raw ADC channels (channel 0 and channel 1) in the device and the remaining one is calculated by the two. The ADC channel 0 only supports programmable interrupt with threshold settings and this driver supports the event but the generated event code does not contain the corresponding iio channel type. This is going to change userspace ABI. Hopefully fixing this to be what it should always have been won't break any userspace code. Cc: Jonathan Cameron Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/tsl2563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c index 04598ae993d4..f0d3f749aa01 100644 --- a/drivers/iio/light/tsl2563.c +++ b/drivers/iio/light/tsl2563.c @@ -626,7 +626,7 @@ static irqreturn_t tsl2563_event_handler(int irq, void *private) struct tsl2563_chip *chip = iio_priv(dev_info); iio_push_event(dev_info, - IIO_UNMOD_EVENT_CODE(IIO_LIGHT, + IIO_UNMOD_EVENT_CODE(IIO_INTENSITY, 0, IIO_EV_TYPE_THRESH, IIO_EV_DIR_EITHER), -- cgit v1.2.3 From b189f8eb27153daaaa464d7bfec9c5a8ed2989a0 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 28 Jul 2017 16:22:31 +0100 Subject: staging: comedi: comedi_fops: do not call blocking ops when !TASK_RUNNING commit cef988642cdac44e910a27cb6e8166c96f86a0df upstream. Comedi's read and write file operation handlers (`comedi_read()` and `comedi_write()`) currently call `copy_to_user()` or `copy_from_user()` whilst in the `TASK_INTERRUPTIBLE` state, which falls foul of the `might_fault()` checks when enabled. Fix it by setting the current task state back to `TASK_RUNNING` a bit earlier before calling these functions. Reported-by: Piotr Gregor Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index ec9979070479..7458df4b6854 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2385,6 +2385,7 @@ static ssize_t comedi_write(struct file *file, const char __user *buf, continue; } + set_current_state(TASK_RUNNING); wp = async->buf_write_ptr; n1 = min(n, async->prealloc_bufsz - wp); n2 = n - n1; @@ -2517,6 +2518,8 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, } continue; } + + set_current_state(TASK_RUNNING); rp = async->buf_read_ptr; n1 = min(n, async->prealloc_bufsz - rp); n2 = n - n1; -- cgit v1.2.3 From a09ecc9345b6bf07812e55070328d7c0bdb2c498 Mon Sep 17 00:00:00 2001 From: Alan Swanson Date: Wed, 26 Jul 2017 12:03:33 +0100 Subject: uas: Add US_FL_IGNORE_RESIDUE for Initio Corporation INIC-3069 commit 89f23d51defcb94a5026d4b5da13faf4e1150a6f upstream. Similar to commit d595259fbb7a ("usb-storage: Add ignore-residue quirk for Initio INIC-3619") for INIC-3169 in unusual_devs.h but INIC-3069 already present in unusual_uas.h. Both in same controller IC family. Issue is that MakeMKV fails during key exchange with installed bluray drive with following error: 002004:0000 Error 'Scsi error - ILLEGAL REQUEST:COPY PROTECTION KEY EXCHANGE FAILURE - KEY NOT ESTABLISHED' occurred while issuing SCSI command AD010..080002400 to device 'SG:dev_11:0' Signed-off-by: Alan Swanson Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index cbea9f329e71..cde115359793 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -124,9 +124,9 @@ UNUSUAL_DEV(0x0bc2, 0xab2a, 0x0000, 0x9999, /* Reported-by: Benjamin Tissoires */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", - "", + "INIC-3069", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_ATA_1X), + US_FL_NO_ATA_1X | US_FL_IGNORE_RESIDUE), /* Reported-by: Tom Arild Naess */ UNUSUAL_DEV(0x152d, 0x0539, 0x0000, 0x9999, -- cgit v1.2.3 From 2db03a7fa0ddb671d2508325e1243bd46ab4c6dc Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 2 Aug 2017 21:06:35 +0900 Subject: usb: gadget: udc: renesas_usb3: Fix usb_gadget_giveback_request() calling commit aca5b9ebd096039657417c321a9252c696b359c2 upstream. According to the gadget.h, a "complete" function will always be called with interrupts disabled. However, sometimes usb3_request_done() function is called with interrupts enabled. So, this function should be held by spin_lock_irqsave() to disable interruption. Also, this driver has to call spin_unlock() to avoid spinlock recursion by this driver before calling usb_gadget_giveback_request(). Reported-by: Kazuya Mizuguchi Tested-by: Kazuya Mizuguchi Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index ba78e3f7aea8..d2cfefadca3c 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -685,21 +685,32 @@ static struct renesas_usb3_request *usb3_get_request(struct renesas_usb3_ep return usb3_req; } -static void usb3_request_done(struct renesas_usb3_ep *usb3_ep, - struct renesas_usb3_request *usb3_req, int status) +static void __usb3_request_done(struct renesas_usb3_ep *usb3_ep, + struct renesas_usb3_request *usb3_req, + int status) { struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep); - unsigned long flags; dev_dbg(usb3_to_dev(usb3), "giveback: ep%2d, %u, %u, %d\n", usb3_ep->num, usb3_req->req.length, usb3_req->req.actual, status); usb3_req->req.status = status; - spin_lock_irqsave(&usb3->lock, flags); usb3_ep->started = false; list_del_init(&usb3_req->queue); - spin_unlock_irqrestore(&usb3->lock, flags); + spin_unlock(&usb3->lock); usb_gadget_giveback_request(&usb3_ep->ep, &usb3_req->req); + spin_lock(&usb3->lock); +} + +static void usb3_request_done(struct renesas_usb3_ep *usb3_ep, + struct renesas_usb3_request *usb3_req, int status) +{ + struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep); + unsigned long flags; + + spin_lock_irqsave(&usb3->lock, flags); + __usb3_request_done(usb3_ep, usb3_req, status); + spin_unlock_irqrestore(&usb3->lock, flags); } static void usb3_irq_epc_pipe0_status_end(struct renesas_usb3 *usb3) -- cgit v1.2.3 From 7f737f10c1ee1749f32e77ac98d5bc3ffde876ce Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 2 Aug 2017 13:21:45 +0900 Subject: usb: renesas_usbhs: Fix UGCTRL2 value for R-Car Gen3 commit 2acecd58969897795cf015c9057ebd349a3fda8a upstream. The latest HW manual (Rev.0.55) shows us this UGCTRL2.VBUSSEL bit. If the bit sets to 1, the VBUS drive is controlled by phy related registers (called "UCOM Registers" on the manual). Since R-Car Gen3 environment will control VBUS by phy-rcar-gen3-usb2 driver, the UGCTRL2.VBUSSEL bit should be set to 1. So, this patch fixes the register's value. Otherwise, even if the ID pin indicates to peripheral, the R-Car will output USBn_PWEN to 1 when a host driver is running. Fixes: de18757e272d ("usb: renesas_usbhs: add R-Car Gen3 power control" Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/rcar3.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c index d544b331c9f2..02b67abfc2a1 100644 --- a/drivers/usb/renesas_usbhs/rcar3.c +++ b/drivers/usb/renesas_usbhs/rcar3.c @@ -20,9 +20,13 @@ /* Low Power Status register (LPSTS) */ #define LPSTS_SUSPM 0x4000 -/* USB General control register 2 (UGCTRL2), bit[31:6] should be 0 */ +/* + * USB General control register 2 (UGCTRL2) + * Remarks: bit[31:11] and bit[9:6] should be 0 + */ #define UGCTRL2_RESERVED_3 0x00000001 /* bit[3:0] should be B'0001 */ #define UGCTRL2_USB0SEL_OTG 0x00000030 +#define UGCTRL2_VBUSSEL 0x00000400 static void usbhs_write32(struct usbhs_priv *priv, u32 reg, u32 data) { @@ -34,7 +38,8 @@ static int usbhs_rcar3_power_ctrl(struct platform_device *pdev, { struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); - usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG); + usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG | + UGCTRL2_VBUSSEL); if (enable) { usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM); -- cgit v1.2.3 From 7c2beb1c44326ecf61c49df4a87a7c75fc255b8a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 1 Aug 2017 10:41:56 -0400 Subject: USB: Check for dropped connection before switching to full speed commit 94c43b9897abf4ea366ed4dba027494e080c7050 upstream. Some buggy USB disk adapters disconnect and reconnect multiple times during the enumeration procedure. This may lead to a device connecting at full speed instead of high speed, because when the USB stack sees that a device isn't able to enumerate at high speed, it tries to hand the connection over to a full-speed companion controller. The logic for doing this is careful to check that the device is still connected. But this check is inadequate if the device disconnects and reconnects before the check is done. The symptom is that a device works, but much more slowly than it is capable of operating. The situation was made worse recently by commit 22547c4cc4fe ("usb: hub: Wait for connection to be reestablished after port reset"), which increases the delay following a reset before a disconnect is recognized, thus giving the device more time to reconnect. This patch makes the check more robust. If the device was disconnected at any time during enumeration, we will now skip the full-speed handover. Signed-off-by: Alan Stern Reported-and-tested-by: Zdenek Kabelac Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f953d6d647f2..80d4ef31ba8d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4728,7 +4728,8 @@ hub_power_remaining(struct usb_hub *hub) static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) { - int status, i; + int status = -ENODEV; + int i; unsigned unit_load; struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); @@ -4932,9 +4933,10 @@ loop: done: hub_port_disable(hub, port1, 1); - if (hcd->driver->relinquish_port && !hub->hdev->parent) - hcd->driver->relinquish_port(hcd, port1); - + if (hcd->driver->relinquish_port && !hub->hdev->parent) { + if (status != -ENOTCONN && status != -ENODEV) + hcd->driver->relinquish_port(hcd, port1); + } } /* Handle physical or logical connection change events. -- cgit v1.2.3 From 42d65cc89a2338c8d488cba8a8625951fac7b0a8 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 25 Jul 2017 09:31:33 -0500 Subject: usb: core: unlink urbs from the tail of the endpoint's urb_list commit 2eac13624364db5b5e1666ae0bb3a4d36bc56b6e upstream. While unlink an urb, if the urb has been programmed in the controller, the controller driver might do some hw related actions to tear down the urb. Currently usb_hcd_flush_endpoint() passes each urb from the head of the endpoint's urb_list to the controller driver, which could make the controller driver think each urb has been programmed and take the unnecessary actions for each urb. This patch changes the behavior in usb_hcd_flush_endpoint() to pass the urbs from the tail of the list, to avoid any unnecessary actions in an controller driver. Acked-by: Alan Stern Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 2d4fe5ae7e73..882fc4e08284 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1877,7 +1877,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev, /* No more submits can occur */ spin_lock_irq(&hcd_urb_list_lock); rescan: - list_for_each_entry (urb, &ep->urb_list, urb_list) { + list_for_each_entry_reverse(urb, &ep->urb_list, urb_list) { int is_in; if (urb->unlinked) -- cgit v1.2.3 From f4bbed570aef42ccd08852b05ace99d488ed3ddf Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 8 Aug 2017 17:51:27 +0800 Subject: usb: quirks: Add no-lpm quirk for Moshi USB to Ethernet Adapter commit 7496cfe5431f21da5d27a8388c326397e3f0a5db upstream. Moshi USB to Ethernet Adapter internally uses a Genesys Logic hub to connect to Realtek r8153. The Realtek r8153 ethernet does not work on the internal hub, no-lpm quirk can make it work. Since another r8153 dongle at my hand does not have the issue, so add the quirk to the Genesys Logic hub instead. Signed-off-by: Kai-Heng Feng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 3116edfcdc18..65a87efc100e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -150,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* appletouch */ { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ + { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, + /* Avision AV600U */ { USB_DEVICE(0x0638, 0x0a13), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- cgit v1.2.3 From 4cae4a23d9a4d2e0515d22590c23147809f7ea8e Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 4 Aug 2017 16:35:56 +0530 Subject: usb:xhci:Add quirk for Certain failing HP keyboard on reset after resume commit e788787ef4f9c24aafefc480a8da5f92b914e5e6 upstream. Certain HP keyboards would keep inputting a character automatically which is the wake-up key after S3 resume On some AMD platforms USB host fails to respond (by holding resume-K) to USB device (an HP keyboard) resume request within 1ms (TURSM) and ensures that resume is signaled for at least 20 ms (TDRSMDN), which is defined in USB 2.0 spec. The result is that the keyboard is out of function. In SNPS USB design, the host responds to the resume request only after system gets back to S0 and the host gets to functional after the internal HW restore operation that is more than 1 second after the initial resume request from the USB device. As a workaround for specific keyboard ID(HP Keyboards), applying port reset after resume when the keyboard is plugged in. Signed-off-by: Sandeep Singh Signed-off-by: Shyam Sundar S K cc: Nehal Shah Reviewed-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 1 + drivers/usb/host/pci-quirks.c | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 65a87efc100e..574da2b4529c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -252,6 +252,7 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { { USB_DEVICE(0x093a, 0x2500), .driver_info = USB_QUIRK_RESET_RESUME }, { USB_DEVICE(0x093a, 0x2510), .driver_info = USB_QUIRK_RESET_RESUME }, { USB_DEVICE(0x093a, 0x2521), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x03f0, 0x2b4a), .driver_info = USB_QUIRK_RESET_RESUME }, /* Logitech Optical Mouse M90/M100 */ { USB_DEVICE(0x046d, 0xc05a), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index c8989c62a262..5f4ca7890435 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -98,6 +98,7 @@ enum amd_chipset_gen { AMD_CHIPSET_HUDSON2, AMD_CHIPSET_BOLTON, AMD_CHIPSET_YANGTZE, + AMD_CHIPSET_TAISHAN, AMD_CHIPSET_UNKNOWN, }; @@ -141,6 +142,11 @@ static int amd_chipset_sb_type_init(struct amd_chipset_info *pinfo) pinfo->sb_type.gen = AMD_CHIPSET_SB700; else if (rev >= 0x40 && rev <= 0x4f) pinfo->sb_type.gen = AMD_CHIPSET_SB800; + } + pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, + 0x145c, NULL); + if (pinfo->smbus_dev) { + pinfo->sb_type.gen = AMD_CHIPSET_TAISHAN; } else { pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS, NULL); @@ -260,11 +266,12 @@ int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev) { /* Make sure amd chipset type has already been initialized */ usb_amd_find_chipset_info(); - if (amd_chipset.sb_type.gen != AMD_CHIPSET_YANGTZE) - return 0; - - dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n"); - return 1; + if (amd_chipset.sb_type.gen == AMD_CHIPSET_YANGTZE || + amd_chipset.sb_type.gen == AMD_CHIPSET_TAISHAN) { + dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n"); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(usb_hcd_amd_remote_wakeup_quirk); -- cgit v1.2.3 From eda1b3d42fad388bf285e8d7b45009b61b0e6f6e Mon Sep 17 00:00:00 2001 From: Stefan-Gabriel Mirea Date: Thu, 6 Jul 2017 10:06:41 +0100 Subject: iio: adc: vf610_adc: Fix VALT selection value for REFSEL bits commit d466d3c1217406b14b834335b5b4b33c0d45bd09 upstream. In order to select the alternate voltage reference pair (VALTH/VALTL), the right value for the REFSEL field in the ADCx_CFG register is "01", leading to 0x800 as register mask. See section 8.2.6.4 in the reference manual[1]. [1] http://www.nxp.com/docs/en/reference-manual/VFXXXRM.pdf Fixes: a775427632fd ("iio:adc:imx: add Freescale Vybrid vf610 adc driver") Signed-off-by: Stefan-Gabriel Mirea Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/vf610_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index 228a003adeed..d1bde6d2721e 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -77,7 +77,7 @@ #define VF610_ADC_ADSTS_MASK 0x300 #define VF610_ADC_ADLPC_EN 0x80 #define VF610_ADC_ADHSC_EN 0x400 -#define VF610_ADC_REFSEL_VALT 0x100 +#define VF610_ADC_REFSEL_VALT 0x800 #define VF610_ADC_REFSEL_VBG 0x1000 #define VF610_ADC_ADTRG_HARD 0x2000 #define VF610_ADC_AVGS_8 0x4000 -- cgit v1.2.3 From a68978bb949a9be075b75a045250ce89d7550604 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Aug 2017 10:59:14 +0200 Subject: pnfs/blocklayout: require 64-bit sector_t commit 8a9d6e964d318533ba3d2901ce153ba317c99a89 upstream. The blocklayout code does not compile cleanly for a 32-bit sector_t, and also has no reliable checks for devices sizes, which makes it unsafe to use with a kernel that doesn't support large block devices. Signed-off-by: Christoph Hellwig Reported-by: Arnd Bergmann Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing") Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f31fd0dd92c6..b1daeafbea92 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM + depends on 64BIT || LBDAF default NFS_V4 config PNFS_OBJLAYOUT -- cgit v1.2.3 From 7b6fff65ecf92b265f48249f041b8c06557960ed Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:50:53 +0800 Subject: pinctrl: sunxi: add a missing function of A10/A20 pinctrl driver commit d81ece747d8727bb8b1cfc9a20dbe62f09a4e35a upstream. The PH16 pin has a function with mux id 0x5, which is the DET pin of the "sim" (smart card reader) IP block. This function is missing in old versions of A10/A20 SoCs' datasheets and user manuals, so it's also missing in the old drivers. The newest A10 Datasheet V1.70 and A20 Datasheet V1.41 contain this pin function, and it's discovered during implementing R40 pinctrl driver. Add it to the driver. As we now merged A20 pinctrl driver to the A10 one, we need to only fix the A10 driver now. Fixes: f2821b1ca3a2 ("pinctrl: sunxi: Move Allwinner A10 pinctrl driver to a driver of its own") Signed-off-by: Icenowy Zheng Reviewed-by: Chen-Yu Tsai Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c index 862a096c5dba..be5c71df148d 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c @@ -811,6 +811,7 @@ static const struct sunxi_desc_pin sun4i_a10_pins[] = { SUNXI_FUNCTION(0x2, "lcd1"), /* D16 */ SUNXI_FUNCTION(0x3, "pata"), /* ATAD12 */ SUNXI_FUNCTION(0x4, "keypad"), /* IN6 */ + SUNXI_FUNCTION(0x5, "sim"), /* DET */ SUNXI_FUNCTION_IRQ(0x6, 16), /* EINT16 */ SUNXI_FUNCTION(0x7, "csi1")), /* D16 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 17), -- cgit v1.2.3 From 877fe62863d0d1dcac837bef9a7b0028f0cd8951 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 4 Aug 2017 19:26:34 +0300 Subject: pinctrl: intel: merrifield: Correct UART pin lists commit 5d996132d921c391af5f267123eca1a6a3148ecd upstream. UART pin lists consist GPIO numbers which is simply wrong. Replace it by pin numbers. Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-merrifield.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index 9931be6af0ca..04d6fd2be08c 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -343,9 +343,9 @@ static const struct pinctrl_pin_desc mrfld_pins[] = { static const unsigned int mrfld_sdio_pins[] = { 50, 51, 52, 53, 54, 55, 56 }; static const unsigned int mrfld_spi5_pins[] = { 90, 91, 92, 93, 94, 95, 96 }; -static const unsigned int mrfld_uart0_pins[] = { 124, 125, 126, 127 }; -static const unsigned int mrfld_uart1_pins[] = { 128, 129, 130, 131 }; -static const unsigned int mrfld_uart2_pins[] = { 132, 133, 134, 135 }; +static const unsigned int mrfld_uart0_pins[] = { 115, 116, 117, 118 }; +static const unsigned int mrfld_uart1_pins[] = { 119, 120, 121, 122 }; +static const unsigned int mrfld_uart2_pins[] = { 123, 124, 125, 126 }; static const unsigned int mrfld_pwm0_pins[] = { 144 }; static const unsigned int mrfld_pwm1_pins[] = { 145 }; static const unsigned int mrfld_pwm2_pins[] = { 132 }; -- cgit v1.2.3 From f642d29c23883f0d700f9f87107f1786dd2d7363 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 14 Jun 2017 13:49:29 +0900 Subject: pinctrl: uniphier: fix WARN_ON() of pingroups dump on LD11 commit 9592bc256d50481dfcdba93890e576a728fb373c upstream. The pingroups dump of debugfs hits WARN_ON() in pinctrl_groups_show(). Filling non-existing ports with '-1' turned out a bad idea. Fixes: 70f2f9c4cf25 ("pinctrl: uniphier: add UniPhier PH1-LD11 pinctrl driver") Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c | 364 ++++++++++++----------- 1 file changed, 192 insertions(+), 172 deletions(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c index 77a0236ee781..b190904c864a 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c @@ -508,57 +508,71 @@ static const unsigned usb1_pins[] = {48, 49}; static const int usb1_muxvals[] = {0, 0}; static const unsigned usb2_pins[] = {50, 51}; static const int usb2_muxvals[] = {0, 0}; -static const unsigned port_range_pins[] = { +static const unsigned port_range0_pins[] = { 159, 160, 161, 162, 163, 164, 165, 166, /* PORT0x */ 0, 1, 2, 3, 4, 5, 6, 7, /* PORT1x */ 8, 9, 10, 11, 12, 13, 14, 15, /* PORT2x */ - 16, 17, 18, -1, -1, -1, -1, -1, /* PORT3x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT4x */ - -1, -1, -1, 46, 47, 48, 49, 50, /* PORT5x */ - 51, -1, -1, 54, 55, 56, 57, 58, /* PORT6x */ + 16, 17, 18, /* PORT30-32 */ +}; +static const int port_range0_muxvals[] = { + 15, 15, 15, 15, 15, 15, 15, 15, /* PORT0x */ + 15, 15, 15, 15, 15, 15, 15, 15, /* PORT1x */ + 15, 15, 15, 15, 15, 15, 15, 15, /* PORT2x */ + 15, 15, 15, /* PORT30-32 */ +}; +static const unsigned port_range1_pins[] = { + 46, 47, 48, 49, 50, /* PORT53-57 */ + 51, /* PORT60 */ +}; +static const int port_range1_muxvals[] = { + 15, 15, 15, 15, 15, /* PORT53-57 */ + 15, /* PORT60 */ +}; +static const unsigned port_range2_pins[] = { + 54, 55, 56, 57, 58, /* PORT63-67 */ 59, 60, 69, 70, 71, 72, 73, 74, /* PORT7x */ 75, 76, 77, 78, 79, 80, 81, 82, /* PORT8x */ 83, 84, 85, 86, 87, 88, 89, 90, /* PORT9x */ 91, 92, 93, 94, 95, 96, 97, 98, /* PORT10x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT11x */ - 99, 100, 101, 102, 103, 104, 105, 106, /* PORT12x */ - 107, 108, 109, 110, 111, 112, 113, 114, /* PORT13x */ - 115, 116, 117, 118, 119, 120, 121, 122, /* PORT14x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT15x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT16x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT17x */ - 61, 62, 63, 64, 65, 66, 67, 68, /* PORT18x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT19x */ - 123, 124, 125, 126, 127, 128, 129, 130, /* PORT20x */ - 131, 132, 133, 134, 135, 136, 137, 138, /* PORT21x */ - 139, 140, 141, 142, -1, -1, -1, -1, /* PORT22x */ - 147, 148, 149, 150, 151, 152, 153, 154, /* PORT23x */ - 155, 156, 157, 143, 144, 145, 146, 158, /* PORT24x */ }; -static const int port_range_muxvals[] = { - 15, 15, 15, 15, 15, 15, 15, 15, /* PORT0x */ - 15, 15, 15, 15, 15, 15, 15, 15, /* PORT1x */ - 15, 15, 15, 15, 15, 15, 15, 15, /* PORT2x */ - 15, 15, 15, -1, -1, -1, -1, -1, /* PORT3x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT4x */ - -1, -1, -1, 15, 15, 15, 15, 15, /* PORT5x */ - 15, -1, -1, 15, 15, 15, 15, 15, /* PORT6x */ +static const int port_range2_muxvals[] = { + 15, 15, 15, 15, 15, /* PORT63-67 */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT7x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT8x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT9x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT10x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT11x */ +}; +static const unsigned port_range3_pins[] = { + 99, 100, 101, 102, 103, 104, 105, 106, /* PORT12x */ + 107, 108, 109, 110, 111, 112, 113, 114, /* PORT13x */ + 115, 116, 117, 118, 119, 120, 121, 122, /* PORT14x */ +}; +static const int port_range3_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT12x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT13x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT14x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT15x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT16x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT17x */ +}; +static const unsigned port_range4_pins[] = { + 61, 62, 63, 64, 65, 66, 67, 68, /* PORT18x */ +}; +static const int port_range4_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT18x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT19x */ +}; +static const unsigned port_range5_pins[] = { + 123, 124, 125, 126, 127, 128, 129, 130, /* PORT20x */ + 131, 132, 133, 134, 135, 136, 137, 138, /* PORT21x */ + 139, 140, 141, 142, /* PORT220-223 */ +}; +static const int port_range5_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT20x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT21x */ - 15, 15, 15, 15, -1, -1, -1, -1, /* PORT22x */ + 15, 15, 15, 15, /* PORT220-223 */ +}; +static const unsigned port_range6_pins[] = { + 147, 148, 149, 150, 151, 152, 153, 154, /* PORT23x */ + 155, 156, 157, 143, 144, 145, 146, 158, /* PORT24x */ +}; +static const int port_range6_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT23x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT24x */ }; @@ -607,147 +621,153 @@ static const struct uniphier_pinctrl_group uniphier_ld11_groups[] = { UNIPHIER_PINCTRL_GROUP(usb0), UNIPHIER_PINCTRL_GROUP(usb1), UNIPHIER_PINCTRL_GROUP(usb2), - UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range0), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range1), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range2), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range3), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range4), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range5), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range6), UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_IRQ(xirq), UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_IRQ(xirq_alternatives), - UNIPHIER_PINCTRL_GROUP_SINGLE(port00, port_range, 0), - UNIPHIER_PINCTRL_GROUP_SINGLE(port01, port_range, 1), - UNIPHIER_PINCTRL_GROUP_SINGLE(port02, port_range, 2), - UNIPHIER_PINCTRL_GROUP_SINGLE(port03, port_range, 3), - UNIPHIER_PINCTRL_GROUP_SINGLE(port04, port_range, 4), - UNIPHIER_PINCTRL_GROUP_SINGLE(port05, port_range, 5), - UNIPHIER_PINCTRL_GROUP_SINGLE(port06, port_range, 6), - UNIPHIER_PINCTRL_GROUP_SINGLE(port07, port_range, 7), - UNIPHIER_PINCTRL_GROUP_SINGLE(port10, port_range, 8), - UNIPHIER_PINCTRL_GROUP_SINGLE(port11, port_range, 9), - UNIPHIER_PINCTRL_GROUP_SINGLE(port12, port_range, 10), - UNIPHIER_PINCTRL_GROUP_SINGLE(port13, port_range, 11), - UNIPHIER_PINCTRL_GROUP_SINGLE(port14, port_range, 12), - UNIPHIER_PINCTRL_GROUP_SINGLE(port15, port_range, 13), - UNIPHIER_PINCTRL_GROUP_SINGLE(port16, port_range, 14), - UNIPHIER_PINCTRL_GROUP_SINGLE(port17, port_range, 15), - UNIPHIER_PINCTRL_GROUP_SINGLE(port20, port_range, 16), - UNIPHIER_PINCTRL_GROUP_SINGLE(port21, port_range, 17), - UNIPHIER_PINCTRL_GROUP_SINGLE(port22, port_range, 18), - UNIPHIER_PINCTRL_GROUP_SINGLE(port23, port_range, 19), - UNIPHIER_PINCTRL_GROUP_SINGLE(port24, port_range, 20), - UNIPHIER_PINCTRL_GROUP_SINGLE(port25, port_range, 21), - UNIPHIER_PINCTRL_GROUP_SINGLE(port26, port_range, 22), - UNIPHIER_PINCTRL_GROUP_SINGLE(port27, port_range, 23), - UNIPHIER_PINCTRL_GROUP_SINGLE(port30, port_range, 24), - UNIPHIER_PINCTRL_GROUP_SINGLE(port31, port_range, 25), - UNIPHIER_PINCTRL_GROUP_SINGLE(port32, port_range, 26), - UNIPHIER_PINCTRL_GROUP_SINGLE(port53, port_range, 43), - UNIPHIER_PINCTRL_GROUP_SINGLE(port54, port_range, 44), - UNIPHIER_PINCTRL_GROUP_SINGLE(port55, port_range, 45), - UNIPHIER_PINCTRL_GROUP_SINGLE(port56, port_range, 46), - UNIPHIER_PINCTRL_GROUP_SINGLE(port57, port_range, 47), - UNIPHIER_PINCTRL_GROUP_SINGLE(port60, port_range, 48), - UNIPHIER_PINCTRL_GROUP_SINGLE(port63, port_range, 51), - UNIPHIER_PINCTRL_GROUP_SINGLE(port64, port_range, 52), - UNIPHIER_PINCTRL_GROUP_SINGLE(port65, port_range, 53), - UNIPHIER_PINCTRL_GROUP_SINGLE(port66, port_range, 54), - UNIPHIER_PINCTRL_GROUP_SINGLE(port67, port_range, 55), - UNIPHIER_PINCTRL_GROUP_SINGLE(port70, port_range, 56), - UNIPHIER_PINCTRL_GROUP_SINGLE(port71, port_range, 57), - UNIPHIER_PINCTRL_GROUP_SINGLE(port72, port_range, 58), - UNIPHIER_PINCTRL_GROUP_SINGLE(port73, port_range, 59), - UNIPHIER_PINCTRL_GROUP_SINGLE(port74, port_range, 60), - UNIPHIER_PINCTRL_GROUP_SINGLE(port75, port_range, 61), - UNIPHIER_PINCTRL_GROUP_SINGLE(port76, port_range, 62), - UNIPHIER_PINCTRL_GROUP_SINGLE(port77, port_range, 63), - UNIPHIER_PINCTRL_GROUP_SINGLE(port80, port_range, 64), - UNIPHIER_PINCTRL_GROUP_SINGLE(port81, port_range, 65), - UNIPHIER_PINCTRL_GROUP_SINGLE(port82, port_range, 66), - UNIPHIER_PINCTRL_GROUP_SINGLE(port83, port_range, 67), - UNIPHIER_PINCTRL_GROUP_SINGLE(port84, port_range, 68), - UNIPHIER_PINCTRL_GROUP_SINGLE(port85, port_range, 69), - UNIPHIER_PINCTRL_GROUP_SINGLE(port86, port_range, 70), - UNIPHIER_PINCTRL_GROUP_SINGLE(port87, port_range, 71), - UNIPHIER_PINCTRL_GROUP_SINGLE(port90, port_range, 72), - UNIPHIER_PINCTRL_GROUP_SINGLE(port91, port_range, 73), - UNIPHIER_PINCTRL_GROUP_SINGLE(port92, port_range, 74), - UNIPHIER_PINCTRL_GROUP_SINGLE(port93, port_range, 75), - UNIPHIER_PINCTRL_GROUP_SINGLE(port94, port_range, 76), - UNIPHIER_PINCTRL_GROUP_SINGLE(port95, port_range, 77), - UNIPHIER_PINCTRL_GROUP_SINGLE(port96, port_range, 78), - UNIPHIER_PINCTRL_GROUP_SINGLE(port97, port_range, 79), - UNIPHIER_PINCTRL_GROUP_SINGLE(port100, port_range, 80), - UNIPHIER_PINCTRL_GROUP_SINGLE(port101, port_range, 81), - UNIPHIER_PINCTRL_GROUP_SINGLE(port102, port_range, 82), - UNIPHIER_PINCTRL_GROUP_SINGLE(port103, port_range, 83), - UNIPHIER_PINCTRL_GROUP_SINGLE(port104, port_range, 84), - UNIPHIER_PINCTRL_GROUP_SINGLE(port105, port_range, 85), - UNIPHIER_PINCTRL_GROUP_SINGLE(port106, port_range, 86), - UNIPHIER_PINCTRL_GROUP_SINGLE(port107, port_range, 87), - UNIPHIER_PINCTRL_GROUP_SINGLE(port120, port_range, 96), - UNIPHIER_PINCTRL_GROUP_SINGLE(port121, port_range, 97), - UNIPHIER_PINCTRL_GROUP_SINGLE(port122, port_range, 98), - UNIPHIER_PINCTRL_GROUP_SINGLE(port123, port_range, 99), - UNIPHIER_PINCTRL_GROUP_SINGLE(port124, port_range, 100), - UNIPHIER_PINCTRL_GROUP_SINGLE(port125, port_range, 101), - UNIPHIER_PINCTRL_GROUP_SINGLE(port126, port_range, 102), - UNIPHIER_PINCTRL_GROUP_SINGLE(port127, port_range, 103), - UNIPHIER_PINCTRL_GROUP_SINGLE(port130, port_range, 104), - UNIPHIER_PINCTRL_GROUP_SINGLE(port131, port_range, 105), - UNIPHIER_PINCTRL_GROUP_SINGLE(port132, port_range, 106), - UNIPHIER_PINCTRL_GROUP_SINGLE(port133, port_range, 107), - UNIPHIER_PINCTRL_GROUP_SINGLE(port134, port_range, 108), - UNIPHIER_PINCTRL_GROUP_SINGLE(port135, port_range, 109), - UNIPHIER_PINCTRL_GROUP_SINGLE(port136, port_range, 110), - UNIPHIER_PINCTRL_GROUP_SINGLE(port137, port_range, 111), - UNIPHIER_PINCTRL_GROUP_SINGLE(port140, port_range, 112), - UNIPHIER_PINCTRL_GROUP_SINGLE(port141, port_range, 113), - UNIPHIER_PINCTRL_GROUP_SINGLE(port142, port_range, 114), - UNIPHIER_PINCTRL_GROUP_SINGLE(port143, port_range, 115), - UNIPHIER_PINCTRL_GROUP_SINGLE(port144, port_range, 116), - UNIPHIER_PINCTRL_GROUP_SINGLE(port145, port_range, 117), - UNIPHIER_PINCTRL_GROUP_SINGLE(port146, port_range, 118), - UNIPHIER_PINCTRL_GROUP_SINGLE(port147, port_range, 119), - UNIPHIER_PINCTRL_GROUP_SINGLE(port180, port_range, 144), - UNIPHIER_PINCTRL_GROUP_SINGLE(port181, port_range, 145), - UNIPHIER_PINCTRL_GROUP_SINGLE(port182, port_range, 146), - UNIPHIER_PINCTRL_GROUP_SINGLE(port183, port_range, 147), - UNIPHIER_PINCTRL_GROUP_SINGLE(port184, port_range, 148), - UNIPHIER_PINCTRL_GROUP_SINGLE(port185, port_range, 149), - UNIPHIER_PINCTRL_GROUP_SINGLE(port186, port_range, 150), - UNIPHIER_PINCTRL_GROUP_SINGLE(port187, port_range, 151), - UNIPHIER_PINCTRL_GROUP_SINGLE(port200, port_range, 160), - UNIPHIER_PINCTRL_GROUP_SINGLE(port201, port_range, 161), - UNIPHIER_PINCTRL_GROUP_SINGLE(port202, port_range, 162), - UNIPHIER_PINCTRL_GROUP_SINGLE(port203, port_range, 163), - UNIPHIER_PINCTRL_GROUP_SINGLE(port204, port_range, 164), - UNIPHIER_PINCTRL_GROUP_SINGLE(port205, port_range, 165), - UNIPHIER_PINCTRL_GROUP_SINGLE(port206, port_range, 166), - UNIPHIER_PINCTRL_GROUP_SINGLE(port207, port_range, 167), - UNIPHIER_PINCTRL_GROUP_SINGLE(port210, port_range, 168), - UNIPHIER_PINCTRL_GROUP_SINGLE(port211, port_range, 169), - UNIPHIER_PINCTRL_GROUP_SINGLE(port212, port_range, 170), - UNIPHIER_PINCTRL_GROUP_SINGLE(port213, port_range, 171), - UNIPHIER_PINCTRL_GROUP_SINGLE(port214, port_range, 172), - UNIPHIER_PINCTRL_GROUP_SINGLE(port215, port_range, 173), - UNIPHIER_PINCTRL_GROUP_SINGLE(port216, port_range, 174), - UNIPHIER_PINCTRL_GROUP_SINGLE(port217, port_range, 175), - UNIPHIER_PINCTRL_GROUP_SINGLE(port220, port_range, 176), - UNIPHIER_PINCTRL_GROUP_SINGLE(port221, port_range, 177), - UNIPHIER_PINCTRL_GROUP_SINGLE(port222, port_range, 178), - UNIPHIER_PINCTRL_GROUP_SINGLE(port223, port_range, 179), - UNIPHIER_PINCTRL_GROUP_SINGLE(port230, port_range, 184), - UNIPHIER_PINCTRL_GROUP_SINGLE(port231, port_range, 185), - UNIPHIER_PINCTRL_GROUP_SINGLE(port232, port_range, 186), - UNIPHIER_PINCTRL_GROUP_SINGLE(port233, port_range, 187), - UNIPHIER_PINCTRL_GROUP_SINGLE(port234, port_range, 188), - UNIPHIER_PINCTRL_GROUP_SINGLE(port235, port_range, 189), - UNIPHIER_PINCTRL_GROUP_SINGLE(port236, port_range, 190), - UNIPHIER_PINCTRL_GROUP_SINGLE(port237, port_range, 191), - UNIPHIER_PINCTRL_GROUP_SINGLE(port240, port_range, 192), - UNIPHIER_PINCTRL_GROUP_SINGLE(port241, port_range, 193), - UNIPHIER_PINCTRL_GROUP_SINGLE(port242, port_range, 194), - UNIPHIER_PINCTRL_GROUP_SINGLE(port243, port_range, 195), - UNIPHIER_PINCTRL_GROUP_SINGLE(port244, port_range, 196), - UNIPHIER_PINCTRL_GROUP_SINGLE(port245, port_range, 197), - UNIPHIER_PINCTRL_GROUP_SINGLE(port246, port_range, 198), - UNIPHIER_PINCTRL_GROUP_SINGLE(port247, port_range, 199), + UNIPHIER_PINCTRL_GROUP_SINGLE(port00, port_range0, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port01, port_range0, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port02, port_range0, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port03, port_range0, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port04, port_range0, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port05, port_range0, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port06, port_range0, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port07, port_range0, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port10, port_range0, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port11, port_range0, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port12, port_range0, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port13, port_range0, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port14, port_range0, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port15, port_range0, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port16, port_range0, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port17, port_range0, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port20, port_range0, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port21, port_range0, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port22, port_range0, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port23, port_range0, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port24, port_range0, 20), + UNIPHIER_PINCTRL_GROUP_SINGLE(port25, port_range0, 21), + UNIPHIER_PINCTRL_GROUP_SINGLE(port26, port_range0, 22), + UNIPHIER_PINCTRL_GROUP_SINGLE(port27, port_range0, 23), + UNIPHIER_PINCTRL_GROUP_SINGLE(port30, port_range0, 24), + UNIPHIER_PINCTRL_GROUP_SINGLE(port31, port_range0, 25), + UNIPHIER_PINCTRL_GROUP_SINGLE(port32, port_range0, 26), + UNIPHIER_PINCTRL_GROUP_SINGLE(port53, port_range1, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port54, port_range1, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port55, port_range1, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port56, port_range1, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port57, port_range1, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port60, port_range1, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port63, port_range2, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port64, port_range2, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port65, port_range2, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port66, port_range2, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port67, port_range2, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port70, port_range2, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port71, port_range2, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port72, port_range2, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port73, port_range2, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port74, port_range2, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port75, port_range2, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port76, port_range2, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port77, port_range2, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port80, port_range2, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port81, port_range2, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port82, port_range2, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port83, port_range2, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port84, port_range2, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port85, port_range2, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port86, port_range2, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port87, port_range2, 20), + UNIPHIER_PINCTRL_GROUP_SINGLE(port90, port_range2, 21), + UNIPHIER_PINCTRL_GROUP_SINGLE(port91, port_range2, 22), + UNIPHIER_PINCTRL_GROUP_SINGLE(port92, port_range2, 23), + UNIPHIER_PINCTRL_GROUP_SINGLE(port93, port_range2, 24), + UNIPHIER_PINCTRL_GROUP_SINGLE(port94, port_range2, 25), + UNIPHIER_PINCTRL_GROUP_SINGLE(port95, port_range2, 26), + UNIPHIER_PINCTRL_GROUP_SINGLE(port96, port_range2, 27), + UNIPHIER_PINCTRL_GROUP_SINGLE(port97, port_range2, 28), + UNIPHIER_PINCTRL_GROUP_SINGLE(port100, port_range2, 29), + UNIPHIER_PINCTRL_GROUP_SINGLE(port101, port_range2, 30), + UNIPHIER_PINCTRL_GROUP_SINGLE(port102, port_range2, 31), + UNIPHIER_PINCTRL_GROUP_SINGLE(port103, port_range2, 32), + UNIPHIER_PINCTRL_GROUP_SINGLE(port104, port_range2, 33), + UNIPHIER_PINCTRL_GROUP_SINGLE(port105, port_range2, 34), + UNIPHIER_PINCTRL_GROUP_SINGLE(port106, port_range2, 35), + UNIPHIER_PINCTRL_GROUP_SINGLE(port107, port_range2, 36), + UNIPHIER_PINCTRL_GROUP_SINGLE(port120, port_range3, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port121, port_range3, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port122, port_range3, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port123, port_range3, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port124, port_range3, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port125, port_range3, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port126, port_range3, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port127, port_range3, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port130, port_range3, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port131, port_range3, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port132, port_range3, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port133, port_range3, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port134, port_range3, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port135, port_range3, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port136, port_range3, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port137, port_range3, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port140, port_range3, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port141, port_range3, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port142, port_range3, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port143, port_range3, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port144, port_range3, 20), + UNIPHIER_PINCTRL_GROUP_SINGLE(port145, port_range3, 21), + UNIPHIER_PINCTRL_GROUP_SINGLE(port146, port_range3, 22), + UNIPHIER_PINCTRL_GROUP_SINGLE(port147, port_range3, 23), + UNIPHIER_PINCTRL_GROUP_SINGLE(port180, port_range4, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port181, port_range4, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port182, port_range4, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port183, port_range4, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port184, port_range4, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port185, port_range4, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port186, port_range4, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port187, port_range4, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port200, port_range5, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port201, port_range5, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port202, port_range5, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port203, port_range5, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port204, port_range5, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port205, port_range5, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port206, port_range5, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port207, port_range5, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port210, port_range5, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port211, port_range5, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port212, port_range5, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port213, port_range5, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port214, port_range5, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port215, port_range5, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port216, port_range5, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port217, port_range5, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port220, port_range5, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port221, port_range5, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port222, port_range5, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port223, port_range5, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port230, port_range6, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port231, port_range6, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port232, port_range6, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port233, port_range6, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port234, port_range6, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port235, port_range6, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port236, port_range6, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port237, port_range6, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port240, port_range6, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port241, port_range6, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port242, port_range6, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port243, port_range6, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port244, port_range6, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port245, port_range6, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port246, port_range6, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port247, port_range6, 15), UNIPHIER_PINCTRL_GROUP_SINGLE(xirq0, xirq, 0), UNIPHIER_PINCTRL_GROUP_SINGLE(xirq1, xirq, 1), UNIPHIER_PINCTRL_GROUP_SINGLE(xirq2, xirq, 2), -- cgit v1.2.3 From 8495ab6ef94a92118ff50b27768bbcc10e1adc14 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 14 Jun 2017 13:49:30 +0900 Subject: pinctrl: uniphier: fix WARN_ON() of pingroups dump on LD20 commit 1bd303dc04c3f744474e77c153575087b657f7e1 upstream. The pingroups dump of debugfs hits WARN_ON() in pinctrl_groups_show(). Filling non-existing ports with '-1' turned out a bad idea. Fixes: 336306ee1f2d ("pinctrl: uniphier: add UniPhier PH1-LD20 pinctrl driver") Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c | 383 ++++++++++++----------- 1 file changed, 194 insertions(+), 189 deletions(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c index 96686336e3a3..73b828b22901 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c @@ -597,7 +597,7 @@ static const unsigned usb2_pins[] = {50, 51}; static const int usb2_muxvals[] = {0, 0}; static const unsigned usb3_pins[] = {52, 53}; static const int usb3_muxvals[] = {0, 0}; -static const unsigned port_range_pins[] = { +static const unsigned port_range0_pins[] = { 168, 169, 170, 171, 172, 173, 174, 175, /* PORT0x */ 0, 1, 2, 3, 4, 5, 6, 7, /* PORT1x */ 8, 9, 10, 11, 12, 13, 14, 15, /* PORT2x */ @@ -609,23 +609,8 @@ static const unsigned port_range_pins[] = { 75, 76, 77, 78, 79, 80, 81, 82, /* PORT8x */ 83, 84, 85, 86, 87, 88, 89, 90, /* PORT9x */ 91, 92, 93, 94, 95, 96, 97, 98, /* PORT10x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT11x */ - 99, 100, 101, 102, 103, 104, 105, 106, /* PORT12x */ - 107, 108, 109, 110, 111, 112, 113, 114, /* PORT13x */ - 115, 116, 117, 118, 119, 120, 121, 122, /* PORT14x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT15x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT16x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT17x */ - 61, 62, 63, 64, 65, 66, 67, 68, /* PORT18x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT19x */ - 123, 124, 125, 126, 127, 128, 129, 130, /* PORT20x */ - 131, 132, 133, 134, 135, 136, 137, 138, /* PORT21x */ - 139, 140, 141, 142, 143, 144, 145, 146, /* PORT22x */ - 147, 148, 149, 150, 151, 152, 153, 154, /* PORT23x */ - 155, 156, 157, 158, 159, 160, 161, 162, /* PORT24x */ - 163, 164, 165, 166, 167, /* PORT25x */ }; -static const int port_range_muxvals[] = { +static const int port_range0_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT0x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT1x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT2x */ @@ -637,21 +622,38 @@ static const int port_range_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT8x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT9x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT10x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT11x */ +}; +static const unsigned port_range1_pins[] = { + 99, 100, 101, 102, 103, 104, 105, 106, /* PORT12x */ + 107, 108, 109, 110, 111, 112, 113, 114, /* PORT13x */ + 115, 116, 117, 118, 119, 120, 121, 122, /* PORT14x */ +}; +static const int port_range1_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT12x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT13x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT14x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT15x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT16x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT17x */ +}; +static const unsigned port_range2_pins[] = { + 61, 62, 63, 64, 65, 66, 67, 68, /* PORT18x */ +}; +static const int port_range2_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT18x */ - -1, -1, -1, -1, -1, -1, -1, -1, /* PORT19x */ +}; +static const unsigned port_range3_pins[] = { + 123, 124, 125, 126, 127, 128, 129, 130, /* PORT20x */ + 131, 132, 133, 134, 135, 136, 137, 138, /* PORT21x */ + 139, 140, 141, 142, 143, 144, 145, 146, /* PORT22x */ + 147, 148, 149, 150, 151, 152, 153, 154, /* PORT23x */ + 155, 156, 157, 158, 159, 160, 161, 162, /* PORT24x */ + 163, 164, 165, 166, 167, /* PORT250-254 */ +}; +static const int port_range3_muxvals[] = { 15, 15, 15, 15, 15, 15, 15, 15, /* PORT20x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT21x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT22x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT23x */ 15, 15, 15, 15, 15, 15, 15, 15, /* PORT24x */ - 15, 15, 15, 15, 15, /* PORT25x */ + 15, 15, 15, 15, 15, /* PORT250-254 */ }; static const unsigned xirq_pins[] = { 149, 150, 151, 152, 153, 154, 155, 156, /* XIRQ0-7 */ @@ -695,174 +697,177 @@ static const struct uniphier_pinctrl_group uniphier_ld20_groups[] = { UNIPHIER_PINCTRL_GROUP(usb1), UNIPHIER_PINCTRL_GROUP(usb2), UNIPHIER_PINCTRL_GROUP(usb3), - UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range0), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range1), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range2), + UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_PORT(port_range3), UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_IRQ(xirq), UNIPHIER_PINCTRL_GROUP_GPIO_RANGE_IRQ(xirq_alternatives), - UNIPHIER_PINCTRL_GROUP_SINGLE(port00, port_range, 0), - UNIPHIER_PINCTRL_GROUP_SINGLE(port01, port_range, 1), - UNIPHIER_PINCTRL_GROUP_SINGLE(port02, port_range, 2), - UNIPHIER_PINCTRL_GROUP_SINGLE(port03, port_range, 3), - UNIPHIER_PINCTRL_GROUP_SINGLE(port04, port_range, 4), - UNIPHIER_PINCTRL_GROUP_SINGLE(port05, port_range, 5), - UNIPHIER_PINCTRL_GROUP_SINGLE(port06, port_range, 6), - UNIPHIER_PINCTRL_GROUP_SINGLE(port07, port_range, 7), - UNIPHIER_PINCTRL_GROUP_SINGLE(port10, port_range, 8), - UNIPHIER_PINCTRL_GROUP_SINGLE(port11, port_range, 9), - UNIPHIER_PINCTRL_GROUP_SINGLE(port12, port_range, 10), - UNIPHIER_PINCTRL_GROUP_SINGLE(port13, port_range, 11), - UNIPHIER_PINCTRL_GROUP_SINGLE(port14, port_range, 12), - UNIPHIER_PINCTRL_GROUP_SINGLE(port15, port_range, 13), - UNIPHIER_PINCTRL_GROUP_SINGLE(port16, port_range, 14), - UNIPHIER_PINCTRL_GROUP_SINGLE(port17, port_range, 15), - UNIPHIER_PINCTRL_GROUP_SINGLE(port20, port_range, 16), - UNIPHIER_PINCTRL_GROUP_SINGLE(port21, port_range, 17), - UNIPHIER_PINCTRL_GROUP_SINGLE(port22, port_range, 18), - UNIPHIER_PINCTRL_GROUP_SINGLE(port23, port_range, 19), - UNIPHIER_PINCTRL_GROUP_SINGLE(port24, port_range, 20), - UNIPHIER_PINCTRL_GROUP_SINGLE(port25, port_range, 21), - UNIPHIER_PINCTRL_GROUP_SINGLE(port26, port_range, 22), - UNIPHIER_PINCTRL_GROUP_SINGLE(port27, port_range, 23), - UNIPHIER_PINCTRL_GROUP_SINGLE(port30, port_range, 24), - UNIPHIER_PINCTRL_GROUP_SINGLE(port31, port_range, 25), - UNIPHIER_PINCTRL_GROUP_SINGLE(port32, port_range, 26), - UNIPHIER_PINCTRL_GROUP_SINGLE(port33, port_range, 27), - UNIPHIER_PINCTRL_GROUP_SINGLE(port34, port_range, 28), - UNIPHIER_PINCTRL_GROUP_SINGLE(port35, port_range, 29), - UNIPHIER_PINCTRL_GROUP_SINGLE(port36, port_range, 30), - UNIPHIER_PINCTRL_GROUP_SINGLE(port37, port_range, 31), - UNIPHIER_PINCTRL_GROUP_SINGLE(port40, port_range, 32), - UNIPHIER_PINCTRL_GROUP_SINGLE(port41, port_range, 33), - UNIPHIER_PINCTRL_GROUP_SINGLE(port42, port_range, 34), - UNIPHIER_PINCTRL_GROUP_SINGLE(port43, port_range, 35), - UNIPHIER_PINCTRL_GROUP_SINGLE(port44, port_range, 36), - UNIPHIER_PINCTRL_GROUP_SINGLE(port45, port_range, 37), - UNIPHIER_PINCTRL_GROUP_SINGLE(port46, port_range, 38), - UNIPHIER_PINCTRL_GROUP_SINGLE(port47, port_range, 39), - UNIPHIER_PINCTRL_GROUP_SINGLE(port50, port_range, 40), - UNIPHIER_PINCTRL_GROUP_SINGLE(port51, port_range, 41), - UNIPHIER_PINCTRL_GROUP_SINGLE(port52, port_range, 42), - UNIPHIER_PINCTRL_GROUP_SINGLE(port53, port_range, 43), - UNIPHIER_PINCTRL_GROUP_SINGLE(port54, port_range, 44), - UNIPHIER_PINCTRL_GROUP_SINGLE(port55, port_range, 45), - UNIPHIER_PINCTRL_GROUP_SINGLE(port56, port_range, 46), - UNIPHIER_PINCTRL_GROUP_SINGLE(port57, port_range, 47), - UNIPHIER_PINCTRL_GROUP_SINGLE(port60, port_range, 48), - UNIPHIER_PINCTRL_GROUP_SINGLE(port61, port_range, 49), - UNIPHIER_PINCTRL_GROUP_SINGLE(port62, port_range, 50), - UNIPHIER_PINCTRL_GROUP_SINGLE(port63, port_range, 51), - UNIPHIER_PINCTRL_GROUP_SINGLE(port64, port_range, 52), - UNIPHIER_PINCTRL_GROUP_SINGLE(port65, port_range, 53), - UNIPHIER_PINCTRL_GROUP_SINGLE(port66, port_range, 54), - UNIPHIER_PINCTRL_GROUP_SINGLE(port67, port_range, 55), - UNIPHIER_PINCTRL_GROUP_SINGLE(port70, port_range, 56), - UNIPHIER_PINCTRL_GROUP_SINGLE(port71, port_range, 57), - UNIPHIER_PINCTRL_GROUP_SINGLE(port72, port_range, 58), - UNIPHIER_PINCTRL_GROUP_SINGLE(port73, port_range, 59), - UNIPHIER_PINCTRL_GROUP_SINGLE(port74, port_range, 60), - UNIPHIER_PINCTRL_GROUP_SINGLE(port75, port_range, 61), - UNIPHIER_PINCTRL_GROUP_SINGLE(port76, port_range, 62), - UNIPHIER_PINCTRL_GROUP_SINGLE(port77, port_range, 63), - UNIPHIER_PINCTRL_GROUP_SINGLE(port80, port_range, 64), - UNIPHIER_PINCTRL_GROUP_SINGLE(port81, port_range, 65), - UNIPHIER_PINCTRL_GROUP_SINGLE(port82, port_range, 66), - UNIPHIER_PINCTRL_GROUP_SINGLE(port83, port_range, 67), - UNIPHIER_PINCTRL_GROUP_SINGLE(port84, port_range, 68), - UNIPHIER_PINCTRL_GROUP_SINGLE(port85, port_range, 69), - UNIPHIER_PINCTRL_GROUP_SINGLE(port86, port_range, 70), - UNIPHIER_PINCTRL_GROUP_SINGLE(port87, port_range, 71), - UNIPHIER_PINCTRL_GROUP_SINGLE(port90, port_range, 72), - UNIPHIER_PINCTRL_GROUP_SINGLE(port91, port_range, 73), - UNIPHIER_PINCTRL_GROUP_SINGLE(port92, port_range, 74), - UNIPHIER_PINCTRL_GROUP_SINGLE(port93, port_range, 75), - UNIPHIER_PINCTRL_GROUP_SINGLE(port94, port_range, 76), - UNIPHIER_PINCTRL_GROUP_SINGLE(port95, port_range, 77), - UNIPHIER_PINCTRL_GROUP_SINGLE(port96, port_range, 78), - UNIPHIER_PINCTRL_GROUP_SINGLE(port97, port_range, 79), - UNIPHIER_PINCTRL_GROUP_SINGLE(port100, port_range, 80), - UNIPHIER_PINCTRL_GROUP_SINGLE(port101, port_range, 81), - UNIPHIER_PINCTRL_GROUP_SINGLE(port102, port_range, 82), - UNIPHIER_PINCTRL_GROUP_SINGLE(port103, port_range, 83), - UNIPHIER_PINCTRL_GROUP_SINGLE(port104, port_range, 84), - UNIPHIER_PINCTRL_GROUP_SINGLE(port105, port_range, 85), - UNIPHIER_PINCTRL_GROUP_SINGLE(port106, port_range, 86), - UNIPHIER_PINCTRL_GROUP_SINGLE(port107, port_range, 87), - UNIPHIER_PINCTRL_GROUP_SINGLE(port120, port_range, 96), - UNIPHIER_PINCTRL_GROUP_SINGLE(port121, port_range, 97), - UNIPHIER_PINCTRL_GROUP_SINGLE(port122, port_range, 98), - UNIPHIER_PINCTRL_GROUP_SINGLE(port123, port_range, 99), - UNIPHIER_PINCTRL_GROUP_SINGLE(port124, port_range, 100), - UNIPHIER_PINCTRL_GROUP_SINGLE(port125, port_range, 101), - UNIPHIER_PINCTRL_GROUP_SINGLE(port126, port_range, 102), - UNIPHIER_PINCTRL_GROUP_SINGLE(port127, port_range, 103), - UNIPHIER_PINCTRL_GROUP_SINGLE(port130, port_range, 104), - UNIPHIER_PINCTRL_GROUP_SINGLE(port131, port_range, 105), - UNIPHIER_PINCTRL_GROUP_SINGLE(port132, port_range, 106), - UNIPHIER_PINCTRL_GROUP_SINGLE(port133, port_range, 107), - UNIPHIER_PINCTRL_GROUP_SINGLE(port134, port_range, 108), - UNIPHIER_PINCTRL_GROUP_SINGLE(port135, port_range, 109), - UNIPHIER_PINCTRL_GROUP_SINGLE(port136, port_range, 110), - UNIPHIER_PINCTRL_GROUP_SINGLE(port137, port_range, 111), - UNIPHIER_PINCTRL_GROUP_SINGLE(port140, port_range, 112), - UNIPHIER_PINCTRL_GROUP_SINGLE(port141, port_range, 113), - UNIPHIER_PINCTRL_GROUP_SINGLE(port142, port_range, 114), - UNIPHIER_PINCTRL_GROUP_SINGLE(port143, port_range, 115), - UNIPHIER_PINCTRL_GROUP_SINGLE(port144, port_range, 116), - UNIPHIER_PINCTRL_GROUP_SINGLE(port145, port_range, 117), - UNIPHIER_PINCTRL_GROUP_SINGLE(port146, port_range, 118), - UNIPHIER_PINCTRL_GROUP_SINGLE(port147, port_range, 119), - UNIPHIER_PINCTRL_GROUP_SINGLE(port180, port_range, 144), - UNIPHIER_PINCTRL_GROUP_SINGLE(port181, port_range, 145), - UNIPHIER_PINCTRL_GROUP_SINGLE(port182, port_range, 146), - UNIPHIER_PINCTRL_GROUP_SINGLE(port183, port_range, 147), - UNIPHIER_PINCTRL_GROUP_SINGLE(port184, port_range, 148), - UNIPHIER_PINCTRL_GROUP_SINGLE(port185, port_range, 149), - UNIPHIER_PINCTRL_GROUP_SINGLE(port186, port_range, 150), - UNIPHIER_PINCTRL_GROUP_SINGLE(port187, port_range, 151), - UNIPHIER_PINCTRL_GROUP_SINGLE(port200, port_range, 160), - UNIPHIER_PINCTRL_GROUP_SINGLE(port201, port_range, 161), - UNIPHIER_PINCTRL_GROUP_SINGLE(port202, port_range, 162), - UNIPHIER_PINCTRL_GROUP_SINGLE(port203, port_range, 163), - UNIPHIER_PINCTRL_GROUP_SINGLE(port204, port_range, 164), - UNIPHIER_PINCTRL_GROUP_SINGLE(port205, port_range, 165), - UNIPHIER_PINCTRL_GROUP_SINGLE(port206, port_range, 166), - UNIPHIER_PINCTRL_GROUP_SINGLE(port207, port_range, 167), - UNIPHIER_PINCTRL_GROUP_SINGLE(port210, port_range, 168), - UNIPHIER_PINCTRL_GROUP_SINGLE(port211, port_range, 169), - UNIPHIER_PINCTRL_GROUP_SINGLE(port212, port_range, 170), - UNIPHIER_PINCTRL_GROUP_SINGLE(port213, port_range, 171), - UNIPHIER_PINCTRL_GROUP_SINGLE(port214, port_range, 172), - UNIPHIER_PINCTRL_GROUP_SINGLE(port215, port_range, 173), - UNIPHIER_PINCTRL_GROUP_SINGLE(port216, port_range, 174), - UNIPHIER_PINCTRL_GROUP_SINGLE(port217, port_range, 175), - UNIPHIER_PINCTRL_GROUP_SINGLE(port220, port_range, 176), - UNIPHIER_PINCTRL_GROUP_SINGLE(port221, port_range, 177), - UNIPHIER_PINCTRL_GROUP_SINGLE(port222, port_range, 178), - UNIPHIER_PINCTRL_GROUP_SINGLE(port223, port_range, 179), - UNIPHIER_PINCTRL_GROUP_SINGLE(port224, port_range, 180), - UNIPHIER_PINCTRL_GROUP_SINGLE(port225, port_range, 181), - UNIPHIER_PINCTRL_GROUP_SINGLE(port226, port_range, 182), - UNIPHIER_PINCTRL_GROUP_SINGLE(port227, port_range, 183), - UNIPHIER_PINCTRL_GROUP_SINGLE(port230, port_range, 184), - UNIPHIER_PINCTRL_GROUP_SINGLE(port231, port_range, 185), - UNIPHIER_PINCTRL_GROUP_SINGLE(port232, port_range, 186), - UNIPHIER_PINCTRL_GROUP_SINGLE(port233, port_range, 187), - UNIPHIER_PINCTRL_GROUP_SINGLE(port234, port_range, 188), - UNIPHIER_PINCTRL_GROUP_SINGLE(port235, port_range, 189), - UNIPHIER_PINCTRL_GROUP_SINGLE(port236, port_range, 190), - UNIPHIER_PINCTRL_GROUP_SINGLE(port237, port_range, 191), - UNIPHIER_PINCTRL_GROUP_SINGLE(port240, port_range, 192), - UNIPHIER_PINCTRL_GROUP_SINGLE(port241, port_range, 193), - UNIPHIER_PINCTRL_GROUP_SINGLE(port242, port_range, 194), - UNIPHIER_PINCTRL_GROUP_SINGLE(port243, port_range, 195), - UNIPHIER_PINCTRL_GROUP_SINGLE(port244, port_range, 196), - UNIPHIER_PINCTRL_GROUP_SINGLE(port245, port_range, 197), - UNIPHIER_PINCTRL_GROUP_SINGLE(port246, port_range, 198), - UNIPHIER_PINCTRL_GROUP_SINGLE(port247, port_range, 199), - UNIPHIER_PINCTRL_GROUP_SINGLE(port250, port_range, 200), - UNIPHIER_PINCTRL_GROUP_SINGLE(port251, port_range, 201), - UNIPHIER_PINCTRL_GROUP_SINGLE(port252, port_range, 202), - UNIPHIER_PINCTRL_GROUP_SINGLE(port253, port_range, 203), - UNIPHIER_PINCTRL_GROUP_SINGLE(port254, port_range, 204), + UNIPHIER_PINCTRL_GROUP_SINGLE(port00, port_range0, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port01, port_range0, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port02, port_range0, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port03, port_range0, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port04, port_range0, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port05, port_range0, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port06, port_range0, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port07, port_range0, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port10, port_range0, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port11, port_range0, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port12, port_range0, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port13, port_range0, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port14, port_range0, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port15, port_range0, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port16, port_range0, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port17, port_range0, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port20, port_range0, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port21, port_range0, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port22, port_range0, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port23, port_range0, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port24, port_range0, 20), + UNIPHIER_PINCTRL_GROUP_SINGLE(port25, port_range0, 21), + UNIPHIER_PINCTRL_GROUP_SINGLE(port26, port_range0, 22), + UNIPHIER_PINCTRL_GROUP_SINGLE(port27, port_range0, 23), + UNIPHIER_PINCTRL_GROUP_SINGLE(port30, port_range0, 24), + UNIPHIER_PINCTRL_GROUP_SINGLE(port31, port_range0, 25), + UNIPHIER_PINCTRL_GROUP_SINGLE(port32, port_range0, 26), + UNIPHIER_PINCTRL_GROUP_SINGLE(port33, port_range0, 27), + UNIPHIER_PINCTRL_GROUP_SINGLE(port34, port_range0, 28), + UNIPHIER_PINCTRL_GROUP_SINGLE(port35, port_range0, 29), + UNIPHIER_PINCTRL_GROUP_SINGLE(port36, port_range0, 30), + UNIPHIER_PINCTRL_GROUP_SINGLE(port37, port_range0, 31), + UNIPHIER_PINCTRL_GROUP_SINGLE(port40, port_range0, 32), + UNIPHIER_PINCTRL_GROUP_SINGLE(port41, port_range0, 33), + UNIPHIER_PINCTRL_GROUP_SINGLE(port42, port_range0, 34), + UNIPHIER_PINCTRL_GROUP_SINGLE(port43, port_range0, 35), + UNIPHIER_PINCTRL_GROUP_SINGLE(port44, port_range0, 36), + UNIPHIER_PINCTRL_GROUP_SINGLE(port45, port_range0, 37), + UNIPHIER_PINCTRL_GROUP_SINGLE(port46, port_range0, 38), + UNIPHIER_PINCTRL_GROUP_SINGLE(port47, port_range0, 39), + UNIPHIER_PINCTRL_GROUP_SINGLE(port50, port_range0, 40), + UNIPHIER_PINCTRL_GROUP_SINGLE(port51, port_range0, 41), + UNIPHIER_PINCTRL_GROUP_SINGLE(port52, port_range0, 42), + UNIPHIER_PINCTRL_GROUP_SINGLE(port53, port_range0, 43), + UNIPHIER_PINCTRL_GROUP_SINGLE(port54, port_range0, 44), + UNIPHIER_PINCTRL_GROUP_SINGLE(port55, port_range0, 45), + UNIPHIER_PINCTRL_GROUP_SINGLE(port56, port_range0, 46), + UNIPHIER_PINCTRL_GROUP_SINGLE(port57, port_range0, 47), + UNIPHIER_PINCTRL_GROUP_SINGLE(port60, port_range0, 48), + UNIPHIER_PINCTRL_GROUP_SINGLE(port61, port_range0, 49), + UNIPHIER_PINCTRL_GROUP_SINGLE(port62, port_range0, 50), + UNIPHIER_PINCTRL_GROUP_SINGLE(port63, port_range0, 51), + UNIPHIER_PINCTRL_GROUP_SINGLE(port64, port_range0, 52), + UNIPHIER_PINCTRL_GROUP_SINGLE(port65, port_range0, 53), + UNIPHIER_PINCTRL_GROUP_SINGLE(port66, port_range0, 54), + UNIPHIER_PINCTRL_GROUP_SINGLE(port67, port_range0, 55), + UNIPHIER_PINCTRL_GROUP_SINGLE(port70, port_range0, 56), + UNIPHIER_PINCTRL_GROUP_SINGLE(port71, port_range0, 57), + UNIPHIER_PINCTRL_GROUP_SINGLE(port72, port_range0, 58), + UNIPHIER_PINCTRL_GROUP_SINGLE(port73, port_range0, 59), + UNIPHIER_PINCTRL_GROUP_SINGLE(port74, port_range0, 60), + UNIPHIER_PINCTRL_GROUP_SINGLE(port75, port_range0, 61), + UNIPHIER_PINCTRL_GROUP_SINGLE(port76, port_range0, 62), + UNIPHIER_PINCTRL_GROUP_SINGLE(port77, port_range0, 63), + UNIPHIER_PINCTRL_GROUP_SINGLE(port80, port_range0, 64), + UNIPHIER_PINCTRL_GROUP_SINGLE(port81, port_range0, 65), + UNIPHIER_PINCTRL_GROUP_SINGLE(port82, port_range0, 66), + UNIPHIER_PINCTRL_GROUP_SINGLE(port83, port_range0, 67), + UNIPHIER_PINCTRL_GROUP_SINGLE(port84, port_range0, 68), + UNIPHIER_PINCTRL_GROUP_SINGLE(port85, port_range0, 69), + UNIPHIER_PINCTRL_GROUP_SINGLE(port86, port_range0, 70), + UNIPHIER_PINCTRL_GROUP_SINGLE(port87, port_range0, 71), + UNIPHIER_PINCTRL_GROUP_SINGLE(port90, port_range0, 72), + UNIPHIER_PINCTRL_GROUP_SINGLE(port91, port_range0, 73), + UNIPHIER_PINCTRL_GROUP_SINGLE(port92, port_range0, 74), + UNIPHIER_PINCTRL_GROUP_SINGLE(port93, port_range0, 75), + UNIPHIER_PINCTRL_GROUP_SINGLE(port94, port_range0, 76), + UNIPHIER_PINCTRL_GROUP_SINGLE(port95, port_range0, 77), + UNIPHIER_PINCTRL_GROUP_SINGLE(port96, port_range0, 78), + UNIPHIER_PINCTRL_GROUP_SINGLE(port97, port_range0, 79), + UNIPHIER_PINCTRL_GROUP_SINGLE(port100, port_range0, 80), + UNIPHIER_PINCTRL_GROUP_SINGLE(port101, port_range0, 81), + UNIPHIER_PINCTRL_GROUP_SINGLE(port102, port_range0, 82), + UNIPHIER_PINCTRL_GROUP_SINGLE(port103, port_range0, 83), + UNIPHIER_PINCTRL_GROUP_SINGLE(port104, port_range0, 84), + UNIPHIER_PINCTRL_GROUP_SINGLE(port105, port_range0, 85), + UNIPHIER_PINCTRL_GROUP_SINGLE(port106, port_range0, 86), + UNIPHIER_PINCTRL_GROUP_SINGLE(port107, port_range0, 87), + UNIPHIER_PINCTRL_GROUP_SINGLE(port120, port_range1, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port121, port_range1, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port122, port_range1, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port123, port_range1, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port124, port_range1, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port125, port_range1, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port126, port_range1, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port127, port_range1, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port130, port_range1, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port131, port_range1, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port132, port_range1, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port133, port_range1, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port134, port_range1, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port135, port_range1, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port136, port_range1, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port137, port_range1, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port140, port_range1, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port141, port_range1, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port142, port_range1, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port143, port_range1, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port144, port_range1, 20), + UNIPHIER_PINCTRL_GROUP_SINGLE(port145, port_range1, 21), + UNIPHIER_PINCTRL_GROUP_SINGLE(port146, port_range1, 22), + UNIPHIER_PINCTRL_GROUP_SINGLE(port147, port_range1, 23), + UNIPHIER_PINCTRL_GROUP_SINGLE(port180, port_range2, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port181, port_range2, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port182, port_range2, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port183, port_range2, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port184, port_range2, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port185, port_range2, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port186, port_range2, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port187, port_range2, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port200, port_range3, 0), + UNIPHIER_PINCTRL_GROUP_SINGLE(port201, port_range3, 1), + UNIPHIER_PINCTRL_GROUP_SINGLE(port202, port_range3, 2), + UNIPHIER_PINCTRL_GROUP_SINGLE(port203, port_range3, 3), + UNIPHIER_PINCTRL_GROUP_SINGLE(port204, port_range3, 4), + UNIPHIER_PINCTRL_GROUP_SINGLE(port205, port_range3, 5), + UNIPHIER_PINCTRL_GROUP_SINGLE(port206, port_range3, 6), + UNIPHIER_PINCTRL_GROUP_SINGLE(port207, port_range3, 7), + UNIPHIER_PINCTRL_GROUP_SINGLE(port210, port_range3, 8), + UNIPHIER_PINCTRL_GROUP_SINGLE(port211, port_range3, 9), + UNIPHIER_PINCTRL_GROUP_SINGLE(port212, port_range3, 10), + UNIPHIER_PINCTRL_GROUP_SINGLE(port213, port_range3, 11), + UNIPHIER_PINCTRL_GROUP_SINGLE(port214, port_range3, 12), + UNIPHIER_PINCTRL_GROUP_SINGLE(port215, port_range3, 13), + UNIPHIER_PINCTRL_GROUP_SINGLE(port216, port_range3, 14), + UNIPHIER_PINCTRL_GROUP_SINGLE(port217, port_range3, 15), + UNIPHIER_PINCTRL_GROUP_SINGLE(port220, port_range3, 16), + UNIPHIER_PINCTRL_GROUP_SINGLE(port221, port_range3, 17), + UNIPHIER_PINCTRL_GROUP_SINGLE(port222, port_range3, 18), + UNIPHIER_PINCTRL_GROUP_SINGLE(port223, port_range3, 19), + UNIPHIER_PINCTRL_GROUP_SINGLE(port224, port_range3, 20), + UNIPHIER_PINCTRL_GROUP_SINGLE(port225, port_range3, 21), + UNIPHIER_PINCTRL_GROUP_SINGLE(port226, port_range3, 22), + UNIPHIER_PINCTRL_GROUP_SINGLE(port227, port_range3, 23), + UNIPHIER_PINCTRL_GROUP_SINGLE(port230, port_range3, 24), + UNIPHIER_PINCTRL_GROUP_SINGLE(port231, port_range3, 25), + UNIPHIER_PINCTRL_GROUP_SINGLE(port232, port_range3, 26), + UNIPHIER_PINCTRL_GROUP_SINGLE(port233, port_range3, 27), + UNIPHIER_PINCTRL_GROUP_SINGLE(port234, port_range3, 28), + UNIPHIER_PINCTRL_GROUP_SINGLE(port235, port_range3, 29), + UNIPHIER_PINCTRL_GROUP_SINGLE(port236, port_range3, 30), + UNIPHIER_PINCTRL_GROUP_SINGLE(port237, port_range3, 31), + UNIPHIER_PINCTRL_GROUP_SINGLE(port240, port_range3, 32), + UNIPHIER_PINCTRL_GROUP_SINGLE(port241, port_range3, 33), + UNIPHIER_PINCTRL_GROUP_SINGLE(port242, port_range3, 34), + UNIPHIER_PINCTRL_GROUP_SINGLE(port243, port_range3, 35), + UNIPHIER_PINCTRL_GROUP_SINGLE(port244, port_range3, 36), + UNIPHIER_PINCTRL_GROUP_SINGLE(port245, port_range3, 37), + UNIPHIER_PINCTRL_GROUP_SINGLE(port246, port_range3, 38), + UNIPHIER_PINCTRL_GROUP_SINGLE(port247, port_range3, 39), + UNIPHIER_PINCTRL_GROUP_SINGLE(port250, port_range3, 40), + UNIPHIER_PINCTRL_GROUP_SINGLE(port251, port_range3, 41), + UNIPHIER_PINCTRL_GROUP_SINGLE(port252, port_range3, 42), + UNIPHIER_PINCTRL_GROUP_SINGLE(port253, port_range3, 43), + UNIPHIER_PINCTRL_GROUP_SINGLE(port254, port_range3, 44), UNIPHIER_PINCTRL_GROUP_SINGLE(xirq0, xirq, 0), UNIPHIER_PINCTRL_GROUP_SINGLE(xirq1, xirq, 1), UNIPHIER_PINCTRL_GROUP_SINGLE(xirq2, xirq, 2), -- cgit v1.2.3 From 8cbc0b49ca8db1f83835f86d337195947632aa3f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jun 2017 23:33:35 +0200 Subject: pinctrl: samsung: Remove bogus irq_[un]mask from resource management commit 3fa53ec2ed885b0aec3f0472e3b4a8a6f1cd748c upstream. The irq chip callbacks irq_request/release_resources() have absolutely no business with masking and unmasking the irq. The core code unmasks the interrupt after complete setup and masks it before invoking irq_release_resources(). The unmask is actually harmful as it happens before the interrupt is completely initialized in __setup_irq(). Remove it. Fixes: f6a8249f9e55 ("pinctrl: exynos: Lock GPIOs as interrupts when used as EINTs") Signed-off-by: Thomas Gleixner Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Cc: Linus Walleij Cc: Kukjin Kim Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-gpio@vger.kernel.org Acked-by: Tomasz Figa Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/samsung/pinctrl-exynos.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index d32fa2b5ff82..e8aee6d88a40 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -195,8 +195,6 @@ static int exynos_irq_request_resources(struct irq_data *irqd) spin_unlock_irqrestore(&bank->slock, flags); - exynos_irq_unmask(irqd); - return 0; } @@ -217,8 +215,6 @@ static void exynos_irq_release_resources(struct irq_data *irqd) shift = irqd->hwirq * bank_type->fld_width[PINCFG_TYPE_FUNC]; mask = (1 << bank_type->fld_width[PINCFG_TYPE_FUNC]) - 1; - exynos_irq_mask(irqd); - spin_lock_irqsave(&bank->slock, flags); con = readl(d->virt_base + reg_con); -- cgit v1.2.3 From 5e5a510455323634dc3df7a2d58c985b730c31ad Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 23 May 2017 16:09:19 +0200 Subject: pinctrl: meson-gxbb: Add missing GPIODV_18 pin entry commit 34e61801a3b9df74b69f0e359d64a197a77dd6ac upstream. GPIODV_18 entry was missing in the original driver push. Fixes: 468c234f9ed7 ("pinctrl: amlogic: Add support for Amlogic Meson GXBB SoC") Signed-off-by: Neil Armstrong Reviewed-by: Jerome Brunet Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index c3928aa3fefa..7511723c6b05 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -85,6 +85,7 @@ static const struct pinctrl_pin_desc meson_gxbb_periphs_pins[] = { MESON_PIN(GPIODV_15, EE_OFF), MESON_PIN(GPIODV_16, EE_OFF), MESON_PIN(GPIODV_17, EE_OFF), + MESON_PIN(GPIODV_18, EE_OFF), MESON_PIN(GPIODV_19, EE_OFF), MESON_PIN(GPIODV_20, EE_OFF), MESON_PIN(GPIODV_21, EE_OFF), -- cgit v1.2.3 From eea1ec08f8a5f8ab57b66f47d2673089c9ebea69 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 30 Jul 2017 21:28:15 +0100 Subject: MIPS: DEC: Fix an int-handler.S CPU_DADDI_WORKAROUNDS regression commit 68fe55680d0f3342969f49412fceabb90bdfadba upstream. Fix a commit 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") regression and remove assembly errors: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:162: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:163: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:229: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:230: Error: Macro used $at after ".set noat" triggering with with the CPU_DADDI_WORKAROUNDS option set and the DADDIU instruction. This is because with that option in place the instruction becomes a macro, which expands to an LI/DADDU (or actually ADDIU/DADDU) sequence that uses $at as a temporary register. With CPU_DADDI_WORKAROUNDS we only support `-msym32' compilation though, and this is already enforced in arch/mips/Makefile, so choose the 32-bit expansion variant for the supported configurations and then replace the 64-bit variant with #error just in case. Fixes: 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16893/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/dec/int-handler.S | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 1910223a9c02..cea2bb1621e6 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -147,23 +147,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1, cpu_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, cpu_mask_nr_tbl lui t1, %hi(cpu_mask_nr_tbl) addiu t1, %lo(cpu_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, cpu_mask_nr_tbl - .set push - .set noat - lui t1, %highest(cpu_mask_nr_tbl) - lui AT, %hi(cpu_mask_nr_tbl) - daddiu t1, t1, %higher(cpu_mask_nr_tbl) - daddiu AT, AT, %lo(cpu_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 1: lw t2,(t1) nop @@ -214,23 +203,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1,asic_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, asic_mask_nr_tbl lui t1, %hi(asic_mask_nr_tbl) addiu t1, %lo(asic_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, asic_mask_nr_tbl - .set push - .set noat - lui t1, %highest(asic_mask_nr_tbl) - lui AT, %hi(asic_mask_nr_tbl) - daddiu t1, t1, %higher(asic_mask_nr_tbl) - daddiu AT, AT, %lo(asic_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 2: lw t2,(t1) nop -- cgit v1.2.3 From a97a16f18c4895e41951a44d27af8af0b8f1d897 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Aug 2017 13:44:13 -0700 Subject: Linux 4.9.44 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 77953bf3f40a..3e95dfdbe572 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 43 +SUBLEVEL = 44 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From 5170d210efe44e947abecd60fd0805e970add4fc Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 16:35:29 +0800 Subject: netfilter: nf_ct_ext: fix possible panic after nf_ct_extend_unregister commit 9c3f3794926a997b1cab6c42480ff300efa2d162 upstream. If one cpu is doing nf_ct_extend_unregister while another cpu is doing __nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover, there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to NULL, so it's possible that we may access invalid pointer. But actually, most of the ct extends are built-in, so the problem listed above will not happen. However, there are two exceptions: NF_CT_EXT_NAT and NF_CT_EXT_SYNPROXY. For _EXT_NAT, the panic will not happen, since adding the nat extend and unregistering the nat extend are located in the same file(nf_nat_core.c), this means that after the nat module is removed, we cannot add the nat extend too. For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while synproxy extend unregister will be done by synproxy_core_exit. So after nf_synproxy_core.ko is removed, we may still try to add the synproxy extend, then kernel panic may happen. I know it's very hard to reproduce this issue, but I can play a tricky game to make it happen very easily :) Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook: # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook. Also note, in the userspace we only add a 20s' delay, then reinject the syn packet to the kernel: # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1 Step 3. Using "nc 2.2.2.2 1234" to connect the server. Step 4. Now remove the nf_synproxy_core.ko quickly: # iptables -F FORWARD # rmmod ipt_SYNPROXY # rmmod nf_synproxy_core Step 5. After 20s' delay, the syn packet is reinjected to the kernel. Now you will see the panic like this: kernel BUG at net/netfilter/nf_conntrack_extend.c:91! Call Trace: ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack] init_conntrack+0x12b/0x600 [nf_conntrack] nf_conntrack_in+0x4cc/0x580 [nf_conntrack] ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4] nf_reinject+0x104/0x270 nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue] ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue] ? nla_parse+0xa0/0x100 nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink] [...] One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e. introduce nf_conntrack_synproxy.c and only do ct extend register and unregister in it, similar to nf_conntrack_timeout.c. But having such a obscure restriction of nf_ct_extend_unregister is not a good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then it will be easier if we add new ct extend in the future. Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary anymore, remove it too. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Cc: Stefan Bader Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_extend.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 02bcf00c2492..008299b7f78f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); -- cgit v1.2.3 From adcfbb2d9e386517db84ac56fa3e4abd56db75b3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Aug 2017 13:00:36 +0200 Subject: audit: Fix use after free in audit_remove_watch_rule() commit d76036ab47eafa6ce52b69482e91ca3ba337d6d6 upstream. audit_remove_watch_rule() drops watch's reference to parent but then continues to work with it. That is not safe as parent can get freed once we drop our reference. The following is a trivial reproducer: mount -o loop image /mnt touch /mnt/file auditctl -w /mnt/file -p wax umount /mnt auditctl -D Grab our own reference in audit_remove_watch_rule() earlier to make sure mark does not get freed under us. Reported-by: Tony Jones Signed-off-by: Jan Kara Tested-by: Tony Jones Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/audit_watch.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 0d302a87f21b..690e1e3c59f7 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule) list_del(&krule->rlist); if (list_empty(&watch->rules)) { + /* + * audit_remove_watch() drops our reference to 'parent' which + * can get freed. Grab our own reference to be safe. + */ + audit_get_parent(parent); audit_remove_watch(watch); - - if (list_empty(&parent->watches)) { - audit_get_parent(parent); + if (list_empty(&parent->watches)) fsnotify_destroy_mark(&parent->mark, audit_watch_group); - audit_put_parent(parent); - } + audit_put_parent(parent); } } -- cgit v1.2.3 From 93f5a0318aae53541a4c53eca444917755b15a40 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 12 Aug 2017 23:36:47 +0200 Subject: parisc: pci memory bar assignment fails with 64bit kernels on dino/cujo commit 4098116039911e8870d84c975e2ec22dab65a909 upstream. For 64bit kernels the lmmio_space_offset of the host bridge window isn't set correctly on systems with dino/cujo PCI host bridges. This leads to not assigned memory bars and failing drivers, which need to use these bars. Signed-off-by: Thomas Bogendoerfer Acked-by: Helge Deller Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/dino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 5c63b920b471..ed92c1254cff 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -956,7 +956,7 @@ static int __init dino_probe(struct parisc_device *dev) dino_dev->hba.dev = dev; dino_dev->hba.base_addr = ioremap_nocache(hpa, 4096); - dino_dev->hba.lmmio_space_offset = 0; /* CPU addrs == bus addrs */ + dino_dev->hba.lmmio_space_offset = PCI_F_EXTEND; spin_lock_init(&dino_dev->dinosaur_pen); dino_dev->hba.iommu = ccio_get_iommu(dev); -- cgit v1.2.3 From 24e7f540245d555f6bcd930a677f8f9056028853 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 2 Aug 2017 16:40:47 +0800 Subject: crypto: ixp4xx - Fix error handling path in 'aead_perform()' commit 28389575a8cf933a5f3c378556b9f4d3cce0efd2 upstream. In commit 0f987e25cb8a, the source processing has been moved in front of the destination processing, but the error handling path has not been modified accordingly. Free resources in the correct order to avoid some leaks. Fixes: 0f987e25cb8a ("crypto: ixp4xx - Fix false lastlen uninitialised warning") Reported-by: Christophe JAILLET Signed-off-by: Herbert Xu Reviewed-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ixp4xx_crypto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 7868765a70c5..b54af97a20bb 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -1074,7 +1074,7 @@ static int aead_perform(struct aead_request *req, int encrypt, req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags, &crypt->icv_rev_aes); if (unlikely(!req_ctx->hmac_virt)) - goto free_buf_src; + goto free_buf_dst; if (!encrypt) { scatterwalk_map_and_copy(req_ctx->hmac_virt, req->src, cryptlen, authsize, 0); @@ -1089,10 +1089,10 @@ static int aead_perform(struct aead_request *req, int encrypt, BUG_ON(qmgr_stat_overflow(SEND_QID)); return -EINPROGRESS; -free_buf_src: - free_buf_chain(dev, req_ctx->src, crypt->src_buf); free_buf_dst: free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); +free_buf_src: + free_buf_chain(dev, req_ctx->src, crypt->src_buf); crypt->ctl_flags = CTL_FLAG_UNUSED; return -ENOMEM; } -- cgit v1.2.3 From c3e8a12e701c587759e3f1d1e9fb8fc262eb5a26 Mon Sep 17 00:00:00 2001 From: "megha.dey@linux.intel.com" Date: Wed, 2 Aug 2017 13:49:09 -0700 Subject: crypto: x86/sha1 - Fix reads beyond the number of blocks passed commit 8861249c740fc4af9ddc5aee321eafefb960d7c6 upstream. It was reported that the sha1 AVX2 function(sha1_transform_avx2) is reading ahead beyond its intended data, and causing a crash if the next block is beyond page boundary: http://marc.info/?l=linux-crypto-vger&m=149373371023377 This patch makes sure that there is no overflow for any buffer length. It passes the tests written by Jan Stancek that revealed this problem: https://github.com/jstancek/sha1-avx2-crash I have re-enabled sha1-avx2 by reverting commit b82ce24426a4071da9529d726057e4e642948667 Fixes: b82ce24426a4 ("crypto: sha1-ssse3 - Disable avx2") Originally-by: Ilya Albrekht Tested-by: Jan Stancek Signed-off-by: Megha Dey Reported-by: Jan Stancek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 ++++++++++++++++++---------------- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1cd792db15ef..1eab79c9ac48 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -117,11 +117,10 @@ .set T1, REG_T1 .endm -#define K_BASE %r8 #define HASH_PTR %r9 +#define BLOCKS_CTR %r8 #define BUFFER_PTR %r10 #define BUFFER_PTR2 %r13 -#define BUFFER_END %r11 #define PRECALC_BUF %r14 #define WK_BUF %r15 @@ -205,14 +204,14 @@ * blended AVX2 and ALU instruction scheduling * 1 vector iteration per 8 rounds */ - vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP + vmovdqu (i * 2)(BUFFER_PTR), W_TMP .elseif ((i & 7) == 1) - vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\ WY_TMP, WY_TMP .elseif ((i & 7) == 2) vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY .elseif ((i & 7) == 4) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP .elseif ((i & 7) == 7) vmovdqu WY_TMP, PRECALC_WK(i&~7) @@ -255,7 +254,7 @@ vpxor WY, WY_TMP, WY_TMP .elseif ((i & 7) == 7) vpxor WY_TMP2, WY_TMP, WY - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -291,7 +290,7 @@ vpsrld $30, WY, WY vpor WY, WY_TMP, WY .elseif ((i & 7) == 7) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -446,6 +445,16 @@ .endm +/* Add constant only if (%2 > %3) condition met (uses RTA as temp) + * %1 + %2 >= %3 ? %4 : 0 + */ +.macro ADD_IF_GE a, b, c, d + mov \a, RTA + add $\d, RTA + cmp $\c, \b + cmovge RTA, \a +.endm + /* * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining */ @@ -463,13 +472,16 @@ lea (2*4*80+32)(%rsp), WK_BUF # Precalc WK for first 2 blocks - PRECALC_OFFSET = 0 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64 .set i, 0 .rept 160 PRECALC i .set i, i + 1 .endr - PRECALC_OFFSET = 128 + + /* Go to next block if needed */ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 xchg WK_BUF, PRECALC_BUF .align 32 @@ -479,8 +491,8 @@ _loop: * we use K_BASE value as a signal of a last block, * it is set below by: cmovae BUFFER_PTR, K_BASE */ - cmp K_BASE, BUFFER_PTR - jne _begin + test BLOCKS_CTR, BLOCKS_CTR + jnz _begin .align 32 jmp _end .align 32 @@ -512,10 +524,10 @@ _loop0: .set j, j+2 .endr - add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ - cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ - + /* Update Counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128 /* * rounds * 60,62,64,66,68 @@ -532,8 +544,8 @@ _loop0: UPDATE_HASH 12(HASH_PTR), D UPDATE_HASH 16(HASH_PTR), E - cmp K_BASE, BUFFER_PTR /* is current block the last one? */ - je _loop + test BLOCKS_CTR, BLOCKS_CTR + jz _loop mov TB, B @@ -575,10 +587,10 @@ _loop2: .set j, j+2 .endr - add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ - - cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + /* update counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 jmp _loop3 _loop3: @@ -641,19 +653,12 @@ _loop3: avx2_zeroupper - lea K_XMM_AR(%rip), K_BASE - + /* Setup initial values */ mov CTX, HASH_PTR mov BUF, BUFFER_PTR - lea 64(BUF), BUFFER_PTR2 - - shl $6, CNT /* mul by 64 */ - add BUF, CNT - add $64, CNT - mov CNT, BUFFER_END - cmp BUFFER_END, BUFFER_PTR2 - cmovae K_BASE, BUFFER_PTR2 + mov BUF, BUFFER_PTR2 + mov CNT, BLOCKS_CTR xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f960a043cdeb..fc61739150e7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; -- cgit v1.2.3 From 5dd141e0e9855daca44b3bf8dcbf92494f9c7bf8 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 14 Aug 2017 20:11:26 -0700 Subject: Input: elan_i2c - add ELAN0608 to the ACPI table commit 1874064eed0502bd9bef7be8023757b0c4f26883 upstream. Similar to commit 722c5ac708b4f ("Input: elan_i2c - add ELAN0605 to the ACPI table"), ELAN0608 should be handled by elan_i2c. This touchpad can be found in Lenovo ideapad 320-14IKB. BugLink: https://bugs.launchpad.net/bugs/1708852 Signed-off-by: Kai-Heng Feng Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index da5458dfb1e3..d5309d0a0482 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1235,6 +1235,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0100", 0 }, { "ELAN0600", 0 }, { "ELAN0605", 0 }, + { "ELAN0608", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From 3f7292106d0bda25ef52a3342fb74b84b04f3585 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Mon, 14 Aug 2017 20:11:59 -0700 Subject: Input: elan_i2c - Add antoher Lenovo ACPI ID for upcoming Lenovo NB commit 76988690402dde2880bfe06ecccf381d48ba8e1c upstream. Add 2 new IDs (ELAN0609 and ELAN060B) to the list of ACPI IDs that should be handled by the driver. Signed-off-by: KT Liao Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index d5309d0a0482..98d4e515587a 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1236,6 +1236,9 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0600", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, + { "ELAN0605", 0 }, + { "ELAN0609", 0 }, + { "ELAN060B", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From bafb25c5050caa96625df0430d2a80072183f4cf Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Mon, 14 Aug 2017 14:46:01 -0700 Subject: ALSA: seq: 2nd attempt at fixing race creating a queue commit 7e1d90f60a0d501c8503e636942ca704a454d910 upstream. commit 4842e98f26dd80be3623c4714a244ba52ea096a8 ("ALSA: seq: Fix race at creating a queue") attempted to fix a race reported by syzkaller. That fix has been described as follows: " When a sequencer queue is created in snd_seq_queue_alloc(),it adds the new queue element to the public list before referencing it. Thus the queue might be deleted before the call of snd_seq_queue_use(), and it results in the use-after-free error, as spotted by syzkaller. The fix is to reference the queue object at the right time. " Even with that fix in place, syzkaller reported a use-after-free error. It specifically pointed to the last instruction "return q->queue" in snd_seq_queue_alloc(). The pointer q is being used after kfree() has been called on it. It turned out that there is still a small window where a race can happen. The window opens at snd_seq_ioctl_create_queue()->snd_seq_queue_alloc()->queue_list_add() and closes at snd_seq_ioctl_create_queue()->queueptr()->snd_use_lock_use(). Between these two calls, a different thread could delete the queue and possibly re-create a different queue in the same location in queue_list. This change prevents this situation by calling snd_use_lock_use() from snd_seq_queue_alloc() prior to calling queue_list_add(). It is then the caller's responsibility to call snd_use_lock_free(&q->use_lock). Fixes: 4842e98f26dd ("ALSA: seq: Fix race at creating a queue") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Mentz Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 13 ++++--------- sound/core/seq/seq_queue.c | 14 +++++++++----- sound/core/seq/seq_queue.h | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index f3b1d7f50b81..67c4c68ce041 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1502,16 +1502,11 @@ static int snd_seq_ioctl_unsubscribe_port(struct snd_seq_client *client, static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; - int result; struct snd_seq_queue *q; - result = snd_seq_queue_alloc(client->number, info->locked, info->flags); - if (result < 0) - return result; - - q = queueptr(result); - if (q == NULL) - return -EINVAL; + q = snd_seq_queue_alloc(client->number, info->locked, info->flags); + if (IS_ERR(q)) + return PTR_ERR(q); info->queue = q->queue; info->locked = q->locked; @@ -1521,7 +1516,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) if (!info->name[0]) snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue); strlcpy(q->name, info->name, sizeof(q->name)); - queuefree(q); + snd_use_lock_free(&q->use_lock); return 0; } diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 450c5187eecb..79e0c5604ef8 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -184,22 +184,26 @@ void __exit snd_seq_queues_delete(void) static void queue_use(struct snd_seq_queue *queue, int client, int use); /* allocate a new queue - - * return queue index value or negative value for error + * return pointer to new queue or ERR_PTR(-errno) for error + * The new queue's use_lock is set to 1. It is the caller's responsibility to + * call snd_use_lock_free(&q->use_lock). */ -int snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) +struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) { struct snd_seq_queue *q; q = queue_new(client, locked); if (q == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); q->info_flags = info_flags; queue_use(q, client, 1); + snd_use_lock_use(&q->use_lock); if (queue_list_add(q) < 0) { + snd_use_lock_free(&q->use_lock); queue_delete(q); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } - return q->queue; + return q; } /* delete a queue - queue must be owned by the client */ diff --git a/sound/core/seq/seq_queue.h b/sound/core/seq/seq_queue.h index 30c8111477f6..719093489a2c 100644 --- a/sound/core/seq/seq_queue.h +++ b/sound/core/seq/seq_queue.h @@ -71,7 +71,7 @@ void snd_seq_queues_delete(void); /* create new queue (constructor) */ -int snd_seq_queue_alloc(int client, int locked, unsigned int flags); +struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int flags); /* delete queue (destructor) */ int snd_seq_queue_delete(int client, int queueid); -- cgit v1.2.3 From f39f086d541f8cef13977fe51308ab805e14b3c3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Aug 2017 14:35:50 +0200 Subject: ALSA: usb-audio: Apply sample rate quirk to Sennheiser headset commit a8e800fe0f68bc28ce309914f47e432742b865ed upstream. A Senheisser headset requires the typical sample-rate quirk for avoiding spurious errors from inquiring the current sample rate like: usb 1-1: 2:1: cannot get freq at ep 0x4 usb 1-1: 3:1: cannot get freq at ep 0x83 The USB ID 1395:740a has to be added to the entries in snd_usb_get_sample_rate_quirk(). Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1052580 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index eb4b9f7a571e..95c2749ac8a3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1142,6 +1142,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ -- cgit v1.2.3 From d3e6e5956687eacc1c462e8dd6f3c66a2b6e8c1b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 16 Aug 2017 14:18:37 +0200 Subject: ALSA: usb-audio: Add mute TLV for playback volumes on C-Media devices commit 0f174b3525a43bd51f9397394763925e0ebe7bc7 upstream. C-Media devices (at least some models) mute the playback stream when volumes are set to the minimum value. But this isn't informed via TLV and the user-space, typically PulseAudio, gets confused as if it's still played in a low volume. This patch adds the new flag, min_mute, to struct usb_mixer_elem_info for indicating that the mixer element is with the minimum-mute volume. This flag is set for known C-Media devices in snd_usb_mixer_fu_apply_quirk() in turn. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196669 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 2 ++ sound/usb/mixer.h | 1 + sound/usb/mixer_quirks.c | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 4703caea56b2..d09c28c1deaf 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -542,6 +542,8 @@ int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag, if (size < sizeof(scale)) return -ENOMEM; + if (cval->min_mute) + scale[0] = SNDRV_CTL_TLVT_DB_MINMAX_MUTE; scale[2] = cval->dBmin; scale[3] = cval->dBmax; if (copy_to_user(_tlv, scale, sizeof(scale))) diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 3417ef347e40..2b4b067646ab 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -64,6 +64,7 @@ struct usb_mixer_elem_info { int cached; int cache_val[MAX_CHANNELS]; u8 initialized; + u8 min_mute; void *private_data; }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 04991b009132..5d2fc5f58bfe 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1873,6 +1873,12 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; + /* lowest playback value is muted on C-Media devices */ + case USB_ID(0x0d8c, 0x000c): + case USB_ID(0x0d8c, 0x0014): + if (strstr(kctl->id.name, "Playback")) + cval->min_mute = 1; + break; } } -- cgit v1.2.3 From 87395eeb28e58a60e89b24d067a5968e88096ead Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 Aug 2017 15:16:05 -0700 Subject: mm: discard memblock data later commit 3010f876500f9ba921afaeccec30c45ca6584dc8 upstream. There is existing use after free bug when deferred struct pages are enabled: The memblock_add() allocates memory for the memory array if more than 128 entries are needed. See comment in e820__memblock_setup(): * The bootstrap memblock region count maximum is 128 entries * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries * than that - so allow memblock resizing. This memblock memory is freed here: free_low_memory_core_early() We access the freed memblock.memory later in boot when deferred pages are initialized in this path: deferred_init_memmap() for_each_mem_pfn_range() __next_mem_pfn_range() type = &memblock.memory; One possible explanation for why this use-after-free hasn't been hit before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded at least on systems where deferred struct pages were enabled. Tested by reducing INIT_MEMBLOCK_REGIONS down to 4 from the current 128, and verifying in qemu that this code is getting excuted and that the freed pages are sane. Link: http://lkml.kernel.org/r/1502485554-318703-2-git-send-email-pasha.tatashin@oracle.com Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Acked-by: Michal Hocko Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/memblock.h | 6 ++++-- mm/memblock.c | 38 +++++++++++++++++--------------------- mm/nobootmem.c | 16 ---------------- mm/page_alloc.c | 4 ++++ 4 files changed, 25 insertions(+), 39 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e8fba68e5d03..4024af00b137 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,6 +64,7 @@ extern bool movable_node_enabled; #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit #define __initdata_memblock __meminitdata +void memblock_discard(void); #else #define __init_memblock #define __initdata_memblock @@ -77,8 +78,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); -phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); -phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); @@ -112,6 +111,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, phys_addr_t *out_end); +void __memblock_free_early(phys_addr_t base, phys_addr_t size); +void __memblock_free_late(phys_addr_t base, phys_addr_t size); + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. diff --git a/mm/memblock.c b/mm/memblock.c index 68849d0ead09..ccec42c12ba8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -297,31 +297,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK - -phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( - phys_addr_t *addr) -{ - if (memblock.reserved.regions == memblock_reserved_init_regions) - return 0; - - *addr = __pa(memblock.reserved.regions); - - return PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.reserved.max); -} - -phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( - phys_addr_t *addr) +/** + * Discard memory and reserved arrays if they were allocated + */ +void __init memblock_discard(void) { - if (memblock.memory.regions == memblock_memory_init_regions) - return 0; + phys_addr_t addr, size; - *addr = __pa(memblock.memory.regions); + if (memblock.reserved.regions != memblock_reserved_init_regions) { + addr = __pa(memblock.reserved.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.reserved.max); + __memblock_free_late(addr, size); + } - return PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.memory.max); + if (memblock.memory.regions == memblock_memory_init_regions) { + addr = __pa(memblock.memory.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); + __memblock_free_late(addr, size); + } } - #endif /** diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 487dad610731..ab998125f04d 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void) NULL) count += __free_memory_core(start, end); -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK - { - phys_addr_t size; - - /* Free memblock.reserved array if it was allocated */ - size = get_allocated_memblock_reserved_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); - - /* Free memblock.memory array if it was allocated */ - size = get_allocated_memblock_memory_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); - } -#endif - return count; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9419aa4e5441..2abf8d5f0ad4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1587,6 +1587,10 @@ void __init page_alloc_init_late(void) /* Reinit limits that are based on free pages after the kernel is up */ files_maxfiles_init(); #endif +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + /* Discard memblock private memory */ + memblock_discard(); +#endif for_each_populated_zone(zone) set_zone_contiguous(zone); -- cgit v1.2.3 From 59ee25d09fd523c57a911d50760bcc162a5f585f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 18 Aug 2017 15:16:12 -0700 Subject: mm: fix double mmap_sem unlock on MMF_UNSTABLE enforced SIGBUS commit 5b53a6ea886700a128b697a6fe8375340dea2c30 upstream. Tetsuo Handa has noticed that MMF_UNSTABLE SIGBUS path in handle_mm_fault causes a lockdep splat Out of memory: Kill process 1056 (a.out) score 603 or sacrifice child Killed process 1056 (a.out) total-vm:4268108kB, anon-rss:2246048kB, file-rss:0kB, shmem-rss:0kB a.out (1169) used greatest stack depth: 11664 bytes left DEBUG_LOCKS_WARN_ON(depth <= 0) ------------[ cut here ]------------ WARNING: CPU: 6 PID: 1339 at kernel/locking/lockdep.c:3617 lock_release+0x172/0x1e0 CPU: 6 PID: 1339 Comm: a.out Not tainted 4.13.0-rc3-next-20170803+ #142 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 RIP: 0010:lock_release+0x172/0x1e0 Call Trace: up_read+0x1a/0x40 __do_page_fault+0x28e/0x4c0 do_page_fault+0x30/0x80 page_fault+0x28/0x30 The reason is that the page fault path might have dropped the mmap_sem and returned with VM_FAULT_RETRY. MMF_UNSTABLE check however rewrites the error path to VM_FAULT_SIGBUS and we always expect mmap_sem taken in that path. Fix this by taking mmap_sem when VM_FAULT_RETRY is held in the MMF_UNSTABLE path. We cannot simply add VM_FAULT_SIGBUS to the existing error code because all arch specific page fault handlers and g-u-p would have to learn a new error code combination. Link: http://lkml.kernel.org/r/20170807113839.16695-2-mhocko@kernel.org Fixes: 3f70dc38cec2 ("mm: make sure that kthreads will not refault oom reaped memory") Reported-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: David Rientjes Cc: Andrea Argangeli Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Wenwei Tao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 9bf3da0d0e14..d064caff9d7d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3635,8 +3635,18 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * further. */ if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) - && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) + && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) { + + /* + * We are going to enforce SIGBUS but the PF path might have + * dropped the mmap_sem already so take it again so that + * we do not break expectations of all arch specific PF paths + * and g-u-p + */ + if (ret & VM_FAULT_RETRY) + down_read(&vma->vm_mm->mmap_sem); ret = VM_FAULT_SIGBUS; + } return ret; } -- cgit v1.2.3 From 91105f2c621ef0b3c40d6725475b6896eb06f954 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 18 Aug 2017 15:16:24 -0700 Subject: mm/mempolicy: fix use after free when calling get_mempolicy commit 73223e4e2e3867ebf033a5a8eb2e5df0158ccc99 upstream. I hit a use after free issue when executing trinity and repoduced it with KASAN enabled. The related call trace is as follows. BUG: KASan: use after free in SyS_get_mempolicy+0x3c8/0x960 at addr ffff8801f582d766 Read of size 2 by task syz-executor1/798 INFO: Allocated in mpol_new.part.2+0x74/0x160 age=3 cpu=1 pid=799 __slab_alloc+0x768/0x970 kmem_cache_alloc+0x2e7/0x450 mpol_new.part.2+0x74/0x160 mpol_new+0x66/0x80 SyS_mbind+0x267/0x9f0 system_call_fastpath+0x16/0x1b INFO: Freed in __mpol_put+0x2b/0x40 age=4 cpu=1 pid=799 __slab_free+0x495/0x8e0 kmem_cache_free+0x2f3/0x4c0 __mpol_put+0x2b/0x40 SyS_mbind+0x383/0x9f0 system_call_fastpath+0x16/0x1b INFO: Slab 0xffffea0009cb8dc0 objects=23 used=8 fp=0xffff8801f582de40 flags=0x200000000004080 INFO: Object 0xffff8801f582d760 @offset=5984 fp=0xffff8801f582d600 Bytes b4 ffff8801f582d750: ae 01 ff ff 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object ffff8801f582d760: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff8801f582d770: 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkk. Redzone ffff8801f582d778: bb bb bb bb bb bb bb bb ........ Padding ffff8801f582d8b8: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Memory state around the buggy address: ffff8801f582d600: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801f582d680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8801f582d700: fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb fc !shared memory policy is not protected against parallel removal by other thread which is normally protected by the mmap_sem. do_get_mempolicy, however, drops the lock midway while we can still access it later. Early premature up_read is a historical artifact from times when put_user was called in this path see https://lwn.net/Articles/124754/ but that is gone since 8bccd85ffbaf ("[PATCH] Implement sys_* do_* layering in the memory policy layer."). but when we have the the current mempolicy ref count model. The issue was introduced accordingly. Fix the issue by removing the premature release. Link: http://lkml.kernel.org/r/1502950924-27521-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Acked-by: Michal Hocko Cc: Minchan Kim Cc: Vlastimil Babka Cc: David Rientjes Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 23471526d424..a8ab5e73dc61 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -926,11 +926,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, *policy |= (pol->flags & MPOL_MODE_FLAGS); } - if (vma) { - up_read(¤t->mm->mmap_sem); - vma = NULL; - } - err = 0; if (nmask) { if (mpol_store_user_nodemask(pol)) { -- cgit v1.2.3 From 1f2347a095ced10b2054fece0319801097cd852f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Aug 2017 15:16:31 -0700 Subject: mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes commit c715b72c1ba406f133217b509044c38d8e714a37 upstream. Moving the x86_64 and arm64 PIE base from 0x555555554000 to 0x000100000000 broke AddressSanitizer. This is a partial revert of: eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE") 02445990a96e ("arm64: move ELF_ET_DYN_BASE to 4GB / 4MB") The AddressSanitizer tool has hard-coded expectations about where executable mappings are loaded. The motivation for changing the PIE base in the above commits was to avoid the Stack-Clash CVEs that allowed executable mappings to get too close to heap and stack. This was mainly a problem on 32-bit, but the 64-bit bases were moved too, in an effort to proactively protect those systems (proofs of concept do exist that show 64-bit collisions, but other recent changes to fix stack accounting and setuid behaviors will minimize the impact). The new 32-bit PIE base is fine for ASan (since it matches the ET_EXEC base), so only the 64-bit PIE base needs to be reverted to let x86 and arm64 ASan binaries run again. Future changes to the 64-bit PIE base on these architectures can be made optional once a more dynamic method for dealing with AddressSanitizer is found. (e.g. always loading PIE into the mmap region for marked binaries.) Link: http://lkml.kernel.org/r/20170807201542.GA21271@beast Fixes: eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE") Fixes: 02445990a96e ("arm64: move ELF_ET_DYN_BASE to 4GB / 4MB") Signed-off-by: Kees Cook Reported-by: Kostya Serebryany Acked-by: Will Deacon Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/elf.h | 4 ++-- arch/x86/include/asm/elf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index afa23b057def..1fb023076dfc 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -114,10 +114,10 @@ /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ -#define ELF_ET_DYN_BASE 0x100000000UL +#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index c152db2ab687..b31761ecce63 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -247,11 +247,11 @@ extern int force_personality32; /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - 0x100000000UL) + (TASK_SIZE / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, -- cgit v1.2.3 From 6c7f3756d072050d612e5c5c04108f90f1985435 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 18 Jul 2017 15:01:00 +0100 Subject: xen: fix bio vec merging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 462cdace790ac2ed6aad1b19c9c0af0143b6aab0 upstream. The current test for bio vec merging is not fully accurate and can be tricked into merging bios when certain grant combinations are used. The result of these malicious bio merges is a bio that extends past the memory page used by any of the originating bios. Take into account the following scenario, where a guest creates two grant references that point to the same mfn, ie: grant 1 -> mfn A, grant 2 -> mfn A. These references are then used in a PV block request, and mapped by the backend domain, thus obtaining two different pfns that point to the same mfn, pfn B -> mfn A, pfn C -> mfn A. If those grants happen to be used in two consecutive sectors of a disk IO operation becoming two different bios in the backend domain, the checks in xen_biovec_phys_mergeable will succeed, because bfn1 == bfn2 (they both point to the same mfn). However due to the bio merging, the backend domain will end up with a bio that expands past mfn A into mfn A + 1. Fix this by making sure the check in xen_biovec_phys_mergeable takes into account the offset and the length of the bio, this basically replicates whats done in __BIOVEC_PHYS_MERGEABLE using mfns (bus addresses). While there also remove the usage of __BIOVEC_PHYS_MERGEABLE, since that's already checked by the callers of xen_biovec_phys_mergeable. Reported-by: "Jan H. Schönherr" Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/xen/biomerge.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 4da69dbf7dca..1bdd02a6d6ac 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -10,8 +10,7 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); - return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && - ((bfn1 == bfn2) || ((bfn1+1) == bfn2)); + return bfn1 + PFN_DOWN(vec1->bv_offset + vec1->bv_len) == bfn2; #else /* * XXX: Add support for merging bio_vec when using different page -- cgit v1.2.3 From d8c20af0085898f9c818dd599b0afb0d0c99dab2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Aug 2017 12:24:47 +0200 Subject: blk-mq-pci: add a fallback when pci_irq_get_affinity returns NULL commit c005390374957baacbc38eef96ea360559510aa7 upstream. While pci_irq_get_affinity should never fail for SMP kernel that implement the affinity mapping, it will always return NULL in the UP case, so provide a fallback mapping of all queues to CPU 0 in that case. Signed-off-by: Christoph Hellwig Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index 966c2169762e..ee9d3d958fbe 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev) for (queue = 0; queue < set->nr_hw_queues; queue++) { mask = pci_irq_get_affinity(pdev, queue); if (!mask) - return -EINVAL; + goto fallback; for_each_cpu(cpu, mask) set->mq_map[cpu] = queue; } return 0; + +fallback: + WARN_ON_ONCE(set->nr_hw_queues > 1); + for_each_possible_cpu(cpu) + set->mq_map[cpu] = 0; + return 0; } EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); -- cgit v1.2.3 From 7672f118604781747e65c9a3d8f7fa79ada80fe4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:14 +1000 Subject: powerpc: Fix VSX enabling/flushing to also test MSR_FP and MSR_VEC commit 5a69aec945d27e78abac9fd032533d3aaebf7c1e upstream. VSX uses a combination of the old vector registers, the old FP registers and new "second halves" of the FP registers. Thus when we need to see the VSX state in the thread struct (flush_vsx_to_thread()) or when we'll use the VSX in the kernel (enable_kernel_vsx()) we need to ensure they are all flushed into the thread struct if either of them is individually enabled. Unfortunately we only tested if the whole VSX was enabled, not if they were individually enabled. Fixes: 72cd7b44bc99 ("powerpc: Uncomment and make enable_kernel_vsx() routine available") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b249c2fb99c8..1c141d50fbc6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -359,7 +359,8 @@ void enable_kernel_vsx(void) cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + if (current->thread.regs && + (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) { check_if_tm_restore_required(current); /* * If a thread has already been reclaimed then the @@ -383,7 +384,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); - if (tsk->thread.regs->msr & MSR_VSX) { + if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) { BUG_ON(tsk != current); giveup_vsx(tsk); } -- cgit v1.2.3 From 1581d704e97d66a03fd7727bd1a9e561aff2acc5 Mon Sep 17 00:00:00 2001 From: Munehisa Kamata Date: Wed, 9 Aug 2017 15:31:40 -0700 Subject: xen-blkfront: use a right index when checking requests commit b15bd8cb37598afb2963f7eb9e2de468d2d60a2f upstream. Since commit d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block") and 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block"), blkfront_resume() has been using an index for iterating ring_info to check request when iterating blk_shadow in an inner loop. This seems to have been accidentally introduced during the massive rewrite of the block layer macros in the commits. This may cause crash like this: [11798.057074] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [11798.058832] IP: [] blkfront_resume+0x10a/0x610 .... [11798.061063] Call Trace: [11798.061063] [] xenbus_dev_resume+0x53/0x140 [11798.061063] [] ? xenbus_dev_probe+0x150/0x150 [11798.061063] [] dpm_run_callback+0x3e/0x110 [11798.061063] [] device_resume+0x88/0x190 [11798.061063] [] dpm_resume+0x100/0x2d0 [11798.061063] [] dpm_resume_end+0x11/0x20 [11798.061063] [] do_suspend+0xe8/0x1a0 [11798.061063] [] shutdown_handler+0xfd/0x130 [11798.061063] [] ? split+0x110/0x110 [11798.061063] [] xenwatch_thread+0x86/0x120 [11798.061063] [] ? prepare_to_wait_event+0x110/0x110 [11798.061063] [] kthread+0xd7/0xf0 [11798.061063] [] ? kfree+0x121/0x170 [11798.061063] [] ? kthread_park+0x60/0x60 [11798.061063] [] ? call_usermodehelper_exec_work+0xb0/0xb0 [11798.061063] [] ? call_usermodehelper_exec_async+0x13a/0x140 [11798.061063] [] ret_from_fork+0x25/0x30 Use the right index in the inner loop. Fixes: d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block") Fixes: 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block") Signed-off-by: Munehisa Kamata Reviewed-by: Thomas Friebel Reviewed-by: Eduardo Valentin Reviewed-by: Boris Ostrovsky Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Reviewed-by: Roger Pau Monne Cc: xen-devel@lists.xenproject.org Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9908597c5209..f11d62de2272 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2112,9 +2112,9 @@ static int blkfront_resume(struct xenbus_device *dev) /* * Get the bios in the request so we can re-queue them. */ - if (req_op(shadow[i].request) == REQ_OP_FLUSH || - req_op(shadow[i].request) == REQ_OP_DISCARD || - req_op(shadow[i].request) == REQ_OP_SECURE_ERASE || + if (req_op(shadow[j].request) == REQ_OP_FLUSH || + req_op(shadow[j].request) == REQ_OP_DISCARD || + req_op(shadow[j].request) == REQ_OP_SECURE_ERASE || shadow[j].request->cmd_flags & REQ_FUA) { /* * Flush operations don't contain bios, so -- cgit v1.2.3 From 10d5bf2f6928e3d333cbf82d970d4a057686f9f9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 7 Aug 2017 19:43:13 -0700 Subject: x86/asm/64: Clear AC on NMI entries commit e93c17301ac55321fc18e0f8316e924e58a83c8c upstream. This closes a hole in our SMAP implementation. This patch comes from grsecurity. Good catch! Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/314cc9f294e8f14ed85485727556ad4f15bb1659.1502159503.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ef766a358b37..e7b0e7ff4c58 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1215,6 +1215,8 @@ ENTRY(nmi) * other IST entries. */ + ASM_CLAC + /* Use %rdx as our temp variable throughout */ pushq %rdx -- cgit v1.2.3 From 665d2009a4afb647b16b2fcfc0ae86a8c7daa23a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:39 +0200 Subject: irqchip/atmel-aic: Fix unbalanced of_node_put() in aic_common_irq_fixup() commit 469bcef53c546bb792aa66303933272991b7831d upstream. aic_common_irq_fixup() is calling twice of_node_put() on the same node thus leading to an unbalanced refcount on the root node. Signed-off-by: Boris Brezillon Reported-by: Alexandre Belloni Fixes: b2f579b58e93 ("irqchip: atmel-aic: Add irq fixup infrastructure") Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-atmel-aic-common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 28b26c80f4cf..7c5a43488d27 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -196,7 +196,6 @@ static void __init aic_common_irq_fixup(const struct of_device_id *matches) return; match = of_match_node(matches, root); - of_node_put(root); if (match) { void (*fixup)(struct device_node *) = match->data; -- cgit v1.2.3 From 4691f1ca6fad3877a4887041d1ad5eb58117b4a2 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:40 +0200 Subject: irqchip/atmel-aic: Fix unbalanced refcount in aic_common_rtc_irq_fixup() commit 277867ade8262583f4280cadbe90e0031a3706a7 upstream. of_find_compatible_node() is calling of_node_put() on its first argument thus leading to an unbalanced of_node_get/put() issue if the node has not been retained before that. Instead of passing the root node, pass NULL, which does exactly the same: iterate over all DT nodes, starting from the root node. Signed-off-by: Boris Brezillon Reported-by: Alexandre Belloni Fixes: 3d61467f9bab ("irqchip: atmel-aic: Implement RTC irq fixup") Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-atmel-aic-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 7c5a43488d27..056507099725 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -142,9 +142,9 @@ void __init aic_common_rtc_irq_fixup(struct device_node *root) struct device_node *np; void __iomem *regs; - np = of_find_compatible_node(root, NULL, "atmel,at91rm9200-rtc"); + np = of_find_compatible_node(NULL, NULL, "atmel,at91rm9200-rtc"); if (!np) - np = of_find_compatible_node(root, NULL, + np = of_find_compatible_node(NULL, NULL, "atmel,at91sam9x5-rtc"); if (!np) -- cgit v1.2.3 From 762ac49ccecece82aad47bb4bd661791fafac2e5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Aug 2017 10:53:45 +0100 Subject: genirq: Restore trigger settings in irq_modify_status() commit e8f241893dfbbebe2813c01eac54f263e6a5e59c upstream. irq_modify_status starts by clearing the trigger settings from irq_data before applying the new settings, but doesn't restore them, leaving them to IRQ_TYPE_NONE. That's pretty confusing to the potential request_irq() that could follow. Instead, snapshot the settings before clearing them, and restore them if the irq_modify_status() invocation was not changing the trigger. Fixes: 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") Reported-and-tested-by: jeffy Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jon Hunter Link: http://lkml.kernel.org/r/20170818095345.12378-1-marc.zyngier@arm.com Signed-off-by: Greg Kroah-Hartman --- kernel/irq/chip.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 077c87f40f4d..f30110e1b8c9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -895,13 +895,15 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) { - unsigned long flags; + unsigned long flags, trigger, tmp; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); if (!desc) return; irq_settings_clr_and_set(desc, clr, set); + trigger = irqd_get_trigger_type(&desc->irq_data); + irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); if (irq_settings_has_no_balance_set(desc)) @@ -913,7 +915,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) if (irq_settings_is_level(desc)) irqd_set(&desc->irq_data, IRQD_LEVEL); - irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); + tmp = irq_settings_get_trigger_mask(desc); + if (tmp != IRQ_TYPE_NONE) + trigger = tmp; + + irqd_set(&desc->irq_data, trigger); irq_put_desc_unlock(desc, flags); } -- cgit v1.2.3 From f9497d51259faf7f6ecfed393a2a75879926b77a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 19 Aug 2017 12:57:51 +0300 Subject: genirq/ipi: Fixup checks against nr_cpu_ids commit 8fbbe2d7cc478d1544f41f2271787c993c23a4f6 upstream. Valid CPU ids are [0, nr_cpu_ids-1] inclusive. Fixes: 3b8e29a82dd1 ("genirq: Implement ipi_send_mask/single()") Fixes: f9bce791ae2a ("genirq: Add a new function to get IPI reverse mapping") Signed-off-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170819095751.GB27864@avx2 Signed-off-by: Greg Kroah-Hartman --- kernel/irq/ipi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 1a9abc1c8ea0..259a22aa9934 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -165,7 +165,7 @@ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) struct irq_data *data = irq_get_irq_data(irq); struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; - if (!data || !ipimask || cpu > nr_cpu_ids) + if (!data || !ipimask || cpu >= nr_cpu_ids) return INVALID_HWIRQ; if (!cpumask_test_cpu(cpu, ipimask)) @@ -195,7 +195,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, if (!chip->ipi_send_single && !chip->ipi_send_mask) return -EINVAL; - if (cpu > nr_cpu_ids) + if (cpu >= nr_cpu_ids) return -EINVAL; if (dest) { -- cgit v1.2.3 From 61332dc598c3f223678b2d7192ccf3472c544799 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Aug 2017 13:26:27 -0700 Subject: Sanitize 'move_pages()' permission checks commit 197e7e521384a23b9e585178f3f11c9fa08274b9 upstream. The 'move_paghes()' system call was introduced long long ago with the same permission checks as for sending a signal (except using CAP_SYS_NICE instead of CAP_SYS_KILL for the overriding capability). That turns out to not be a great choice - while the system call really only moves physical page allocations around (and you need other capabilities to do a lot of it), you can check the return value to map out some the virtual address choices and defeat ASLR of a binary that still shares your uid. So change the access checks to the more common 'ptrace_may_access()' model instead. This tightens the access checks for the uid, and also effectively changes the CAP_SYS_NICE check to CAP_SYS_PTRACE, but it's unlikely that anybody really _uses_ this legacy system call any more (we hav ebetter NUMA placement models these days), so I expect nobody to notice. Famous last words. Reported-by: Otto Ebeling Acked-by: Eric W. Biederman Cc: Willy Tarreau Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 6850f62998cd..821623fc7091 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -1663,7 +1664,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, const int __user *, nodes, int __user *, status, int, flags) { - const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; @@ -1687,14 +1687,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, /* * Check if this process has the right to modify the specified - * process. The right exists if the process has administrative - * capabilities, superuser privileges or the same - * userid as the target process. + * process. Use the regular "ptrace_may_access()" checks. */ - tcred = __task_cred(task); - if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && - !capable(CAP_SYS_NICE)) { + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { rcu_read_unlock(); err = -EPERM; goto out; -- cgit v1.2.3 From 322cd32623653c0f860d2fc9789fa5b7ac7b09ae Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2017 17:35:02 +0200 Subject: pids: make task_tgid_nr_ns() safe commit dd1c1f2f2028a7b851f701fc6a8ebe39dcb95e7c upstream. This was reported many times, and this was even mentioned in commit 52ee2dfdd4f5 ("pids: refactor vnr/nr_ns helpers to make them safe") but somehow nobody bothered to fix the obvious problem: task_tgid_nr_ns() is not safe because task->group_leader points to nowhere after the exiting task passes exit_notify(), rcu_read_lock() can not help. We really need to change __unhash_process() to nullify group_leader, parent, and real_parent, but this needs some cleanups. Until then we can turn task_tgid_nr_ns() into another user of __task_pid_nr_ns() and fix the problem. Reported-by: Troy Kensinger Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/pid.h | 4 +++- include/linux/sched.h | 50 +++++++++++++++++++++++++++----------------------- kernel/pid.c | 11 ++++------- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/include/linux/pid.h b/include/linux/pid.h index 23705a53abba..97b745ddece5 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -8,7 +8,9 @@ enum pid_type PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, - PIDTYPE_MAX + PIDTYPE_MAX, + /* only valid to __task_pid_nr_ns() */ + __PIDTYPE_TGID }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 14f58cf06054..a4d0afc009a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2132,31 +2132,8 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_tgid(tsk)); -} - static inline int pid_alive(const struct task_struct *p); -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) @@ -2181,6 +2158,33 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + /* obsolete, do not use */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { diff --git a/kernel/pid.c b/kernel/pid.c index f66162f2359b..693a64385d59 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, if (!ns) ns = task_active_pid_ns(current); if (likely(pid_alive(task))) { - if (type != PIDTYPE_PID) + if (type != PIDTYPE_PID) { + if (type == __PIDTYPE_TGID) + type = PIDTYPE_PID; task = task->group_leader; + } nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); } rcu_read_unlock(); @@ -536,12 +539,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, } EXPORT_SYMBOL(__task_pid_nr_ns); -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return pid_nr_ns(task_tgid(tsk), ns); -} -EXPORT_SYMBOL(task_tgid_nr_ns); - struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { return ns_of_pid(task_pid(tsk)); -- cgit v1.2.3 From edfe57b2f44a6fa0d02a411fa61425d6b2e4f032 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 2 Jun 2017 16:36:26 +0300 Subject: usb: optimize acpi companion search for usb port devices commit ed18c5fa945768a9bec994e786edbbbc7695acf6 upstream. This optimization significantly reduces xhci driver load time. In ACPI tables the acpi companion port devices are children of the hub device. The port devices are identified by their port number returned by the ACPI _ADR method. _ADR 0 is reserved for the root hub device. The current implementation to find a acpi companion port device loops through all acpi port devices under that parent hub, evaluating their _ADR method each time a new port device is added. for a xHC controller with 25 ports under its roothub it will end up invoking ACPI bytecode 625 times before all ports are ready, making it really slow. The _ADR values are already read and cached earler. So instead of running the bytecode again we can check the cached _ADR value first, and then fall back to the old way. As one of the more significant changes, the xhci load time on Intel kabylake reduced by 70%, (28ms) from initcall xhci_pci_init+0x0/0x49 returned 0 after 39537 usecs to initcall xhci_pci_init+0x0/0x49 returned 0 after 11270 usecs Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb-acpi.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index 2776cfe64c09..ef9cf4a21afe 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -127,6 +127,22 @@ out: */ #define USB_ACPI_LOCATION_VALID (1 << 31) +static struct acpi_device *usb_acpi_find_port(struct acpi_device *parent, + int raw) +{ + struct acpi_device *adev; + + if (!parent) + return NULL; + + list_for_each_entry(adev, &parent->children, node) { + if (acpi_device_adr(adev) == raw) + return adev; + } + + return acpi_find_child_device(parent, raw, false); +} + static struct acpi_device *usb_acpi_find_companion(struct device *dev) { struct usb_device *udev; @@ -174,8 +190,10 @@ static struct acpi_device *usb_acpi_find_companion(struct device *dev) int raw; raw = usb_hcd_find_raw_port_number(hcd, port1); - adev = acpi_find_child_device(ACPI_COMPANION(&udev->dev), - raw, false); + + adev = usb_acpi_find_port(ACPI_COMPANION(&udev->dev), + raw); + if (!adev) return NULL; } else { @@ -186,7 +204,9 @@ static struct acpi_device *usb_acpi_find_companion(struct device *dev) return NULL; acpi_bus_get_device(parent_handle, &adev); - adev = acpi_find_child_device(adev, port1, false); + + adev = usb_acpi_find_port(adev, port1); + if (!adev) return NULL; } -- cgit v1.2.3 From 083d423b1f8abfc064a3aae14d46c28b7539656d Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 2 Aug 2017 00:45:44 +0900 Subject: usb: qmi_wwan: add D-Link DWM-222 device ID commit bed9ff165960921303a100228585f2d1691b42eb upstream. Signed-off-by: Hector Martin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 2f260c63c383..49a27dc46e5e 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -876,6 +876,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ + {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ -- cgit v1.2.3 From 4b904b22bc906d5867933b8132ae4d7f31d7645d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 24 Aug 2017 17:12:55 -0700 Subject: Linux 4.9.45 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3e95dfdbe572..ccd6d91f616e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 44 +SUBLEVEL = 45 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From d0526eef0bf7b2e6695d3e554433fe7a64d204d9 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 16 Aug 2017 11:09:10 -0700 Subject: sparc64: remove unnecessary log message [ Upstream commit 6170a506899aee3dd4934c928426505e47b1b466 ] There is no need to log message if ATU hvapi couldn't get register. Unlike PCI hvapi, ATU hvapi registration failure is not hard error. Even if ATU hvapi registration fails (on system with ATU or without ATU) system continues with legacy IOMMU. So only log message when ATU hvapi successfully get registered. Signed-off-by: Tushar Dave Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/pci_sun4v.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 06981cc716b6..d04111a5c615 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -1240,8 +1240,6 @@ static int pci_sun4v_probe(struct platform_device *op) * ATU group, but ATU hcalls won't be available. */ hv_atu = false; - pr_err(PFX "Could not register hvapi ATU err=%d\n", - err); } else { pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n", vatu_major, vatu_minor); -- cgit v1.2.3 From 2e3f17f81029bc35b0f1f8d934e46e6d86876a00 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 10:16:45 -0700 Subject: af_key: do not use GFP_KERNEL in atomic contexts [ Upstream commit 36f41f8fc6d8aa9f8c9072d66ff7cf9055f5e69b ] pfkey_broadcast() might be called from non process contexts, we can not use GFP_KERNEL in these cases [1]. This patch partially reverts commit ba51b6be38c1 ("net: Fix RCU splat in af_key"), only keeping the GFP_ATOMIC forcing under rcu_read_lock() section. [1] : syzkaller reported : in_atomic(): 1, irqs_disabled(): 0, pid: 2932, name: syzkaller183439 3 locks held by syzkaller183439/2932: #0: (&net->xfrm.xfrm_cfg_mutex){+.+.+.}, at: [] pfkey_sendmsg+0x4c8/0x9f0 net/key/af_key.c:3649 #1: (&pfk->dump_lock){+.+.+.}, at: [] pfkey_do_dump+0x76/0x3f0 net/key/af_key.c:293 #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] spin_lock_bh include/linux/spinlock.h:304 [inline] #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] xfrm_policy_walk+0x192/0xa30 net/xfrm/xfrm_policy.c:1028 CPU: 0 PID: 2932 Comm: syzkaller183439 Not tainted 4.13.0-rc4+ #24 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:5994 __might_sleep+0x95/0x190 kernel/sched/core.c:5947 slab_pre_alloc_hook mm/slab.h:416 [inline] slab_alloc mm/slab.c:3383 [inline] kmem_cache_alloc+0x24b/0x6e0 mm/slab.c:3559 skb_clone+0x1a0/0x400 net/core/skbuff.c:1037 pfkey_broadcast_one+0x4b2/0x6f0 net/key/af_key.c:207 pfkey_broadcast+0x4ba/0x770 net/key/af_key.c:281 dump_sp+0x3d6/0x500 net/key/af_key.c:2685 xfrm_policy_walk+0x2f1/0xa30 net/xfrm/xfrm_policy.c:1042 pfkey_dump_sp+0x42/0x50 net/key/af_key.c:2695 pfkey_do_dump+0xaa/0x3f0 net/key/af_key.c:299 pfkey_spddump+0x1a0/0x210 net/key/af_key.c:2722 pfkey_process+0x606/0x710 net/key/af_key.c:2814 pfkey_sendmsg+0x4d6/0x9f0 net/key/af_key.c:3650 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 ___sys_sendmsg+0x755/0x890 net/socket.c:2035 __sys_sendmsg+0xe5/0x210 net/socket.c:2069 SYSC_sendmsg net/socket.c:2080 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2076 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x445d79 RSP: 002b:00007f32447c1dc8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000445d79 RDX: 0000000000000000 RSI: 000000002023dfc8 RDI: 0000000000000008 RBP: 0000000000000086 R08: 00007f32447c2700 R09: 00007f32447c2700 R10: 00007f32447c2700 R11: 0000000000000202 R12: 0000000000000000 R13: 00007ffe33edec4f R14: 00007f32447c29c0 R15: 0000000000000000 Fixes: ba51b6be38c1 ("net: Fix RCU splat in af_key") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: David Ahern Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 2e1050ec2cf0..94bf810ad242 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, +static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { @@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); + err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); kfree_skb(skb2); kfree_skb(skb); @@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1396,7 +1396,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ xfrm_state_put(x); - pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net); + pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } @@ -1483,7 +1483,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x)); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } @@ -1596,7 +1596,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1701,8 +1701,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad return -ENOBUFS; } - pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk)); - + pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, + sock_net(sk)); return 0; } @@ -1720,7 +1720,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, + sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) @@ -1741,7 +1742,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -1798,7 +1799,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -1886,7 +1887,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb new_hdr->sadb_msg_errno = 0; } - pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -2219,7 +2220,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } @@ -2439,7 +2440,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2695,7 +2696,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -2752,7 +2753,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -2814,7 +2815,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); @@ -3036,7 +3037,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); return 0; } @@ -3226,7 +3228,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, @@ -3424,7 +3427,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3616,7 +3620,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; -- cgit v1.2.3 From b31cbe2c3916ebc4404b2bc1a231edf1391d465f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 14:10:25 -0700 Subject: dccp: purge write queue in dccp_destroy_sock() [ Upstream commit 7749d4ff88d31b0be17c8683143135adaaadc6a7 ] syzkaller reported that DCCP could have a non empty write queue at dismantle time. WARNING: CPU: 1 PID: 2953 at net/core/stream.c:199 sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 2953 Comm: syz-executor0 Not tainted 4.13.0-rc4+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:180 __warn+0x1c4/0x1d9 kernel/panic.c:541 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190 do_trap_no_signal arch/x86/kernel/traps.c:224 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:273 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846 RIP: 0010:sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 RSP: 0018:ffff8801d182f108 EFLAGS: 00010297 RAX: ffff8801d1144140 RBX: ffff8801d13cb280 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff85137b00 RDI: ffff8801d13cb280 RBP: ffff8801d182f148 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d13cb4d0 R13: ffff8801d13cb3b8 R14: ffff8801d13cb300 R15: ffff8801d13cb3b8 inet_csk_destroy_sock+0x175/0x3f0 net/ipv4/inet_connection_sock.c:835 dccp_close+0x84d/0xc10 net/dccp/proto.c:1067 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 sock_release+0x8d/0x1e0 net/socket.c:597 sock_close+0x16/0x20 net/socket.c:1126 __fput+0x327/0x7e0 fs/file_table.c:210 ____fput+0x15/0x20 fs/file_table.c:246 task_work_run+0x18a/0x260 kernel/task_work.c:116 exit_task_work include/linux/task_work.h:21 [inline] do_exit+0xa32/0x1b10 kernel/exit.c:865 do_group_exit+0x149/0x400 kernel/exit.c:969 get_signal+0x7e8/0x17e0 kernel/signal.c:2330 do_signal+0x94/0x1ee0 arch/x86/kernel/signal.c:808 exit_to_usermode_loop+0x21c/0x2d0 arch/x86/entry/common.c:157 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath+0x3a7/0x450 arch/x86/entry/common.c:263 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/proto.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9fe25bf63296..86bc40ba6ba5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -201,10 +201,7 @@ void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - /* - * DCCP doesn't use sk_write_queue, just sk_send_head - * for retransmissions - */ + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; -- cgit v1.2.3 From 236b0d939b05e100a70ab20c402bf10e10269051 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 07:03:15 -0700 Subject: dccp: defer ccid_hc_tx_delete() at dismantle time [ Upstream commit 120e9dabaf551c6dc03d3a10a1f026376cb1811c ] syszkaller team reported another problem in DCCP [1] Problem here is that the structure holding RTO timer (ccid2_hc_tx_rto_expire() handler) is freed too soon. We can not use del_timer_sync() to cancel the timer since this timer wants to grab socket lock (that would risk a dead lock) Solution is to defer the freeing of memory when all references to the socket were released. Socket timers do own a reference, so this should fix the issue. [1] ================================================================== BUG: KASAN: use-after-free in ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144 Read of size 4 at addr ffff8801d2660540 by task kworker/u4:7/3365 CPU: 1 PID: 3365 Comm: kworker/u4:7 Not tainted 4.13.0-rc4+ #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events_unbound call_usermodehelper_exec_work Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144 call_timer_fn+0x233/0x830 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x7fd/0xb90 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x2f5/0xba3 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:638 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:1044 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:702 RIP: 0010:arch_local_irq_enable arch/x86/include/asm/paravirt.h:824 [inline] RIP: 0010:__raw_write_unlock_irq include/linux/rwlock_api_smp.h:267 [inline] RIP: 0010:_raw_write_unlock_irq+0x56/0x70 kernel/locking/spinlock.c:343 RSP: 0018:ffff8801cd50eaa8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff10 RAX: dffffc0000000000 RBX: ffffffff85a090c0 RCX: 0000000000000006 RDX: 1ffffffff0b595f3 RSI: 1ffff1003962f989 RDI: ffffffff85acaf98 RBP: ffff8801cd50eab0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801cc96ea60 R13: dffffc0000000000 R14: ffff8801cc96e4c0 R15: ffff8801cc96e4c0 release_task+0xe9e/0x1a40 kernel/exit.c:220 wait_task_zombie kernel/exit.c:1162 [inline] wait_consider_task+0x29b8/0x33c0 kernel/exit.c:1389 do_wait_thread kernel/exit.c:1452 [inline] do_wait+0x441/0xa90 kernel/exit.c:1523 kernel_wait4+0x1f5/0x370 kernel/exit.c:1665 SYSC_wait4+0x134/0x140 kernel/exit.c:1677 SyS_wait4+0x2c/0x40 kernel/exit.c:1673 call_usermodehelper_exec_sync kernel/kmod.c:286 [inline] call_usermodehelper_exec_work+0x1a0/0x2c0 kernel/kmod.c:323 process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2097 worker_thread+0x223/0x1860 kernel/workqueue.c:2231 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:425 Allocated by task 21267: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 kmem_cache_alloc+0x127/0x750 mm/slab.c:3561 ccid_new+0x20e/0x390 net/dccp/ccid.c:151 dccp_hdlr_ccid+0x27/0x140 net/dccp/feat.c:44 __dccp_feat_activate+0x142/0x2a0 net/dccp/feat.c:344 dccp_feat_activate_values+0x34e/0xa90 net/dccp/feat.c:1538 dccp_rcv_request_sent_state_process net/dccp/input.c:472 [inline] dccp_rcv_state_process+0xed1/0x1620 net/dccp/input.c:677 dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679 sk_backlog_rcv include/net/sock.h:911 [inline] __release_sock+0x124/0x360 net/core/sock.c:2269 release_sock+0xa4/0x2a0 net/core/sock.c:2784 inet_wait_for_connect net/ipv4/af_inet.c:557 [inline] __inet_stream_connect+0x671/0xf00 net/ipv4/af_inet.c:643 inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:682 SYSC_connect+0x204/0x470 net/socket.c:1642 SyS_connect+0x24/0x30 net/socket.c:1623 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3049: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kmem_cache_free+0x77/0x280 mm/slab.c:3763 ccid_hc_tx_delete+0xc5/0x100 net/dccp/ccid.c:190 dccp_destroy_sock+0x1d1/0x2b0 net/dccp/proto.c:225 inet_csk_destroy_sock+0x166/0x3f0 net/ipv4/inet_connection_sock.c:833 dccp_done+0xb7/0xd0 net/dccp/proto.c:145 dccp_time_wait+0x13d/0x300 net/dccp/minisocks.c:72 dccp_rcv_reset+0x1d1/0x5b0 net/dccp/input.c:160 dccp_rcv_state_process+0x8fc/0x1620 net/dccp/input.c:663 dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679 sk_backlog_rcv include/net/sock.h:911 [inline] __sk_receive_skb+0x33e/0xc00 net/core/sock.c:521 dccp_v4_rcv+0xef1/0x1c00 net/dccp/ipv4.c:871 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:248 [inline] ip_local_deliver+0x1ce/0x6d0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:477 [inline] ip_rcv_finish+0x8db/0x19c0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:248 [inline] ip_rcv+0xc3f/0x17d0 net/ipv4/ip_input.c:488 __netif_receive_skb_core+0x19af/0x33d0 net/core/dev.c:4417 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4455 process_backlog+0x203/0x740 net/core/dev.c:5130 napi_poll net/core/dev.c:5527 [inline] net_rx_action+0x792/0x1910 net/core/dev.c:5593 __do_softirq+0x2f5/0xba3 kernel/softirq.c:284 The buggy address belongs to the object at ffff8801d2660100 which belongs to the cache ccid2_hc_tx_sock of size 1240 The buggy address is located 1088 bytes inside of 1240-byte region [ffff8801d2660100, ffff8801d26605d8) The buggy address belongs to the page: page:ffffea0007499800 count:1 mapcount:0 mapping:ffff8801d2660100 index:0x0 compound_mapcount: 0 flags: 0x200000000008100(slab|head) raw: 0200000000008100 ffff8801d2660100 0000000000000000 0000000100000005 raw: ffffea00075271a0 ffffea0007538820 ffff8801d3aef9c0 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d2660400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801d2660480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801d2660500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801d2660580: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ffff8801d2660600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Gerrit Renker Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/proto.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 86bc40ba6ba5..b68168fcc06a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); +static void dccp_sk_destruct(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_tx_ccid = NULL; + inet_sock_destruct(sk); +} + int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); @@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; + sk->sk_destruct = dccp_sk_destruct; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; @@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ackvec = NULL; } ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + dp->dccps_hc_rx_ccid = NULL; /* clean up feature negotiation state */ dccp_feat_list_purge(&dp->dccps_featneg); -- cgit v1.2.3 From 71501d9b61902d8ddf0e782a2e6763afd2aa0c96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 05:26:17 -0700 Subject: ipv4: fix NULL dereference in free_fib_info_rcu() [ Upstream commit 187e5b3ac84d3421d2de3aca949b2791fbcad554 ] If fi->fib_metrics could not be allocated in fib_create_info() we attempt to dereference a NULL pointer in free_fib_info_rcu() : m = fi->fib_metrics; if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) kfree(m); Before my recent patch, we used to call kfree(NULL) and nothing wrong happened. Instead of using RCU to defer freeing while we are under memory stress, it seems better to take immediate action. This was reported by syzkaller team. Fixes: 3fb07daff8e9 ("ipv4: add reference counting to metrics") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7563831fa432..38c1c979ecb1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1044,15 +1044,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (unlikely(!fi->fib_metrics)) { + kfree(fi); + return ERR_PTR(err); + } atomic_set(&fi->fib_metrics->refcnt, 1); - } else + } else { fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; - + } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; -- cgit v1.2.3 From 38530f6e6dae4fdcd7ae5d48174b62a0f7858cbd Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:37:04 +0300 Subject: net_sched/sfq: update hierarchical backlog when drop packet [ Upstream commit 325d5dc3f7e7c2840b65e4a2988c082c2c0025c5 ] When sfq_enqueue() drops head packet or packet from another queue it have to update backlog at upper qdiscs too. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Konstantin Khlebnikov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_sfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index bc5e99584e41..ea8a56f76b32 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -434,6 +434,7 @@ congestion_drop: qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); + qdisc_tree_reduce_backlog(sch, 0, delta); return NET_XMIT_CN; } @@ -465,8 +466,10 @@ enqueue: /* Return Congestion Notification only if we dropped a packet * from this flow. */ - if (qlen != slot->qlen) + if (qlen != slot->qlen) { + qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN; + } /* As we dropped a packet, better let upper stack know this */ qdisc_tree_reduce_backlog(sch, 1, dropped); -- cgit v1.2.3 From 792c0707bd3fe4f7c25e4e748d20e1e54eccf547 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:39:05 +0300 Subject: net_sched: remove warning from qdisc_hash_add [ Upstream commit c90e95147c27b1780e76c6e8fea1b5c78d7d387f ] It was added in commit e57a784d8cae ("pkt_sched: set root qdisc before change() in attach_default_qdiscs()") to hide duplicates from "tc qdisc show" for incative deivices. After 59cc1f61f ("net: sched: convert qdisc linked list to hashtable") it triggered when classful qdisc is added to inactive device because default qdiscs are added before switching root qdisc. Anyway after commit ea3274695353 ("net: sched: avoid duplicates in qdisc dump") duplicates are filtered right in dumper. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ff27a85a71a9..195a3b2d9afc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -277,9 +277,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_hash_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - struct Qdisc *root = qdisc_dev(q)->qdisc; - - WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); } -- cgit v1.2.3 From d6a6b6b4c3bac13e83c8bd18711a86b09410c0ac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 16 Aug 2017 01:45:33 +0200 Subject: bpf: fix bpf_trace_printk on 32 bit archs [ Upstream commit 88a5c690b66110ad255380d8f629c629cf6ca559 ] James reported that on MIPS32 bpf_trace_printk() is currently broken while MIPS64 works fine: bpf_trace_printk() uses conditional operators to attempt to pass different types to __trace_printk() depending on the format operators. This doesn't work as intended on 32-bit architectures where u32 and long are passed differently to u64, since the result of C conditional operators follows the "usual arithmetic conversions" rules, such that the values passed to __trace_printk() will always be u64 [causing issues later in the va_list handling for vscnprintf()]. For example the samples/bpf/tracex5 test printed lines like below on MIPS32, where the fd and buf have come from the u64 fd argument, and the size from the buf argument: [...] 1180.941542: 0x00000001: write(fd=1, buf= (null), size=6258688) Instead of this: [...] 1625.616026: 0x00000001: write(fd=1, buf=009e4000, size=512) One way to get it working is to expand various combinations of argument types into 8 different combinations for 32 bit and 64 bit kernels. Fix tested by James on MIPS32 and MIPS64 as well that it resolves the issue. Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()") Reported-by: James Hogan Tested-by: James Hogan Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5dcb99281259..41805fb3c661 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -203,10 +203,36 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, fmt_cnt++; } - return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, - mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, - mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); +/* Horrid workaround for getting va_list handling working with different + * argument type combinations generically for 32 and 64 bit archs. + */ +#define __BPF_TP_EMIT() __BPF_ARG3_TP() +#define __BPF_TP(...) \ + __trace_printk(1 /* Fake ip will not be printed. */, \ + fmt, ##__VA_ARGS__) + +#define __BPF_ARG1_TP(...) \ + ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_TP(arg1, ##__VA_ARGS__) \ + : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ + : __BPF_TP((u32)arg1, ##__VA_ARGS__))) + +#define __BPF_ARG2_TP(...) \ + ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ + : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ + : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) + +#define __BPF_ARG3_TP(...) \ + ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ + : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ + : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) + + return __BPF_TP_EMIT(); } static const struct bpf_func_proto bpf_trace_printk_proto = { -- cgit v1.2.3 From 3c7af814def8daf25d827c8ed9e75ddc949e52d2 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 16 Aug 2017 13:30:07 +0800 Subject: openvswitch: fix skb_panic due to the incorrect actions attrlen [ Upstream commit 494bea39f3201776cdfddc232705f54a0bd210c4 ] For sw_flow_actions, the actions_len only represents the kernel part's size, and when we dump the actions to the userspace, we will do the convertions, so it's true size may become bigger than the actions_len. But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len to alloc the skbuff, so the user_skb's size may become insufficient and oops will happen like this: skbuff: skb_over_panic: text:ffffffff8148fabf len:1749 put:157 head: ffff881300f39000 data:ffff881300f39000 tail:0x6d5 end:0x6c0 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:129! [...] Call Trace: [] skb_put+0x43/0x44 [] skb_zerocopy+0x6c/0x1f4 [] queue_userspace_packet+0x3a3/0x448 [openvswitch] [] ovs_dp_upcall+0x30/0x5c [openvswitch] [] output_userspace+0x132/0x158 [openvswitch] [] ? ip6_rcv_finish+0x74/0x77 [ipv6] [] do_execute_actions+0xcc1/0xdc8 [openvswitch] [] ovs_execute_actions+0x74/0x106 [openvswitch] [] ovs_dp_process_packet+0xe1/0xfd [openvswitch] [] ? key_extract+0x63c/0x8d5 [openvswitch] [] ovs_vport_receive+0xa1/0xc3 [openvswitch] [...] Also we can find that the actions_len is much little than the orig_len: crash> struct sw_flow_actions 0xffff8812f539d000 struct sw_flow_actions { rcu = { next = 0xffff8812f5398800, func = 0xffffe3b00035db32 }, orig_len = 1384, actions_len = 592, actions = 0xffff8812f539d01c } So as a quick fix, use the orig_len instead of the actions_len to alloc the user_skb. Last, this oops happened on our system running a relative old kernel, but the same risk still exists on the mainline, since we use the wrong actions_len from the beginning. Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet upcall to userspace") Cc: Neil McKee Signed-off-by: Liping Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/actions.c | 1 + net/openvswitch/datapath.c | 7 ++++--- net/openvswitch/datapath.h | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4e03f64709bc..05d9f42fc309 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1240,6 +1240,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, goto out; } + OVS_CB(skb)->acts_origlen = acts->orig_len; err = do_execute_actions(dp, skb, key, acts->actions, acts->actions_len); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4d67ea856067..453f806afe6e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -383,7 +383,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, } static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, - unsigned int hdrlen) + unsigned int hdrlen, int actions_attrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ @@ -400,7 +400,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, /* OVS_PACKET_ATTR_ACTIONS */ if (upcall_info->actions_len) - size += nla_total_size(upcall_info->actions_len); + size += nla_total_size(actions_attrlen); /* OVS_PACKET_ATTR_MRU */ if (upcall_info->mru) @@ -467,7 +467,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info, hlen - cutlen); + len = upcall_msg_size(upcall_info, hlen - cutlen, + OVS_CB(skb)->acts_origlen); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index ab85c1cae255..e19ace428e38 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -100,12 +100,14 @@ struct datapath { * @input_vport: The original vport packet came in on. This value is cached * when a packet is received by OVS. * @mru: The maximum received fragement size; 0 if the packet is not + * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. * fragmented. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; + u16 acts_origlen; u32 cutlen; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) -- cgit v1.2.3 From 59af5b87695de8e2c772827798938d8e35b49d05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 10:36:47 -0700 Subject: ptr_ring: use kmalloc_array() [ Upstream commit 81fbfe8adaf38d4f5a98c19bebfd41c5d6acaee8 ] As found by syzkaller, malicious users can set whatever tx_queue_len on a tun device and eventually crash the kernel. Lets remove the ALIGN(XXX, SMP_CACHE_BYTES) thing since a small ring buffer is not fast anyway. Fixes: 2e0ab8ca83c1 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Michael S. Tsirkin Cc: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 9 +++++---- include/linux/skb_array.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6c70444da3b9..b83507c0640c 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -340,9 +340,9 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) __PTR_RING_PEEK_CALL_v; \ }) -static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) +static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); + return kcalloc(size, sizeof(void *), gfp); } static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) @@ -417,7 +417,8 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, + unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { @@ -425,7 +426,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, void ***queues; int i; - queues = kmalloc(nrings * sizeof *queues, gfp); + queues = kmalloc_array(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index f4dfade428f0..be8b902b5845 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -162,7 +162,8 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) } static inline int skb_array_resize_multiple(struct skb_array **rings, - int nrings, int size, gfp_t gfp) + int nrings, unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); return ptr_ring_resize_multiple((struct ptr_ring **)rings, -- cgit v1.2.3 From f29c9f46af4ac2540713e43981ded8588e03ae3b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 11:09:12 -0700 Subject: ipv4: better IP_MAX_MTU enforcement [ Upstream commit c780a049f9bf442314335372c9abc4548bfe3e44 ] While working on yet another syzkaller report, I found that our IP_MAX_MTU enforcements were not properly done. gcc seems to reload dev->mtu for min(dev->mtu, IP_MAX_MTU), and final result can be bigger than IP_MAX_MTU :/ This is a problem because device mtu can be changed on other cpus or threads. While this patch does not fix the issue I am working on, it is probably worth addressing it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 4 ++-- net/ipv4/route.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index d3a107850a41..51c6b9786c46 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -339,7 +339,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, !forwarding) return dst_mtu(dst); - return min(dst->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, @@ -351,7 +351,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } - return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } u32 ip_idents_reserve(u32 hash, int segs); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6cd49fd17ac0..6a5b7783932e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (mtu) return mtu; - mtu = dst->dev->mtu; + mtu = READ_ONCE(dst->dev->mtu); if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { if (rt->rt_uses_gateway && mtu > 576) -- cgit v1.2.3 From 57406e7373220412437a025d6abc10031e9acafd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Aug 2017 12:11:50 +0100 Subject: nfp: fix infinite loop on umapping cleanup [ Upstream commit eac2c68d663effb077210218788952b5a0c1f60e ] The while loop that performs the dma page unmapping never decrements index counter f and hence loops forever. Fix this with a pre-decrement on f. Detected by CoverityScan, CID#1357309 ("Infinite loop") Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") Signed-off-by: Colin Ian King Acked-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index aee3fd2b6538..4ca82bd8c4f0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -871,8 +871,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; err_unmap: - --f; - while (f >= 0) { + while (--f >= 0) { frag = &skb_shinfo(skb)->frags[f]; dma_unmap_page(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, -- cgit v1.2.3 From 0f5ecc79194dd7d6f0ed1e99e3832aa7578673c0 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 16 Aug 2017 20:16:40 +0200 Subject: sctp: fully initialize the IPv6 address in sctp_v6_to_addr() [ Upstream commit 15339e441ec46fbc3bf3486bb1ae4845b0f1bb8d ] KMSAN reported use of uninitialized sctp_addr->v4.sin_addr.s_addr and sctp_addr->v6.sin6_scope_id in sctp_v6_cmp_addr() (see below). Make sure all fields of an IPv6 address are initialized, which guarantees that the IPv4 fields are also initialized. ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x172/0x1c0 lib/dump_stack.c:42 is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg net/socket.c:643 [inline] SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x44b479 RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 origin description: ----dst_saddr@sctp_v6_get_dst local variable created at: sk_fullsock include/net/sock.h:2321 [inline] inet6_sk include/linux/ipv6.h:309 [inline] sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x172/0x1c0 lib/dump_stack.c:42 is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg net/socket.c:643 [inline] SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x44b479 RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 origin description: ----dst_saddr@sctp_v6_get_dst local variable created at: sk_fullsock include/net/sock.h:2321 [inline] inet6_sk include/linux/ipv6.h:309 [inline] sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 ================================================================== Signed-off-by: Alexander Potapenko Reviewed-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0c090600f377..ca4a63e3eadd 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; + addr->v6.sin6_scope_id = 0; } /* Compare addresses exactly. -- cgit v1.2.3 From 7ad5fb9594ee3341ce4a01dc5dbe3a51932b8e8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 09:41:54 -0700 Subject: tipc: fix use-after-free [ Upstream commit 5bfd37b4de5c98e86b12bd13be5aa46c7484a125 ] syszkaller reported use-after-free in tipc [1] When msg->rep skb is freed, set the pointer to NULL, so that caller does not free it again. [1] ================================================================== BUG: KASAN: use-after-free in skb_push+0xd4/0xe0 net/core/skbuff.c:1466 Read of size 8 at addr ffff8801c6e71e90 by task syz-executor5/4115 CPU: 1 PID: 4115 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #32 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 skb_push+0xd4/0xe0 net/core/skbuff.c:1466 tipc_nl_compat_recv+0x833/0x18f0 net/tipc/netlink_compat.c:1209 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x31a/0x5d0 net/socket.c:898 call_write_iter include/linux/fs.h:1743 [inline] new_sync_write fs/read_write.c:457 [inline] __vfs_write+0x684/0x970 fs/read_write.c:470 vfs_write+0x189/0x510 fs/read_write.c:518 SYSC_write fs/read_write.c:565 [inline] SyS_write+0xef/0x220 fs/read_write.c:557 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512e9 RSP: 002b:00007f3bc8184c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004512e9 RDX: 0000000000000020 RSI: 0000000020fdb000 RDI: 0000000000000006 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b5e76 R13: 00007f3bc8184b48 R14: 00000000004b5e86 R15: 0000000000000000 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 kmem_cache_alloc_node+0x13d/0x750 mm/slab.c:3651 __alloc_skb+0xf1/0x740 net/core/skbuff.c:219 alloc_skb include/linux/skbuff.h:903 [inline] tipc_tlv_alloc+0x26/0xb0 net/tipc/netlink_compat.c:148 tipc_nl_compat_dumpit+0xf2/0x3c0 net/tipc/netlink_compat.c:248 tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline] tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x31a/0x5d0 net/socket.c:898 call_write_iter include/linux/fs.h:1743 [inline] new_sync_write fs/read_write.c:457 [inline] __vfs_write+0x684/0x970 fs/read_write.c:470 vfs_write+0x189/0x510 fs/read_write.c:518 SYSC_write fs/read_write.c:565 [inline] SyS_write+0xef/0x220 fs/read_write.c:557 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kmem_cache_free+0x77/0x280 mm/slab.c:3763 kfree_skbmem+0x1a1/0x1d0 net/core/skbuff.c:622 __kfree_skb net/core/skbuff.c:682 [inline] kfree_skb+0x165/0x4c0 net/core/skbuff.c:699 tipc_nl_compat_dumpit+0x36a/0x3c0 net/tipc/netlink_compat.c:260 tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline] tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x31a/0x5d0 net/socket.c:898 call_write_iter include/linux/fs.h:1743 [inline] new_sync_write fs/read_write.c:457 [inline] __vfs_write+0x684/0x970 fs/read_write.c:470 vfs_write+0x189/0x510 fs/read_write.c:518 SYSC_write fs/read_write.c:565 [inline] SyS_write+0xef/0x220 fs/read_write.c:557 entry_SYSCALL_64_fastpath+0x1f/0xbe The buggy address belongs to the object at ffff8801c6e71dc0 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 208 bytes inside of 224-byte region [ffff8801c6e71dc0, ffff8801c6e71ea0) The buggy address belongs to the page: page:ffffea00071b9c40 count:1 mapcount:0 mapping:ffff8801c6e71000 index:0x0 flags: 0x200000000000100(slab) raw: 0200000000000100 ffff8801c6e71000 0000000000000000 000000010000000c raw: ffffea0007224a20 ffff8801d98caf48 ffff8801d9e79040 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801c6e71d80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801c6e71e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801c6e71e80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff8801c6e71f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801c6e71f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Jon Maloy Cc: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1fd464764765..aedc476fac02 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { kfree_skb(msg->rep); + msg->rep = NULL; return -ENOMEM; } err = __tipc_nl_compat_dumpit(cmd, msg, arg); - if (err) + if (err) { kfree_skb(msg->rep); - + msg->rep = NULL; + } kfree_skb(arg); return err; -- cgit v1.2.3 From 62e9a28baa283a510d31cecc157d9e4270ca3126 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 16 Aug 2017 11:18:09 -0700 Subject: ipv6: reset fn->rr_ptr when replacing route [ Upstream commit 383143f31d7d3525a1dbff733d52fff917f82f15 ] syzcaller reported the following use-after-free issue in rt6_select(): BUG: KASAN: use-after-free in rt6_select net/ipv6/route.c:755 [inline] at addr ffff8800bc6994e8 BUG: KASAN: use-after-free in ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 at addr ffff8800bc6994e8 Read of size 4 by task syz-executor1/439628 CPU: 0 PID: 439628 Comm: syz-executor1 Not tainted 4.3.5+ #8 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 0000000000000000 ffff88018fe435b0 ffffffff81ca384d ffff8801d3588c00 ffff8800bc699380 ffff8800bc699500 dffffc0000000000 ffff8801d40a47c0 ffff88018fe435d8 ffffffff81735751 ffff88018fe43660 ffff8800bc699380 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0xc1/0x124 lib/dump_stack.c:51 sctp: [Deprecated]: syz-executor0 (pid 439615) Use of struct sctp_assoc_value in delayed_ack socket option. Use struct sctp_sack_info instead [] kasan_object_err+0x21/0x70 mm/kasan/report.c:158 [] print_address_description mm/kasan/report.c:196 [inline] [] kasan_report_error+0x1b4/0x4a0 mm/kasan/report.c:285 [] kasan_report mm/kasan/report.c:305 [inline] [] __asan_report_load4_noabort+0x43/0x50 mm/kasan/report.c:325 [] rt6_select net/ipv6/route.c:755 [inline] [] ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 [] ip6_pol_route_output+0x81/0xb0 net/ipv6/route.c:1203 [] fib6_rule_action+0x1f0/0x680 net/ipv6/fib6_rules.c:95 [] fib_rules_lookup+0x2a6/0x7a0 net/core/fib_rules.c:223 [] fib6_rule_lookup+0xd0/0x250 net/ipv6/fib6_rules.c:41 [] ip6_route_output+0x1d6/0x2c0 net/ipv6/route.c:1224 [] ip6_dst_lookup_tail+0x4d2/0x890 net/ipv6/ip6_output.c:943 [] ip6_dst_lookup_flow+0x9a/0x250 net/ipv6/ip6_output.c:1079 [] ip6_datagram_dst_update+0x538/0xd40 net/ipv6/datagram.c:91 [] __ip6_datagram_connect net/ipv6/datagram.c:251 [inline] [] ip6_datagram_connect+0x518/0xe50 net/ipv6/datagram.c:272 [] ip6_datagram_connect_v6_only+0x63/0x90 net/ipv6/datagram.c:284 [] inet_dgram_connect+0x170/0x1f0 net/ipv4/af_inet.c:564 [] SYSC_connect+0x1a7/0x2f0 net/socket.c:1582 [] SyS_connect+0x29/0x30 net/socket.c:1563 [] entry_SYSCALL_64_fastpath+0x12/0x17 Object at ffff8800bc699380, in cache ip6_dst_cache size: 384 The root cause of it is that in fib6_add_rt2node(), when it replaces an existing route with the new one, it does not update fn->rr_ptr. This commit resets fn->rr_ptr to NULL when it points to a route which is replaced in fib6_add_rt2node(). Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4345ee39f180..1421a846ad36 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -897,6 +897,8 @@ add: } nsiblings = iter->rt6i_nsiblings; fib6_purge_rt(iter, fn, info->nl_net); + if (fn->rr_ptr == iter) + fn->rr_ptr = NULL; rt6_release(iter); if (nsiblings) { @@ -909,6 +911,8 @@ add: if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); + if (fn->rr_ptr == iter) + fn->rr_ptr = NULL; rt6_release(iter); nsiblings--; } else { -- cgit v1.2.3 From 1c18f93675202d2327932dbfc060e67796238d89 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 18 Aug 2017 17:14:49 -0700 Subject: ipv6: repair fib6 tree in failure case [ Upstream commit 348a4002729ccab8b888b38cbc099efa2f2a2036 ] In fib6_add(), it is possible that fib6_add_1() picks an intermediate node and sets the node's fn->leaf to NULL in order to add this new route. However, if fib6_add_rt2node() fails to add the new route for some reason, fn->leaf will be left as NULL and could potentially cause crash when fn->leaf is accessed in fib6_locate(). This patch makes sure fib6_repair_tree() is called to properly repair fn->leaf in the above failure case. Here is the syzkaller reported general protection fault in fib6_locate: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 0 PID: 40937 Comm: syz-executor3 Not tainted Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d7d64100 ti: ffff8801d01a0000 task.ti: ffff8801d01a0000 RIP: 0010:[] [] __ipv6_prefix_equal64_half include/net/ipv6.h:475 [inline] RIP: 0010:[] [] ipv6_prefix_equal include/net/ipv6.h:492 [inline] RIP: 0010:[] [] fib6_locate_1 net/ipv6/ip6_fib.c:1210 [inline] RIP: 0010:[] [] fib6_locate+0x281/0x3c0 net/ipv6/ip6_fib.c:1233 RSP: 0018:ffff8801d01a36a8 EFLAGS: 00010202 RAX: 0000000000000020 RBX: ffff8801bc790e00 RCX: ffffc90002983000 RDX: 0000000000001219 RSI: ffff8801d01a37a0 RDI: 0000000000000100 RBP: ffff8801d01a36f0 R08: 00000000000000ff R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000001 R13: dffffc0000000000 R14: ffff8801d01a37a0 R15: 0000000000000000 FS: 00007f6afd68c700(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004c6340 CR3: 00000000ba41f000 CR4: 00000000001426f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: ffff8801d01a37a8 ffff8801d01a3780 ffffed003a0346f5 0000000c82a23ea0 ffff8800b7bd7700 ffff8801d01a3780 ffff8800b6a1c940 ffffffff82a23ea0 ffff8801d01a3920 ffff8801d01a3748 ffffffff82a223d6 ffff8801d7d64988 Call Trace: [] ip6_route_del+0x106/0x570 net/ipv6/route.c:2109 [] inet6_rtm_delroute+0xfd/0x100 net/ipv6/route.c:3075 [] rtnetlink_rcv_msg+0x549/0x7a0 net/core/rtnetlink.c:3450 [] netlink_rcv_skb+0x141/0x370 net/netlink/af_netlink.c:2281 [] rtnetlink_rcv+0x2f/0x40 net/core/rtnetlink.c:3456 [] netlink_unicast_kernel net/netlink/af_netlink.c:1206 [inline] [] netlink_unicast+0x518/0x750 net/netlink/af_netlink.c:1232 [] netlink_sendmsg+0x8ce/0xc30 net/netlink/af_netlink.c:1778 [] sock_sendmsg_nosec net/socket.c:609 [inline] [] sock_sendmsg+0xcf/0x110 net/socket.c:619 [] sock_write_iter+0x222/0x3a0 net/socket.c:834 [] new_sync_write+0x1dd/0x2b0 fs/read_write.c:478 [] __vfs_write+0xe4/0x110 fs/read_write.c:491 [] vfs_write+0x178/0x4b0 fs/read_write.c:538 [] SYSC_write fs/read_write.c:585 [inline] [] SyS_write+0xd9/0x1b0 fs/read_write.c:577 [] entry_SYSCALL_64_fastpath+0x12/0x17 Note: there is no "Fixes" tag as this seems to be a bug introduced very early. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1421a846ad36..ff389591a340 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1001,7 +1001,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, /* Create subtree root node */ sfn = node_alloc(); if (!sfn) - goto st_failure; + goto failure; sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); @@ -1017,12 +1017,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (IS_ERR(sn)) { /* If it is failed, discard just allocated - root, and then (in st_failure) stale node + root, and then (in failure) stale node in main tree. */ node_free(sfn); err = PTR_ERR(sn); - goto st_failure; + goto failure; } /* Now link new subtree to main tree */ @@ -1036,7 +1036,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (IS_ERR(sn)) { err = PTR_ERR(sn); - goto st_failure; + goto failure; } } @@ -1078,22 +1078,22 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - if (!(rt->dst.flags & DST_NOCACHE)) - dst_free(&rt->dst); + goto failure; } return err; -#ifdef CONFIG_IPV6_SUBTREES - /* Subtree creation failed, probably main tree node - is orphan. If it is, shoot it. +failure: + /* fn->leaf could be NULL if fn is an intermediate node and we + * failed to add the new route to it in both subtree creation + * failure and fib6_add_rt2node() failure case. + * In both cases, fib6_repair_tree() should be called to fix + * fn->leaf. */ -st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); if (!(rt->dst.flags & DST_NOCACHE)) dst_free(&rt->dst); return err; -#endif } /* -- cgit v1.2.3 From aadbe1fe91e785c95f12308b1e1589cfb3984984 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 16 Aug 2017 17:53:36 -0400 Subject: tcp: when rearming RTO, if RTO time is in past then fire RTO ASAP [ Upstream commit cdbeb633ca71a02b7b63bfeb94994bf4e1a0b894 ] In some situations tcp_send_loss_probe() can realize that it's unable to send a loss probe (TLP), and falls back to calling tcp_rearm_rto() to schedule an RTO timer. In such cases, sometimes tcp_rearm_rto() realizes that the RTO was eligible to fire immediately or at some point in the past (delta_us <= 0). Previously in such cases tcp_rearm_rto() was scheduling such "overdue" RTOs to happen at now + icsk_rto, which caused needless delays of hundreds of milliseconds (and non-linear behavior that made reproducible testing difficult). This commit changes the logic to schedule "overdue" RTOs ASAP, rather than at now + icsk_rto. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Suggested-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 32c540145c17..c03850771a4e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3036,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk) /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta > 0) - rto = delta; + rto = max(delta, 1); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); -- cgit v1.2.3 From 19f433a9ce9d37ff3817b67dd6d8cb7a504aef62 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 17 Aug 2017 18:29:52 +0300 Subject: net/mlx4_core: Enable 4K UAR if SRIOV module parameter is not enabled [ Upstream commit ca3d89a3ebe79367bd41b6b8ba37664478ae2dba ] enable_4k_uar module parameter was added in patch cited below to address the backward compatibility issue in SRIOV when the VM has system's PAGE_SIZE uar implementation and the Hypervisor has 4k uar implementation. The above compatibility issue does not exist in the non SRIOV case. In this patch, we always enable 4k uar implementation if SRIOV is not enabled on mlx4's supported cards. Fixes: 76e39ccf9c36 ("net/mlx4_core: Fix backward compatibility on VFs") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 551786f58e59..ba652d8a2b93 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -430,7 +430,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) /* Virtual PCI function needs to determine UAR page size from * firmware. Only master PCI function can set the uar page size */ - if (enable_4k_uar) + if (enable_4k_uar || !dev->persist->num_vfs) dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; else dev->uar_page_shift = PAGE_SHIFT; @@ -2269,7 +2269,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; - if (enable_4k_uar) { + if (enable_4k_uar || !dev->persist->num_vfs) { init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; -- cgit v1.2.3 From f3f5bf2755c75d43930aab28f1affd0602e6fd27 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Aug 2017 23:14:58 +0100 Subject: irda: do not leak initialized list.dev to userspace [ Upstream commit b024d949a3c24255a7ef1a470420eb478949aa4c ] list.dev has not been initialized and so the copy_to_user is copying data from the stack back to user space which is a potential information leak. Fix this ensuring all of list is initialized to zero. Detected by CoverityScan, CID#1357894 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/irda/af_irda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 391c3cbd2eed..101ed6c42808 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2223,7 +2223,7 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); - struct irda_device_list list; + struct irda_device_list list = { 0 }; struct irda_device_info *discoveries; struct irda_ias_set * ias_opt; /* IAS get/query params */ struct ias_object * ias_obj; /* Object in IAS */ -- cgit v1.2.3 From 3e00bf91fefced104ba91721674efe4502cb416a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Aug 2017 11:01:36 +0800 Subject: net: sched: fix NULL pointer dereference when action calls some targets [ Upstream commit 4f8a881acc9d1adaf1e552349a0b1df28933a04c ] As we know in some target's checkentry it may dereference par.entryinfo to check entry stuff inside. But when sched action calls xt_check_target, par.entryinfo is set with NULL. It would cause kernel panic when calling some targets. It can be reproduce with: # tc qd add dev eth1 ingress handle ffff: # tc filter add dev eth1 parent ffff: u32 match u32 0 0 action xt \ -j ECN --ecn-tcp-remove It could also crash kernel when using target CLUSTERIP or TPROXY. By now there's no proper value for par.entryinfo in ipt_init_target, but it can not be set with NULL. This patch is to void all these panics by setting it with an ipt_entry obj with all members = 0. Note that this issue has been there since the very beginning. Signed-off-by: Xin Long Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ipt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index a1aec0a6c789..50030519a89b 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -41,6 +41,7 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, { struct xt_tgchk_param par; struct xt_target *target; + struct ipt_entry e = {}; int ret = 0; target = xt_request_find_target(AF_INET, t->u.user.name, @@ -51,6 +52,7 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, t->u.kernel.target = target; memset(&par, 0, sizeof(par)); par.table = table; + par.entryinfo = &e; par.target = target; par.targinfo = t->data; par.hook_mask = hook; -- cgit v1.2.3 From 7fa2fdf9f19e3b4bc71051c115271741369b5e7f Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 19 Aug 2017 15:37:07 +0300 Subject: net_sched: fix order of queue length updates in qdisc_replace() [ Upstream commit 68a66d149a8c78ec6720f268597302883e48e9fa ] This important to call qdisc_tree_reduce_backlog() after changing queue length. Parent qdisc should deactivate class in ->qlen_notify() called from qdisc_tree_reduce_backlog() but this happens only if qdisc->q.qlen in zero. Missed class deactivations leads to crashes/warnings at picking packets from empty qdisc and corrupting state at reactivating this class in future. Signed-off-by: Konstantin Khlebnikov Fixes: 86a7996cc8a0 ("net_sched: introduce qdisc_replace() helper") Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e6aa0a249672..f18fc1a0321f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -768,8 +768,11 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) { - qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); + unsigned int qlen = old->q.qlen; + unsigned int backlog = old->qstats.backlog; + qdisc_reset(old); + qdisc_tree_reduce_backlog(old, qlen, backlog); } sch_tree_unlock(sch); -- cgit v1.2.3 From e37bdeee95a4a714dadc9743c86d8822c87038ac Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 2 Jul 2017 02:13:30 +0200 Subject: bpf, verifier: add additional patterns to evaluate_reg_imm_alu [ Upstream commit 43188702b3d98d2792969a3377a30957f05695e6 ] Currently the verifier does not track imm across alu operations when the source register is of unknown type. This adds additional pattern matching to catch this and track imm. We've seen LLVM generating this pattern while working on cilium. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8ce679d36c58..404b6ea7dd92 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1467,6 +1467,65 @@ static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + u8 opcode = BPF_OP(insn->code); + s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm); + + /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */ + if (src_reg->imm > 0 && dst_reg->imm) { + switch (opcode) { + case BPF_ADD: + /* dreg += sreg + * where both have zero upper bits. Adding them + * can only result making one more bit non-zero + * in the larger value. + * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47) + * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47) + */ + dst_reg->imm = min(src_reg->imm, 63 - imm_log2); + dst_reg->imm--; + break; + case BPF_AND: + /* dreg &= sreg + * AND can not extend zero bits only shrink + * Ex. 0x00..00ffffff + * & 0x0f..ffffffff + * ---------------- + * 0x00..00ffffff + */ + dst_reg->imm = max(src_reg->imm, 63 - imm_log2); + break; + case BPF_OR: + /* dreg |= sreg + * OR can only extend zero bits + * Ex. 0x00..00ffffff + * | 0x0f..ffffffff + * ---------------- + * 0x0f..00ffffff + */ + dst_reg->imm = min(src_reg->imm, 63 - imm_log2); + break; + case BPF_SUB: + case BPF_MUL: + case BPF_RSH: + case BPF_LSH: + /* These may be flushed out later */ + default: + mark_reg_unknown_value(regs, insn->dst_reg); + } + } else { + mark_reg_unknown_value(regs, insn->dst_reg); + } + + dst_reg->type = UNKNOWN_VALUE; + return 0; +} + static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1475,6 +1534,9 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); + if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE) + return evaluate_reg_imm_alu_unknown(env, insn); + /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. * Don't care about overflow or negative values, just add them */ -- cgit v1.2.3 From 577aa83b2896b9c92780c8594d4b7d2cec5f7abb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 May 2017 03:00:06 +0200 Subject: bpf: adjust verifier heuristics [ Upstream commit 3c2ce60bdd3d57051bf85615deec04a694473840 ] Current limits with regards to processing program paths do not really reflect today's needs anymore due to programs becoming more complex and verifier smarter, keeping track of more data such as const ALU operations, alignment tracking, spilling of PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for smarter matching of what LLVM generates. This also comes with the side-effect that we result in fewer opportunities to prune search states and thus often need to do more work to prove safety than in the past due to different register states and stack layout where we mismatch. Generally, it's quite hard to determine what caused a sudden increase in complexity, it could be caused by something as trivial as a single branch somewhere at the beginning of the program where LLVM assigned a stack slot that is marked differently throughout other branches and thus causing a mismatch, where verifier then needs to prove safety for the whole rest of the program. Subsequently, programs with even less than half the insn size limit can get rejected. We noticed that while some programs load fine under pre 4.11, they get rejected due to hitting limits on more recent kernels. We saw that in the vast majority of cases (90+%) pruning failed due to register mismatches. In case of stack mismatches, majority of cases failed due to different stack slot types (invalid, spill, misc) rather than differences in spilled registers. This patch makes pruning more aggressive by also adding markers that sit at conditional jumps as well. Currently, we only mark jump targets for pruning. For example in direct packet access, these are usually error paths where we bail out. We found that adding these markers, it can reduce number of processed insns by up to 30%. Another option is to ignore reg->id in probing PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning slightly as well by up to 7% observed complexity reduction as stand-alone. Meaning, if a previous path with register type PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then in the current state a PTR_TO_MAP_VALUE_OR_NULL register for the same map X must be safe as well. Last but not least the patch also adds a scheduling point and bumps the current limit for instructions to be processed to a more adequate value. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 404b6ea7dd92..2ee2e7970df6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -139,7 +139,7 @@ struct bpf_verifier_stack_elem { struct bpf_verifier_stack_elem *next; }; -#define BPF_COMPLEXITY_LIMIT_INSNS 65536 +#define BPF_COMPLEXITY_LIMIT_INSNS 98304 #define BPF_COMPLEXITY_LIMIT_STACK 1024 struct bpf_call_arg_meta { @@ -2452,6 +2452,7 @@ peek_stack: env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ + env->explored_states[t] = STATE_LIST_MARK; ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret == 1) goto peek_stack; @@ -2610,6 +2611,12 @@ static bool states_equal(struct bpf_verifier_env *env, rcur->type != NOT_INIT)) continue; + /* Don't care about the reg->id in this case. */ + if (rold->type == PTR_TO_MAP_VALUE_OR_NULL && + rcur->type == PTR_TO_MAP_VALUE_OR_NULL && + rold->map_ptr == rcur->map_ptr) + continue; + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && compare_ptrs_to_packet(rold, rcur)) continue; @@ -2744,6 +2751,9 @@ static int do_check(struct bpf_verifier_env *env) goto process_bpf_exit; } + if (need_resched()) + cond_resched(); + if (log_level && do_print_state) { verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); print_verifier_state(&env->cur_state); -- cgit v1.2.3 From 8d674bee8f66c4796e396fe69355669e164ab179 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:02 +0200 Subject: bpf, verifier: fix alu ops against map_value{, _adj} register types [ Upstream commit fce366a9dd0ddc47e7ce05611c266e8574a45116 ] While looking into map_value_adj, I noticed that alu operations directly on the map_value() resp. map_value_adj() register (any alu operation on a map_value() register will turn it into a map_value_adj() typed register) are not sufficiently protected against some of the operations. Two non-exhaustive examples are provided that the verifier needs to reject: i) BPF_AND on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0xbf842a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+2 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (57) r0 &= 8 8: (7a) *(u64 *)(r0 +0) = 22 R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp 9: (95) exit from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp 9: (95) exit processed 10 insns ii) BPF_ADD in 32 bit mode on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0xc24eee00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+2 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (04) (u32) r0 += (u32) 0 8: (7a) *(u64 *)(r0 +0) = 22 R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 9: (95) exit from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp 9: (95) exit processed 10 insns Issue is, while min_value / max_value boundaries for the access are adjusted appropriately, we change the pointer value in a way that cannot be sufficiently tracked anymore from its origin. Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact, all the test cases coming with 484611357c19 ("bpf: allow access into map value arrays") perform BPF_ADD only on the destination register that is PTR_TO_MAP_VALUE_ADJ. Only for UNKNOWN_VALUE register types such operations make sense, f.e. with unknown memory content fetched initially from a constant offset from the map value memory into a register. That register is then later tested against lower / upper bounds, so that the verifier can then do the tracking of min_value / max_value, and properly check once that UNKNOWN_VALUE register is added to the destination register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the original use-case is solving. Note, tracking on what is being added is done through adjust_reg_min_max_vals() and later access to the map value enforced with these boundaries and the given offset from the insn through check_map_access_adj(). Tests will fail for non-root environment due to prohibited pointer arithmetic, in particular in check_alu_op(), we bail out on the is_pointer_value() check on the dst_reg (which is false in root case as we allow for pointer arithmetic via env->allow_ptr_leaks). Similarly to PTR_TO_PACKET, one way to fix it is to restrict the allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit mode BPF_ADD. The test_verifier suite runs fine after the patch and it also rejects mentioned test cases. Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2ee2e7970df6..df0485bdaccd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1870,6 +1870,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * register as unknown. */ if (env->allow_ptr_leaks && + BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD && (dst_reg->type == PTR_TO_MAP_VALUE || dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) dst_reg->type = PTR_TO_MAP_VALUE_ADJ; -- cgit v1.2.3 From bf5b91b782e8975ec1021139c5e3bd6d3afeb980 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:21 +0200 Subject: bpf: fix mixed signed/unsigned derived min/max value bounds [ Upstream commit 4cabc5b186b5427b9ee5a7495172542af105f02b ] Edward reported that there's an issue in min/max value bounds tracking when signed and unsigned compares both provide hints on limits when having unknown variables. E.g. a program such as the following should have been rejected: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff8a94cda93400 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 11: (65) if r1 s> 0x1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 14: (b7) r0 = 0 15: (95) exit What happens is that in the first part ... 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 ... r1 carries an unsigned value, and is compared as unsigned against a register carrying an immediate. Verifier deduces in reg_set_min_max() that since the compare is unsigned and operation is greater than (>), that in the fall-through/false case, r1's minimum bound must be 0 and maximum bound must be r2. Latter is larger than the bound and thus max value is reset back to being 'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now 'R1=inv,min_value=0'. The subsequent test ... 11: (65) if r1 s> 0x1 goto pc+2 ... is a signed compare of r1 with immediate value 1. Here, verifier deduces in reg_set_min_max() that since the compare is signed this time and operation is greater than (>), that in the fall-through/false case, we can deduce that r1's maximum bound must be 1, meaning with prior test, we result in r1 having the following state: R1=inv,min_value=0,max_value=1. Given that the actual value this holds is -8, the bounds are wrongly deduced. When this is being added to r0 which holds the map_value(_adj) type, then subsequent store access in above case will go through check_mem_access() which invokes check_map_access_adj(), that will then probe whether the map memory is in bounds based on the min_value and max_value as well as access size since the actual unknown value is min_value <= x <= max_value; commit fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, _adj} register types") provides some more explanation on the semantics. It's worth to note in this context that in the current code, min_value and max_value tracking are used for two things, i) dynamic map value access via check_map_access_adj() and since commit 06c1c049721a ("bpf: allow helpers access to variable memory") ii) also enforced at check_helper_mem_access() when passing a memory address (pointer to packet, map value, stack) and length pair to a helper and the length in this case is an unknown value defining an access range through min_value/max_value in that case. The min_value/max_value tracking is /not/ used in the direct packet access case to track ranges. However, the issue also affects case ii), for example, the following crafted program based on the same principle must be rejected as well: 0: (b7) r2 = 0 1: (bf) r3 = r10 2: (07) r3 += -512 3: (7a) *(u64 *)(r10 -16) = -8 4: (79) r4 = *(u64 *)(r10 -16) 5: (b7) r6 = -1 6: (2d) if r4 > r6 goto pc+5 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 7: (65) if r4 s> 0x1 goto pc+4 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 8: (07) r4 += 1 9: (b7) r5 = 0 10: (6a) *(u16 *)(r10 -512) = 0 11: (85) call bpf_skb_load_bytes#26 12: (b7) r0 = 0 13: (95) exit Meaning, while we initialize the max_value stack slot that the verifier thinks we access in the [1,2] range, in reality we pass -7 as length which is interpreted as u32 in the helper. Thus, this issue is relevant also for the case of helper ranges. Resetting both bounds in check_reg_overflow() in case only one of them exceeds limits is also not enough as similar test can be created that uses values which are within range, thus also here learned min value in r1 is incorrect when mixed with later signed test to create a range: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff880ad081fa00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (65) if r1 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 14: (b7) r0 = 0 15: (95) exit This leaves us with two options for fixing this: i) to invalidate all prior learned information once we switch signed context, ii) to track min/max signed and unsigned boundaries separately as done in [0]. (Given latter introduces major changes throughout the whole verifier, it's rather net-next material, thus this patch follows option i), meaning we can derive bounds either from only signed tests or only unsigned tests.) There is still the case of adjust_reg_min_max_vals(), where we adjust bounds on ALU operations, meaning programs like the following where boundaries on the reg get mixed in context later on when bounds are merged on the dst reg must get rejected, too: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff89b2bf87ce00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+6 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (b7) r7 = 1 12: (65) if r7 s> 0x0 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp 13: (b7) r0 = 0 14: (95) exit from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp 15: (0f) r7 += r1 16: (65) if r7 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 17: (0f) r0 += r7 18: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 19: (b7) r0 = 0 20: (95) exit Meaning, in adjust_reg_min_max_vals() we must also reset range values on the dst when src/dst registers have mixed signed/ unsigned derived min/max value bounds with one unbounded value as otherwise they can be added together deducing false boundaries. Once both boundaries are established from either ALU ops or compare operations w/o mixing signed/unsigned insns, then they can safely be added to other regs also having both boundaries established. Adding regs with one unbounded side to a map value where the bounded side has been learned w/o mixing ops is possible, but the resulting map value won't recover from that, meaning such op is considered invalid on the time of actual access. Invalid bounds are set on the dst reg in case i) src reg, or ii) in case dst reg already had them. The only way to recover would be to perform i) ALU ops but only 'add' is allowed on map value types or ii) comparisons, but these are disallowed on pointers in case they span a range. This is fine as only BPF_JEQ and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers which potentially turn them into PTR_TO_MAP_VALUE type depending on the branch, so only here min/max value cannot be invalidated for them. In terms of state pruning, value_from_signed is considered as well in states_equal() when dealing with adjusted map values. With regards to breaking existing programs, there is a small risk, but use-cases are rather quite narrow where this could occur and mixing compares probably unlikely. Joint work with Josef and Edward. [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html Fixes: 484611357c19 ("bpf: allow access into map value arrays") Reported-by: Edward Cree Signed-off-by: Daniel Borkmann Signed-off-by: Edward Cree Signed-off-by: Josef Bacik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 110 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 97 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a13b031dc6b8..3101141661a1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -40,6 +40,7 @@ struct bpf_reg_state { */ s64 min_value; u64 max_value; + bool value_from_signed; }; enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index df0485bdaccd..4f9fa4c24c30 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -682,12 +682,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, return -EACCES; } -static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +static bool __is_pointer_value(bool allow_ptr_leaks, + const struct bpf_reg_state *reg) { - if (env->allow_ptr_leaks) + if (allow_ptr_leaks) return false; - switch (env->cur_state.regs[regno].type) { + switch (reg->type) { case UNKNOWN_VALUE: case CONST_IMM: return false; @@ -696,6 +697,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } } +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +{ + return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); +} + static int check_ptr_alignment(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int off, int size) { @@ -1592,10 +1598,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, } /* We don't know anything about what was done to this register, mark it - * as unknown. + * as unknown. Also, if both derived bounds came from signed/unsigned + * mixed compares and one side is unbounded, we cannot really do anything + * with them as boundaries cannot be trusted. Thus, arithmetic of two + * regs of such kind will get invalidated bounds on the dst side. */ - if (min_val == BPF_REGISTER_MIN_RANGE && - max_val == BPF_REGISTER_MAX_RANGE) { + if ((min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (BPF_SRC(insn->code) == BPF_X && + ((min_val != BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (min_val == BPF_REGISTER_MIN_RANGE && + max_val != BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && + dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && + dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && + regs[insn->dst_reg].value_from_signed != + regs[insn->src_reg].value_from_signed)) { reset_reg_range_values(regs, insn->dst_reg); return; } @@ -1939,38 +1959,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; + /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum val is val, * otherwise we know the min val is val+1. */ false_reg->max_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val + 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; + /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum value is val - 1, * otherwise we know the mimimum value is val. */ false_reg->max_value = val - 1; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -1978,6 +2023,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg @@ -1987,39 +2038,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; + /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* * If this is false, then the val is <= the register, if it is * true the register <= to the val. */ false_reg->min_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val - 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; + /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* If this is false then constant < register, if it is true then * the register < constant. */ false_reg->min_value = val + 1; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2027,6 +2103,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, -- cgit v1.2.3 From 655da3da9bb3e35b1b4b57d7914d91fd33efde8b Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 21 Jul 2017 14:37:34 +0100 Subject: bpf/verifier: fix min/max handling in BPF_SUB [ Upstream commit 9305706c2e808ae59f1eb201867f82f1ddf6d7a6 ] We have to subtract the src max from the dst min, and vice-versa, since (e.g.) the smallest result comes from the largest subtrahend. Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Edward Cree Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4f9fa4c24c30..779c871c5dcd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1624,10 +1624,12 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, * do our normal operations to the register, we need to set the values * to the min/max since they are undefined. */ - if (min_val == BPF_REGISTER_MIN_RANGE) - dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - if (max_val == BPF_REGISTER_MAX_RANGE) - dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + if (opcode != BPF_SUB) { + if (min_val == BPF_REGISTER_MIN_RANGE) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + if (max_val == BPF_REGISTER_MAX_RANGE) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + } switch (opcode) { case BPF_ADD: @@ -1637,10 +1639,17 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_reg->max_value += max_val; break; case BPF_SUB: + /* If one of our values was at the end of our ranges, then the + * _opposite_ value in the dst_reg goes to the end of our range. + */ + if (min_val == BPF_REGISTER_MIN_RANGE) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + if (max_val == BPF_REGISTER_MAX_RANGE) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) - dst_reg->min_value -= min_val; + dst_reg->min_value -= max_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) - dst_reg->max_value -= max_val; + dst_reg->max_value -= min_val; break; case BPF_MUL: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) -- cgit v1.2.3 From a00a9cd7daf9cbb3359bb69ee8c4df426b5be738 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 18 Aug 2017 12:17:21 -0700 Subject: Input: trackpoint - add new trackpoint firmware ID commit ec667683c532c93fb41e100e5d61a518971060e2 upstream. Synaptics add new TP firmware ID: 0x2 and 0x3, for now both lower 2 bits are indicated as TP. Change the constant to bitwise values. This makes trackpoint to be recognized on Lenovo Carbon X1 Gen5 instead of it being identified as "PS/2 Generic Mouse". Signed-off-by: Aaron Ma Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/trackpoint.c | 3 ++- drivers/input/mouse/trackpoint.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 354d47ecd66a..ce6ff9b301bb 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -265,7 +265,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse, unsigned char *fir if (ps2_command(&psmouse->ps2dev, param, MAKE_PS2_CMD(0, 2, TP_READ_ID))) return -1; - if (param[0] != TP_MAGIC_IDENT) + /* add new TP ID. */ + if (!(param[0] & TP_MAGIC_IDENT)) return -1; if (firmware_id) diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 5617ed3a7d7a..88055755f82e 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -21,8 +21,9 @@ #define TP_COMMAND 0xE2 /* Commands start with this */ #define TP_READ_ID 0xE1 /* Sent for device identification */ -#define TP_MAGIC_IDENT 0x01 /* Sent after a TP_READ_ID followed */ +#define TP_MAGIC_IDENT 0x03 /* Sent after a TP_READ_ID followed */ /* by the firmware ID */ + /* Firmware ID includes 0x1, 0x2, 0x3 */ /* -- cgit v1.2.3 From 9ab605d2bb6fe43c57da8fd79120d7ef7b20b32f Mon Sep 17 00:00:00 2001 From: KT Liao Date: Fri, 18 Aug 2017 16:58:15 -0700 Subject: Input: elan_i2c - add ELAN0602 ACPI ID to support Lenovo Yoga310 commit 1d2226e45040ed4aee95b633cbd64702bf7fc2a1 upstream. Add ELAN0602 to the list of known ACPI IDs to enable support for ELAN touchpads found in Lenovo Yoga310. Signed-off-by: KT Liao Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 98d4e515587a..681dce15fbc8 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1234,6 +1234,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0602", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, { "ELAN0605", 0 }, -- cgit v1.2.3 From 963068b1ff1c640ea54026c918d6616079e22bed Mon Sep 17 00:00:00 2001 From: Masaki Ota Date: Thu, 24 Aug 2017 15:44:36 -0700 Subject: Input: ALPS - fix two-finger scroll breakage in right side on ALPS touchpad commit 4a646580f793d19717f7e034c8d473b509c27d49 upstream. Fixed the issue that two finger scroll does not work correctly on V8 protocol. The cause is that V8 protocol X-coordinate decode is wrong at SS4 PLUS device. I added SS4 PLUS X decode definition. Mote notes: the problem manifests itself by the commit e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)"), where a fix for the V8+ protocol was applied. Although the culprit must have been present beforehand, the two-finger scroll worked casually even with the wrongly reported values by some reason. It got broken by the commit above just because it changed x_max value, and this made libinput correctly figuring the MT events. Since the X coord is reported as falsely doubled, the events on the right-half side go outside the boundary, thus they are no longer handled. This resulted as a broken two-finger scroll. One finger event is decoded differently, and it didn't suffer from this problem. The problem was only about MT events. --tiwai Fixes: e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)") Signed-off-by: Masaki Ota Tested-by: Takashi Iwai Tested-by: Paul Donohue Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/alps.c | 41 +++++++++++++++++++++++++++++++---------- drivers/input/mouse/alps.h | 8 ++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 518e8a7bd5f9..f26807c75be4 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1212,14 +1212,24 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_TWO: if (priv->flags & ALPS_BUTTONPAD) { - f->mt[0].x = SS4_BTL_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_BTL_MF_X_V2(p, 1); + } f->mt[0].y = SS4_BTL_MF_Y_V2(p, 0); - f->mt[1].x = SS4_BTL_MF_X_V2(p, 1); f->mt[1].y = SS4_BTL_MF_Y_V2(p, 1); } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + } f->mt[0].y = SS4_STD_MF_Y_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); f->mt[1].y = SS4_STD_MF_Y_V2(p, 1); } f->pressure = SS4_MF_Z_V2(p, 0) ? 0x30 : 0; @@ -1236,16 +1246,27 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { - f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + } else { + f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + } + f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); - f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); no_data_x = SS4_MFPACKET_NO_AX_BL; no_data_y = SS4_MFPACKET_NO_AY_BL; } else { - f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + } f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); - f->mt[3].x = SS4_STD_MF_X_V2(p, 1); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); no_data_x = SS4_MFPACKET_NO_AX; no_data_y = SS4_MFPACKET_NO_AY; @@ -2535,8 +2556,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse, memset(otp, 0, sizeof(otp)); - if (alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0]) || - alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0])) + if (alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0]) || + alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0])) return -1; alps_update_device_area_ss4_v2(otp, priv); diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index dbfd26073e1a..793123717145 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -91,6 +91,10 @@ enum SS4_PACKET_ID { ((_b[1 + _i * 3] << 5) & 0x1F00) \ ) +#define SS4_PLUS_STD_MF_X_V2(_b, _i) (((_b[0 + (_i) * 3] << 4) & 0x0070) | \ + ((_b[1 + (_i) * 3] << 4) & 0x0F80) \ + ) + #define SS4_STD_MF_Y_V2(_b, _i) (((_b[1 + (_i) * 3] << 3) & 0x0010) | \ ((_b[2 + (_i) * 3] << 5) & 0x01E0) | \ ((_b[2 + (_i) * 3] << 4) & 0x0E00) \ @@ -100,6 +104,10 @@ enum SS4_PACKET_ID { ((_b[0 + (_i) * 3] >> 3) & 0x0010) \ ) +#define SS4_PLUS_BTL_MF_X_V2(_b, _i) (SS4_PLUS_STD_MF_X_V2(_b, _i) | \ + ((_b[0 + (_i) * 3] >> 4) & 0x0008) \ + ) + #define SS4_BTL_MF_Y_V2(_b, _i) (SS4_STD_MF_Y_V2(_b, _i) | \ ((_b[0 + (_i) * 3] >> 3) & 0x0008) \ ) -- cgit v1.2.3 From a745333fa924b26c916e7b21d8e5177ca597b532 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Aug 2017 13:05:11 +0200 Subject: KVM: s390: sthyi: fix sthyi inline assembly commit 4a4eefcd0e49f9f339933324c1bde431186a0a7d upstream. The sthyi inline assembly misses register r3 within the clobber list. The sthyi instruction will always write a return code to register "R2+1", which in this case would be r3. Due to that we may have register corruption and see host crashes or data corruption depending on how gcc decided to allocate and use registers during compile time. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Reviewed-by: Janosch Frank Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/sthyi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 05c98bb853cf..c10ffd94f93c 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -394,7 +394,7 @@ static int sthyi(u64 vaddr) "srl %[cc],28\n" : [cc] "=d" (cc) : [code] "d" (code), [addr] "a" (addr) - : "memory", "cc"); + : "3", "memory", "cc"); return cc; } -- cgit v1.2.3 From cfb917a17d3df75243199ad8bad7e9505fd79468 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Aug 2017 14:27:30 +0200 Subject: KVM: s390: sthyi: fix specification exception detection commit 857b8de96795646c5891cf44ae6fb19b9ff74bf9 upstream. sthyi should only generate a specification exception if the function code is zero and the response buffer is not on a 4k boundary. The current code would also test for unknown function codes if the response buffer, that is currently only defined for function code 0, is not on a 4k boundary and incorrectly inject a specification exception instead of returning with condition code 3 and return code 4 (unsupported function code). Fix this by moving the boundary check. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Reviewed-by: Janosch Frank Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/sthyi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index c10ffd94f93c..2f04ad1ea01c 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -422,7 +422,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); trace_kvm_s390_handle_sthyi(vcpu, code, addr); - if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + if (reg1 == reg2 || reg1 & 1 || reg2 & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (code & 0xffff) { @@ -430,6 +430,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) goto out; } + if (addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* * If the page has not yet been faulted in, we want to do that * now and not after all the expensive calculations. -- cgit v1.2.3 From 275f3033ccf6114f3c7d4535665fcf7bdcc9f04e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 24 Aug 2017 11:59:31 +0200 Subject: KVM: x86: block guest protection keys unless the host has them enabled commit c469268cd523245cc58255f6696e0c295485cb0b upstream. If the host has protection keys disabled, we cannot read and write the guest PKRU---RDPKRU and WRPKRU fail with #GP(0) if CR4.PKE=0. Block the PKU cpuid bit in that case. This ensures that guest_CR4.PKE=1 implies host_CR4.PKE=1. Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 649d8f2c1e40..91af75e37306 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -456,7 +456,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_7_ECX); /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled) + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); } else { entry->ebx = 0; -- cgit v1.2.3 From 9767a4567831aa897e71474014a6e903345b8d0a Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 22 Aug 2017 08:33:53 +0200 Subject: ALSA: usb-audio: Add delay quirk for H650e/Jabra 550a USB headsets commit 07b3b5e9ed807a0d2077319b8e43a42e941db818 upstream. These headsets reports a lot of: cannot set freq 44100 to ep 0x81 and need a small delay between sample rate settings, just like Zoom R16/24. Add both headsets to the Zoom R16/24 quirk for a 1 ms delay between control msgs. Signed-off-by: Joakim Tjernlund Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 95c2749ac8a3..286efc3a6116 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1309,10 +1309,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); - /* Zoom R16/24 needs a tiny delay here, otherwise requests like - * get/set frequency return as failed despite actually succeeding. + /* Zoom R16/24, Logitech H650e, Jabra 550a needs a tiny delay here, + * otherwise requests like get/set frequency return as failed despite + * actually succeeding. */ - if (chip->usb_id == USB_ID(0x1686, 0x00dd) && + if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || + chip->usb_id == USB_ID(0x046d, 0x0a46) || + chip->usb_id == USB_ID(0x0b0e, 0x0349)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(1); } -- cgit v1.2.3 From 8989c70d30b60749dccb549ed66f915b646ef038 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2017 08:15:13 +0200 Subject: ALSA: core: Fix unexpected error at replacing user TLV commit 88c54cdf61f508ebcf8da2d819f5dfc03e954d1d upstream. When user tries to replace the user-defined control TLV, the kernel checks the change of its content via memcmp(). The problem is that the kernel passes the return value from memcmp() as is. memcmp() gives a non-zero negative value depending on the comparison result, and this shall be recognized as an error code. The patch covers that corner-case, return 1 properly for the changed TLV. Fixes: 8aa9b586e420 ("[ALSA] Control API - more robust TLV implementation") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index fb096cb20a80..995cde48c1be 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1156,7 +1156,7 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, mutex_lock(&ue->card->user_ctl_lock); change = ue->tlv_data_size != size; if (!change) - change = memcmp(ue->tlv_data, new_data, size); + change = memcmp(ue->tlv_data, new_data, size) != 0; kfree(ue->tlv_data); ue->tlv_data = new_data; ue->tlv_data_size = size; -- cgit v1.2.3 From b52bce93db99794689cb5af7398aa47c57b343e8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2017 09:30:17 +0200 Subject: ALSA: hda - Add stereo mic quirk for Lenovo G50-70 (17aa:3978) commit bbba6f9d3da357bbabc6fda81e99ff5584500e76 upstream. Lenovo G50-70 (17aa:3978) with Conexant codec chip requires the similar workaround for the inverted stereo dmic like other Lenovo models. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1020657 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index c15c51bea26d..f2e4e99ce651 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -854,6 +854,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), SND_PCI_QUIRK(0x1c06, 0x2011, "Lemote A1004", CXT_PINCFG_LEMOTE_A1004), -- cgit v1.2.3 From 6cba07468521d4ffbebd59363a7d0bc5962b1002 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 20 Aug 2017 15:54:26 +0900 Subject: ALSA: firewire: fix NULL pointer dereference when releasing uninitialized data of iso-resource commit 0c264af7be2013266c5b4c644f3f366399ee490a upstream. When calling 'iso_resource_free()' for uninitialized data, this function causes NULL pointer dereference due to its 'unit' member. This occurs when unplugging audio and music units on IEEE 1394 bus at failure of card registration. This commit fixes the bug. The bug exists since kernel v4.5. Fixes: 324540c4e05c ('ALSA: fireface: postpone sound card registration') at v4.12 Fixes: 8865a31e0fd8 ('ALSA: firewire-motu: postpone sound card registration') at v4.12 Fixes: b610386c8afb ('ALSA: firewire-tascam: deleyed registration of sound card') at v4.7 Fixes: 86c8dd7f4da3 ('ALSA: firewire-digi00x: delayed registration of sound card') at v4.7 Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') at v4.7 Fixes: 7d3c1d5901aa ('ALSA: fireworks: delayed registration of sound card') at v4.7 Fixes: 04a2c73c97eb ('ALSA: bebob: delayed registration of sound card') at v4.7 Fixes: b59fb1900b4f ('ALSA: dice: postpone card registration') at v4.5 Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/iso-resources.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c index f0e4d502d604..066b5df666f4 100644 --- a/sound/firewire/iso-resources.c +++ b/sound/firewire/iso-resources.c @@ -210,9 +210,14 @@ EXPORT_SYMBOL(fw_iso_resources_update); */ void fw_iso_resources_free(struct fw_iso_resources *r) { - struct fw_card *card = fw_parent_device(r->unit)->card; + struct fw_card *card; int bandwidth, channel; + /* Not initialized. */ + if (r->unit == NULL) + return; + card = fw_parent_device(r->unit)->card; + mutex_lock(&r->mutex); if (r->allocated) { -- cgit v1.2.3 From 8d362cb3bfbbaea8f3ea44d785408c719b5f6cef Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 1 Aug 2017 12:58:47 +0300 Subject: ARCv2: PAE40: Explicitly set MSB counterpart of SLC region ops addresses commit 7d79cee2c6540ea64dd917a14e2fd63d4ac3d3c0 upstream. It is necessary to explicitly set both SLC_AUX_RGN_START1 and SLC_AUX_RGN_END1 which hold MSB bits of the physical address correspondingly of region start and end otherwise SLC region operation is executed in unpredictable manner Without this patch, SLC flushes on HSDK (IOC disabled) were taking seconds. Reported-by: Vladimir Kondratiev Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: PAR40 regs only written if PAE40 exist] Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/cache.h | 2 ++ arch/arc/mm/cache.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index b3410ff6a62d..4fd6272e6c01 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -89,7 +89,9 @@ extern unsigned long perip_base, perip_end; #define ARC_REG_SLC_FLUSH 0x904 #define ARC_REG_SLC_INVALIDATE 0x905 #define ARC_REG_SLC_RGN_START 0x914 +#define ARC_REG_SLC_RGN_START1 0x915 #define ARC_REG_SLC_RGN_END 0x916 +#define ARC_REG_SLC_RGN_END1 0x917 /* Bit val in SLC_CONTROL */ #define SLC_CTRL_IM 0x040 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 8147583c4434..bbdfeb31dee6 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -562,6 +562,7 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned int ctrl; + phys_addr_t end; spin_lock_irqsave(&lock, flags); @@ -591,8 +592,16 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) * END needs to be setup before START (latter triggers the operation) * END can't be same as START, so add (l2_line_sz - 1) to sz */ - write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); - write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + end = paddr + sz + l2_line_sz - 1; + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end)); + + write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end)); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr)); + + write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr)); while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); -- cgit v1.2.3 From 5d8b3cc24688efbca6062bf8f5712edcf05a2132 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 25 Aug 2017 15:55:33 -0700 Subject: mm, shmem: fix handling /sys/kernel/mm/transparent_hugepage/shmem_enabled commit 435c0b87d661da83771c30ed775f7c37eed193fb upstream. /sys/kernel/mm/transparent_hugepage/shmem_enabled controls if we want to allocate huge pages when allocate pages for private in-kernel shmem mount. Unfortunately, as Dan noticed, I've screwed it up and the only way to make kernel allocate huge page for the mount is to use "force" there. All other values will be effectively ignored. Link: http://lkml.kernel.org/r/20170822144254.66431-1-kirill.shutemov@linux.intel.com Fixes: 5a6e75f8110c ("shmem: prepare huge= mount option and sysfs knob") Signed-off-by: Kirill A. Shutemov Reported-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7ee5444ffb6d..004e0f87e8a8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3810,7 +3810,7 @@ int __init shmem_init(void) } #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE - if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY) + if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; else shmem_huge = 0; /* just in case it was patched */ @@ -3871,7 +3871,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, return -EINVAL; shmem_huge = huge; - if (shmem_huge < SHMEM_HUGE_DENY) + if (shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; return count; } -- cgit v1.2.3 From 036c100b58f742289dfab088a60e35717ec056f4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 9 Aug 2017 15:28:22 +0200 Subject: i2c: designware: Fix system suspend commit a23318feeff662c8d25d21623daebdd2e55ec221 upstream. The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming during system suspend"), may suggest to the PM core to try out the so called direct_complete path for system sleep. In this path, the PM core treats a runtime suspended device as it's already in a proper low power state for system sleep, which makes it skip calling the system sleep callbacks for the device, except for the ->prepare() and the ->complete() callbacks. However, the PM core may unset the direct_complete flag for a parent device, in case its child device are being system suspended before. In this scenario, the PM core invokes the system sleep callbacks, no matter if the device is runtime suspended or not. Particularly in cases of an existing i2c slave device, the above path is triggered, which breaks the assumption that the i2c device is always runtime resumed whenever the dw_i2c_plat_suspend() is being called. More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and clk_core_unprepare(), for an already disabled/unprepared clock, leading to a splat in the log about clocks calls being wrongly balanced and breaking system sleep. To still allow the direct_complete path in cases when it's possible, but also to keep the fix simple, let's runtime resume the i2c device in the ->suspend() callback, before continuing to put the device into low power state. Note, in cases when the i2c device is attached to the ACPI PM domain, this problem doesn't occur, because ACPI's ->suspend() callback, assigned to acpi_subsys_suspend(), already calls pm_runtime_resume() for the device. It should also be noted that this change does not fix commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming during system suspend"). Because for the non-ACPI case, the system sleep support was already broken prior that point. Signed-off-by: Ulf Hansson Acked-by: Rafael J. Wysocki Tested-by: John Stultz Tested-by: Jarkko Nikula Acked-by: Jarkko Nikula Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 0b42a12171f3..b42d95f09c68 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -319,7 +319,7 @@ static void dw_i2c_plat_complete(struct device *dev) #endif #ifdef CONFIG_PM -static int dw_i2c_plat_suspend(struct device *dev) +static int dw_i2c_plat_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev); @@ -343,11 +343,21 @@ static int dw_i2c_plat_resume(struct device *dev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int dw_i2c_plat_suspend(struct device *dev) +{ + pm_runtime_resume(dev); + return dw_i2c_plat_runtime_suspend(dev); +} +#endif + static const struct dev_pm_ops dw_i2c_dev_pm_ops = { .prepare = dw_i2c_plat_prepare, .complete = dw_i2c_plat_complete, SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) - SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL) + SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, + dw_i2c_plat_resume, + NULL) }; #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops) -- cgit v1.2.3 From 0f49b0519f1190858f36e0bdafd4a139c9c2e34a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Aug 2017 15:55:39 -0700 Subject: mm/madvise.c: fix freeing of locked page with MADV_FREE commit 263630e8d176d87308481ebdcd78ef9426739c6b upstream. If madvise(..., MADV_FREE) split a transparent hugepage, it called put_page() before unlock_page(). This was wrong because put_page() can free the page, e.g. if a concurrent madvise(..., MADV_DONTNEED) has removed it from the memory mapping. put_page() then rightfully complained about freeing a locked page. Fix this by moving the unlock_page() before put_page(). This bug was found by syzkaller, which encountered the following splat: BUG: Bad page state in process syzkaller412798 pfn:1bd800 page:ffffea0006f60000 count:0 mapcount:0 mapping: (null) index:0x20a00 flags: 0x200000000040019(locked|uptodate|dirty|swapbacked) raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x1(locked) Modules linked in: CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 bad_page+0x230/0x2b0 mm/page_alloc.c:565 free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943 free_pages_check mm/page_alloc.c:952 [inline] free_pages_prepare mm/page_alloc.c:1043 [inline] free_pcp_prepare mm/page_alloc.c:1068 [inline] free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584 __put_single_page mm/swap.c:79 [inline] __put_page+0xfb/0x160 mm/swap.c:113 put_page include/linux/mm.h:814 [inline] madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371 walk_pmd_range mm/pagewalk.c:50 [inline] walk_pud_range mm/pagewalk.c:108 [inline] walk_p4d_range mm/pagewalk.c:134 [inline] walk_pgd_range mm/pagewalk.c:160 [inline] __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249 walk_page_range+0x200/0x470 mm/pagewalk.c:326 madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444 madvise_free_single_vma+0x353/0x580 mm/madvise.c:471 madvise_dontneed_free mm/madvise.c:555 [inline] madvise_vma mm/madvise.c:664 [inline] SYSC_madvise mm/madvise.c:832 [inline] SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760 entry_SYSCALL_64_fastpath+0x1f/0xbe Here is a C reproducer: #define _GNU_SOURCE #include #include #include #define MADV_FREE 8 #define PAGE_SIZE 4096 static void *mapping; static const size_t mapping_size = 0x1000000; static void *madvise_thrproc(void *arg) { madvise(mapping, mapping_size, (long)arg); } int main(void) { pthread_t t[2]; for (;;) { mapping = mmap(NULL, mapping_size, PROT_WRITE, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); munmap(mapping + mapping_size / 2, PAGE_SIZE); pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED); pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE); pthread_join(t[0], NULL); pthread_join(t[1], NULL); munmap(mapping, mapping_size); } } Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and CONFIG_DEBUG_VM=y are needed. Google Bug Id: 64696096 Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)") Signed-off-by: Eric Biggers Acked-by: David Rientjes Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 253b1533fba5..63a12162f4c6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -331,8 +331,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, pte_offset_map_lock(mm, pmd, addr, &ptl); goto out; } - put_page(page); unlock_page(page); + put_page(page); pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte--; addr -= PAGE_SIZE; -- cgit v1.2.3 From b65b6ac52e0f8694aa3a4402d5f766b2bb9e94ef Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Aug 2017 15:55:43 -0700 Subject: fork: fix incorrect fput of ->exe_file causing use-after-free commit 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a upstream. Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") made it possible to kill a forking task while it is waiting to acquire its ->mmap_sem for write, in dup_mmap(). However, it was overlooked that this introduced an new error path before a reference is taken on the mm_struct's ->exe_file. Since the ->exe_file of the new mm_struct was already set to the old ->exe_file by the memcpy() in dup_mm(), it was possible for the mmput() in the error path of dup_mm() to drop a reference to ->exe_file which was never taken. This caused the struct file to later be freed prematurely. Fix it by updating mm_init() to NULL out the ->exe_file, in the same place it clears other things like the list of mmaps. This bug was found by syzkaller. It can be reproduced using the following C program: #define _GNU_SOURCE #include #include #include #include #include #include static void *mmap_thread(void *_arg) { for (;;) { mmap(NULL, 0x1000000, PROT_READ, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); } } static void *fork_thread(void *_arg) { usleep(rand() % 10000); fork(); } int main(void) { fork(); fork(); fork(); for (;;) { if (fork() == 0) { pthread_t t; pthread_create(&t, NULL, mmap_thread, NULL); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } No special kernel config options are needed. It usually causes a NULL pointer dereference in __remove_shared_vm_struct() during exit, or in dup_mmap() (which is usually inlined into copy_process()) during fork. Both are due to a vm_area_struct's ->vm_file being used after it's already been freed. Google Bug Id: 64772007 Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") Signed-off-by: Eric Biggers Tested-by: Mark Rutland Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/fork.c b/kernel/fork.c index 59faac4de181..50bf262cc427 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -766,6 +766,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); clear_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS -- cgit v1.2.3 From 9d263321d7a2c1cb5c9b226316640d5066047a7a Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 25 Aug 2017 15:55:46 -0700 Subject: mm/memblock.c: reversed logic in memblock_discard() commit 91b540f98872a206ea1c49e4aa6ea8eed0886644 upstream. In recently introduced memblock_discard() there is a reversed logic bug. Memory is freed of static array instead of dynamically allocated one. Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com Fixes: 3010f876500f ("mm: discard memblock data later") Signed-off-by: Pavel Tatashin Reported-by: Woody Suwalski Tested-by: Woody Suwalski Acked-by: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index ccec42c12ba8..42b98af6a415 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -311,7 +311,7 @@ void __init memblock_discard(void) __memblock_free_late(addr, size); } - if (memblock.memory.regions == memblock_memory_init_regions) { + if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); -- cgit v1.2.3 From ce66f629ad828168ed13250eaa7d3a60124d753e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Aug 2017 13:05:58 +0100 Subject: drm: Release driver tracking before making the object available again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fe4600a548f2763dec91b3b27a1245c370ceee2a upstream. This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release driver references to handle before making it available again"), but now the exposure is via the PRIME lookup tables. If we remove the object/handle from the PRIME lut, then a new request for the same object/fd will generate a new handle, thus for a short window that object is known to userspace by two different handles. Fix this by releasing the driver tracking before PRIME. Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs imported buffer list (v2)") Signed-off-by: Chris Wilson Cc: David Airlie Cc: Daniel Vetter Cc: Rob Clark Cc: Ville Syrjälä Cc: Thierry Reding Reviewed-by: Daniel Vetter Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 465bacd0a630..48e99ab525c3 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_gem_object *obj = ptr; struct drm_device *dev = obj->dev; + if (dev->driver->gem_close_object) + dev->driver->gem_close_object(obj, file_priv); + if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_gem_remove_prime_handles(obj, file_priv); drm_vma_node_revoke(&obj->vma_node, file_priv); - if (dev->driver->gem_close_object) - dev->driver->gem_close_object(obj, file_priv); - drm_gem_object_handle_unreference_unlocked(obj); return 0; -- cgit v1.2.3 From eed96e7595339858ce8e17f8c2ce60fae0c23a7a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 15 Aug 2017 11:57:06 +0200 Subject: drm/atomic: If the atomic check fails, return its value first commit a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba upstream. The last part of drm_atomic_check_only is testing whether we need to fail with -EINVAL when modeset is not allowed, but forgets to return the value when atomic_check() fails first. This results in -EDEADLK being replaced by -EINVAL, and the sanity check in drm_modeset_drop_locks kicks in: [ 308.531734] ------------[ cut here ]------------ [ 308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm] [ 308.531828] Modules linked in: [ 308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G U W 4.13.0-rc5-patser+ #5225 [ 308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 [ 308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000 [ 308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm] [ 308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282 [ 308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6 [ 308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48 [ 308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003 [ 308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001 [ 308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680 [ 308.532328] FS: 00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000 [ 308.532359] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0 [ 308.532402] Call Trace: [ 308.532440] drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm] [ 308.532488] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532565] ? avc_has_extended_perms+0xc39/0xff0 [ 308.532593] ? lock_downgrade+0x610/0x610 [ 308.532640] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532680] drm_ioctl_kernel+0x154/0x1a0 [drm] [ 308.532755] drm_ioctl+0x624/0x8f0 [drm] [ 308.532858] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532976] ? drm_getunique+0x210/0x210 [drm] [ 308.533061] do_vfs_ioctl+0xd92/0xe40 [ 308.533121] ? ioctl_preallocate+0x1b0/0x1b0 [ 308.533160] ? selinux_capable+0x20/0x20 [ 308.533191] ? do_fcntl+0x1b1/0xbf0 [ 308.533219] ? kasan_slab_free+0xa2/0xb0 [ 308.533249] ? f_getown+0x4b/0xa0 [ 308.533278] ? putname+0xcf/0xe0 [ 308.533309] ? security_file_ioctl+0x57/0x90 [ 308.533342] SyS_ioctl+0x4e/0x80 [ 308.533374] entry_SYSCALL_64_fastpath+0x18/0xad [ 308.533405] RIP: 0033:0x7ff00779e4d7 [ 308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7 [ 308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003 [ 308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60 [ 308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070 [ 308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930 [ 308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7 50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00 74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 [ 308.534086] ---[ end trace 77f11e53b1df44ad ]--- Solve this by adding the missing return. This is also a bugfix because we could end up rejecting updates with -EINVAL because of a early -EDEADLK, while if atomic_check ran to completion it might have downgraded the modeset to a fastset. Signed-off-by: Maarten Lankhorst Testcase: kms_atomic Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl") Reviewed-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 4e19bde4bbff..34adde169a78 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1386,6 +1386,9 @@ int drm_atomic_check_only(struct drm_atomic_state *state) if (config->funcs->atomic_check) ret = config->funcs->atomic_check(state->dev, state); + if (ret) + return ret; + if (!state->allow_modeset) { for_each_crtc_in_state(state, crtc, crtc_state, i) { if (drm_atomic_crtc_needs_modeset(crtc_state)) { @@ -1396,7 +1399,7 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - return ret; + return 0; } EXPORT_SYMBOL(drm_atomic_check_only); -- cgit v1.2.3 From 35fd2b840b6c02da9c0e8a31f52ebb89f51692e2 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 3 Oct 2016 20:03:22 +0300 Subject: drm: rcar-du: Fix crash in encoder failure error path commit 05ee29e94acf0d4b3998c3f93374952de8f90176 upstream. When an encoder fails to initialize the driver prints an error message to the kernel log. The message contains the name of the encoder's DT node, which is NULL for internal encoders. Use the of_node_full_name() macro to avoid dereferencing a NULL pointer, print the output number to add more context to the error, and make sure we still own a reference to the encoder's DT node by delaying the of_node_put() call. Signed-off-by: Laurent Pinchart Reviewed-by: Gustavo Padovan Signed-off-by: Thong Ho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_kms.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index cfc302c65b0b..c58602b638e4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -453,13 +453,13 @@ static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu, } ret = rcar_du_encoder_init(rcdu, enc_type, output, encoder, connector); - of_node_put(encoder); - of_node_put(connector); - if (ret && ret != -EPROBE_DEFER) dev_warn(rcdu->dev, - "failed to initialize encoder %s (%d), skipping\n", - encoder->full_name, ret); + "failed to initialize encoder %s on output %u (%d), skipping\n", + of_node_full_name(encoder), output, ret); + + of_node_put(encoder); + of_node_put(connector); return ret; } -- cgit v1.2.3 From 1fb8ff8b92bd1335e036e3187797136348fe0029 Mon Sep 17 00:00:00 2001 From: Koji Matsuoka Date: Mon, 18 Apr 2016 16:31:30 +0900 Subject: drm: rcar-du: Fix display timing controller parameter commit 9cdced8a39c04cf798ddb2a27cb5952f7d39f633 upstream. There is a bug in the setting of the DES (Display Enable Signal) register. This current setting occurs 1 dot left shift. The DES register should be set minus one value about the specifying value with H/W specification. This patch corrects it. Signed-off-by: Koji Matsuoka Signed-off-by: Laurent Pinchart Signed-off-by: Thong Ho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 7316fc7fa0bd..aca26eed93b1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -172,7 +172,7 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) mode->crtc_vsync_start - 1); rcar_du_crtc_write(rcrtc, VCR, mode->crtc_vtotal - 1); - rcar_du_crtc_write(rcrtc, DESR, mode->htotal - mode->hsync_start); + rcar_du_crtc_write(rcrtc, DESR, mode->htotal - mode->hsync_start - 1); rcar_du_crtc_write(rcrtc, DEWR, mode->hdisplay); } -- cgit v1.2.3 From 46cd0a3b93a5b4b124f898ca4851888926f00993 Mon Sep 17 00:00:00 2001 From: Koji Matsuoka Date: Mon, 16 May 2016 11:28:15 +0900 Subject: drm: rcar-du: Fix H/V sync signal polarity configuration commit fd1adef3bff0663c5ac31b45bc4a05fafd43d19b upstream. The VSL and HSL bits in the DSMR register set the corresponding horizontal and vertical sync signal polarity to active high. The code got it the wrong way around, fix it. Signed-off-by: Koji Matsuoka Signed-off-by: Laurent Pinchart Signed-off-by: Thong Ho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index aca26eed93b1..a2ec6d8796a0 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -149,8 +149,8 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? OTAR2 : OTAR, 0); /* Signal polarities */ - value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : DSMR_VSL) - | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? 0 : DSMR_HSL) + value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? DSMR_VSL : 0) + | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? DSMR_HSL : 0) | DSMR_DIPM_DISP | DSMR_CSPM; rcar_du_crtc_write(rcrtc, DSMR, value); -- cgit v1.2.3 From 3170d9abc5d0ba56bb1eb4a924fab5b41ee546eb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 1 Aug 2017 12:01:52 -0400 Subject: tracing: Call clear_boot_tracer() at lateinit_sync commit 4bb0f0e73c8c30917d169c4a0f1ac083690c545b upstream. The clear_boot_tracer function is used to reset the default_bootup_tracer string to prevent it from being accessed after boot, as it originally points to init data. But since clear_boot_tracer() is called via the init_lateinit() call, it races with the initcall for registering the hwlat tracer. If someone adds "ftrace=hwlat" to the kernel command line, depending on how the linker sets up the text, the saved command line may be cleared, and the hwlat tracer never is initialized. Simply have the clear_boot_tracer() be called by initcall_lateinit_sync() as that's for tasks to be called after lateinit. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196551 Fixes: e7c15cd8a ("tracing: Added hardware latency tracer") Reported-by: Zamir SUN Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 53c308068e39..7379f735a9f4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7767,4 +7767,4 @@ __init static int clear_boot_tracer(void) } fs_initcall(tracer_init_tracefs); -late_initcall(clear_boot_tracer); +late_initcall_sync(clear_boot_tracer); -- cgit v1.2.3 From 2818a7659f0ac7dd8f12908dcde01f024d311b20 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Mon, 14 Aug 2017 18:18:17 +0800 Subject: tracing: Fix kmemleak in tracing_map_array_free() commit 475bb3c69ab05df2a6ecef6acc2393703d134180 upstream. kmemleak reported the below leak when I was doing clear of the hist trigger. With this patch, the kmeamleak is gone. unreferenced object 0xffff94322b63d760 (size 32): comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) hex dump (first 32 bytes): 00 01 00 00 04 00 00 00 08 00 00 00 ff 00 00 00 ................ 10 00 00 00 00 00 00 00 80 a8 7a f2 31 94 ff ff ..........z.1... backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0xca/0x1d0 [] tracing_map_array_alloc+0x26/0x140 [] kretprobe_trampoline+0x0/0x50 [] create_hist_data+0x535/0x750 [] event_hist_trigger_func+0x1f7/0x420 [] event_trigger_write+0xfd/0x1a0 [] __vfs_write+0x37/0x170 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] do_syscall_64+0x67/0x150 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff unreferenced object 0xffff9431f27aa880 (size 128): comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) hex dump (first 32 bytes): 00 00 8c 2a 32 94 ff ff 00 f0 8b 2a 32 94 ff ff ...*2......*2... 00 e0 8b 2a 32 94 ff ff 00 d0 8b 2a 32 94 ff ff ...*2......*2... backtrace: [] kmemleak_alloc+0x4a/0xa0 [] __kmalloc+0xe8/0x220 [] tracing_map_array_alloc+0xb1/0x140 [] kretprobe_trampoline+0x0/0x50 [] create_hist_data+0x535/0x750 [] event_hist_trigger_func+0x1f7/0x420 [] event_trigger_write+0xfd/0x1a0 [] __vfs_write+0x37/0x170 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] do_syscall_64+0x67/0x150 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff Link: http://lkml.kernel.org/r/1502705898-27571-1-git-send-email-chuhu@redhat.com Fixes: 08d43a5fa063 ("tracing: Add lock-free tracing_map") Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/tracing_map.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 0a689bbb78ef..305039b122fa 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -221,16 +221,19 @@ void tracing_map_array_free(struct tracing_map_array *a) if (!a) return; - if (!a->pages) { - kfree(a); - return; - } + if (!a->pages) + goto free; for (i = 0; i < a->n_pages; i++) { if (!a->pages[i]) break; free_page((unsigned long)a->pages[i]); } + + kfree(a->pages); + + free: + kfree(a); } struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, -- cgit v1.2.3 From 8838cd5c543f649ffd5ffd2e6da5aabbcc25ff65 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 23 Aug 2017 12:46:27 -0400 Subject: tracing: Fix freeing of filter in create_filter() when set_str is false commit 8b0db1a5bdfcee0dbfa89607672598ae203c9045 upstream. Performing the following task with kmemleak enabled: # cd /sys/kernel/tracing/events/irq/irq_handler_entry/ # echo 'enable_event:kmem:kmalloc:3 if irq >' > trigger # echo 'enable_event:kmem:kmalloc:3 if irq > 31' > trigger # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800b9290308 (size 32): comm "bash", pid 1114, jiffies 4294848451 (age 141.139s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0x158/0x290 [] create_filter_start.constprop.28+0x99/0x940 [] create_filter+0xa9/0x160 [] create_event_filter+0xc/0x10 [] set_trigger_filter+0xe5/0x210 [] event_enable_trigger_func+0x324/0x490 [] event_trigger_write+0x1a2/0x260 [] __vfs_write+0xd7/0x380 [] vfs_write+0x101/0x260 [] SyS_write+0xab/0x130 [] entry_SYSCALL_64_fastpath+0x1f/0xbe [] 0xffffffffffffffff The function create_filter() is passed a 'filterp' pointer that gets allocated, and if "set_str" is true, it is up to the caller to free it, even on error. The problem is that the pointer is not freed by create_filter() when set_str is false. This is a bug, and it is not up to the caller to free the filter on error if it doesn't care about the string. Link: http://lkml.kernel.org/r/1502705898-27571-2-git-send-email-chuhu@redhat.com Fixes: 38b78eb85 ("tracing: Factorize filter creation") Reported-by: Chunyu Hu Tested-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9daa9b3bc6d9..0193f58c45f0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1926,6 +1926,10 @@ static int create_filter(struct trace_event_call *call, if (err && set_str) append_filter_err(ps, filter); } + if (err && !set_str) { + free_event_filter(filter); + filter = NULL; + } create_filter_finish(ps); *filterp = filter; -- cgit v1.2.3 From 1fdee09116db0de46c3a3077357f6f3531e10205 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Jul 2017 22:46:27 +1000 Subject: kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured commit cb87481ee89dbd6609e227afbf64900fb4e5c930 upstream. The .data and .bss sections were modified in the generic linker script to pull in sections named .data., which are generated by gcc with -ffunction-sections and -fdata-sections options. The problem with this pattern is it can also match section names that Linux defines explicitly, e.g., .data.unlikely. This can cause Linux sections to get moved into the wrong place. The way to avoid this is to use ".." separators for explicit section names (the dot character is valid in a section name but not a C identifier). However currently there are sections which don't follow this rule, so for now just disable the wild card by default. Example: http://marc.info/?l=linux-arm-kernel&m=150106824024221&w=2 Fixes: b67067f1176df ("kbuild: allow archs to select link dead code/data elimination") Signed-off-by: Nicholas Piggin Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 31e1d639abed..dc81e5287ebf 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -59,6 +59,22 @@ /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) +/* + * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which + * generates .data.identifier sections, which need to be pulled in with + * .data. We don't want to pull in .data..other sections, which Linux + * has defined. Same for text and bss. + */ +#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#else +#define TEXT_MAIN .text +#define DATA_MAIN .data +#define BSS_MAIN .bss +#endif + /* * Align to a 32 byte boundary equal to the * alignment gcc 4.5 uses for a struct @@ -198,12 +214,9 @@ /* * .data section - * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates - * .data.identifier which needs to be pulled in with .data, but don't want to - * pull in .data..stuff which has its own requirements. Same for bss. */ #define DATA_DATA \ - *(.data .data.[0-9a-zA-Z_]*) \ + *(DATA_MAIN) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data) \ @@ -436,16 +449,17 @@ VMLINUX_SYMBOL(__security_initcall_end) = .; \ } -/* .text section. Map to function alignment to avoid address changes +/* + * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map - * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates - * .text.identifier which needs to be pulled in with .text , but some - * architectures define .text.foo which is not intended to be pulled in here. - * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have - * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */ + * + * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead + * code elimination is enabled, so these sections should be converted + * to use ".." first. + */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot .text .text.fixup .text.unlikely) \ + *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ *(.ref.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ @@ -613,7 +627,7 @@ BSS_FIRST_SECTIONS \ *(.bss..page_aligned) \ *(.dynbss) \ - *(.bss .bss.[0-9a-zA-Z_]*) \ + *(BSS_MAIN) \ *(COMMON) \ } -- cgit v1.2.3 From 8b053290ee20cce2a082c80fd2e654e3e533f802 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 3 Aug 2017 13:09:03 +0530 Subject: cifs: Fix df output for users with quota limits commit 42bec214d8bd432be6d32a1acb0a9079ecd4d142 upstream. The df for a SMB2 share triggers a GetInfo call for FS_FULL_SIZE_INFORMATION. The values returned are used to populate struct statfs. The problem is that none of the information returned by the call contains the total blocks available on the filesystem. Instead we use the blocks available to the user ie. quota limitation when filling out statfs.f_blocks. The information returned does contain Actual free units on the filesystem and is used to populate statfs.f_bfree. For users with quota enabled, it can lead to situations where the total free space reported is more than the total blocks on the system ending up with df reports like the following # df -h /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.5G -2.3G 2.5G - /mnt/a To fix this problem, we instead populate both statfs.f_bfree with the same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This is similar to what is done already in the code for cifs and df now reports the quota information for the user used to mount the share. # df --si /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.7G 101M 2.6G 4% /mnt/a Signed-off-by: Sachin Prabhu Signed-off-by: Pierguido Lambri Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7c1c6c39d582..0437e5fdba56 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2930,8 +2930,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); - kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); - kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + kst->f_bfree = kst->f_bavail = + le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); return; } -- cgit v1.2.3 From 5ed70f7db9afe5e4e5cc1fa2239676e3b2d2c5b2 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 23 Aug 2017 14:48:14 +1000 Subject: cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() commit d3edede29f74d335f81d95a4588f5f136a9f7dcf upstream. Add checking for the path component length and verify it is <= the maximum that the server advertizes via FileFsAttributeInformation. With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT when users to access an overlong path. To test this, try to cd into a (non-existing) directory on a CIFS share that has a too long name: cd /mnt/aaaaaaaaaaaaaaa... and it now should show a good error message from the shell: bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long rh bz 1153996 Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dir.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 789ff1df2d8d..581712534c93 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -183,15 +183,20 @@ cifs_bp_rename_retry: } /* + * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. */ static int -check_name(struct dentry *direntry) +check_name(struct dentry *direntry, struct cifs_tcon *tcon) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); int i; + if (unlikely(direntry->d_name.len > + tcon->fsAttrInfo.MaxPathNameComponentLength)) + return -ENAMETOOLONG; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { @@ -489,10 +494,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, return finish_no_open(file, res); } - rc = check_name(direntry); - if (rc) - return rc; - xid = get_xid(); cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", @@ -505,6 +506,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } tcon = tlink_tcon(tlink); + + rc = check_name(direntry, tcon); + if (rc) + goto out_free_xid; + server = tcon->ses->server; if (server->ops->new_lease_key) @@ -765,7 +771,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - rc = check_name(direntry); + rc = check_name(direntry, pTcon); if (rc) goto lookup_out; -- cgit v1.2.3 From fd8235e721ef8bb15ba390d9f2f806f794368536 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Aug 2017 11:12:19 -0400 Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE commit fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 upstream. When processing an NFSv4 WRITE operation, argp->end should never point past the end of the data in the final page of the page list. Otherwise, nfsd4_decode_compound can walk into uninitialized memory. More critical, nfsd4_decode_write is failing to increment argp->pagelen when it increments argp->pagelist. This can cause later xdr decoders to assume more data is available than really is, which can cause server crashes on malformed requests. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4e7a56a0a9b6..2c4f7a22e128 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -129,7 +129,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); + argp->end = argp->p + XDR_QUADLEN(argp->pagelen); argp->pagelen = 0; } else { argp->end = argp->p + (PAGE_SIZE>>2); @@ -1246,9 +1246,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) argp->pagelen -= pages * PAGE_SIZE; len -= pages * PAGE_SIZE; - argp->p = (__be32 *)page_address(argp->pagelist[0]); - argp->pagelist++; - argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); + next_decode_page(argp); } argp->p += XDR_QUADLEN(len); -- cgit v1.2.3 From 741397d16a3d65e1629e9be75f5d64687d767664 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 17 Aug 2017 16:37:25 -0400 Subject: ftrace: Check for null ret_stack on profile function graph entry function commit a8f0f9e49956a74718874b800251455680085600 upstream. There's a small race when function graph shutsdown and the calling of the registered function graph entry callback. The callback must not reference the task's ret_stack without first checking that it is not NULL. Note, when a ret_stack is allocated for a task, it stays allocated until the task exits. The problem here, is that function_graph is shutdown, and a new task was created, which doesn't have its ret_stack allocated. But since some of the functions are still being traced, the callbacks can still be called. The normal function_graph code handles this, but starting with commit 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler") the profiler code references the ret_stack on function entry, but doesn't check if it is NULL first. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196611 Fixes: 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler") Reported-by: lilydjwg@gmail.com Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f7ea8446bb5..6e432ed7d0fe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -876,6 +876,10 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) function_profile_call(trace->func, 0, NULL, NULL); + /* If function graph is shutting down, ret_stack can be NULL */ + if (!current->ret_stack) + return 0; + if (index >= 0 && index < FTRACE_RETFUNC_DEPTH) current->ret_stack[index].subtime = 0; -- cgit v1.2.3 From bde6608dd6385a10c1f749d11458d96c54114a5c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jun 2017 15:41:38 +0100 Subject: perf/core: Fix group {cpu,task} validation commit 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e upstream. Regardless of which events form a group, it does not make sense for the events to target different tasks and/or CPUs, as this leaves the group inconsistent and impossible to schedule. The core perf code assumes that these are consistent across (successfully intialised) groups. Core perf code only verifies this when moving SW events into a HW context. Thus, we can violate this requirement for pure SW groups and pure HW groups, unless the relevant PMU driver happens to perform this verification itself. These mismatched groups subsequently wreak havoc elsewhere. For example, we handle watchpoints as SW events, and reserve watchpoint HW on a per-CPU basis at pmu::event_init() time to ensure that any event that is initialised is guaranteed to have a slot at pmu::add() time. However, the core code only checks the group leader's cpu filter (via event_filter_match()), and can thus install follower events onto CPUs violating thier (mismatched) CPU filters, potentially installing them into a CPU without sufficient reserved slots. This can be triggered with the below test case, resulting in warnings from arch backends. #define _GNU_SOURCE #include #include #include #include #include #include #include static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } char watched_char; struct perf_event_attr wp_attr = { .type = PERF_TYPE_BREAKPOINT, .bp_type = HW_BREAKPOINT_RW, .bp_addr = (unsigned long)&watched_char, .bp_len = 1, .size = sizeof(wp_attr), }; int main(int argc, char *argv[]) { int leader, ret; cpu_set_t cpus; /* * Force use of CPU0 to ensure our CPU0-bound events get scheduled. */ CPU_ZERO(&cpus); CPU_SET(0, &cpus); ret = sched_setaffinity(0, sizeof(cpus), &cpus); if (ret) { printf("Unable to set cpu affinity\n"); return 1; } /* open leader event, bound to this task, CPU0 only */ leader = perf_event_open(&wp_attr, 0, 0, -1, 0); if (leader < 0) { printf("Couldn't open leader: %d\n", leader); return 1; } /* * Open a follower event that is bound to the same task, but a * different CPU. This means that the group should never be possible to * schedule. */ ret = perf_event_open(&wp_attr, 0, 1, leader, 0); if (ret < 0) { printf("Couldn't open mismatched follower: %d\n", ret); return 1; } else { printf("Opened leader/follower with mismastched CPUs\n"); } /* * Open as many independent events as we can, all bound to the same * task, CPU0 only. */ do { ret = perf_event_open(&wp_attr, 0, 0, -1, 0); } while (ret >= 0); /* * Force enable/disble all events to trigger the erronoeous * installation of the follower event. */ printf("Opened all events. Toggling..\n"); for (;;) { prctl(PR_TASK_PERF_EVENTS_DISABLE, 0, 0, 0, 0); prctl(PR_TASK_PERF_EVENTS_ENABLE, 0, 0, 0, 0); } return 0; } Fix this by validating this requirement regardless of whether we're moving events. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Zhou Chengming Link: http://lkml.kernel.org/r/1498142498-15758-1-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f5a693589d66..c774773ac3a4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9786,28 +9786,27 @@ SYSCALL_DEFINE5(perf_event_open, goto err_context; /* - * Do not allow to attach to a group in a different - * task or CPU context: + * Make sure we're both events for the same CPU; + * grouping events for different CPUs is broken; since + * you can never concurrently schedule them anyhow. */ - if (move_group) { - /* - * Make sure we're both on the same task, or both - * per-cpu events. - */ - if (group_leader->ctx->task != ctx->task) - goto err_context; + if (group_leader->cpu != event->cpu) + goto err_context; - /* - * Make sure we're both events for the same CPU; - * grouping events for different CPUs is broken; since - * you can never concurrently schedule them anyhow. - */ - if (group_leader->cpu != event->cpu) - goto err_context; - } else { - if (group_leader->ctx != ctx) - goto err_context; - } + /* + * Make sure we're both on the same task, or both + * per-CPU events. + */ + if (group_leader->ctx->task != ctx->task) + goto err_context; + + /* + * Do not allow to attach to a group in a different task + * or CPU context. If we're moving SW events, we'll fix + * this up later, so allow that. + */ + if (!move_group && group_leader->ctx != ctx) + goto err_context; /* * Only a group leader can be exclusive or pinned -- cgit v1.2.3 From bac83e5ce85e64e02a3133ce275154779d97281c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 4 Jan 2017 12:29:05 +0900 Subject: perf probe: Fix --funcs to show correct symbols for offline module commit eebc509b20881b92d62e317b2c073e57c5f200f0 upstream. Fix --funcs (-F) option to show correct symbols for offline module. Since previous perf-probe uses machine__findnew_module_map() for offline module, even if user passes a module file (with full path) which is for other architecture, perf-probe always tries to load symbol map for current kernel module. This fix uses dso__new_map() to load the map from given binary as same as a map for user applications. Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo Cc: Krister Johansen Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/probe-event.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6c50d9f8e210..6a6f44dd594b 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module) /* A file path -- this is an offline module */ if (module && strchr(module, '/')) - return machine__findnew_module_map(host_machine, 0, module); + return dso__new_map(module); if (!module) module = "kernel"; @@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module) if (strncmp(pos->dso->short_name + 1, module, pos->dso->short_name_len - 2) == 0 && module[pos->dso->short_name_len - 2] == '\0') { + map__get(pos); return pos; } } @@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user) return kernel_get_module_map(target); } -static void put_target_map(struct map *map, bool user) -{ - if (map && user) { - /* Only the user map needs to be released */ - map__put(map); - } -} - - static int convert_exec_to_group(const char *exec, char **result) { char *ptr1, *ptr2, *exec_copy; @@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, } out: - put_target_map(map, uprobes); + map__put(map); return ret; } @@ -2944,7 +2936,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } out: - put_target_map(map, pev->uprobes); + map__put(map); free(syms); return ret; @@ -3437,10 +3429,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, return ret; /* Get a symbol map */ - if (user) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(target, user); if (!map) { pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); return -EINVAL; @@ -3472,9 +3461,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, } end: - if (user) { - map__put(map); - } + map__put(map); exit_probe_symbol_maps(); return ret; -- cgit v1.2.3 From 3df3b2efc065e71ba5edda9fa9b3cab1933d131c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:38 +0100 Subject: perf/x86/intel/rapl: Make package handling more robust commit dd86e373e09fb16b83e8adf5c48c421a4ca76468 upstream. The package management code in RAPL relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left RAPL in broken state. This was not noticed because on a regular boot all CPUs are online before RAPL is initialized. A possible fix would be to reintroduce the mess which allocates a package data structure in CPU prepare and when it turns out to already exist in starting throw it away later in the CPU online callback. But that's a horrible hack and not required at all because RAPL becomes functional for perf only in the CPU online callback. That's correct because user space is not yet informed about the CPU being onlined, so nothing caan rely on RAPL being available on that particular CPU. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. This also adds a missing check for available package data in the event_init() function. Reported-by: Yasuaki Ishimatsu Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de Signed-off-by: Ingo Molnar [ jwang: backport to 4.9 fix Null pointer deref during hotplug cpu.] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/rapl.c | 58 +++++++++++++++++++------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 970c1de3b86e..4c1b7ea18541 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -161,7 +161,13 @@ static u64 rapl_timer_ms; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - return rapl_pmus->pmus[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; } static inline u64 rapl_read_counter(struct perf_event *event) @@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event) /* must be done before validate_group */ pmu = cpu_to_rapl_pmu(event->cpu); + if (!pmu) + return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; event->hw.event_base = msr; @@ -585,6 +593,19 @@ static int rapl_cpu_online(unsigned int cpu) struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; + if (!pmu) { + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -ENOMEM; + + raw_spin_lock_init(&pmu->lock); + INIT_LIST_HEAD(&pmu->active_list); + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(pmu); + + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; + } /* * Check if there is an online cpu in the package which collects rapl * events already. @@ -598,27 +619,6 @@ static int rapl_cpu_online(unsigned int cpu) return 0; } -static int rapl_cpu_prepare(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - - if (pmu) - return 0; - - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - pmu->cpu = -1; - rapl_hrtimer_init(pmu); - rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; - return 0; -} - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -804,28 +804,21 @@ static int __init rapl_pmu_init(void) * Install callbacks. Core will call them for each online cpu. */ - ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP", - rapl_cpu_prepare, NULL); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, "AP_PERF_X86_RAPL_ONLINE", rapl_cpu_online, rapl_cpu_offline); if (ret) - goto out1; + goto out; ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out2; + goto out1; rapl_advertise(); return 0; -out2: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out1: - cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); @@ -836,7 +829,6 @@ module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index ba1cad7b97cf..965cc5693a46 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -10,7 +10,6 @@ enum cpuhp_state { CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, - CPUHP_PERF_X86_RAPL_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, -- cgit v1.2.3 From 70b3fd5ce2ce1dfe8f563e93d31c124b84593af4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Aug 2017 18:43:48 +1000 Subject: timers: Fix excessive granularity of new timers after a nohz idle commit 2fe59f507a65dbd734b990a11ebc7488f6f87a24 upstream. When a timer base is idle, it is forwarded when a new timer is added to ensure that granularity does not become excessive. When not idle, the timer tick is expected to increment the base. However there are several problems: - If an existing timer is modified, the base is forwarded only after the index is calculated. - The base is not forwarded by add_timer_on. - There is a window after a timer is restarted from a nohz idle, after it is marked not-idle and before the timer tick on this CPU, where a timer may be added but the ancient base does not get forwarded. These result in excessive granularity (a 1 jiffy timeout can blow out to 100s of jiffies), which cause the rcu lockup detector to trigger, among other things. Fix this by keeping track of whether the timer base has been idle since it was last run or forwarded, and if so then forward it before adding a new timer. There is still a case where mod_timer optimises the case of a pending timer mod with the same expiry time, where the timer can see excessive granularity relative to the new, shorter interval. A comment is added, but it's not changed because it is an important fastpath for networking. This has been tested and found to fix the RCU softlockup messages. Testing was also done with tracing to measure requested versus achieved wakeup latencies for all non-deferrable timers in an idle system (with no lockup watchdogs running). Wakeup latency relative to absolute latency is calculated (note this suffers from round-up skew at low absolute times) and analysed: max avg std upstream 506.0 1.20 4.68 patched 2.0 1.08 0.15 The bug was noticed due to the lockup detector Kconfig changes dropping it out of people's .configs and resulting in larger base clk skew When the lockup detectors are enabled, no CPU can go idle for longer than 4 seconds, which limits the granularity errors. Sub-optimal timer behaviour is observable on a smaller scale in that case: max avg std upstream 9.0 1.05 0.19 patched 2.0 1.04 0.11 Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") Signed-off-by: Nicholas Piggin Signed-off-by: Thomas Gleixner Tested-by: Jonathan Cameron Tested-by: David Miller Cc: dzickus@redhat.com Cc: sfr@canb.auug.org.au Cc: mpe@ellerman.id.au Cc: Stephen Boyd Cc: linuxarm@huawei.com Cc: abdhalee@linux.vnet.ibm.com Cc: John Stultz Cc: akpm@linux-foundation.org Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 944ad64277a6..df445cde8a1e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -201,6 +201,7 @@ struct timer_base { bool migration_enabled; bool nohz_active; bool is_idle; + bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -891,13 +892,19 @@ get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { - unsigned long jnow = READ_ONCE(jiffies); + unsigned long jnow; /* - * We only forward the base when it's idle and we have a delta between - * base clock and jiffies. + * We only forward the base when we are idle or have just come out of + * idle (must_forward_clk logic), and have a delta between base clock + * and jiffies. In the common case, run_timers will take care of it. */ - if (!base->is_idle || (long) (jnow - base->clk) < 2) + if (likely(!base->must_forward_clk)) + return; + + jnow = READ_ONCE(jiffies); + base->must_forward_clk = base->is_idle; + if ((long)(jnow - base->clk) < 2) return; /* @@ -973,6 +980,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * same array bucket then just return: */ if (timer_pending(timer)) { + /* + * The downside of this optimization is that it can result in + * larger granularity than you would get from adding a new + * timer with this expiry. + */ if (timer->expires == expires) return 1; @@ -983,6 +995,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * dequeue/enqueue dance. */ base = lock_timer_base(timer, &flags); + forward_timer_base(base); clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -999,6 +1012,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } else { base = lock_timer_base(timer, &flags); + forward_timer_base(base); } timer_stats_timer_set_start_info(timer); @@ -1028,12 +1042,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); + forward_timer_base(base); } } - /* Try to forward a stale timer base clock */ - forward_timer_base(base); - timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1150,6 +1162,7 @@ void add_timer_on(struct timer_list *timer, int cpu) WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } + forward_timer_base(base); debug_activate(timer, timer->expires); internal_add_timer(base, timer); @@ -1538,10 +1551,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (!is_max_delta) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* - * If we expect to sleep more than a tick, mark the base idle: + * If we expect to sleep more than a tick, mark the base idle. + * Also the tick is stopped so any added timer must forward + * the base clk itself to keep granularity small. This idle + * logic is only maintained for the BASE_STD base, deferrable + * timers may still see large granularity skew (by design). */ - if ((expires - basem) > TICK_NSEC) + if ((expires - basem) > TICK_NSEC) { + base->must_forward_clk = true; base->is_idle = true; + } } spin_unlock(&base->lock); @@ -1651,6 +1670,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the + * base. idle trcking / clock forwarding logic is only used with + * BASE_STD timers. + * + * The deferrable base does not do idle tracking at all, so we do + * not forward it. This can result in very large variations in + * granularity for deferrable timers, but they can be deferred for + * long periods due to idle. + */ + base->must_forward_clk = false; + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); -- cgit v1.2.3 From 3559de45c99a68c0b8c4956fc35367837df9161c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 10:50:29 -0700 Subject: x86/mm: Fix use-after-free of ldt_struct commit ccd5b3235180eef3cfec337df1c8554ab151b5cc upstream. The following commit: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") renamed init_new_context() to init_new_context_ldt() and added a new init_new_context() which calls init_new_context_ldt(). However, the error code of init_new_context_ldt() was ignored. Consequently, if a memory allocation in alloc_ldt_struct() failed during a fork(), the ->context.ldt of the new task remained the same as that of the old task (due to the memcpy() in dup_mm()). ldt_struct's are not intended to be shared, so a use-after-free occurred after one task exited. Fix the bug by making init_new_context() pass through the error code of init_new_context_ldt(). This bug was found by syzkaller, which encountered the following splat: BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710 CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] exec_mmap fs/exec.c:1061 [inline] flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291 load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855 search_binary_handler+0x142/0x6b0 fs/exec.c:1652 exec_binprm fs/exec.c:1694 [inline] do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816 do_execve+0x31/0x40 fs/exec.c:1860 call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Allocated by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627 kmalloc include/linux/slab.h:493 [inline] alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67 write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277 sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] __mmput kernel/fork.c:916 [inline] mmput+0x541/0x6e0 kernel/fork.c:927 copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931 copy_process kernel/fork.c:1546 [inline] _do_fork+0x1ef/0xfb0 kernel/fork.c:2025 SYSC_clone kernel/fork.c:2135 [inline] SyS_clone+0x37/0x50 kernel/fork.c:2129 do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287 return_from_SYSCALL_64+0x0/0x7a Here is a C reproducer: #include #include #include #include #include #include #include static void *fork_thread(void *_arg) { fork(); } int main(void) { struct user_desc desc = { .entry_number = 8191 }; syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); for (;;) { if (fork() == 0) { pthread_t t; srand(getpid()); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } Note: the reproducer takes advantage of the fact that alloc_ldt_struct() may use vmalloc() to allocate a large ->entries array, and after commit: 5d17a73a2ebe ("vmalloc: back off when the current task is killed") it is possible for userspace to fail a task's vmalloc() by sending a fatal signal, e.g. via exit_group(). It would be more difficult to reproduce this bug on kernels without that commit. This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y. Signed-off-by: Eric Biggers Acked-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Hellwig Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Michal Hocko Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 8e0a9fe86de4..f9dd22469388 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -116,9 +116,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif - init_new_context_ldt(tsk, mm); - - return 0; + return init_new_context_ldt(tsk, mm); } static inline void destroy_context(struct mm_struct *mm) { -- cgit v1.2.3 From d4c5c26c5f0905f7582291f78e64a0a250cb0fc2 Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Mon, 21 Aug 2017 07:23:07 -0400 Subject: net: sunrpc: svcsock: fix NULL-pointer exception commit eebe53e87f97975ee58a21693e44797608bf679c upstream. While running nfs/connectathon tests kernel NULL-pointer exception has been observed due to races in svcsock.c. Race is appear when kernel accepts connection by kernel_accept (which creates new socket) and start queuing ingress packets to new socket. This happens in ksoftirq context which could run concurrently on a different core while new socket setup is not done yet. The fix is to re-order socket user data init sequence and add write/read barrier calls to be sure that we got proper values for callback pointers before actually calling them. Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations. Crash log: ---<-snip->--- [ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 6708.647093] pgd = ffff0000094e0000 [ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000 [ 6708.660761] Internal error: Oops: 86000005 [#1] SMP [ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops [ 6708.736446] ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021] [ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G W OE 4.11.0-4.el7.aarch64 #1 [ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017 [ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000 [ 6708.773822] PC is at 0x0 [ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc] [ 6708.781866] pc : [<0000000000000000>] lr : [] pstate: 60000145 [ 6708.789248] sp : ffff810ffbad3900 [ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58 [ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00 [ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28 [ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000 [ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00 [ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2 [ 6708.824365] x17: 0000000000000000 x16: 0000000000000000 [ 6708.829667] x15: 0000000000000000 x14: 0000000000000001 [ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e [ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000 [ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000 [ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000 [ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000 [ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000 [ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00 [ 6708.872084] [ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000) [ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000) [ 6708.886075] Call trace: [ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840) [ 6708.894942] 3700: ffff810012412400 0001000000000000 [ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000 [ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000 [ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000 [ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d [ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140 [ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000 [ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028 [ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000 [ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000 [ 6708.973117] [< (null)>] (null) [ 6708.977824] [] tcp_data_queue+0x754/0xc5c [ 6708.983386] [] tcp_rcv_established+0x1a0/0x67c [ 6708.989384] [] tcp_v4_do_rcv+0x15c/0x22c [ 6708.994858] [] tcp_v4_rcv+0xaf0/0xb58 [ 6709.000077] [] ip_local_deliver_finish+0x10c/0x254 [ 6709.006419] [] ip_local_deliver+0xf0/0xfc [ 6709.011980] [] ip_rcv_finish+0x208/0x3a4 [ 6709.017454] [] ip_rcv+0x2dc/0x3c8 [ 6709.022328] [] __netif_receive_skb_core+0x2f8/0xa0c [ 6709.028758] [] __netif_receive_skb+0x38/0x84 [ 6709.034580] [] netif_receive_skb_internal+0x68/0xdc [ 6709.041010] [] napi_gro_receive+0xcc/0x1a8 [ 6709.046690] [] nicvf_cq_intr_handler+0x59c/0x730 [nicvf] [ 6709.053559] [] nicvf_poll+0x38/0xb8 [nicvf] [ 6709.059295] [] net_rx_action+0x2f8/0x464 [ 6709.064771] [] __do_softirq+0x11c/0x308 [ 6709.070164] [] irq_exit+0x12c/0x174 [ 6709.075206] [] __handle_domain_irq+0x78/0xc4 [ 6709.081027] [] gic_handle_irq+0x94/0x190 [ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20) [ 6709.092929] fde0: 0000810ff2ec0000 ffff000008c10000 [ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18 [ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80 [ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4 [ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50 [ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000 [ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000 [ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20 [ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145 [ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238 [ 6709.171107] [] el1_irq+0xb4/0x140 [ 6709.175976] [] arch_cpu_idle+0x44/0x11c [ 6709.181368] [] default_idle_call+0x20/0x30 [ 6709.187020] [] do_idle+0x158/0x1e4 [ 6709.191973] [] cpu_startup_entry+0x2c/0x30 [ 6709.197624] [] secondary_start_kernel+0x13c/0x160 [ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4 [ 6709.207967] Code: bad PC value [ 6709.211061] SMP: stopping secondary CPUs [ 6709.218830] Starting crashdump kernel... [ 6709.222749] Bye! ---<-snip>--- Signed-off-by: Vadim Lomovtsev Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a4bc98265d88..266a30c8b88b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -408,6 +408,9 @@ static void svc_data_ready(struct sock *sk) dprintk("svc: socket %p(inet %p), busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); @@ -424,6 +427,9 @@ static void svc_write_space(struct sock *sk) if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -748,8 +754,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk) dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); - if (svsk) + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); + } + /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -782,6 +792,8 @@ static void svc_tcp_state_change(struct sock *sk) if (!svsk) printk("svc: socket %p: no user data\n", sk); else { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_ostate(sk); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1368,12 +1380,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return ERR_PTR(err); } - inet->sk_user_data = svsk; svsk->sk_sock = sock; svsk->sk_sk = inet; svsk->sk_ostate = inet->sk_state_change; svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; + /* + * This barrier is necessary in order to prevent race condition + * with svc_data_ready(), svc_listen_data_ready() and others + * when calling callbacks above. + */ + wmb(); + inet->sk_user_data = svsk; /* Initialize the socket */ if (sock->type == SOCK_DGRAM) -- cgit v1.2.3 From 090911a22c7b3bef0726737369532d0105e3f20d Mon Sep 17 00:00:00 2001 From: Zhang Bo Date: Tue, 13 Jun 2017 10:39:20 +0800 Subject: Revert "leds: handle suspend/resume in heartbeat trigger" commit 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc upstream. This reverts commit 5ab92a7cb82c66bf30685583a38a18538e3807db. System cannot enter suspend mode because of heartbeat led trigger. In autosleep_wq, try_to_suspend function will try to enter suspend mode in specific period. it will get wakeup_count then call pm_notifier chain callback function and freeze processes. Heartbeat_pm_notifier is called and it call led_trigger_unregister to change the trigger of led device to none. It will send uevent message and the wakeup source count changed. As wakeup_count changed, suspend will abort. Fixes: 5ab92a7cb82c ("leds: handle suspend/resume in heartbeat trigger") Signed-off-by: Zhang Bo Acked-by: Pavel Machek Reviewed-by: Linus Walleij Signed-off-by: Jacek Anaszewski Cc: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/leds/trigger/ledtrig-heartbeat.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c index c9f386213e9e..410c39c62dc7 100644 --- a/drivers/leds/trigger/ledtrig-heartbeat.c +++ b/drivers/leds/trigger/ledtrig-heartbeat.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "../leds.h" static int panic_heartbeats; @@ -155,30 +154,6 @@ static struct led_trigger heartbeat_led_trigger = { .deactivate = heartbeat_trig_deactivate, }; -static int heartbeat_pm_notifier(struct notifier_block *nb, - unsigned long pm_event, void *unused) -{ - int rc; - - switch (pm_event) { - case PM_SUSPEND_PREPARE: - case PM_HIBERNATION_PREPARE: - case PM_RESTORE_PREPARE: - led_trigger_unregister(&heartbeat_led_trigger); - break; - case PM_POST_SUSPEND: - case PM_POST_HIBERNATION: - case PM_POST_RESTORE: - rc = led_trigger_register(&heartbeat_led_trigger); - if (rc) - pr_err("could not re-register heartbeat trigger\n"); - break; - default: - break; - } - return NOTIFY_DONE; -} - static int heartbeat_reboot_notifier(struct notifier_block *nb, unsigned long code, void *unused) { @@ -193,10 +168,6 @@ static int heartbeat_panic_notifier(struct notifier_block *nb, return NOTIFY_DONE; } -static struct notifier_block heartbeat_pm_nb = { - .notifier_call = heartbeat_pm_notifier, -}; - static struct notifier_block heartbeat_reboot_nb = { .notifier_call = heartbeat_reboot_notifier, }; @@ -213,14 +184,12 @@ static int __init heartbeat_trig_init(void) atomic_notifier_chain_register(&panic_notifier_list, &heartbeat_panic_nb); register_reboot_notifier(&heartbeat_reboot_nb); - register_pm_notifier(&heartbeat_pm_nb); } return rc; } static void __exit heartbeat_trig_exit(void) { - unregister_pm_notifier(&heartbeat_pm_nb); unregister_reboot_notifier(&heartbeat_reboot_nb); atomic_notifier_chain_unregister(&panic_notifier_list, &heartbeat_panic_nb); -- cgit v1.2.3 From 5f81b1f51b9cfcbfbe7a1abea09962c91bf485e7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jul 2017 13:07:17 +0200 Subject: netfilter: nat: fix src map lookup commit 97772bcd56efa21d9d8976db6f205574ea602f51 upstream. When doing initial conversion to rhashtable I replaced the bucket walk with a single rhashtable_lookup_fast(). When moving to rhlist I failed to properly walk the list of identical tuples, but that is what is needed for this to work correctly. The table contains the original tuples, so the reply tuples are all distinct. We currently decide that mapping is (not) in range only based on the first entry, but in case its not we need to try the reply tuple of the next entry until we either find an in-range mapping or we checked all the entries. This bug makes nat core attempt collision resolution while it might be able to use the mapping as-is. Fixes: 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable") Reported-by: Jaco Kroon Tested-by: Jaco Kroon Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_nat_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5b9c884a452e..dde64c4565d2 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -225,20 +225,21 @@ find_appropriate_src(struct net *net, .tuple = tuple, .zone = zone }; - struct rhlist_head *hl; + struct rhlist_head *hl, *h; hl = rhltable_lookup(&nf_nat_bysource_table, &key, nf_nat_bysource_params); - if (!hl) - return 0; - ct = container_of(hl, typeof(*ct), nat_bysource); + rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) { + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; + if (in_range(l3proto, l4proto, result, range)) + return 1; + } - return in_range(l3proto, l4proto, result, range); + return 0; } /* For [FUTURE] fragmentation handling, we want the least-used -- cgit v1.2.3 From 1b5fcb3b25f794957acbeec78800a18d4d0903b3 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 27 Jun 2017 17:34:44 +0800 Subject: Bluetooth: hidp: fix possible might sleep error in hidp_session_thread commit 5da8e47d849d3d37b14129f038782a095b9ad049 upstream. It looks like hidp_session_thread has same pattern as the issue reported in old rfcomm: while (1) { set_current_state(TASK_INTERRUPTIBLE); if (condition) break; // may call might_sleep here schedule(); } __set_current_state(TASK_RUNNING); Which fixed at: dfb2fae Bluetooth: Fix nested sleeps So let's fix it at the same way, also follow the suggestion of: https://lwn.net/Articles/628628/ Signed-off-by: Jeffy Chen Tested-by: AL Yu-Chen Cho Tested-by: Rohit Vaswani Signed-off-by: Marcel Holtmann Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/core.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 0bec4588c3c8..1fc076420d1e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -36,6 +36,7 @@ #define VERSION "1.2" static DECLARE_RWSEM(hidp_session_sem); +static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq); static LIST_HEAD(hidp_session_list); static unsigned char hidp_keycode[256] = { @@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session) * Wake up session thread and notify it to stop. This is asynchronous and * returns immediately. Call this whenever a runtime error occurs and you want * the session to stop. - * Note: wake_up_process() performs any necessary memory-barriers for us. + * Note: wake_up_interruptible() performs any necessary memory-barriers for us. */ static void hidp_session_terminate(struct hidp_session *session) { atomic_inc(&session->terminate); - wake_up_process(session->task); + wake_up_interruptible(&hidp_session_wq); } /* @@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session) struct sock *ctrl_sk = session->ctrl_sock->sk; struct sock *intr_sk = session->intr_sock->sk; struct sk_buff *skb; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&hidp_session_wq, &wait); for (;;) { /* * This thread can be woken up two ways: @@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session) * session->terminate flag and wakes this thread up. * - Via modifying the socket state of ctrl/intr_sock. This * thread is woken up by ->sk_state_changed(). - * - * Note: set_current_state() performs any necessary - * memory-barriers for us. */ - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session) hidp_process_transmit(session, &session->ctrl_transmit, session->ctrl_sock); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } + remove_wait_queue(&hidp_session_wq, &wait); atomic_inc(&session->terminate); - set_current_state(TASK_RUNNING); + + /* Ensure session->terminate is updated */ + smp_mb__after_atomic(); +} + +static int hidp_session_wake_function(wait_queue_t *wait, + unsigned int mode, + int sync, void *key) +{ + wake_up_interruptible(&hidp_session_wq); + return false; } /* @@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session) static int hidp_session_thread(void *arg) { struct hidp_session *session = arg; - wait_queue_t ctrl_wait, intr_wait; + DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function); + DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function); BT_DBG("session %p", session); @@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg) set_user_nice(current, -15); hidp_set_timer(session); - init_waitqueue_entry(&ctrl_wait, current); - init_waitqueue_entry(&intr_wait, current); add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait); add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); /* This memory barrier is paired with wq_has_sleeper(). See -- cgit v1.2.3 From ffb7640acbc3f8a3366258ae0f281b1e98472ad4 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 27 Jun 2017 17:34:43 +0800 Subject: Bluetooth: cmtp: fix possible might sleep error in cmtp_session commit f06d977309d09253c744e54e75c5295ecc52b7b4 upstream. It looks like cmtp_session has same pattern as the issue reported in old rfcomm: while (1) { set_current_state(TASK_INTERRUPTIBLE); if (condition) break; // may call might_sleep here schedule(); } __set_current_state(TASK_RUNNING); Which fixed at: dfb2fae Bluetooth: Fix nested sleeps So let's fix it at the same way, also follow the suggestion of: https://lwn.net/Articles/628628/ Signed-off-by: Jeffy Chen Reviewed-by: Brian Norris Reviewed-by: AL Yu-Chen Cho Signed-off-by: Marcel Holtmann Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/cmtp/core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 9e59b6654126..1152ce34dad4 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -280,16 +280,16 @@ static int cmtp_session(void *arg) struct cmtp_session *session = arg; struct sock *sk = session->sock->sk; struct sk_buff *skb; - wait_queue_t wait; + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG("session %p", session); set_user_nice(current, -15); - init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -306,9 +306,8 @@ static int cmtp_session(void *arg) cmtp_process_transmit(session); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); @@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) err = cmtp_attach_device(session); if (err < 0) { atomic_inc(&session->terminate); - wake_up_process(session->task); + wake_up_interruptible(sk_sleep(session->sock->sk)); up_write(&cmtp_session_sem); return err; } @@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) /* Stop session thread */ atomic_inc(&session->terminate); - wake_up_process(session->task); + + /* Ensure session->terminate is updated */ + smp_mb__after_atomic(); + + wake_up_interruptible(sk_sleep(session->sock->sk)); } else err = -ENOENT; -- cgit v1.2.3 From 242cea2d666f9e9a61513f8d96d549122589daca Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 27 Jun 2017 17:34:42 +0800 Subject: Bluetooth: bnep: fix possible might sleep error in bnep_session commit 25717382c1dd0ddced2059053e3ca5088665f7a5 upstream. It looks like bnep_session has same pattern as the issue reported in old rfcomm: while (1) { set_current_state(TASK_INTERRUPTIBLE); if (condition) break; // may call might_sleep here schedule(); } __set_current_state(TASK_RUNNING); Which fixed at: dfb2fae Bluetooth: Fix nested sleeps So let's fix it at the same way, also follow the suggestion of: https://lwn.net/Articles/628628/ Signed-off-by: Jeffy Chen Reviewed-by: Brian Norris Reviewed-by: AL Yu-Chen Cho Signed-off-by: Marcel Holtmann Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/bnep/core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index fbf251fef70f..4d6b94d7ce5f 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -484,16 +484,16 @@ static int bnep_session(void *arg) struct net_device *dev = s->dev; struct sock *sk = s->sock->sk; struct sk_buff *skb; - wait_queue_t wait; + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG(""); set_user_nice(current, -15); - init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&s->terminate)) break; @@ -515,9 +515,8 @@ static int bnep_session(void *arg) break; netif_wake_queue(dev); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ @@ -666,7 +665,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) s = __bnep_get_session(req->dst); if (s) { atomic_inc(&s->terminate); - wake_up_process(s->task); + wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; -- cgit v1.2.3 From eda70a559187ebc33b8019a50bcc4077030f2ed3 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 5 Jul 2017 13:46:01 -0700 Subject: Revert "android: binder: Sanity check at binder ioctl" commit a2b18708ee14baec4ef9c0fba96070bba14d0081 upstream. This reverts commit a906d6931f3ccaf7de805643190765ddd7378e27. The patch introduced a race in the binder driver. An attempt to fix the race was submitted in "[PATCH v2] android: binder: fix dangling pointer comparison", however the conclusion in the discussion for that patch was that the original patch should be reverted. The reversion is being done as part of the fine-grained locking patchset since the patch would need to be refactored when proc->vmm_vm_mm is removed from struct binder_proc and added in the binder allocator. Signed-off-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3c71b982bf2a..9d19880576b2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2760,10 +2760,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /*pr_info("binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ - if (unlikely(current->mm != proc->vma_vm_mm)) { - pr_err("current mm mismatch proc mm\n"); - return -EINVAL; - } trace_binder_ioctl(cmd, arg); ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); @@ -2978,7 +2974,6 @@ static int binder_open(struct inode *nodp, struct file *filp) return -ENOMEM; get_task_struct(current); proc->tsk = current; - proc->vma_vm_mm = current->mm; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); -- cgit v1.2.3 From 51050750181c791410a7e8dc405cc909e264587b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 29 Jun 2017 12:01:36 -0700 Subject: binder: use group leader instead of open thread commit c4ea41ba195d01c9af66fb28711a16cc97caa9c5 upstream. The binder allocator assumes that the thread that called binder_open will never die for the lifetime of that proc. That thread is normally the group_leader, however it may not be. Use the group_leader instead of current. Signed-off-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9d19880576b2..abd10afa014f 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2972,8 +2972,8 @@ static int binder_open(struct inode *nodp, struct file *filp) proc = kzalloc(sizeof(*proc), GFP_KERNEL); if (proc == NULL) return -ENOMEM; - get_task_struct(current); - proc->tsk = current; + get_task_struct(current->group_leader); + proc->tsk = current->group_leader; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); -- cgit v1.2.3 From 8fb0b0ce288d38d78685a9d6f6320339afadf709 Mon Sep 17 00:00:00 2001 From: Riley Andrews Date: Thu, 29 Jun 2017 12:01:37 -0700 Subject: binder: Use wake up hint for synchronous transactions. commit 00b40d613352c623aaae88a44e5ded7c912909d7 upstream. Use wake_up_interruptible_sync() to hint to the scheduler binder transactions are synchronous wakeups. Disable preemption while waking to avoid ping-ponging on the binder lock. Signed-off-by: Todd Kjos Signed-off-by: Omprakash Dhyade Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index abd10afa014f..5ce7ee98f3b5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1724,8 +1724,12 @@ static void binder_transaction(struct binder_proc *proc, list_add_tail(&t->work.entry, target_list); tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; list_add_tail(&tcomplete->entry, &thread->todo); - if (target_wait) - wake_up_interruptible(target_wait); + if (target_wait) { + if (reply || !(t->flags & TF_ONE_WAY)) + wake_up_interruptible_sync(target_wait); + else + wake_up_interruptible(target_wait); + } return; err_get_unused_fd_failed: -- cgit v1.2.3 From cbd854d993546a38d8d5ee46361715c2119cd613 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 28 Jul 2017 13:56:08 +0200 Subject: ANDROID: binder: fix proc->tsk check. commit b2a6d1b999a4c13e5997bb864694e77172d45250 upstream. Commit c4ea41ba195d ("binder: use group leader instead of open thread")' was incomplete and didn't update a check in binder_mmap(), causing all mmap() calls into the binder driver to fail. Signed-off-by: Martijn Coenen Tested-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5ce7ee98f3b5..15009b2b33c7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2875,7 +2875,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) const char *failure_string; struct binder_buffer *buffer; - if (proc->tsk != current) + if (proc->tsk != current->group_leader) return -EINVAL; if ((vma->vm_end - vma->vm_start) > SZ_4M) -- cgit v1.2.3 From b150ee066af7dac804f92a44452bb2b862a6bc15 Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Fri, 4 Aug 2017 01:37:27 +0300 Subject: iio: imu: adis16480: Fix acceleration scale factor for adis16480 commit fdd0d32eb95f135041236a6885d9006315aa9a1d upstream. According to the datasheet, the range of the acceleration is [-10 g, + 10 g], so the scale factor should be 10 instead of 5. Signed-off-by: Dragos Bogdan Acked-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16480.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 8cf84d3488b2..12898424d838 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -696,7 +696,7 @@ static const struct adis16480_chip_info adis16480_chip_info[] = { .gyro_max_val = IIO_RAD_TO_DEGREE(22500), .gyro_max_scale = 450, .accel_max_val = IIO_M_S_2_TO_G(12500), - .accel_max_scale = 5, + .accel_max_scale = 10, }, [ADIS16485] = { .channels = adis16485_channels, -- cgit v1.2.3 From 23caaf2f216c2120e72705d7b297ce95724c97ae Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sat, 12 Aug 2017 09:09:21 -0700 Subject: iio: hid-sensor-trigger: Fix the race with user space powering up sensors commit f1664eaacec31035450132c46ed2915fd2b2049a upstream. It has been reported for a while that with iio-sensor-proxy service the rotation only works after one suspend/resume cycle. This required a wait in the systemd unit file to avoid race. I found a Yoga 900 where I could reproduce this. The problem scenerio is: - During sensor driver init, enable run time PM and also set a auto-suspend for 3 seconds. This result in one runtime resume. But there is a check to avoid a powerup in this sequence, but rpm is active - User space iio-sensor-proxy tries to power up the sensor. Since rpm is active it will simply return. But sensors were not actually powered up in the prior sequence, so actaully the sensors will not work - After 3 seconds the auto suspend kicks If we add a wait in systemd service file to fire iio-sensor-proxy after 3 seconds, then now everything will work as the runtime resume will actually powerup the sensor as this is a user request. To avoid this: - Remove the check to match user requested state, this will cause a brief powerup, but if the iio-sensor-proxy starts immediately it will still work as the sensors are ON. - Also move the autosuspend delay to place when user requested turn off of sensors, like after user finished raw read or buffer disable Signed-off-by: Srinivas Pandruvada Tested-by: Bastien Nocera Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 60829340a82e..b60e5d87c257 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -36,8 +36,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) s32 poll_value = 0; if (state) { - if (!atomic_read(&st->user_requested_state)) - return 0; if (sensor_hub_device_open(st->hsdev)) return -EIO; @@ -86,6 +84,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) &report_val); } + pr_debug("HID_SENSOR %s set power_state %d report_state %d\n", + st->pdev->name, state_val, report_val); + sensor_hub_get_feature(st->hsdev, st->power_state.report_id, st->power_state.index, sizeof(state_val), &state_val); @@ -107,6 +108,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) ret = pm_runtime_get_sync(&st->pdev->dev); else { pm_runtime_mark_last_busy(&st->pdev->dev); + pm_runtime_use_autosuspend(&st->pdev->dev); ret = pm_runtime_put_autosuspend(&st->pdev->dev); } if (ret < 0) { @@ -201,8 +203,6 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, /* Default to 3 seconds, but can be changed from sysfs */ pm_runtime_set_autosuspend_delay(&attrb->pdev->dev, 3000); - pm_runtime_use_autosuspend(&attrb->pdev->dev); - return ret; error_unreg_trigger: iio_trigger_unregister(trig); -- cgit v1.2.3 From ab4be3a64d06e790f1aeae671569bd910e7995e3 Mon Sep 17 00:00:00 2001 From: Charles Milette Date: Fri, 18 Aug 2017 16:30:34 -0400 Subject: staging: rtl8188eu: add RNX-N150NUB support commit f299aec6ebd747298e35934cff7709c6b119ca52 upstream. Add support for USB Device Rosewill RNX-N150NUB. VendorID: 0x0bda, ProductID: 0xffef Signed-off-by: Charles Milette Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index b432153a6c5a..0f63a36a519e 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -45,6 +45,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ + {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ }; -- cgit v1.2.3 From b8fce38266dbf3fdc1e100a0c2ffd555b336f03d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 12:12:25 -0700 Subject: Clarify (and fix) MAX_LFS_FILESIZE macros commit 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad upstream. We have a MAX_LFS_FILESIZE macro that is meant to be filled in by filesystems (and other IO targets) that know they are 64-bit clean and don't have any 32-bit limits in their IO path. It turns out that our 32-bit value for that limit was bogus. On 32-bit, the VM layer is limited by the page cache to only 32-bit index values, but our logic for that was confusing and actually wrong. We used to define that value to (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) which is actually odd in several ways: it limits the index to 31 bits, and then it limits files so that they can't have data in that last byte of a page that has the highest 31-bit index (ie page index 0x7fffffff). Neither of those limitations make sense. The index is actually the full 32 bit unsigned value, and we can use that whole full page. So the maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG". However, we do wan tto avoid the maximum index, because we have code that iterates over the page indexes, and we don't want that code to overflow. So the maximum size of a file on a 32-bit host should actually be one page less than the full 32-bit index. So the actual limit is ULONG_MAX << PAGE_SHIFT. That means that we will not actually be using the page of that last index (ULONG_MAX), but we can grow a file up to that limit. The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5 volume. It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one byte less), but the actual true VM limit is one page less than 16TiB. This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), which started applying that MAX_LFS_FILESIZE limit to block devices too. NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is actually just the offset type itself (loff_t), which is signed. But for clarity, on 64-bit, just use the maximum signed value, and don't make people have to count the number of 'f' characters in the hex constant. So just use LLONG_MAX for the 64-bit case. That was what the value had been before too, just written out as a hex constant. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-and-tested-by: Doug Nazar Cc: Andreas Dilger Cc: Mark Fasheh Cc: Joel Becker Cc: Dave Kleikamp Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2f63d44368bd..dd88ded27fc8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -941,9 +941,9 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif #define FL_POSIX 1 -- cgit v1.2.3 From 4743d1b37a38c36ce2b01741edd33bbaaea9a1b4 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 5 Jun 2017 14:00:52 -0600 Subject: ntb_transport: fix qp count bug commit cb827ee6ccc3e480f0d9c0e8e53eef55be5b0414 upstream. In cases where there are more mw's than spads/2-2, the mw count gets reduced to match the limitation. ntb_transport also tries to ensure that there are fewer qps than mws but uses the full mw count instead of the reduced one. When this happens, the math in 'ntb_transport_setup_qp_mw' will get confused and result in a kernel paging request bug. This patch fixes the bug by reducing qp_count to the reduced mw count instead of the full mw count. Signed-off-by: Logan Gunthorpe Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Acked-by: Allen Hubbe Signed-off-by: Jon Mason Signed-off-by: Greg Kroah-Hartman --- drivers/ntb/ntb_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index c234ee43b6ef..c32bda90c562 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1125,8 +1125,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) qp_count = ilog2(qp_bitmap); if (max_num_clients && max_num_clients < qp_count) qp_count = max_num_clients; - else if (mw_count < qp_count) - qp_count = mw_count; + else if (nt->mw_count < qp_count) + qp_count = nt->mw_count; qp_bitmap &= BIT_ULL(qp_count) - 1; -- cgit v1.2.3 From 471954c313c57e09839f1381bd4897283d8e6537 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 5 Jun 2017 14:00:53 -0600 Subject: ntb_transport: fix bug calculating num_qps_mw commit 8e8496e0e9564b66165f5219a4e8ed20b0d3fc6b upstream. A divide by zero error occurs if qp_count is less than mw_count because num_qps_mw is calculated to be zero. The calculation appears to be incorrect. The requirement is for num_qps_mw to be set to qp_count / mw_count with any remainder divided among the earlier mws. For example, if mw_count is 5 and qp_count is 12 then mws 0 and 1 will have 3 qps per window and mws 2 through 4 will have 2 qps per window. Thus, when mw_num < qp_count % mw_count, num_qps_mw is 1 higher than when mw_num >= qp_count. Signed-off-by: Logan Gunthorpe Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Acked-by: Allen Hubbe Signed-off-by: Jon Mason Signed-off-by: Greg Kroah-Hartman --- drivers/ntb/ntb_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index c32bda90c562..18414dc17cd0 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -625,7 +625,7 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, if (!mw->virt_addr) return -ENOMEM; - if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + if (mw_num < qp_count % mw_count) num_qps_mw = qp_count / mw_count + 1; else num_qps_mw = qp_count / mw_count; @@ -1002,7 +1002,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, qp->event_handler = NULL; ntb_qp_link_down_reset(qp); - if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + if (mw_num < qp_count % mw_count) num_qps_mw = qp_count / mw_count + 1; else num_qps_mw = qp_count / mw_count; -- cgit v1.2.3 From bff04a462da3b1e4ae588bcbbe2e318d7fbada28 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 5 Jun 2017 10:13:24 -0600 Subject: NTB: ntb_test: fix bug printing ntb_perf results commit 07b0b22b3e58824f70b9188d085d400069ca3240 upstream. The code mistakenly prints the local perf results for the remote test so the script reports identical results for both directions. Fix this by ensuring we print the remote result. Signed-off-by: Logan Gunthorpe Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem") Acked-by: Allen Hubbe Signed-off-by: Jon Mason Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/ntb/ntb_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index a676d3eefefb..13f5198ba0ee 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -305,7 +305,7 @@ function perf_test() echo "Running remote perf test $WITH DMA" write_file "" $REMOTE_PERF/run echo -n " " - read_file $LOCAL_PERF/run + read_file $REMOTE_PERF/run echo " Passed" _modprobe -r ntb_perf -- cgit v1.2.3 From 7592db555524b5d0dfe92a1d3069f450f31291fe Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Fri, 9 Jun 2017 18:06:36 -0400 Subject: ntb: no sleep in ntb_async_tx_submit commit 88931ec3dc11e7dbceb3b0df455693873b508fbe upstream. Do not sleep in ntb_async_tx_submit, which could deadlock. This reverts commit "8c874cc140d667f84ae4642bb5b5e0d6396d2ca4" Fixes: 8c874cc140d6 ("NTB: Address out of DMA descriptor issue with NTB") Reported-by: Jia-Ju Bai Signed-off-by: Allen Hubbe Acked-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Greg Kroah-Hartman --- drivers/ntb/ntb_transport.c | 50 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 18414dc17cd0..d7582a1dd163 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -176,14 +176,12 @@ struct ntb_transport_qp { u64 rx_err_ver; u64 rx_memcpy; u64 rx_async; - u64 dma_rx_prep_err; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; u64 tx_err_no_buf; u64 tx_memcpy; u64 tx_async; - u64 dma_tx_prep_err; }; struct ntb_transport_mw { @@ -256,8 +254,6 @@ enum { #define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 -#define DMA_RETRIES 20 -#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) static void ntb_transport_rxc_db(unsigned long data); static const struct ntb_ctx_ops ntb_transport_ops; @@ -518,12 +514,6 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, - "DMA tx prep err - \t%llu\n", - qp->dma_tx_prep_err); - out_offset += snprintf(buf + out_offset, out_count - out_offset, - "DMA rx prep err - \t%llu\n", - qp->dma_rx_prep_err); out_offset += snprintf(buf + out_offset, out_count - out_offset, "\n"); @@ -770,8 +760,6 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) qp->tx_err_no_buf = 0; qp->tx_memcpy = 0; qp->tx_async = 0; - qp->dma_tx_prep_err = 0; - qp->dma_rx_prep_err = 0; } static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) @@ -1314,7 +1302,6 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset) struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; void *buf = entry->buf; - int retries = 0; len = entry->len; device = chan->device; @@ -1343,22 +1330,11 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset) unmap->from_cnt = 1; - for (retries = 0; retries < DMA_RETRIES; retries++) { - txd = device->device_prep_dma_memcpy(chan, - unmap->addr[1], - unmap->addr[0], len, - DMA_PREP_INTERRUPT); - if (txd) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(DMA_OUT_RESOURCE_TO); - } - - if (!txd) { - qp->dma_rx_prep_err++; + txd = device->device_prep_dma_memcpy(chan, unmap->addr[1], + unmap->addr[0], len, + DMA_PREP_INTERRUPT); + if (!txd) goto err_get_unmap; - } txd->callback_result = ntb_rx_copy_callback; txd->callback_param = entry; @@ -1603,7 +1579,6 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp, struct dmaengine_unmap_data *unmap; dma_addr_t dest; dma_cookie_t cookie; - int retries = 0; device = chan->device; dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index; @@ -1625,21 +1600,10 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp, unmap->to_cnt = 1; - for (retries = 0; retries < DMA_RETRIES; retries++) { - txd = device->device_prep_dma_memcpy(chan, dest, - unmap->addr[0], len, - DMA_PREP_INTERRUPT); - if (txd) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(DMA_OUT_RESOURCE_TO); - } - - if (!txd) { - qp->dma_tx_prep_err++; + txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len, + DMA_PREP_INTERRUPT); + if (!txd) goto err_get_unmap; - } txd->callback_result = ntb_tx_copy_callback; txd->callback_param = entry; -- cgit v1.2.3 From 4d4f35473d8f77f1df508f5bb87cc89513a843d9 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 25 Jul 2017 14:57:42 -0600 Subject: ntb: ntb_test: ensure the link is up before trying to configure the mws commit 0eb46345364d7318b11068c46e8a68d5dc10f65e upstream. After the link tests, there is a race on one side of the test for the link coming up. It's possible, in some cases, for the test script to write to the 'peer_trans' files before the link has come up. To fix this, we simply use the link event file to ensure both sides see the link as up before continuning. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem") Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/ntb/ntb_test.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 13f5198ba0ee..b3c48fc6ea4b 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -326,6 +326,10 @@ function ntb_tool_tests() link_test $LOCAL_TOOL $REMOTE_TOOL link_test $REMOTE_TOOL $LOCAL_TOOL + #Ensure the link is up on both sides before continuing + write_file Y $LOCAL_TOOL/link_event + write_file Y $REMOTE_TOOL/link_event + for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do PT=$(basename $PEER_TRANS) write_file $MW_SIZE $LOCAL_TOOL/$PT -- cgit v1.2.3 From c1628774f0c807944478684b97cf258da8f7c10a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 28 Jul 2017 15:10:48 -0700 Subject: ntb: transport shouldn't disable link due to bogus values in SPADs commit f3fd2afed8eee91620d05b69ab94c14793c849d7 upstream. It seems that under certain scenarios the SPAD can have bogus values caused by an agent (i.e. BIOS or other software) that is not the kernel driver, and that causes memory window setup failure. This should not cause the link to be disabled because if we do that, the driver will never recover again. We have verified in testing that this issue happens and prevents proper link recovery. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: 84f766855f61 ("ntb: stop link work when we do not have memory") Signed-off-by: Greg Kroah-Hartman --- drivers/ntb/ntb_transport.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index d7582a1dd163..24222a5d8df2 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -921,10 +921,8 @@ out1: ntb_free_mw(nt, i); /* if there's an actual failure, we should just bail */ - if (rc < 0) { - ntb_link_disable(ndev); + if (rc < 0) return; - } out: if (ntb_link_is_up(ndev, NULL, NULL) == 1) -- cgit v1.2.3 From 454cac5d0891494dd2050a4da4fa64a9173c38af Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 22 Mar 2017 18:33:23 +0100 Subject: ACPI: ioapic: Clear on-stack resource before using it commit e3d5092b6756b9e0b08f94bbeafcc7afe19f0996 upstream. The on-stack resource-window 'win' in setup_res() is not properly initialized. This causes the pointers in the embedded 'struct resource' to contain stale addresses. These pointers (in my case the ->child pointer) later get propagated to the global iomem_resources list, causing a #GP exception when the list is traversed in iomem_map_sanity_check(). Fixes: c183619b63ec (x86/irq, ACPI: Implement ACPI driver to support IOAPIC hotplug) Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c index 6d7ce6e12aaa..5e18ccf5ab57 100644 --- a/drivers/acpi/ioapic.c +++ b/drivers/acpi/ioapic.c @@ -45,6 +45,12 @@ static acpi_status setup_res(struct acpi_resource *acpi_res, void *data) struct resource *res = data; struct resource_win win; + /* + * We might assign this to 'res' later, make sure all pointers are + * cleared before the resource is added to the global list + */ + memset(&win, 0, sizeof(win)); + res->flags = 0; if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM)) return AE_OK; -- cgit v1.2.3 From 3bc8e4f96fe906c4a872f84c7d9cf053e5786bfe Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 16 Mar 2017 14:30:39 +0000 Subject: ACPI / APEI: Add missing synchronize_rcu() on NOTIFY_SCI removal commit 7d64f82cceb21e6d95db312d284f5f195e120154 upstream. When removing a GHES device notified by SCI, list_del_rcu() is used, ghes_remove() should call synchronize_rcu() before it goes on to call kfree(ghes), otherwise concurrent RCU readers may still hold this list entry after it has been freed. Signed-off-by: James Morse Reviewed-by: "Huang, Ying" Fixes: 81e88fdc432a (ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support) Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/apei/ghes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e53bef6cf53c..0375c6024062 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1072,6 +1072,7 @@ static int ghes_remove(struct platform_device *ghes_dev) if (list_empty(&ghes_sci)) unregister_acpi_hed_notifier(&ghes_notifier_sci); mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); -- cgit v1.2.3 From 5906715b93da2923d7fced66a90f45145959ab33 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Aug 2017 15:29:49 +0800 Subject: ACPI: EC: Fix regression related to wrong ECDT initialization order commit 98529b9272e06a7767034fb8a32e43cdecda240a upstream. Commit 2a5708409e4e (ACPI / EC: Fix a gap that ECDT EC cannot handle EC events) introduced acpi_ec_ecdt_start(), but that function is invoked before acpi_ec_query_init(), which is too early. This causes the kernel to crash if an EC event occurs after boot, when ec_query_wq is not valid: BUG: unable to handle kernel NULL pointer dereference at 0000000000000102 ... Workqueue: events acpi_ec_event_handler task: ffff9f539790dac0 task.stack: ffffb437c0e10000 RIP: 0010:__queue_work+0x32/0x430 Normally, the DSDT EC should always be valid, so acpi_ec_ecdt_start() is actually a no-op in the majority of cases. However, commit c712bb58d827 (ACPI / EC: Add support to skip boot stage DSDT probe) caused the probing of the DSDT EC as the "boot EC" to be skipped when the ECDT EC is valid and uncovered the bug. Fix this issue by invoking acpi_ec_ecdt_start() after acpi_ec_query_init() in acpi_ec_init(). Link: https://jira01.devtools.intel.com/browse/LCK-4348 Fixes: 2a5708409e4e (ACPI / EC: Fix a gap that ECDT EC cannot handle EC events) Fixes: c712bb58d827 (ACPI / EC: Add support to skip boot stage DSDT probe) Reported-by: Wang Wendy Tested-by: Feng Chenzhou Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 17 +++++++---------- drivers/acpi/internal.h | 1 - drivers/acpi/scan.c | 1 - 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 79152dbc5528..51874695a730 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1728,7 +1728,7 @@ error: * functioning ECDT EC first in order to handle the events. * https://bugzilla.kernel.org/show_bug.cgi?id=115021 */ -int __init acpi_ec_ecdt_start(void) +static int __init acpi_ec_ecdt_start(void) { acpi_handle handle; @@ -1959,20 +1959,17 @@ static inline void acpi_ec_query_exit(void) int __init acpi_ec_init(void) { int result; + int ecdt_fail, dsdt_fail; /* register workqueue for _Qxx evaluations */ result = acpi_ec_query_init(); if (result) - goto err_exit; - /* Now register the driver for the EC */ - result = acpi_bus_register_driver(&acpi_ec_driver); - if (result) - goto err_exit; + return result; -err_exit: - if (result) - acpi_ec_query_exit(); - return result; + /* Drivers must be started after acpi_ec_query_init() */ + ecdt_fail = acpi_ec_ecdt_start(); + dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); + return ecdt_fail && dsdt_fail ? -ENODEV : 0; } /* EC driver currently not unloadable */ diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 219b90bc0922..08b3ca0ead69 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -185,7 +185,6 @@ typedef int (*acpi_ec_query_func) (void *data); int acpi_ec_init(void); int acpi_ec_ecdt_probe(void); int acpi_ec_dsdt_probe(void); -int acpi_ec_ecdt_start(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index dd3786acba89..cf725d581cae 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2051,7 +2051,6 @@ int __init acpi_scan_init(void) acpi_gpe_apply_masked_gpes(); acpi_update_all_gpes(); - acpi_ec_ecdt_start(); acpi_scan_initialized = true; -- cgit v1.2.3 From 5aa523a994d1e2850115dac11c3ea69e332a97ae Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:00 +1000 Subject: powerpc/mm: Ensure cpumask update is ordered commit 1a92a80ad386a1a6e3b36d576d52a1a456394b70 upstream. There is no guarantee that the various isync's involved with the context switch will order the update of the CPU mask with the first TLB entry for the new context being loaded by the HW. Be safe here and add a memory barrier to order any subsequent load/store which may bring entries into the TLB. The corresponding barrier on the other side already exists as pte updates use pte_xchg() which uses __cmpxchg_u64 which has a sync after the atomic operation. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Nicholas Piggin [mpe: Add comments in the code] [mpe: Backport to 4.12, minor context change] Signed-off-by: Michael Ellerman Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu_context.h | 20 +++++++++++++++++++- arch/powerpc/include/asm/pgtable-be-types.h | 1 + arch/powerpc/include/asm/pgtable-types.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 0012f0353fd6..fe208b70b8b1 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -75,9 +75,27 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct task_struct *tsk) { /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + /* + * This full barrier orders the store to the cpumask above vs + * a subsequent operation which allows this CPU to begin loading + * translations for next. + * + * When using the radix MMU that operation is the load of the + * MMU context id, which is then moved to SPRN_PID. + * + * For the hash MMU it is either the first load from slb_cache + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). + * + * On the read side the barrier is in pte_xchg(), which orders + * the store to the PTE vs the load of mm_cpumask. + */ + smp_mb(); + } + /* 32-bit keeps track of the current PGDIR in the thread struct */ #ifdef CONFIG_PPC32 tsk->thread.pgdir = next->pgd; diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 49c0a5a80efa..68e087e807f8 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) unsigned long *p = (unsigned long *)ptep; __be64 prev; + /* See comment in switch_mm_irqs_off() */ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old), (__force unsigned long)pte_raw(new)); diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index e7f4f3e0fcde..41e9d0a6cbeb 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) { unsigned long *p = (unsigned long *)ptep; + /* See comment in switch_mm_irqs_off() */ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new)); } #endif -- cgit v1.2.3 From 0eed54bdbd1b922004fe05dc8bf3815f2e5723d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Aug 2017 10:24:43 +0200 Subject: Linux 4.9.46 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ccd6d91f616e..846ef1b57a02 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 45 +SUBLEVEL = 46 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From f71996c3ce5d3b7ee0f581f6c2c37a19d50d72e8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 14 Oct 2016 11:23:09 +0200 Subject: p54: memset(0) whole array commit 6f17581788206444cbbcdbc107498f85e9765e3d upstream. gcc 7 complains: drivers/net/wireless/intersil/p54/fwio.c: In function 'p54_scan': drivers/net/wireless/intersil/p54/fwio.c:491:4: warning: 'memset' used with length equal to number of elements without multiplication by element size [-Wmemset-elt-size] Fix that by passing the correct size to memset. Signed-off-by: Jiri Slaby Cc: Christian Lamparter Cc: Kalle Valo Acked-by: Christian Lamparter Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intersil/p54/fwio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/p54/fwio.c b/drivers/net/wireless/intersil/p54/fwio.c index 257a9eadd595..4ac6764f4897 100644 --- a/drivers/net/wireless/intersil/p54/fwio.c +++ b/drivers/net/wireless/intersil/p54/fwio.c @@ -488,7 +488,7 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell) entry += sizeof(__le16); chan->pa_points_per_curve = 8; - memset(chan->curve_data, 0, sizeof(*chan->curve_data)); + memset(chan->curve_data, 0, sizeof(chan->curve_data)); memcpy(chan->curve_data, entry, sizeof(struct p54_pa_curve_data_sample) * min((u8)8, curve_data->points_per_channel)); -- cgit v1.2.3 From dd758f82a3bf2f11843a59188cd0d7922dbcc731 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 18 Nov 2016 17:14:01 +0100 Subject: scsi: isci: avoid array subscript warning commit 5cfa2a3c7342bd0b50716c8bb32ee491af43c785 upstream. I'm getting a new warning with gcc-7: isci/remote_node_context.c: In function 'sci_remote_node_context_destruct': isci/remote_node_context.c:69:16: error: array subscript is above array bounds [-Werror=array-bounds] This is odd, since we clearly cover all values for enum scis_sds_remote_node_context_states here. Anyway, checking for an array overflow can't harm and it makes the warning go away. Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/isci/remote_node_context.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/isci/remote_node_context.c b/drivers/scsi/isci/remote_node_context.c index 1910100638a2..00602abec0ea 100644 --- a/drivers/scsi/isci/remote_node_context.c +++ b/drivers/scsi/isci/remote_node_context.c @@ -66,6 +66,9 @@ const char *rnc_state_name(enum scis_sds_remote_node_context_states state) { static const char * const strings[] = RNC_STATES; + if (state >= ARRAY_SIZE(strings)) + return "UNKNOWN"; + return strings[state]; } #undef C -- cgit v1.2.3 From 47974403c9caadc9fdb762e399569689efb02386 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Nov 2016 16:07:10 +0100 Subject: staging: wilc1000: simplify vif[i]->ndev accesses commit 735bb39ca3bed8469b3b3a42d8cc57bdb9fc4dd7 upstream. With gcc-7, I got a new warning for this driver: wilc1000/linux_wlan.c: In function 'wilc_netdev_cleanup': wilc1000/linux_wlan.c:1224:15: error: 'vif[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized] wilc1000/linux_wlan.c:1224:15: error: 'vif[0]' may be used uninitialized in this function [-Werror=maybe-uninitialized] A closer look at the function reveals that it's more complex than it needs to be, given that based on how the device is created we always get netdev_priv(vif->ndev) == vif Based on this assumption, I found a few other places in the same file that can be simplified. That code appears to be a relic from times when the assumption above was not valid. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/linux_wlan.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 6370a5efe343..defffa75ae1c 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -269,23 +269,12 @@ static struct net_device *get_if_handler(struct wilc *wilc, u8 *mac_header) int wilc_wlan_set_bssid(struct net_device *wilc_netdev, u8 *bssid, u8 mode) { - int i = 0; - int ret = -1; - struct wilc_vif *vif; - struct wilc *wilc; - - vif = netdev_priv(wilc_netdev); - wilc = vif->wilc; + struct wilc_vif *vif = netdev_priv(wilc_netdev); - for (i = 0; i < wilc->vif_num; i++) - if (wilc->vif[i]->ndev == wilc_netdev) { - memcpy(wilc->vif[i]->bssid, bssid, 6); - wilc->vif[i]->mode = mode; - ret = 0; - break; - } + memcpy(vif->bssid, bssid, 6); + vif->mode = mode; - return ret; + return 0; } int wilc_wlan_get_num_conn_ifcs(struct wilc *wilc) @@ -1212,16 +1201,11 @@ void WILC_WFI_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size) void wilc_netdev_cleanup(struct wilc *wilc) { - int i = 0; - struct wilc_vif *vif[NUM_CONCURRENT_IFC]; + int i; - if (wilc && (wilc->vif[0]->ndev || wilc->vif[1]->ndev)) { + if (wilc && (wilc->vif[0]->ndev || wilc->vif[1]->ndev)) unregister_inetaddr_notifier(&g_dev_notifier); - for (i = 0; i < NUM_CONCURRENT_IFC; i++) - vif[i] = netdev_priv(wilc->vif[i]->ndev); - } - if (wilc && wilc->firmware) { release_firmware(wilc->firmware); wilc->firmware = NULL; @@ -1230,7 +1214,7 @@ void wilc_netdev_cleanup(struct wilc *wilc) if (wilc && (wilc->vif[0]->ndev || wilc->vif[1]->ndev)) { for (i = 0; i < NUM_CONCURRENT_IFC; i++) if (wilc->vif[i]->ndev) - if (vif[i]->mac_opened) + if (wilc->vif[i]->mac_opened) wilc_mac_close(wilc->vif[i]->ndev); for (i = 0; i < NUM_CONCURRENT_IFC; i++) { @@ -1278,9 +1262,9 @@ int wilc_netdev_init(struct wilc **wilc, struct device *dev, int io_type, vif->idx = wl->vif_num; vif->wilc = *wilc; + vif->ndev = ndev; wl->vif[i] = vif; - wl->vif[wl->vif_num]->ndev = ndev; - wl->vif_num++; + wl->vif_num = i; ndev->netdev_ops = &wilc_netdev_ops; { -- cgit v1.2.3 From b8a1532b16fd49596304e1cfd285cf4509b6fba7 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 12 May 2017 15:46:35 -0700 Subject: gcov: support GCC 7.1 commit 05384213436ab690c46d9dfec706b80ef8d671ab upstream. Starting from GCC 7.1, __gcov_exit is a new symbol expected to be implemented in a profiling runtime. [akpm@linux-foundation.org: coding-style fixes] [mliska@suse.cz: v2] Link: http://lkml.kernel.org/r/e63a3c59-0149-c97e-4084-20ca8f146b26@suse.cz Link: http://lkml.kernel.org/r/8c4084fa-3885-29fe-5fc4-0d4ca199c785@suse.cz Signed-off-by: Martin Liska Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/gcov/base.c | 6 ++++++ kernel/gcov/gcc_4_7.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 2f9df37940a0..c51a49c9be70 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) } EXPORT_SYMBOL(__gcov_merge_icall_topn); +void __gcov_exit(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_exit); + /** * gcov_enable_events - enable event reporting through gcov_event() * diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 6a5c239c7669..46a18e72bce6 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include #include "gcov.h" -#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) +#if (__GNUC__ >= 7) +#define GCOV_COUNTERS 9 +#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define GCOV_COUNTERS 10 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 #define GCOV_COUNTERS 9 -- cgit v1.2.3 From 3e033635b2b7eab01855c5a3e426e364064fd12b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 3 May 2017 15:17:51 +0100 Subject: kvm: arm/arm64: Fix race in resetting stage2 PGD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c0d706b563af732adb094c5bf807437e8963e84 upstream. In kvm_free_stage2_pgd() we check the stage2 PGD before holding the lock and proceed to take the lock if it is valid. And we unmap the page tables, followed by releasing the lock. We reset the PGD only after dropping this lock, which could cause a race condition where another thread waiting on or even holding the lock, could potentially see that the PGD is still valid and proceed to perform a stage2 operation and later encounter a NULL PGD. [223090.242280] Unable to handle kernel NULL pointer dereference at virtual address 00000040 [223090.262330] PC is at unmap_stage2_range+0x8c/0x428 [223090.262332] LR is at kvm_unmap_hva_handler+0x2c/0x3c [223090.262531] Call trace: [223090.262533] [] unmap_stage2_range+0x8c/0x428 [223090.262535] [] kvm_unmap_hva_handler+0x2c/0x3c [223090.262537] [] handle_hva_to_gpa+0xb0/0x104 [223090.262539] [] kvm_unmap_hva+0x5c/0xbc [223090.262543] [] kvm_mmu_notifier_invalidate_page+0x50/0x8c [223090.262547] [] __mmu_notifier_invalidate_page+0x5c/0x84 [223090.262551] [] try_to_unmap_one+0x1d0/0x4a0 [223090.262553] [] rmap_walk+0x1cc/0x2e0 [223090.262555] [] try_to_unmap+0x74/0xa4 [223090.262557] [] migrate_pages+0x31c/0x5ac [223090.262561] [] compact_zone+0x3fc/0x7ac [223090.262563] [] compact_zone_order+0x94/0xb0 [223090.262564] [] try_to_compact_pages+0x108/0x290 [223090.262569] [] __alloc_pages_direct_compact+0x70/0x1ac [223090.262571] [] __alloc_pages_nodemask+0x434/0x9f4 [223090.262572] [] alloc_pages_vma+0x230/0x254 [223090.262574] [] do_huge_pmd_anonymous_page+0x114/0x538 [223090.262576] [] handle_mm_fault+0xd40/0x17a4 [223090.262577] [] __get_user_pages+0x12c/0x36c [223090.262578] [] get_user_pages_unlocked+0xa4/0x1b8 [223090.262579] [] __gfn_to_pfn_memslot+0x280/0x31c [223090.262580] [] gfn_to_pfn_prot+0x4c/0x5c [223090.262582] [] kvm_handle_guest_abort+0x240/0x774 [223090.262584] [] handle_exit+0x11c/0x1ac [223090.262586] [] kvm_arch_vcpu_ioctl_run+0x31c/0x648 [223090.262587] [] kvm_vcpu_ioctl+0x378/0x768 [223090.262590] [] do_vfs_ioctl+0x324/0x5a4 [223090.262591] [] SyS_ioctl+0x90/0xa4 [223090.262595] [] el0_svc_naked+0x38/0x3c This patch moves the stage2 PGD manipulation under the lock. Reported-by: Alexander Graf Cc: Mark Rutland Cc: Marc Zyngier Cc: Paolo Bonzini Cc: Radim Krčmář Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 710511cadd50..0c060c5e844a 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -829,22 +829,22 @@ void stage2_unmap_vm(struct kvm *kvm) * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all * underlying level-2 and level-3 tables before freeing the actual level-1 table * and setting the struct pointer to NULL. - * - * Note we don't need locking here as this is only called when the VM is - * destroyed, which can only be done once. */ void kvm_free_stage2_pgd(struct kvm *kvm) { - if (kvm->arch.pgd == NULL) - return; + void *pgd = NULL; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + if (kvm->arch.pgd) { + unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + pgd = kvm->arch.pgd; + kvm->arch.pgd = NULL; + } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); - kvm->arch.pgd = NULL; + if (pgd) + free_pages_exact(pgd, S2_PGD_SIZE); } static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, -- cgit v1.2.3 From 509d8b52bbe7e6f6022a086989e7ecf5180508cc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Jul 2017 15:19:22 +0100 Subject: arm64: mm: abort uaccess retries upon fatal signal commit 289d07a2dc6c6b6f3e4b8a62669320d99dbe6c3d upstream. When there's a fatal signal pending, arm64's do_page_fault() implementation returns 0. The intent is that we'll return to the faulting userspace instruction, delivering the signal on the way. However, if we take a fatal signal during fixing up a uaccess, this results in a return to the faulting kernel instruction, which will be instantly retried, resulting in the same fault being taken forever. As the task never reaches userspace, the signal is not delivered, and the task is left unkillable. While the task is stuck in this state, it can inhibit the forward progress of the system. To avoid this, we must ensure that when a fatal signal is pending, we apply any necessary fixup for a faulting kernel instruction. Thus we will return to an error path, and it is up to that code to make forward progress towards delivering the fatal signal. Cc: Catalin Marinas Cc: Laura Abbott Reviewed-by: Steve Capper Tested-by: Steve Capper Reviewed-by: James Morse Tested-by: James Morse Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 0e90c7e0279c..fec5b1ce97f8 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -373,8 +373,11 @@ retry: * signal first. We do not need to release the mmap_sem because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + if (!user_mode(regs)) + goto no_context; return 0; + } /* * Major/minor page fault accounting is only done on the initial -- cgit v1.2.3 From 43f776dab360931f3dd344c8f4fb28b52ea98ee9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jul 2017 14:53:02 +0200 Subject: x86/io: Add "memory" clobber to insb/insw/insl/outsb/outsw/outsl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7206f9bf108eb9513d170c73f151367a1bdf3dbf upstream. The x86 version of insb/insw/insl uses an inline assembly that does not have the target buffer listed as an output. This can confuse the compiler, leading it to think that a subsequent access of the buffer is uninitialized: drivers/net/wireless/wl3501_cs.c: In function ‘wl3501_mgmt_scan_confirm’: drivers/net/wireless/wl3501_cs.c:665:9: error: ‘sig.status’ is used uninitialized in this function [-Werror=uninitialized] drivers/net/wireless/wl3501_cs.c:668:12: error: ‘sig.cap_info’ may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/sb1000.c: In function 'sb1000_rx': drivers/net/sb1000.c:775:9: error: 'st[0]' is used uninitialized in this function [-Werror=uninitialized] drivers/net/sb1000.c:776:10: error: 'st[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/sb1000.c:784:11: error: 'st[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized] I tried to mark the exact input buffer as an output here, but couldn't figure it out. As suggested by Linus, marking all memory as clobbered however is good enough too. For the outs operations, I also add the memory clobber, to force the input to be written to local variables. This is probably already guaranteed by the "asm volatile", but it can't hurt to do this for symmetry. Suggested-by: Linus Torvalds Signed-off-by: Arnd Bergmann Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Link: http://lkml.kernel.org/r/20170719125310.2487451-5-arnd@arndb.de Link: https://lkml.org/lkml/2017/7/12/605 Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d34bd370074b..6c5020163db0 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -304,13 +304,13 @@ static inline unsigned type in##bwl##_p(int port) \ static inline void outs##bwl(int port, const void *addr, unsigned long count) \ { \ asm volatile("rep; outs" #bwl \ - : "+S"(addr), "+c"(count) : "d"(port)); \ + : "+S"(addr), "+c"(count) : "d"(port) : "memory"); \ } \ \ static inline void ins##bwl(int port, void *addr, unsigned long count) \ { \ asm volatile("rep; ins" #bwl \ - : "+D"(addr), "+c"(count) : "d"(port)); \ + : "+D"(addr), "+c"(count) : "d"(port) : "memory"); \ } BUILDIO(b, b, char) -- cgit v1.2.3 From 27e7506c33d0f8afc1b49566e8994028a2847072 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 18 Aug 2017 16:57:01 +0100 Subject: arm64: fpsimd: Prevent registers leaking across exec commit 096622104e14d8a1db4860bd557717067a0515d2 upstream. There are some tricky dependencies between the different stages of flushing the FPSIMD register state during exec, and these can race with context switch in ways that can cause the old task's regs to leak across. In particular, a context switch during the memset() can cause some of the task's old FPSIMD registers to reappear. Disabling preemption for this small window would be no big deal for performance: preemption is already disabled for similar scenarios like updating the FPSIMD registers in sigreturn. So, instead of rearranging things in ways that might swap existing subtle bugs for new ones, this patch just disables preemption around the FPSIMD state flushing so that races of this type can't occur here. This brings fpsimd_flush_thread() into line with other code paths. Fixes: 674c242c9323 ("arm64: flush FP/SIMD state correctly after execve()") Reviewed-by: Ard Biesheuvel Signed-off-by: Dave Martin Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 394c61db5566..1d5890f19ca3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -157,9 +157,11 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); + preempt_enable(); } /* -- cgit v1.2.3 From c0c6dff9230398dd7ec9ca6c1c023c8bd44bb6cd Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 8 Feb 2017 14:46:48 -0500 Subject: locking/spinlock/debug: Remove spinlock lockup detection code commit bc88c10d7e6900916f5e1ba3829d66a9de92b633 upstream. The current spinlock lockup detection code can sometimes produce false positives because of the unfairness of the locking algorithm itself. So the lockup detection code is now removed. Instead, we are relying on the NMI watchdog to detect potential lockup. We won't have lockup detection if the watchdog isn't running. The commented-out read-write lock lockup detection code are also removed. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Sasha Levin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1486583208-11038-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar Cc: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- kernel/locking/spinlock_debug.c | 86 +++-------------------------------------- 1 file changed, 5 insertions(+), 81 deletions(-) diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 0374a596cffa..9aa0fccd5d43 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -103,38 +103,14 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock) lock->owner_cpu = -1; } -static void __spin_lock_debug(raw_spinlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - - for (i = 0; i < loops; i++) { - if (arch_spin_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - spin_dump(lock, "lockup suspected"); -#ifdef CONFIG_SMP - trigger_all_cpu_backtrace(); -#endif - - /* - * The trylock above was causing a livelock. Give the lower level arch - * specific lock code a chance to acquire the lock. We have already - * printed a warning/backtrace at this point. The non-debug arch - * specific code might actually succeed in acquiring the lock. If it is - * not successful, the end-result is the same - there is no forward - * progress. - */ - arch_spin_lock(&lock->raw_lock); -} - +/* + * We are now relying on the NMI watchdog to detect lockup instead of doing + * the detection here with an unfair lock which can cause problem of its own. + */ void do_raw_spin_lock(raw_spinlock_t *lock) { debug_spin_lock_before(lock); - if (unlikely(!arch_spin_trylock(&lock->raw_lock))) - __spin_lock_debug(lock); + arch_spin_lock(&lock->raw_lock); debug_spin_lock_after(lock); } @@ -172,32 +148,6 @@ static void rwlock_bug(rwlock_t *lock, const char *msg) #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) -#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ -static void __read_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_read_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - void do_raw_read_lock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); @@ -247,32 +197,6 @@ static inline void debug_write_unlock(rwlock_t *lock) lock->owner_cpu = -1; } -#if 0 /* This can cause lockups */ -static void __write_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_write_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - void do_raw_write_lock(rwlock_t *lock) { debug_write_lock_before(lock); -- cgit v1.2.3 From 4099ac93838537351099859f824b8c3f9451a264 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Apr 2017 09:34:14 +0200 Subject: scsi: sg: protect accesses to 'reserved' page array commit 1bc0eb0446158cc76562176b80623aa119afee5b upstream. The 'reserved' page array is used as a short-cut for mapping data, saving us to allocate pages per request. However, the 'reserved' array is only capable of holding one request, so this patch introduces a mutex for protect 'sg_fd' against concurrent accesses. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman [toddpoynor@google.com: backport to 3.18-4.9, fixup for bad ioctl SG_SET_FORCE_LOW_DMA code removed in later versions and not modified by the original patch.] Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index f753df25ba34..cf7e5f096988 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -142,6 +142,7 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ struct sg_device *parentdp; /* owning device */ wait_queue_head_t read_wait; /* queue read until command done */ rwlock_t rq_list_lock; /* protect access to list in req_arr */ + struct mutex f_mutex; /* protect against changes in this fd */ int timeout; /* defaults to SG_DEFAULT_TIMEOUT */ int timeout_user; /* defaults to SG_DEFAULT_TIMEOUT_USER */ Sg_scatter_hold reserve; /* buffer held for this file descriptor */ @@ -155,6 +156,7 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */ char keep_orphan; /* 0 -> drop orphan (def), 1 -> keep for read() */ char mmap_called; /* 0 -> mmap() never called on this fd */ + char res_in_use; /* 1 -> 'reserve' array in use */ struct kref f_ref; struct execute_work ew; } Sg_fd; @@ -198,7 +200,6 @@ static void sg_remove_sfp(struct kref *); static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); -static int sg_res_in_use(Sg_fd * sfp); static Sg_device *sg_get_dev(int dev); static void sg_device_destroy(struct kref *kref); @@ -614,6 +615,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) } buf += SZ_SG_HEADER; __get_user(opcode, buf); + mutex_lock(&sfp->f_mutex); if (sfp->next_cmd_len > 0) { cmd_size = sfp->next_cmd_len; sfp->next_cmd_len = 0; /* reset so only this write() effected */ @@ -622,6 +624,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) if ((opcode >= 0xc0) && old_hdr.twelve_byte) cmd_size = 12; } + mutex_unlock(&sfp->f_mutex); SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, "sg_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) opcode, cmd_size)); /* Determine buffer size. */ @@ -721,7 +724,7 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, sg_remove_request(sfp, srp); return -EINVAL; /* either MMAP_IO or DIRECT_IO (not both) */ } - if (sg_res_in_use(sfp)) { + if (sfp->res_in_use) { sg_remove_request(sfp, srp); return -EBUSY; /* reserve buffer already being used */ } @@ -892,7 +895,7 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return result; if (val) { sfp->low_dma = 1; - if ((0 == sfp->low_dma) && (0 == sg_res_in_use(sfp))) { + if ((0 == sfp->low_dma) && !sfp->res_in_use) { val = (int) sfp->reserve.bufflen; sg_remove_scat(sfp, &sfp->reserve); sg_build_reserve(sfp, val); @@ -967,12 +970,18 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return -EINVAL; val = min_t(int, val, max_sectors_bytes(sdp->device->request_queue)); + mutex_lock(&sfp->f_mutex); if (val != sfp->reserve.bufflen) { - if (sg_res_in_use(sfp) || sfp->mmap_called) + if (sfp->mmap_called || + sfp->res_in_use) { + mutex_unlock(&sfp->f_mutex); return -EBUSY; + } + sg_remove_scat(sfp, &sfp->reserve); sg_build_reserve(sfp, val); } + mutex_unlock(&sfp->f_mutex); return 0; case SG_GET_RESERVED_SIZE: val = min_t(int, sfp->reserve.bufflen, @@ -1727,13 +1736,22 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) md = &map_data; if (md) { - if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen) + mutex_lock(&sfp->f_mutex); + if (dxfer_len <= rsv_schp->bufflen && + !sfp->res_in_use) { + sfp->res_in_use = 1; sg_link_reserve(sfp, srp, dxfer_len); - else { + } else if ((hp->flags & SG_FLAG_MMAP_IO) && sfp->res_in_use) { + mutex_unlock(&sfp->f_mutex); + return -EBUSY; + } else { res = sg_build_indirect(req_schp, sfp, dxfer_len); - if (res) + if (res) { + mutex_unlock(&sfp->f_mutex); return res; + } } + mutex_unlock(&sfp->f_mutex); md->pages = req_schp->pages; md->page_order = req_schp->page_order; @@ -2135,6 +2153,7 @@ sg_add_sfp(Sg_device * sdp) rwlock_init(&sfp->rq_list_lock); kref_init(&sfp->f_ref); + mutex_init(&sfp->f_mutex); sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; @@ -2210,20 +2229,6 @@ sg_remove_sfp(struct kref *kref) schedule_work(&sfp->ew.work); } -static int -sg_res_in_use(Sg_fd * sfp) -{ - const Sg_request *srp; - unsigned long iflags; - - read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (srp = sfp->headrp; srp; srp = srp->nextrp) - if (srp->res_used) - break; - read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return srp ? 1 : 0; -} - #ifdef CONFIG_SCSI_PROC_FS static int sg_idr_max_id(int id, void *p, void *data) -- cgit v1.2.3 From c47c52cde806f32c4da1e455874f6aa154c06aca Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Apr 2017 10:26:36 +0200 Subject: scsi: sg: reset 'res_in_use' after unlinking reserved array commit e791ce27c3f6a1d3c746fd6a8f8e36c9540ec6f9 upstream. Once the reserved page array is unused we can reset the 'res_in_use' state; here we can do a lazy update without holding the mutex as we only need to check against concurrent access, not concurrent release. [mkp: checkpatch] Fixes: 1bc0eb044615 ("scsi: sg: protect accesses to 'reserved' page array") Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Cc: Todd Poynor Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cf7e5f096988..fed37aabf828 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2042,6 +2042,8 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) req_schp->sglist_len = 0; sfp->save_scat_len = 0; srp->res_used = 0; + /* Called without mutex lock to avoid deadlock */ + sfp->res_in_use = 0; } static Sg_request * -- cgit v1.2.3 From 529ada21ff9e37a14fd02ab1fb9d58d71d7a0d9e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Aug 2017 09:09:54 +0200 Subject: lz4: fix bogus gcc warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building lz4 under gcc-7 we get the following bogus warning: CC [M] lib/lz4/lz4hc_compress.o lib/lz4/lz4hc_compress.c: In function ‘lz4hc_compress’: lib/lz4/lz4hc_compress.c:179:42: warning: ‘delta’ may be used uninitialized in this function [-Wmaybe-uninitialized] chaintable[(size_t)(ptr) & MAXD_MASK] = delta; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~ lib/lz4/lz4hc_compress.c:134:6: note: ‘delta’ was declared here u16 delta; ^~~~~ This doesn't show up in the 4.4-stable tree due to us turning off warnings like this. It also doesn't show up in newer kernel versions as this code was totally rewritten. So for now, to get the 4.9-stable tree to build with 0 warnings on x86 allmodconfig, let's just shut the compiler up by initializing the variable to 0, despite it not really doing anything. To be far, this code is crazy complex, so the fact that gcc can't determine if the variable is really used or not isn't that bad, I'd blame the code here instead of the compiler. Signed-off-by: Greg Kroah-Hartman --- lib/lz4/lz4hc_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index f344f76b6559..6b2e046a9c61 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -131,7 +131,7 @@ static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, #endif int nbattempts = MAX_NB_ATTEMPTS; size_t repl = 0, ml = 0; - u16 delta; + u16 delta = 0; /* HC4 match finder */ lz4hc_insert(hc4, ip); -- cgit v1.2.3 From 458ca52f1564938c158d271f45bce0bc6ede2b3f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 2 Sep 2017 07:08:05 +0200 Subject: Linux 4.9.47 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 846ef1b57a02..a0abbfc15a49 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 46 +SUBLEVEL = 47 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From c13fc0c1106a1d4085087bb9fbf003ddd2e09b22 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Mon, 4 Sep 2017 15:24:21 +0800 Subject: ARM64: add lsk_defconfig This's a new defconfig for kernelci auto testing. It's copied from arch/arm64/configs/defconfig plus TEE, OPTEE config. The kexec/kdump config was enabled as default already. Signed-off-by: Alex Shi --- arch/arm64/configs/lsk_defconfig | 468 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 arch/arm64/configs/lsk_defconfig diff --git a/arch/arm64/configs/lsk_defconfig b/arch/arm64/configs/lsk_defconfig new file mode 100644 index 000000000000..28089d2d651b --- /dev/null +++ b/arch/arm64/configs/lsk_defconfig @@ -0,0 +1,468 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_USER_NS=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_SUNXI=y +CONFIG_ARCH_ALPINE=y +CONFIG_ARCH_BCM_IPROC=y +CONFIG_ARCH_BERLIN=y +CONFIG_ARCH_EXYNOS=y +CONFIG_ARCH_LAYERSCAPE=y +CONFIG_ARCH_LG1K=y +CONFIG_ARCH_HISI=y +CONFIG_ARCH_MEDIATEK=y +CONFIG_ARCH_MESON=y +CONFIG_ARCH_MVEBU=y +CONFIG_ARCH_QCOM=y +CONFIG_ARCH_ROCKCHIP=y +CONFIG_ARCH_SEATTLE=y +CONFIG_ARCH_RENESAS=y +CONFIG_ARCH_R8A7795=y +CONFIG_ARCH_R8A7796=y +CONFIG_ARCH_STRATIX10=y +CONFIG_ARCH_TEGRA=y +CONFIG_ARCH_SPRD=y +CONFIG_ARCH_THUNDER=y +CONFIG_ARCH_UNIPHIER=y +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_VULCAN=y +CONFIG_ARCH_XGENE=y +CONFIG_ARCH_ZX=y +CONFIG_ARCH_ZYNQMP=y +CONFIG_PCI=y +CONFIG_PCI_MSI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_AARDVARK=y +CONFIG_PCIE_RCAR=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI_XGENE=y +CONFIG_PCI_LAYERSCAPE=y +CONFIG_PCI_HISI=y +CONFIG_PCIE_QCOM=y +CONFIG_PCIE_ARMADA_8K=y +CONFIG_ARM64_VA_BITS_48=y +CONFIG_SCHED_MC=y +CONFIG_PREEMPT=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CMA=y +CONFIG_SECCOMP=y +CONFIG_XEN=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_CPU_IDLE=y +CONFIG_ARM_CPUIDLE=y +CONFIG_CPU_FREQ=y +CONFIG_CPUFREQ_DT=y +CONFIG_ARM_BIG_LITTLE_CPUFREQ=y +CONFIG_ARM_SCPI_CPUFREQ=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IPV6=m +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_BRIDGE=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_VLAN_8021Q_MVRP=y +CONFIG_BPF_JIT=y +CONFIG_BT=m +CONFIG_BT_HIDP=m +# CONFIG_BT_HS is not set +# CONFIG_BT_LE is not set +CONFIG_BT_LEDS=y +# CONFIG_BT_DEBUGFS is not set +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_LL=y +CONFIG_CFG80211=m +CONFIG_MAC80211=m +CONFIG_MAC80211_LEDS=y +CONFIG_RFKILL=m +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_MTD=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_SPI_NOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=m +CONFIG_VIRTIO_BLK=y +CONFIG_SRAM=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_SAS_ATA=y +CONFIG_SCSI_HISI_SAS=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_CEVA=y +CONFIG_AHCI_MVEBU=y +CONFIG_AHCI_XGENE=y +CONFIG_AHCI_QORIQ=y +CONFIG_SATA_RCAR=y +CONFIG_SATA_SIL24=y +CONFIG_PATA_PLATFORM=y +CONFIG_PATA_OF_PLATFORM=y +CONFIG_NETDEVICES=y +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_TUN=y +CONFIG_VETH=m +CONFIG_VIRTIO_NET=y +CONFIG_AMD_XGBE=y +CONFIG_NET_XGENE=y +CONFIG_MACB=y +CONFIG_HNS_DSAF=y +CONFIG_HNS_ENET=y +CONFIG_E1000E=y +CONFIG_IGB=y +CONFIG_IGBVF=y +CONFIG_SKY2=y +CONFIG_RAVB=y +CONFIG_SMC91X=y +CONFIG_SMSC911X=y +CONFIG_STMMAC_ETH=m +CONFIG_REALTEK_PHY=m +CONFIG_MICREL_PHY=y +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_RTL8152=m +CONFIG_USB_USBNET=m +CONFIG_USB_NET_DM9601=m +CONFIG_USB_NET_SR9800=m +CONFIG_USB_NET_SMSC75XX=m +CONFIG_USB_NET_SMSC95XX=m +CONFIG_USB_NET_PLUSB=m +CONFIG_USB_NET_MCS7830=m +CONFIG_WL18XX=m +CONFIG_WLCORE_SDIO=m +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_PM8941_PWRKEY=y +CONFIG_INPUT_HISI_POWERKEY=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_8250_MT6577=y +CONFIG_SERIAL_8250_UNIPHIER=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_SAMSUNG=y +CONFIG_SERIAL_SAMSUNG_CONSOLE=y +CONFIG_SERIAL_TEGRA=y +CONFIG_SERIAL_SH_SCI=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=11 +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_MESON=y +CONFIG_SERIAL_MESON_CONSOLE=y +CONFIG_SERIAL_MSM=y +CONFIG_SERIAL_MSM_CONSOLE=y +CONFIG_SERIAL_XILINX_PS_UART=y +CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y +CONFIG_SERIAL_MVEBU_UART=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_MUX=y +CONFIG_I2C_MUX_PCA954x=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y +CONFIG_I2C_IMX=y +CONFIG_I2C_MESON=y +CONFIG_I2C_MV64XXX=y +CONFIG_I2C_QUP=y +CONFIG_I2C_TEGRA=y +CONFIG_I2C_UNIPHIER_F=y +CONFIG_I2C_RCAR=y +CONFIG_I2C_CROS_EC_TUNNEL=y +CONFIG_SPI=y +CONFIG_SPI_MESON_SPIFC=m +CONFIG_SPI_ORION=y +CONFIG_SPI_PL022=y +CONFIG_SPI_QUP=y +CONFIG_SPI_SPIDEV=m +CONFIG_SPI_S3C64XX=y +CONFIG_SPMI=y +CONFIG_PINCTRL_SINGLE=y +CONFIG_PINCTRL_MAX77620=y +CONFIG_PINCTRL_MSM8916=y +CONFIG_PINCTRL_MSM8996=y +CONFIG_PINCTRL_QDF2XXX=y +CONFIG_PINCTRL_QCOM_SPMI_PMIC=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_DWAPB=y +CONFIG_GPIO_PL061=y +CONFIG_GPIO_RCAR=y +CONFIG_GPIO_XGENE=y +CONFIG_GPIO_PCA953X=y +CONFIG_GPIO_PCA953X_IRQ=y +CONFIG_GPIO_MAX77620=y +CONFIG_POWER_RESET_MSM=y +CONFIG_BATTERY_BQ27XXX=y +CONFIG_POWER_RESET_XGENE=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SENSORS_LM90=m +CONFIG_SENSORS_INA2XX=m +CONFIG_SENSORS_ARM_SCPI=y +CONFIG_THERMAL=y +CONFIG_THERMAL_EMULATION=y +CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y +CONFIG_CPU_THERMAL=y +CONFIG_EXYNOS_THERMAL=y +CONFIG_WATCHDOG=y +CONFIG_RENESAS_WDT=y +CONFIG_S3C2410_WATCHDOG=y +CONFIG_MESON_GXBB_WATCHDOG=m +CONFIG_MESON_WATCHDOG=m +CONFIG_MFD_MAX77620=y +CONFIG_MFD_SPMI_PMIC=y +CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_HI655X_PMIC=y +CONFIG_REGULATOR=y +CONFIG_MFD_CROS_EC=y +CONFIG_MFD_CROS_EC_I2C=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_GPIO=y +CONFIG_REGULATOR_HI655X=y +CONFIG_REGULATOR_MAX77620=y +CONFIG_REGULATOR_PWM=y +CONFIG_REGULATOR_QCOM_SMD_RPM=y +CONFIG_REGULATOR_QCOM_SPMI=y +CONFIG_REGULATOR_S2MPS11=y +CONFIG_DRM=m +CONFIG_DRM_NOUVEAU=m +CONFIG_DRM_TEGRA=m +CONFIG_DRM_PANEL_SIMPLE=m +CONFIG_DRM_I2C_ADV7511=m +CONFIG_DRM_HISI_KIRIN=m +CONFIG_FB=y +CONFIG_FB_ARMCLCD=y +CONFIG_BACKLIGHT_GENERIC=m +CONFIG_BACKLIGHT_LP855X=m +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SOC=y +CONFIG_SND_SOC_RCAR=y +CONFIG_SND_SOC_SAMSUNG=y +CONFIG_SND_SOC_AK4613=y +CONFIG_USB=y +CONFIG_USB_OTG=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_PLATFORM=y +CONFIG_USB_XHCI_RCAR=y +CONFIG_USB_EHCI_EXYNOS=y +CONFIG_USB_XHCI_TEGRA=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_MSM=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_EXYNOS=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_RENESAS_USBHS=m +CONFIG_USB_STORAGE=y +CONFIG_USB_DWC2=y +CONFIG_USB_DWC3=y +CONFIG_USB_CHIPIDEA=y +CONFIG_USB_CHIPIDEA_UDC=y +CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_ISP1760=y +CONFIG_USB_HSIC_USB3503=y +CONFIG_USB_MSM_OTG=y +CONFIG_USB_ULPI=y +CONFIG_USB_GADGET=y +CONFIG_USB_RENESAS_USBHS_UDC=m +CONFIG_MMC=y +CONFIG_MMC_BLOCK_MINORS=32 +CONFIG_MMC_ARMMMCI=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_ACPI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_OF_ESDHC=y +CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_MMC_SDHCI_MSM=y +CONFIG_MMC_SPI=y +CONFIG_MMC_SDHI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_EXYNOS=y +CONFIG_MMC_DW_K3=y +CONFIG_MMC_SUNXI=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_SYSCON=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_MAX77686=y +CONFIG_RTC_DRV_S5M=y +CONFIG_RTC_DRV_DS3232=y +CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_PL031=y +CONFIG_RTC_DRV_SUN6I=y +CONFIG_RTC_DRV_TEGRA=y +CONFIG_RTC_DRV_XGENE=y +CONFIG_RTC_DRV_S3C=y +CONFIG_DMADEVICES=y +CONFIG_PL330_DMA=y +CONFIG_TEGRA20_APB_DMA=y +CONFIG_QCOM_BAM_DMA=y +CONFIG_QCOM_HIDMA_MGMT=y +CONFIG_QCOM_HIDMA=y +CONFIG_RCAR_DMAC=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_MMIO=y +CONFIG_XEN_GNTDEV=y +CONFIG_XEN_GRANT_DEV_ALLOC=y +CONFIG_COMMON_CLK_SCPI=y +CONFIG_COMMON_CLK_CS2000_CP=y +CONFIG_COMMON_CLK_S2MPS11=y +CONFIG_CLK_QORIQ=y +CONFIG_COMMON_CLK_QCOM=y +CONFIG_MSM_GCC_8916=y +CONFIG_MSM_MMCC_8996=y +CONFIG_HWSPINLOCK_QCOM=y +CONFIG_MAILBOX=y +CONFIG_ARM_MHU=y +CONFIG_HI6220_MBOX=y +CONFIG_ARM_SMMU=y +CONFIG_QCOM_SMEM=y +CONFIG_QCOM_SMD=y +CONFIG_QCOM_SMD_RPM=y +CONFIG_ARCH_TEGRA_132_SOC=y +CONFIG_ARCH_TEGRA_210_SOC=y +CONFIG_EXTCON_USB_GPIO=y +CONFIG_PWM=y +CONFIG_PWM_TEGRA=m +CONFIG_COMMON_RESET_HI6220=y +CONFIG_PHY_RCAR_GEN3_USB2=y +CONFIG_PHY_HI6220_USB=y +CONFIG_PHY_XGENE=y +CONFIG_PHY_TEGRA_XUSB=y +CONFIG_ARM_SCPI_PROTOCOL=y +CONFIG_ACPI=y +CONFIG_IIO=y +CONFIG_EXYNOS_ADC=y +CONFIG_PWM_SAMSUNG=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA=y +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=y +CONFIG_EFIVAR_FS=y +CONFIG_SQUASHFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set +CONFIG_MEMTEST=y +CONFIG_SECURITY=y +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_CRYPTO_ANSI_CPRNG=y +CONFIG_ARM64_CRYPTO=y +CONFIG_CRYPTO_SHA1_ARM64_CE=y +CONFIG_CRYPTO_SHA2_ARM64_CE=y +CONFIG_CRYPTO_GHASH_ARM64_CE=y +CONFIG_CRYPTO_AES_ARM64_CE_CCM=y +CONFIG_CRYPTO_AES_ARM64_CE_BLK=y +# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set +CONFIG_CRYPTO_CRC32_ARM64=y +CONFIG_TEE=y +CONFIG_OPTEE=y -- cgit v1.2.3 From f9ded7bcde96b12ea7dc81c85c09a98a71160652 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Mon, 4 Sep 2017 15:47:54 +0800 Subject: ARM: add lsk_defconfig Add a arm default config named lsk_defconfig, which copied from arch/arm/configs/multi_v7_defconfig plus TEE, OPTEE config. The kexec/kdump config was enabled as default already. Signed-off-by: Alex Shi --- arch/arm/configs/lsk_defconfig | 932 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 932 insertions(+) create mode 100644 arch/arm/configs/lsk_defconfig diff --git a/arch/arm/configs/lsk_defconfig b/arch/arm/configs/lsk_defconfig new file mode 100644 index 000000000000..5dce9050d229 --- /dev/null +++ b/arch/arm/configs/lsk_defconfig @@ -0,0 +1,932 @@ +CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_CGROUPS=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_CMDLINE_PARTITION=y +CONFIG_ARCH_MULTI_V7=y +# CONFIG_ARCH_MULTI_V5 is not set +# CONFIG_ARCH_MULTI_V4 is not set +CONFIG_ARCH_VIRT=y +CONFIG_ARCH_ALPINE=y +CONFIG_ARCH_ARTPEC=y +CONFIG_MACH_ARTPEC6=y +CONFIG_ARCH_MVEBU=y +CONFIG_MACH_ARMADA_370=y +CONFIG_MACH_ARMADA_375=y +CONFIG_MACH_ARMADA_38X=y +CONFIG_MACH_ARMADA_39X=y +CONFIG_MACH_ARMADA_XP=y +CONFIG_MACH_DOVE=y +CONFIG_ARCH_AT91=y +CONFIG_SOC_SAMA5D2=y +CONFIG_SOC_SAMA5D3=y +CONFIG_SOC_SAMA5D4=y +CONFIG_ARCH_BCM=y +CONFIG_ARCH_BCM_CYGNUS=y +CONFIG_ARCH_BCM_NSP=y +CONFIG_ARCH_BCM_21664=y +CONFIG_ARCH_BCM_281XX=y +CONFIG_ARCH_BCM_5301X=y +CONFIG_ARCH_BCM2835=y +CONFIG_ARCH_BCM_63XX=y +CONFIG_ARCH_BRCMSTB=y +CONFIG_ARCH_BERLIN=y +CONFIG_MACH_BERLIN_BG2=y +CONFIG_MACH_BERLIN_BG2CD=y +CONFIG_MACH_BERLIN_BG2Q=y +CONFIG_ARCH_DIGICOLOR=y +CONFIG_ARCH_HIGHBANK=y +CONFIG_ARCH_HISI=y +CONFIG_ARCH_HI3xxx=y +CONFIG_ARCH_HIX5HD2=y +CONFIG_ARCH_HIP01=y +CONFIG_ARCH_HIP04=y +CONFIG_ARCH_KEYSTONE=y +CONFIG_ARCH_MESON=y +CONFIG_ARCH_MXC=y +CONFIG_SOC_IMX50=y +CONFIG_SOC_IMX51=y +CONFIG_SOC_IMX53=y +CONFIG_SOC_IMX6Q=y +CONFIG_SOC_IMX6SL=y +CONFIG_SOC_IMX6SX=y +CONFIG_SOC_IMX6UL=y +CONFIG_SOC_IMX7D=y +CONFIG_SOC_VF610=y +CONFIG_SOC_LS1021A=y +CONFIG_ARCH_OMAP3=y +CONFIG_ARCH_OMAP4=y +CONFIG_SOC_OMAP5=y +CONFIG_SOC_AM33XX=y +CONFIG_SOC_AM43XX=y +CONFIG_SOC_DRA7XX=y +CONFIG_ARCH_QCOM=y +CONFIG_ARCH_MEDIATEK=y +CONFIG_ARCH_MSM8X60=y +CONFIG_ARCH_MSM8960=y +CONFIG_ARCH_MSM8974=y +CONFIG_ARCH_ROCKCHIP=y +CONFIG_ARCH_SOCFPGA=y +CONFIG_PLAT_SPEAR=y +CONFIG_ARCH_SPEAR13XX=y +CONFIG_MACH_SPEAR1310=y +CONFIG_MACH_SPEAR1340=y +CONFIG_ARCH_STI=y +CONFIG_ARCH_EXYNOS=y +CONFIG_EXYNOS5420_MCPM=y +CONFIG_ARCH_RENESAS=y +CONFIG_ARCH_EMEV2=y +CONFIG_ARCH_R7S72100=y +CONFIG_ARCH_R8A73A4=y +CONFIG_ARCH_R8A7740=y +CONFIG_ARCH_R8A7778=y +CONFIG_ARCH_R8A7779=y +CONFIG_ARCH_R8A7790=y +CONFIG_ARCH_R8A7791=y +CONFIG_ARCH_R8A7792=y +CONFIG_ARCH_R8A7793=y +CONFIG_ARCH_R8A7794=y +CONFIG_ARCH_SH73A0=y +CONFIG_ARCH_SUNXI=y +CONFIG_ARCH_SIRF=y +CONFIG_ARCH_TEGRA=y +CONFIG_ARCH_TEGRA_2x_SOC=y +CONFIG_ARCH_TEGRA_3x_SOC=y +CONFIG_ARCH_TEGRA_114_SOC=y +CONFIG_ARCH_TEGRA_124_SOC=y +CONFIG_TEGRA_EMC_SCALING_ENABLE=y +CONFIG_ARCH_UNIPHIER=y +CONFIG_ARCH_U8500=y +CONFIG_MACH_HREFV60=y +CONFIG_MACH_SNOWBALL=y +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_VEXPRESS_CA9X4=y +CONFIG_ARCH_VEXPRESS_TC2_PM=y +CONFIG_ARCH_WM8850=y +CONFIG_ARCH_ZYNQ=y +CONFIG_TRUSTED_FOUNDATIONS=y +CONFIG_PCI=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI_KEYSTONE=y +CONFIG_PCI_MSI=y +CONFIG_PCI_MVEBU=y +CONFIG_PCI_TEGRA=y +CONFIG_PCI_RCAR_GEN2=y +CONFIG_PCIE_RCAR=y +CONFIG_PCIEPORTBUS=y +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_HIGHPTE=y +CONFIG_CMA=y +CONFIG_SECCOMP=y +CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y +CONFIG_KEXEC=y +CONFIG_EFI=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT_DETAILS=y +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=m +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y +CONFIG_ARM_IMX6Q_CPUFREQ=y +CONFIG_QORIQ_CPUFREQ=y +CONFIG_CPU_IDLE=y +CONFIG_ARM_CPUIDLE=y +CONFIG_NEON=y +CONFIG_KERNEL_MODE_NEON=y +CONFIG_ARM_ZYNQ_CPUIDLE=y +CONFIG_ARM_EXYNOS_CPUIDLE=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_DSA=m +CONFIG_NET_SWITCHDEV=y +CONFIG_CAN=y +CONFIG_CAN_RAW=y +CONFIG_CAN_BCM=y +CONFIG_CAN_DEV=y +CONFIG_CAN_AT91=m +CONFIG_CAN_RCAR=m +CONFIG_CAN_XILINXCAN=y +CONFIG_CAN_MCP251X=y +CONFIG_NET_DSA_BCM_SF2=m +CONFIG_CAN_SUN4I=y +CONFIG_BT=m +CONFIG_BT_MRVL=m +CONFIG_BT_MRVL_SDIO=m +CONFIG_CFG80211=m +CONFIG_MAC80211=m +CONFIG_RFKILL=y +CONFIG_RFKILL_INPUT=y +CONFIG_RFKILL_GPIO=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=64 +CONFIG_OMAP_OCP2SCP=y +CONFIG_SIMPLE_PM_BUS=y +CONFIG_SUNXI_RSB=m +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_DENALI_DT=y +CONFIG_MTD_NAND_ATMEL=y +CONFIG_MTD_NAND_BRCMNAND=y +CONFIG_MTD_NAND_VF610_NFC=y +CONFIG_MTD_NAND_DAVINCI=y +CONFIG_MTD_SPI_NOR=y +CONFIG_SPI_FSL_QUADSPI=m +CONFIG_MTD_UBI=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_VIRTIO_BLK=y +CONFIG_AD525X_DPOT=y +CONFIG_AD525X_DPOT_I2C=y +CONFIG_ATMEL_TCLIB=y +CONFIG_ICS932S401=y +CONFIG_ATMEL_SSC=m +CONFIG_QCOM_COINCELL=m +CONFIG_APDS9802ALS=y +CONFIG_ISL29003=y +CONFIG_EEPROM_AT24=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_BRCM=y +CONFIG_AHCI_ST=y +CONFIG_AHCI_IMX=y +CONFIG_AHCI_SUNXI=y +CONFIG_AHCI_TEGRA=y +CONFIG_SATA_HIGHBANK=y +CONFIG_SATA_MV=y +CONFIG_SATA_RCAR=y +CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y +CONFIG_HIX5HD2_GMAC=y +CONFIG_SUN4I_EMAC=y +CONFIG_MACB=y +CONFIG_BCMGENET=m +CONFIG_SYSTEMPORT=m +CONFIG_NET_CALXEDA_XGMAC=y +CONFIG_GIANFAR=y +CONFIG_IGB=y +CONFIG_MV643XX_ETH=y +CONFIG_MVNETA=y +CONFIG_PXA168_ETH=m +CONFIG_KS8851=y +CONFIG_R8169=y +CONFIG_SH_ETH=y +CONFIG_SMSC911X=y +CONFIG_STMMAC_ETH=y +CONFIG_SYNOPSYS_DWC_ETH_QOS=y +CONFIG_TI_CPSW=y +CONFIG_XILINX_EMACLITE=y +CONFIG_AT803X_PHY=y +CONFIG_MARVELL_PHY=y +CONFIG_SMSC_PHY=y +CONFIG_BROADCOM_PHY=y +CONFIG_ICPLUS_PHY=y +CONFIG_REALTEK_PHY=y +CONFIG_MICREL_PHY=y +CONFIG_FIXED_PHY=y +CONFIG_USB_PEGASUS=y +CONFIG_USB_RTL8152=m +CONFIG_USB_USBNET=y +CONFIG_USB_NET_SMSC75XX=y +CONFIG_USB_NET_SMSC95XX=y +CONFIG_BRCMFMAC=m +CONFIG_RT2X00=m +CONFIG_RT2800USB=m +CONFIG_MWIFIEX=m +CONFIG_MWIFIEX_SDIO=m +CONFIG_INPUT_JOYDEV=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_QT1070=m +CONFIG_KEYBOARD_GPIO=y +CONFIG_KEYBOARD_TEGRA=y +CONFIG_KEYBOARD_SPEAR=y +CONFIG_KEYBOARD_ST_KEYSCAN=y +CONFIG_KEYBOARD_CROS_EC=m +CONFIG_KEYBOARD_SAMSUNG=m +CONFIG_MOUSE_PS2_ELANTECH=y +CONFIG_MOUSE_CYAPA=m +CONFIG_MOUSE_ELAN_I2C=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_ATMEL_MXT=m +CONFIG_TOUCHSCREEN_MMS114=m +CONFIG_TOUCHSCREEN_ST1232=m +CONFIG_TOUCHSCREEN_STMPE=y +CONFIG_TOUCHSCREEN_SUN4I=y +CONFIG_TOUCHSCREEN_WM97XX=m +CONFIG_INPUT_MISC=y +CONFIG_INPUT_MAX77693_HAPTIC=m +CONFIG_INPUT_MAX8997_HAPTIC=m +CONFIG_INPUT_MPU3050=y +CONFIG_INPUT_AXP20X_PEK=m +CONFIG_INPUT_ADXL34X=m +CONFIG_SERIO_AMBAKMI=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_8250_EM=y +CONFIG_SERIAL_8250_MT6577=y +CONFIG_SERIAL_8250_UNIPHIER=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_ATMEL=y +CONFIG_SERIAL_ATMEL_CONSOLE=y +CONFIG_SERIAL_ATMEL_TTYAT=y +CONFIG_SERIAL_BCM63XX=y +CONFIG_SERIAL_BCM63XX_CONSOLE=y +CONFIG_SERIAL_MESON=y +CONFIG_SERIAL_MESON_CONSOLE=y +CONFIG_SERIAL_SAMSUNG=y +CONFIG_SERIAL_SAMSUNG_CONSOLE=y +CONFIG_SERIAL_SIRFSOC=y +CONFIG_SERIAL_SIRFSOC_CONSOLE=y +CONFIG_SERIAL_TEGRA=y +CONFIG_SERIAL_IMX=y +CONFIG_SERIAL_IMX_CONSOLE=y +CONFIG_SERIAL_SH_SCI=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=20 +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_MSM=y +CONFIG_SERIAL_MSM_CONSOLE=y +CONFIG_SERIAL_VT8500=y +CONFIG_SERIAL_VT8500_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_OMAP=y +CONFIG_SERIAL_OMAP_CONSOLE=y +CONFIG_SERIAL_XILINX_PS_UART=y +CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y +CONFIG_SERIAL_FSL_LPUART=y +CONFIG_SERIAL_FSL_LPUART_CONSOLE=y +CONFIG_SERIAL_CONEXANT_DIGICOLOR=y +CONFIG_SERIAL_CONEXANT_DIGICOLOR_CONSOLE=y +CONFIG_SERIAL_ST_ASC=y +CONFIG_SERIAL_ST_ASC_CONSOLE=y +CONFIG_HVC_DRIVER=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_DAVINCI=y +CONFIG_I2C_MUX=y +CONFIG_I2C_ARB_GPIO_CHALLENGE=m +CONFIG_I2C_MUX_PCA954x=y +CONFIG_I2C_MUX_PINCTRL=y +CONFIG_I2C_DEMUX_PINCTRL=y +CONFIG_I2C_AT91=m +CONFIG_I2C_BCM2835=y +CONFIG_I2C_CADENCE=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y +CONFIG_I2C_DIGICOLOR=m +CONFIG_I2C_EMEV2=m +CONFIG_I2C_GPIO=m +CONFIG_I2C_EXYNOS5=y +CONFIG_I2C_IMX=m +CONFIG_I2C_MV64XXX=y +CONFIG_I2C_RIIC=y +CONFIG_I2C_RK3X=y +CONFIG_I2C_S3C2410=y +CONFIG_I2C_SH_MOBILE=y +CONFIG_I2C_SIRF=y +CONFIG_I2C_ST=y +CONFIG_I2C_SUN6I_P2WI=y +CONFIG_I2C_TEGRA=y +CONFIG_I2C_UNIPHIER=y +CONFIG_I2C_UNIPHIER_F=y +CONFIG_I2C_XILINX=y +CONFIG_I2C_RCAR=y +CONFIG_I2C_CROS_EC_TUNNEL=m +CONFIG_I2C_SLAVE_EEPROM=y +CONFIG_SPI=y +CONFIG_SPI_ATMEL=m +CONFIG_SPI_BCM2835=y +CONFIG_SPI_BCM2835AUX=y +CONFIG_SPI_CADENCE=y +CONFIG_SPI_DAVINCI=y +CONFIG_SPI_GPIO=m +CONFIG_SPI_FSL_DSPI=m +CONFIG_SPI_OMAP24XX=y +CONFIG_SPI_ORION=y +CONFIG_SPI_PL022=y +CONFIG_SPI_ROCKCHIP=m +CONFIG_SPI_RSPI=y +CONFIG_SPI_S3C64XX=m +CONFIG_SPI_SH_MSIOF=m +CONFIG_SPI_SH_HSPI=y +CONFIG_SPI_SIRF=y +CONFIG_SPI_SUN4I=y +CONFIG_SPI_SUN6I=y +CONFIG_SPI_TEGRA114=y +CONFIG_SPI_TEGRA20_SFLASH=y +CONFIG_SPI_TEGRA20_SLINK=y +CONFIG_SPI_XILINX=y +CONFIG_SPI_SPIDEV=y +CONFIG_SPMI=y +CONFIG_PINCTRL_AS3722=y +CONFIG_PINCTRL_PALMAS=y +CONFIG_PINCTRL_BCM2835=y +CONFIG_PINCTRL_APQ8064=y +CONFIG_PINCTRL_APQ8084=y +CONFIG_PINCTRL_IPQ8064=y +CONFIG_PINCTRL_MSM8660=y +CONFIG_PINCTRL_MSM8960=y +CONFIG_PINCTRL_MSM8X74=y +CONFIG_PINCTRL_MSM8916=y +CONFIG_PINCTRL_QCOM_SPMI_PMIC=y +CONFIG_PINCTRL_QCOM_SSBI_PMIC=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_GENERIC_PLATFORM=y +CONFIG_GPIO_DAVINCI=y +CONFIG_GPIO_DWAPB=y +CONFIG_GPIO_EM=y +CONFIG_GPIO_RCAR=y +CONFIG_GPIO_XILINX=y +CONFIG_GPIO_ZYNQ=y +CONFIG_GPIO_PCA953X=y +CONFIG_GPIO_PCA953X_IRQ=y +CONFIG_GPIO_PCF857X=y +CONFIG_GPIO_TWL4030=y +CONFIG_GPIO_PALMAS=y +CONFIG_GPIO_SYSCON=y +CONFIG_GPIO_TPS6586X=y +CONFIG_GPIO_TPS65910=y +CONFIG_BATTERY_ACT8945A=y +CONFIG_BATTERY_SBS=y +CONFIG_BATTERY_MAX17040=m +CONFIG_BATTERY_MAX17042=m +CONFIG_CHARGER_MAX14577=m +CONFIG_CHARGER_MAX77693=m +CONFIG_CHARGER_MAX8997=m +CONFIG_CHARGER_MAX8998=m +CONFIG_CHARGER_TPS65090=y +CONFIG_AXP20X_POWER=m +CONFIG_POWER_RESET_AS3722=y +CONFIG_POWER_RESET_GPIO=y +CONFIG_POWER_RESET_GPIO_RESTART=y +CONFIG_POWER_RESET_KEYSTONE=y +CONFIG_POWER_RESET_RMOBILE=y +CONFIG_POWER_RESET_ST=y +CONFIG_POWER_AVS=y +CONFIG_ROCKCHIP_IODOMAIN=y +CONFIG_SENSORS_IIO_HWMON=y +CONFIG_SENSORS_LM90=y +CONFIG_SENSORS_LM95245=y +CONFIG_SENSORS_NTC_THERMISTOR=m +CONFIG_SENSORS_PWM_FAN=m +CONFIG_SENSORS_INA2XX=m +CONFIG_CPU_THERMAL=y +CONFIG_ROCKCHIP_THERMAL=y +CONFIG_RCAR_THERMAL=y +CONFIG_ARMADA_THERMAL=y +CONFIG_DAVINCI_WATCHDOG=m +CONFIG_EXYNOS_THERMAL=m +CONFIG_ST_THERMAL_SYSCFG=y +CONFIG_ST_THERMAL_MEMMAP=y +CONFIG_WATCHDOG=y +CONFIG_DA9063_WATCHDOG=m +CONFIG_XILINX_WATCHDOG=y +CONFIG_ARM_SP805_WATCHDOG=y +CONFIG_AT91SAM9X_WATCHDOG=y +CONFIG_SAMA5D4_WATCHDOG=y +CONFIG_ORION_WATCHDOG=y +CONFIG_ST_LPC_WATCHDOG=y +CONFIG_SUNXI_WATCHDOG=y +CONFIG_IMX2_WDT=y +CONFIG_TEGRA_WATCHDOG=m +CONFIG_MESON_WATCHDOG=y +CONFIG_DW_WATCHDOG=y +CONFIG_DIGICOLOR_WATCHDOG=y +CONFIG_BCM2835_WDT=y +CONFIG_BCM7038_WDT=m +CONFIG_BCM_KONA_WDT=y +CONFIG_MFD_ACT8945A=y +CONFIG_MFD_AS3711=y +CONFIG_MFD_AS3722=y +CONFIG_MFD_ATMEL_FLEXCOM=y +CONFIG_MFD_ATMEL_HLCDC=m +CONFIG_MFD_BCM590XX=y +CONFIG_MFD_AXP20X=y +CONFIG_MFD_AXP20X_I2C=m +CONFIG_MFD_AXP20X_RSB=m +CONFIG_MFD_CROS_EC=m +CONFIG_MFD_CROS_EC_I2C=m +CONFIG_MFD_CROS_EC_SPI=m +CONFIG_MFD_DA9063=m +CONFIG_MFD_MAX14577=y +CONFIG_MFD_MAX77686=y +CONFIG_MFD_MAX77693=m +CONFIG_MFD_MAX8907=y +CONFIG_MFD_MAX8997=y +CONFIG_MFD_MAX8998=y +CONFIG_MFD_RK808=y +CONFIG_MFD_PM8921_CORE=y +CONFIG_MFD_QCOM_RPM=y +CONFIG_MFD_SPMI_PMIC=y +CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_STMPE=y +CONFIG_MFD_PALMAS=y +CONFIG_MFD_TPS65090=y +CONFIG_MFD_TPS65217=y +CONFIG_MFD_TPS65218=y +CONFIG_MFD_TPS6586X=y +CONFIG_MFD_TPS65910=y +CONFIG_REGULATOR_ACT8945A=y +CONFIG_REGULATOR_AB8500=y +CONFIG_REGULATOR_ACT8865=y +CONFIG_REGULATOR_ANATOP=y +CONFIG_REGULATOR_AS3711=y +CONFIG_REGULATOR_AS3722=y +CONFIG_REGULATOR_AXP20X=m +CONFIG_REGULATOR_BCM590XX=y +CONFIG_REGULATOR_DA9210=y +CONFIG_REGULATOR_FAN53555=y +CONFIG_REGULATOR_RK808=y +CONFIG_REGULATOR_GPIO=y +CONFIG_MFD_SYSCON=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_REGULATOR_LP872X=y +CONFIG_REGULATOR_MAX14577=m +CONFIG_REGULATOR_MAX8907=y +CONFIG_REGULATOR_MAX8973=y +CONFIG_REGULATOR_MAX8997=m +CONFIG_REGULATOR_MAX8998=m +CONFIG_REGULATOR_MAX77686=y +CONFIG_REGULATOR_MAX77693=m +CONFIG_REGULATOR_MAX77802=m +CONFIG_REGULATOR_PALMAS=y +CONFIG_REGULATOR_PBIAS=y +CONFIG_REGULATOR_PWM=y +CONFIG_REGULATOR_QCOM_RPM=y +CONFIG_REGULATOR_QCOM_SMD_RPM=y +CONFIG_REGULATOR_S2MPS11=y +CONFIG_REGULATOR_S5M8767=y +CONFIG_REGULATOR_TI_ABB=y +CONFIG_REGULATOR_TPS51632=y +CONFIG_REGULATOR_TPS62360=y +CONFIG_REGULATOR_TPS65090=y +CONFIG_REGULATOR_TPS65217=y +CONFIG_REGULATOR_TPS65218=y +CONFIG_REGULATOR_TPS6586X=y +CONFIG_REGULATOR_TPS65910=y +CONFIG_REGULATOR_TWL4030=y +CONFIG_REGULATOR_VEXPRESS=y +CONFIG_REGULATOR_WM8994=m +CONFIG_MEDIA_SUPPORT=m +CONFIG_MEDIA_CAMERA_SUPPORT=y +CONFIG_MEDIA_CONTROLLER=y +CONFIG_VIDEO_V4L2_SUBDEV_API=y +CONFIG_MEDIA_USB_SUPPORT=y +CONFIG_USB_VIDEO_CLASS=y +CONFIG_USB_GSPCA=y +CONFIG_V4L_PLATFORM_DRIVERS=y +CONFIG_SOC_CAMERA=m +CONFIG_SOC_CAMERA_PLATFORM=m +CONFIG_VIDEO_RCAR_VIN=m +CONFIG_VIDEO_ATMEL_ISI=m +CONFIG_VIDEO_SAMSUNG_EXYNOS4_IS=m +CONFIG_VIDEO_S5P_FIMC=m +CONFIG_VIDEO_S5P_MIPI_CSIS=m +CONFIG_VIDEO_EXYNOS_FIMC_LITE=m +CONFIG_VIDEO_EXYNOS4_FIMC_IS=m +CONFIG_V4L_MEM2MEM_DRIVERS=y +CONFIG_VIDEO_SAMSUNG_S5P_JPEG=m +CONFIG_VIDEO_SAMSUNG_S5P_MFC=m +CONFIG_VIDEO_STI_BDISP=m +CONFIG_VIDEO_RENESAS_JPU=m +CONFIG_VIDEO_RENESAS_VSP1=m +CONFIG_V4L_TEST_DRIVERS=y +# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set +CONFIG_VIDEO_ADV7180=m +CONFIG_VIDEO_ML86V7667=m +CONFIG_DRM=y +CONFIG_DRM_I2C_ADV7511=m +# CONFIG_DRM_I2C_CH7006 is not set +# CONFIG_DRM_I2C_SIL164 is not set +CONFIG_DRM_NXP_PTN3460=m +CONFIG_DRM_PARADE_PS8622=m +CONFIG_DRM_NOUVEAU=m +CONFIG_DRM_EXYNOS=m +CONFIG_DRM_EXYNOS_FIMD=y +CONFIG_DRM_EXYNOS_MIXER=y +CONFIG_DRM_EXYNOS_DPI=y +CONFIG_DRM_EXYNOS_DSI=y +CONFIG_DRM_EXYNOS_HDMI=y +CONFIG_DRM_ROCKCHIP=m +CONFIG_ROCKCHIP_ANALOGIX_DP=m +CONFIG_ROCKCHIP_DW_HDMI=m +CONFIG_ROCKCHIP_DW_MIPI_DSI=m +CONFIG_ROCKCHIP_INNO_HDMI=m +CONFIG_DRM_ATMEL_HLCDC=m +CONFIG_DRM_RCAR_DU=m +CONFIG_DRM_RCAR_HDMI=y +CONFIG_DRM_RCAR_LVDS=y +CONFIG_DRM_SUN4I=m +CONFIG_DRM_TEGRA=y +CONFIG_DRM_PANEL_SAMSUNG_LD9040=m +CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m +CONFIG_DRM_PANEL_SIMPLE=y +CONFIG_DRM_STI=m +CONFIG_DRM_VC4=y +CONFIG_FB_ARMCLCD=y +CONFIG_FB_EFI=y +CONFIG_FB_WM8505=y +CONFIG_FB_SH_MOBILE_LCDC=y +CONFIG_FB_SIMPLE=y +CONFIG_FB_SH_MOBILE_MERAM=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_LCD_PLATFORM=m +CONFIG_BACKLIGHT_PWM=y +CONFIG_BACKLIGHT_AS3711=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_HDA_TEGRA=m +CONFIG_SND_HDA_INPUT_BEEP=y +CONFIG_SND_HDA_PATCH_LOADER=y +CONFIG_SND_HDA_CODEC_REALTEK=m +CONFIG_SND_HDA_CODEC_HDMI=m +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_SOC=m +CONFIG_SND_ATMEL_SOC=m +CONFIG_SND_ATMEL_SOC_WM8904=m +CONFIG_SND_ATMEL_SOC_PDMIC=m +CONFIG_SND_BCM2835_SOC_I2S=m +CONFIG_SND_SOC_FSL_SAI=m +CONFIG_SND_SOC_ROCKCHIP=m +CONFIG_SND_SOC_ROCKCHIP_SPDIF=m +CONFIG_SND_SOC_ROCKCHIP_MAX98090=m +CONFIG_SND_SOC_ROCKCHIP_RT5645=m +CONFIG_SND_SOC_SAMSUNG=m +CONFIG_SND_SOC_SAMSUNG_SMDK_WM8994=m +CONFIG_SND_SOC_SMDK_WM8994_PCM=m +CONFIG_SND_SOC_SNOW=m +CONFIG_SND_SOC_SH4_FSI=m +CONFIG_SND_SOC_RCAR=m +CONFIG_SND_SOC_RSRC_CARD=m +CONFIG_SND_SUN4I_CODEC=m +CONFIG_SND_SOC_TEGRA=m +CONFIG_SND_SOC_TEGRA_RT5640=m +CONFIG_SND_SOC_TEGRA_WM8753=m +CONFIG_SND_SOC_TEGRA_WM8903=m +CONFIG_SND_SOC_TEGRA_WM9712=m +CONFIG_SND_SOC_TEGRA_TRIMSLICE=m +CONFIG_SND_SOC_TEGRA_ALC5632=m +CONFIG_SND_SOC_TEGRA_MAX98090=m +CONFIG_SND_SOC_AK4642=m +CONFIG_SND_SOC_SGTL5000=m +CONFIG_SND_SOC_SPDIF=m +CONFIG_SND_SOC_WM8978=m +CONFIG_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_MVEBU=y +CONFIG_USB_XHCI_RCAR=m +CONFIG_USB_XHCI_TEGRA=m +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_MSM=m +CONFIG_USB_EHCI_EXYNOS=y +CONFIG_USB_EHCI_TEGRA=y +CONFIG_USB_EHCI_HCD_STI=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_ISP1760=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_STI=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_EXYNOS=m +CONFIG_USB_R8A66597_HCD=m +CONFIG_USB_RENESAS_USBHS=m +CONFIG_USB_STORAGE=y +CONFIG_USB_MUSB_HDRC=m +CONFIG_USB_MUSB_SUNXI=m +CONFIG_USB_DWC3=y +CONFIG_USB_DWC2=y +CONFIG_USB_CHIPIDEA=y +CONFIG_USB_CHIPIDEA_UDC=y +CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_AB8500_USB=y +CONFIG_KEYSTONE_USB_PHY=y +CONFIG_OMAP_USB3=y +CONFIG_USB_GPIO_VBUS=y +CONFIG_USB_ISP1301=y +CONFIG_USB_MSM_OTG=m +CONFIG_USB_MXS_PHY=y +CONFIG_USB_GADGET=y +CONFIG_USB_FSL_USB2=y +CONFIG_USB_RENESAS_USBHS_UDC=m +CONFIG_USB_ETH=m +CONFIG_MMC=y +CONFIG_MMC_BLOCK_MINORS=16 +CONFIG_MMC_ARMMMCI=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_OF_ARASAN=y +CONFIG_MMC_SDHCI_OF_AT91=y +CONFIG_MMC_SDHCI_OF_ESDHC=m +CONFIG_MMC_SDHCI_ESDHC_IMX=y +CONFIG_MMC_SDHCI_DOVE=y +CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_MMC_SDHCI_PXAV3=y +CONFIG_MMC_SDHCI_SPEAR=y +CONFIG_MMC_SDHCI_S3C=y +CONFIG_MMC_SDHCI_S3C_DMA=y +CONFIG_MMC_SDHCI_BCM_KONA=y +CONFIG_MMC_SDHCI_ST=y +CONFIG_MMC_OMAP=y +CONFIG_MMC_OMAP_HS=y +CONFIG_MMC_ATMELMCI=y +CONFIG_MMC_SDHCI_MSM=y +CONFIG_MMC_MVSDIO=y +CONFIG_MMC_SDHI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +CONFIG_MMC_DW_PLTFM=y +CONFIG_MMC_DW_EXYNOS=y +CONFIG_MMC_DW_ROCKCHIP=y +CONFIG_MMC_SH_MMCIF=y +CONFIG_MMC_SUNXI=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_CLASS_FLASH=m +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_PWM=y +CONFIG_LEDS_MAX77693=m +CONFIG_LEDS_MAX8997=m +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_ONESHOT=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_BACKLIGHT=y +CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_LEDS_TRIGGER_GPIO=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_LEDS_TRIGGER_TRANSIENT=y +CONFIG_LEDS_TRIGGER_CAMERA=y +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y +CONFIG_EDAC_HIGHBANK_MC=y +CONFIG_EDAC_HIGHBANK_L2=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_AS3722=y +CONFIG_RTC_DRV_DS1307=y +CONFIG_RTC_DRV_HYM8563=m +CONFIG_RTC_DRV_MAX8907=y +CONFIG_RTC_DRV_MAX8998=m +CONFIG_RTC_DRV_MAX8997=m +CONFIG_RTC_DRV_MAX77686=y +CONFIG_RTC_DRV_RK808=m +CONFIG_RTC_DRV_RS5C372=m +CONFIG_RTC_DRV_PALMAS=y +CONFIG_RTC_DRV_ST_LPC=y +CONFIG_RTC_DRV_TWL4030=y +CONFIG_RTC_DRV_TPS6586X=y +CONFIG_RTC_DRV_TPS65910=y +CONFIG_RTC_DRV_S35390A=m +CONFIG_RTC_DRV_RX8581=m +CONFIG_RTC_DRV_EM3027=y +CONFIG_RTC_DRV_DA9063=m +CONFIG_RTC_DRV_EFI=m +CONFIG_RTC_DRV_DIGICOLOR=m +CONFIG_RTC_DRV_S5M=m +CONFIG_RTC_DRV_S3C=m +CONFIG_RTC_DRV_PL031=y +CONFIG_RTC_DRV_AT91RM9200=m +CONFIG_RTC_DRV_AT91SAM9=m +CONFIG_RTC_DRV_VT8500=y +CONFIG_RTC_DRV_SUN6I=y +CONFIG_RTC_DRV_SUNXI=y +CONFIG_RTC_DRV_MV=y +CONFIG_RTC_DRV_TEGRA=y +CONFIG_DMADEVICES=y +CONFIG_DW_DMAC=y +CONFIG_AT_HDMAC=y +CONFIG_AT_XDMAC=y +CONFIG_FSL_EDMA=y +CONFIG_MV_XOR=y +CONFIG_TEGRA20_APB_DMA=y +CONFIG_SH_DMAE=y +CONFIG_RCAR_DMAC=y +CONFIG_RENESAS_USB_DMAC=m +CONFIG_STE_DMA40=y +CONFIG_SIRF_DMA=y +CONFIG_TI_EDMA=y +CONFIG_PL330_DMA=y +CONFIG_IMX_SDMA=y +CONFIG_IMX_DMA=y +CONFIG_MXS_DMA=y +CONFIG_DMA_BCM2835=y +CONFIG_DMA_OMAP=y +CONFIG_QCOM_BAM_DMA=y +CONFIG_XILINX_DMA=y +CONFIG_DMA_SUN6I=y +CONFIG_STAGING=y +CONFIG_SENSORS_ISL29018=y +CONFIG_SENSORS_ISL29028=y +CONFIG_MFD_NVEC=y +CONFIG_KEYBOARD_NVEC=y +CONFIG_SERIO_NVEC_PS2=y +CONFIG_NVEC_POWER=y +CONFIG_NVEC_PAZ00=y +CONFIG_QCOM_GSBI=y +CONFIG_QCOM_PM=y +CONFIG_QCOM_SMEM=y +CONFIG_QCOM_SMD=y +CONFIG_QCOM_SMD_RPM=y +CONFIG_QCOM_SMP2P=y +CONFIG_QCOM_SMSM=y +CONFIG_QCOM_WCNSS_CTRL=m +CONFIG_ROCKCHIP_PM_DOMAINS=y +CONFIG_COMMON_CLK_QCOM=y +CONFIG_CHROME_PLATFORMS=y +CONFIG_STAGING_BOARD=y +CONFIG_CROS_EC_CHARDEV=m +CONFIG_COMMON_CLK_MAX77686=y +CONFIG_COMMON_CLK_MAX77802=m +CONFIG_COMMON_CLK_RK808=m +CONFIG_COMMON_CLK_S2MPS11=m +CONFIG_APQ_MMCC_8084=y +CONFIG_MSM_GCC_8660=y +CONFIG_MSM_MMCC_8960=y +CONFIG_MSM_MMCC_8974=y +CONFIG_HWSPINLOCK_QCOM=y +CONFIG_ROCKCHIP_IOMMU=y +CONFIG_TEGRA_IOMMU_GART=y +CONFIG_TEGRA_IOMMU_SMMU=y +CONFIG_PM_DEVFREQ=y +CONFIG_ARM_TEGRA_DEVFREQ=m +CONFIG_MEMORY=y +CONFIG_EXTCON=y +CONFIG_TI_AEMIF=y +CONFIG_IIO=y +CONFIG_AT91_ADC=m +CONFIG_AT91_SAMA5D2_ADC=m +CONFIG_BERLIN2_ADC=m +CONFIG_EXYNOS_ADC=m +CONFIG_VF610_ADC=m +CONFIG_XILINX_XADC=y +CONFIG_CM36651=m +CONFIG_AK8975=y +CONFIG_RASPBERRYPI_POWER=y +CONFIG_PWM=y +CONFIG_PWM_ATMEL=m +CONFIG_PWM_ATMEL_HLCDC_PWM=m +CONFIG_PWM_ATMEL_TCB=m +CONFIG_PWM_FSL_FTM=m +CONFIG_PWM_RENESAS_TPU=y +CONFIG_PWM_ROCKCHIP=m +CONFIG_PWM_SAMSUNG=m +CONFIG_PWM_SUN4I=y +CONFIG_PWM_TEGRA=y +CONFIG_PWM_VT8500=y +CONFIG_PHY_HIX5HD2_SATA=y +CONFIG_E1000E=y +CONFIG_PWM_STI=y +CONFIG_PWM_BCM2835=y +CONFIG_PWM_BRCMSTB=m +CONFIG_OMAP_USB2=y +CONFIG_TI_PIPE3=y +CONFIG_PHY_BERLIN_USB=y +CONFIG_PHY_BERLIN_SATA=y +CONFIG_PHY_ROCKCHIP_DP=m +CONFIG_PHY_ROCKCHIP_USB=m +CONFIG_PHY_QCOM_APQ8064_SATA=m +CONFIG_PHY_MIPHY28LP=y +CONFIG_PHY_MIPHY365X=y +CONFIG_PHY_RCAR_GEN2=m +CONFIG_PHY_STIH41X_USB=y +CONFIG_PHY_STIH407_USB=y +CONFIG_PHY_SUN4I_USB=y +CONFIG_PHY_SUN9I_USB=y +CONFIG_PHY_SAMSUNG_USB2=m +CONFIG_PHY_TEGRA_XUSB=y +CONFIG_PHY_BRCM_SATA=y +CONFIG_NVMEM=y +CONFIG_NVMEM_SUNXI_SID=y +CONFIG_BCM2835_MBOX=y +CONFIG_RASPBERRYPI_FIRMWARE=y +CONFIG_EFI_VARS=m +CONFIG_EFI_CAPSULE_LOADER=m +CONFIG_EXT4_FS=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_UBIFS_FS=y +CONFIG_TMPFS=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_CRYPTO_DEV_TEGRA_AES=y +CONFIG_CPUFREQ_DT=y +CONFIG_KEYSTONE_IRQ=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_ST=y +CONFIG_CRYPTO_DEV_MARVELL_CESA=m +CONFIG_CRYPTO_DEV_S5P=m +CONFIG_CRYPTO_DEV_SUN4I_SS=m +CONFIG_CRYPTO_DEV_ROCKCHIP=m +CONFIG_ARM_CRYPTO=y +CONFIG_CRYPTO_SHA1_ARM=m +CONFIG_CRYPTO_SHA1_ARM_NEON=m +CONFIG_CRYPTO_SHA1_ARM_CE=m +CONFIG_CRYPTO_SHA2_ARM_CE=m +CONFIG_CRYPTO_SHA256_ARM=m +CONFIG_CRYPTO_SHA512_ARM=m +CONFIG_CRYPTO_AES_ARM=m +CONFIG_CRYPTO_AES_ARM_BS=m +CONFIG_CRYPTO_AES_ARM_CE=m +CONFIG_CRYPTO_GHASH_ARM_CE=m +CONFIG_CRYPTO_DEV_ATMEL_AES=m +CONFIG_CRYPTO_DEV_ATMEL_TDES=m +CONFIG_CRYPTO_DEV_ATMEL_SHA=m +CONFIG_VIDEO_VIVID=m +CONFIG_VIRTIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_PCI_LEGACY=y +CONFIG_VIRTIO_MMIO=y +CONFIG_TEE=y +CONFIG_OPTEE=y + -- cgit v1.2.3 From 31562136c8d4ed60f2b6bc1c6c349c176432470f Mon Sep 17 00:00:00 2001 From: James Hogan Date: Sat, 12 Aug 2017 21:36:09 -0700 Subject: irqchip: mips-gic: SYNC after enabling GIC region commit 2c0e8382386f618c85d20cb05e7cf7df8cdd382c upstream. A SYNC is required between enabling the GIC region and actually trying to use it, even if the first access is a read, otherwise its possible depending on the timing (and in my case depending on the precise alignment of certain kernel code) to hit CM bus errors on that first access. Add the SYNC straight after setting the GIC base. [paul.burton@imgtec.com: Changes later in this series increase our likelihood of hitting this by reducing the amount of code that runs between enabling the GIC & accessing it.] Fixes: a7057270c280 ("irqchip: mips-gic: Add device-tree support") Signed-off-by: James Hogan Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Thomas Gleixner Cc: Jason Cooper Cc: James Hogan Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17019/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-mips-gic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index c0178a122940..d74374f25392 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -1115,8 +1115,11 @@ static int __init gic_of_init(struct device_node *node, gic_len = resource_size(&res); } - if (mips_cm_present()) + if (mips_cm_present()) { write_gcr_gic_base(gic_base | CM_GCR_GIC_BASE_GICEN_MSK); + /* Ensure GIC region is enabled before trying to access it */ + __sync(); + } gic_present = true; __gic_init(gic_base, gic_len, cpu_vec, 0, node); -- cgit v1.2.3 From d22f6da47355b6547427838d67054c51bca0c011 Mon Sep 17 00:00:00 2001 From: Stephen Douthit Date: Mon, 7 Aug 2017 17:10:59 -0400 Subject: i2c: ismt: Don't duplicate the receive length for block reads commit b6c159a9cb69c2cf0bf59d4e12c3a2da77e4d994 upstream. According to Table 15-14 of the C2000 EDS (Intel doc #510524) the rx data pointed to by the descriptor dptr contains the byte count. desc->rxbytes reports all bytes read on the wire, including the "byte count" byte. So if a device sends 4 bytes in response to a block read, on the wire and in the DMA buffer we see: count data1 data2 data3 data4 0x04 0xde 0xad 0xbe 0xef That's what we want to return in data->block to the next level. Instead we were actually prefixing that with desc->rxbytes: bad count count data1 data2 data3 data4 0x05 0x04 0xde 0xad 0xbe 0xef This was discovered while developing a BMC solution relying on the ipmi_ssif.c driver which was trying to interpret the bogus length field as part of the IPMI response. Signed-off-by: Stephen Douthit Tested-by: Dan Priamo Acked-by: Neil Horman Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-ismt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index f573448d2132..8075a9368681 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -341,8 +341,8 @@ static int ismt_process_desc(const struct ismt_desc *desc, break; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_I2C_BLOCK_DATA: - memcpy(&data->block[1], dma_buffer, desc->rxbytes); - data->block[0] = desc->rxbytes; + memcpy(data->block, dma_buffer, desc->rxbytes); + data->block[0] = desc->rxbytes - 1; break; } return 0; -- cgit v1.2.3 From 91a0e1edb80a3cdeaa7b8810b65cc7abf6b85af8 Mon Sep 17 00:00:00 2001 From: Stephen Douthit Date: Mon, 7 Aug 2017 17:11:00 -0400 Subject: i2c: ismt: Return EMSGSIZE for block reads with bogus length commit ba201c4f5ebe13d7819081756378777d8153f23e upstream. Compare the number of bytes actually seen on the wire to the byte count field returned by the slave device. Previously we just overwrote the byte count returned by the slave with the real byte count and let the caller figure out if the message was sane. Signed-off-by: Stephen Douthit Tested-by: Dan Priamo Acked-by: Neil Horman Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-ismt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 8075a9368681..8477292e92c8 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -341,8 +341,10 @@ static int ismt_process_desc(const struct ismt_desc *desc, break; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_I2C_BLOCK_DATA: + if (desc->rxbytes != dma_buffer[0] + 1) + return -EMSGSIZE; + memcpy(data->block, dma_buffer, desc->rxbytes); - data->block[0] = desc->rxbytes - 1; break; } return 0; -- cgit v1.2.3 From 9e0a64330ce5ef4be20391fdf7c95a1d7ac67538 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 16 Aug 2017 11:56:24 +0200 Subject: crypto: algif_skcipher - only call put_page on referenced and used pages commit 445a582738de6802669aeed9c33ca406c23c3b1f upstream. For asynchronous operation, SGs are allocated without a page mapped to them or with a page that is not used (ref-counted). If the SGL is freed, the code must only call put_page for an SG if there was a page assigned and ref-counted in the first place. This fixes a kernel crash when using io_submit with more than one iocb using the sendmsg and sendpage (vmsplice/splice) interface. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algif_skcipher.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 28556fce4267..45af0fe00f33 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -86,8 +86,13 @@ static void skcipher_free_async_sgls(struct skcipher_async_req *sreq) } sgl = sreq->tsg; n = sg_nents(sgl); - for_each_sg(sgl, sg, n, i) - put_page(sg_page(sg)); + for_each_sg(sgl, sg, n, i) { + struct page *page = sg_page(sg); + + /* some SGs may not have a page mapped */ + if (page && page_ref_count(page)) + put_page(page); + } kfree(sreq->tsg); } -- cgit v1.2.3 From 17c564f629f436fb357b88398e1834f442c28ced Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Aug 2017 16:15:26 -0700 Subject: mm, uprobes: fix multiple free of ->uprobes_state.xol_area commit 355627f518978b5167256d27492fe0b343aaf2f2 upstream. Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") made it possible to kill a forking task while it is waiting to acquire its ->mmap_sem for write, in dup_mmap(). However, it was overlooked that this introduced an new error path before the new mm_struct's ->uprobes_state.xol_area has been set to NULL after being copied from the old mm_struct by the memcpy in dup_mm(). For a task that has previously hit a uprobe tracepoint, this resulted in the 'struct xol_area' being freed multiple times if the task was killed at just the right time while forking. Fix it by setting ->uprobes_state.xol_area to NULL in mm_init() rather than in uprobe_dup_mmap(). With CONFIG_UPROBE_EVENTS=y, the bug can be reproduced by the same C program given by commit 2b7e8665b4ff ("fork: fix incorrect fput of ->exe_file causing use-after-free"), provided that a uprobe tracepoint has been set on the fork_thread() function. For example: $ gcc reproducer.c -o reproducer -lpthread $ nm reproducer | grep fork_thread 0000000000400719 t fork_thread $ echo "p $PWD/reproducer:0x719" > /sys/kernel/debug/tracing/uprobe_events $ echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable $ ./reproducer Here is the use-after-free reported by KASAN: BUG: KASAN: use-after-free in uprobe_clear_state+0x1c4/0x200 Read of size 8 at addr ffff8800320a8b88 by task reproducer/198 CPU: 1 PID: 198 Comm: reproducer Not tainted 4.13.0-rc7-00015-g36fde05f3fb5 #255 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014 Call Trace: dump_stack+0xdb/0x185 print_address_description+0x7e/0x290 kasan_report+0x23b/0x350 __asan_report_load8_noabort+0x19/0x20 uprobe_clear_state+0x1c4/0x200 mmput+0xd6/0x360 do_exit+0x740/0x1670 do_group_exit+0x13f/0x380 get_signal+0x597/0x17d0 do_signal+0x99/0x1df0 exit_to_usermode_loop+0x166/0x1e0 syscall_return_slowpath+0x258/0x2c0 entry_SYSCALL_64_fastpath+0xbc/0xbe ... Allocated by task 199: save_stack_trace+0x1b/0x20 kasan_kmalloc+0xfc/0x180 kmem_cache_alloc_trace+0xf3/0x330 __create_xol_area+0x10f/0x780 uprobe_notify_resume+0x1674/0x2210 exit_to_usermode_loop+0x150/0x1e0 prepare_exit_to_usermode+0x14b/0x180 retint_user+0x8/0x20 Freed by task 199: save_stack_trace+0x1b/0x20 kasan_slab_free+0xa8/0x1a0 kfree+0xba/0x210 uprobe_clear_state+0x151/0x200 mmput+0xd6/0x360 copy_process.part.8+0x605f/0x65d0 _do_fork+0x1a5/0xbd0 SyS_clone+0x19/0x20 do_syscall_64+0x22f/0x660 return_from_SYSCALL_64+0x0/0x7a Note: without KASAN, you may instead see a "Bad page state" message, or simply a general protection fault. Link: http://lkml.kernel.org/r/20170830033303.17927-1-ebiggers3@gmail.com Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") Signed-off-by: Eric Biggers Reported-by: Oleg Nesterov Acked-by: Oleg Nesterov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Mark Rutland Cc: Michal Hocko Cc: Peter Zijlstra Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/events/uprobes.c | 2 -- kernel/fork.c | 8 ++++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f9ec9add2164..a1de021dccba 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1254,8 +1254,6 @@ void uprobe_end_dup_mmap(void) void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { - newmm->uprobes_state.xol_area = NULL; - if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { set_bit(MMF_HAS_UPROBES, &newmm->flags); /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ diff --git a/kernel/fork.c b/kernel/fork.c index 50bf262cc427..9321b1ad3335 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -745,6 +745,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } +static void mm_init_uprobes_state(struct mm_struct *mm) +{ +#ifdef CONFIG_UPROBES + mm->uprobes_state.xol_area = NULL; +#endif +} + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, struct user_namespace *user_ns) { @@ -772,6 +779,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif + mm_init_uprobes_state(mm); if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; -- cgit v1.2.3 From 8cc3acff5f1a99b153146643c6dd322edef11e21 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 31 Aug 2017 16:15:30 -0700 Subject: mm, madvise: ensure poisoned pages are removed from per-cpu lists commit c461ad6a63b37ba74632e90c063d14823c884247 upstream. Wendy Wang reported off-list that a RAS HWPOISON-SOFT test case failed and bisected it to the commit 479f854a207c ("mm, page_alloc: defer debugging checks of pages allocated from the PCP"). The problem is that a page that was poisoned with madvise() is reused. The commit removed a check that would trigger if DEBUG_VM was enabled but re-enabling the check only fixes the problem as a side-effect by printing a bad_page warning and recovering. The root of the problem is that an madvise() can leave a poisoned page on the per-cpu list. This patch drains all per-cpu lists after pages are poisoned so that they will not be reused. Wendy reports that the test case in question passes with this patch applied. While this could be done in a targeted fashion, it is over-complicated for such a rare operation. Link: http://lkml.kernel.org/r/20170828133414.7qro57jbepdcyz5x@techsingularity.net Fixes: 479f854a207c ("mm, page_alloc: defer debugging checks of pages allocated from the PCP") Signed-off-by: Mel Gorman Reported-by: Wang, Wendy Tested-by: Wang, Wendy Acked-by: David Rientjes Acked-by: Vlastimil Babka Cc: "Hansen, Dave" Cc: "Luck, Tony" Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/madvise.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/madvise.c b/mm/madvise.c index 63a12162f4c6..55f30ec32e5b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -533,6 +533,8 @@ static long madvise_remove(struct vm_area_struct *vma, static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) { struct page *p; + struct zone *zone; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; for (; start < end; start += PAGE_SIZE << @@ -561,6 +563,11 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) if (ret) return ret; } + + /* Ensure that all poisoned pages are removed from per-cpu lists */ + for_each_populated_zone(zone) + drain_all_pages(zone); + return 0; } #endif -- cgit v1.2.3 From 715849268b3418a94ee1330be2c4ca966df83d90 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 4 Aug 2017 11:22:31 +0800 Subject: ceph: fix readpage from fscache commit dd2bc473482eedc60c29cf00ad12568ce40ce511 upstream. ceph_readpage() unlocks page prematurely prematurely in the case that page is reading from fscache. Caller of readpage expects that page is uptodate when it get unlocked. So page shoule get locked by completion callback of fscache_read_or_alloc_pages() Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 24 +++++++++++++++--------- fs/ceph/cache.c | 12 +++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 900ffafb85ca..7b79a54a2789 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -188,7 +188,7 @@ static int ceph_releasepage(struct page *page, gfp_t g) /* * read a single page, without unlocking it. */ -static int readpage_nounlock(struct file *filp, struct page *page) +static int ceph_do_readpage(struct file *filp, struct page *page) { struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); @@ -218,7 +218,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) err = ceph_readpage_from_fscache(inode, page); if (err == 0) - goto out; + return -EINPROGRESS; dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); @@ -248,8 +248,11 @@ out: static int ceph_readpage(struct file *filp, struct page *page) { - int r = readpage_nounlock(filp, page); - unlock_page(page); + int r = ceph_do_readpage(filp, page); + if (r != -EINPROGRESS) + unlock_page(page); + else + r = 0; return r; } @@ -1235,7 +1238,7 @@ retry_locked: goto retry_locked; r = writepage_nounlock(page, NULL); if (r < 0) - goto fail_nosnap; + goto fail_unlock; goto retry_locked; } @@ -1263,11 +1266,14 @@ retry_locked: } /* we need to read it. */ - r = readpage_nounlock(file, page); - if (r < 0) - goto fail_nosnap; + r = ceph_do_readpage(file, page); + if (r < 0) { + if (r == -EINPROGRESS) + return -EAGAIN; + goto fail_unlock; + } goto retry_locked; -fail_nosnap: +fail_unlock: unlock_page(page); return r; } diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 5bc5d37b1217..a2d7997afd94 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -240,13 +240,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) } } -static void ceph_vfs_readpage_complete(struct page *page, void *data, int error) -{ - if (!error) - SetPageUptodate(page); -} - -static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error) +static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error) { if (!error) SetPageUptodate(page); @@ -274,7 +268,7 @@ int ceph_readpage_from_fscache(struct inode *inode, struct page *page) return -ENOBUFS; ret = fscache_read_or_alloc_page(ci->fscache, page, - ceph_vfs_readpage_complete, NULL, + ceph_readpage_from_fscache_complete, NULL, GFP_KERNEL); switch (ret) { @@ -303,7 +297,7 @@ int ceph_readpages_from_fscache(struct inode *inode, return -ENOBUFS; ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages, - ceph_vfs_readpage_complete_unlock, + ceph_readpage_from_fscache_complete, NULL, mapping_gfp_mask(mapping)); switch (ret) { -- cgit v1.2.3 From da16ed52c36aa200e60230de54271a8556dc8674 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 28 Aug 2017 14:51:27 -0700 Subject: cpumask: fix spurious cpumask_of_node() on non-NUMA multi-node configs commit b339752d054fb32863418452dff350a1086885b1 upstream. When !NUMA, cpumask_of_node(@node) equals cpu_online_mask regardless of @node. The assumption seems that if !NUMA, there shouldn't be more than one node and thus reporting cpu_online_mask regardless of @node is correct. However, that assumption was broken years ago to support DISCONTIGMEM and whether a system has multiple nodes or not is separately controlled by NEED_MULTIPLE_NODES. This means that, on a system with !NUMA && NEED_MULTIPLE_NODES, cpumask_of_node() will report cpu_online_mask for all possible nodes, indicating that the CPUs are associated with multiple nodes which is an impossible configuration. This bug has been around forever but doesn't look like it has caused any noticeable symptoms. However, it triggers a WARN recently added to workqueue to verify NUMA affinity configuration. Fix it by reporting empty cpumask on non-zero nodes if !NUMA. Signed-off-by: Tejun Heo Reported-and-tested-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/topology.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index fc824e2828f3..5d2add1a6c96 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -48,7 +48,11 @@ #define parent_node(node) ((void)(node),0) #endif #ifndef cpumask_of_node -#define cpumask_of_node(node) ((void)node, cpu_online_mask) + #ifdef CONFIG_NEED_MULTIPLE_NODES + #define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask) + #else + #define cpumask_of_node(node) ((void)node, cpu_online_mask) + #endif #endif #ifndef pcibus_to_node #define pcibus_to_node(bus) ((void)(bus), -1) -- cgit v1.2.3 From 309e4dbfaf3da435644b6d9c96a43c50ce9598be Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 24 Aug 2017 12:04:29 -0400 Subject: cpuset: Fix incorrect memory_pressure control file mapping commit 1c08c22c874ac88799cab1f78c40f46110274915 upstream. The memory_pressure control file was incorrectly set up without a private value (0, by default). As a result, this control file was treated like memory_migrate on read. By adding back the FILE_MEMORY_PRESSURE private value, the correct memory pressure value will be returned. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Fixes: 7dbdb199d3bf ("cgroup: replace cftype->mode with CFTYPE_WORLD_WRITABLE") Signed-off-by: Greg Kroah-Hartman --- kernel/cpuset.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 247afb108343..03a3a6e94eb9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1905,6 +1905,7 @@ static struct cftype files[] = { { .name = "memory_pressure", .read_u64 = cpuset_read_u64, + .private = FILE_MEMORY_PRESSURE, }, { -- cgit v1.2.3 From d4e7dfda905ec08abd303068f22afd714dbda8d6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 1 Oct 2015 01:35:55 +0100 Subject: alpha: uapi: Add support for __SANE_USERSPACE_TYPES__ commit cec80d82142ab25c71eee24b529cfeaf17c43062 upstream. This fixes compiler errors in perf such as: tests/attr.c: In function 'store_event': tests/attr.c:66:27: error: format '%llu' expects argument of type 'long long unsigned int', but argument 6 has type '__u64 {aka long unsigned int}' [-Werror=format=] snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, ^ Signed-off-by: Ben Hutchings Tested-by: Michael Cree Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/types.h | 2 +- arch/alpha/include/uapi/asm/types.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index 4cb4b6d3452c..0bc66e1d3a7e 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -1,6 +1,6 @@ #ifndef _ALPHA_TYPES_H #define _ALPHA_TYPES_H -#include +#include #endif /* _ALPHA_TYPES_H */ diff --git a/arch/alpha/include/uapi/asm/types.h b/arch/alpha/include/uapi/asm/types.h index 9fd3cd459777..8d1024d7be05 100644 --- a/arch/alpha/include/uapi/asm/types.h +++ b/arch/alpha/include/uapi/asm/types.h @@ -9,8 +9,18 @@ * need to be careful to avoid a name clashes. */ -#ifndef __KERNEL__ +/* + * This is here because we used to use l64 for alpha + * and we don't want to impact user mode with our change to ll64 + * in the kernel. + * + * However, some user programs are fine with this. They can + * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here. + */ +#if !defined(__SANE_USERSPACE_TYPES__) && !defined(__KERNEL__) #include +#else +#include #endif #endif /* _UAPI_ALPHA_TYPES_H */ -- cgit v1.2.3 From c5e76654a9e5e63781e722c3de835a8496e4e6e5 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 24 Aug 2017 15:16:40 -0700 Subject: CIFS: Fix maximum SMB2 header size commit 9e37b1784f2be9397a903307574ee565bbadfd75 upstream. Currently the maximum size of SMB2/3 header is set incorrectly which leads to hanging of directory listing operations on encrypted SMB3 connections. Fix this by setting the maximum size to 170 bytes that is calculated as RFC1002 length field size (4) + transform header size (52) + SMB2 header size (64) + create response size (56). Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Acked-by: Sachin Prabhu Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index dc0d141f33e2..1e1449ad00e8 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* BB FIXME - analyze following length BB */ -#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */ +/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ +#define MAX_SMB2_HDR_SIZE 0x00b0 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) -- cgit v1.2.3 From e2ae90bb85f83918c667942d9fff587c73cc1a8c Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 27 Aug 2017 16:56:08 -0500 Subject: CIFS: remove endian related sparse warning commit 6e3c1529c39e92ed64ca41d53abadabbaa1d5393 upstream. Recent patch had an endian warning ie cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() Signed-off-by: Steve French CC: Ronnie Sahlberg Acked-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 581712534c93..dd3e236d7a2b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -194,7 +194,7 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon) int i; if (unlikely(direntry->d_name.len > - tcon->fsAttrInfo.MaxPathNameComponentLength)) + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength))) return -ENAMETOOLONG; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { -- cgit v1.2.3 From 9e2788ce8f17485ddd506b639eeea38b729baf2e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 31 Aug 2017 16:47:43 +0200 Subject: wl1251: add a missing spin_lock_init() commit f581a0dd744fe32b0a8805e279c59ec1ac676d60 upstream. wl1251: add a missing spin_lock_init() This fixes the following kernel warning: [ 5668.771453] BUG: spinlock bad magic on CPU#0, kworker/u2:3/9745 [ 5668.771850] lock: 0xce63ef20, .magic: 00000000, .owner: /-1, .owner_cpu: 0 [ 5668.772277] CPU: 0 PID: 9745 Comm: kworker/u2:3 Tainted: G W 4.12.0-03002-gec979a4-dirty #40 [ 5668.772796] Hardware name: Nokia RX-51 board [ 5668.773071] Workqueue: phy1 wl1251_irq_work [ 5668.773345] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 5668.773803] [] (show_stack) from [] (do_raw_spin_lock+0x6c/0xa0) [ 5668.774230] [] (do_raw_spin_lock) from [] (_raw_spin_lock_irqsave+0x10/0x18) [ 5668.774658] [] (_raw_spin_lock_irqsave) from [] (wl1251_op_tx+0x38/0x5c) [ 5668.775115] [] (wl1251_op_tx) from [] (ieee80211_tx_frags+0x188/0x1c0) [ 5668.775543] [] (ieee80211_tx_frags) from [] (__ieee80211_tx+0x6c/0x130) [ 5668.775970] [] (__ieee80211_tx) from [] (ieee80211_tx+0xdc/0x104) [ 5668.776367] [] (ieee80211_tx) from [] (__ieee80211_subif_start_xmit+0x454/0x8c8) [ 5668.776824] [] (__ieee80211_subif_start_xmit) from [] (ieee80211_subif_start_xmit+0x30/0x2fc) [ 5668.777343] [] (ieee80211_subif_start_xmit) from [] (dev_hard_start_xmit+0x80/0x118) ... by adding the missing spin_lock_init(). Reported-by: Pavel Machek Cc: Kalle Valo Signed-off-by: Cong Wang Acked-by: Pavel Machek Signed-off-by: Kalle Valo Signed-off-by: Pavel Machek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wl1251/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index bbf7604889b7..1c539c83e8cf 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -1571,6 +1571,7 @@ struct ieee80211_hw *wl1251_alloc_hw(void) wl->state = WL1251_STATE_OFF; mutex_init(&wl->mutex); + spin_lock_init(&wl->wl_lock); wl->tx_mgmt_frm_rate = DEFAULT_HW_GEN_TX_RATE; wl->tx_mgmt_frm_mod = DEFAULT_HW_GEN_MODULATION_TYPE; -- cgit v1.2.3 From 31decdcd83695b6a0e84794ecc470c454b4088f7 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 10 Aug 2017 08:06:18 +0200 Subject: lib/mpi: kunmap after finishing accessing buffer commit dea3eb8b452e36cf2dd572b0a797915ccf452ae6 upstream. Using sg_miter_start and sg_miter_next, the buffer of an SG is kmap'ed to *buff. The current code calls sg_miter_stop (and thus kunmap) on the SG entry before the last access of *buff. The patch moves the sg_miter_stop call after the last access to *buff to ensure that the memory pointed to by *buff is still mapped. Fixes: 4816c9406430 ("lib/mpi: Fix SG miter leak") Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- lib/mpi/mpicoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 5a0f75a3bf01..eead4b339466 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -364,11 +364,11 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) } miter.consumed = lzeros; - sg_miter_stop(&miter); nbytes -= lzeros; nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { + sg_miter_stop(&miter); pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } @@ -376,6 +376,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) if (nbytes > 0) nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); + sg_miter_stop(&miter); + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) -- cgit v1.2.3 From 12a70ccaa6868163487f9a5ad97e1a91a397b453 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 2 Aug 2017 19:50:14 +0200 Subject: xfrm: policy: check policy direction value commit 7bab09631c2a303f87a7eb7e3d69e888673b9b7e upstream. The 'dir' parameter in xfrm_migrate() is a user-controlled byte which is used as an array index. This can lead to an out-of-bound access, kernel lockup and DoS. Add a check for the 'dir' value. This fixes CVE-2017-11600. References: https://bugzilla.redhat.com/show_bug.cgi?id=1474928 Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Reported-by: "bo Zhang" Signed-off-by: Vladis Dronov Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e26b515f7794..8ce5711ea21b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3308,9 +3308,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_state *x_new[XFRM_MAX_DEPTH]; struct xfrm_migrate *mp; + /* Stage 0 - sanity checks */ if ((err = xfrm_migrate_check(m, num_migrate)) < 0) goto out; + if (dir >= XFRM_POLICY_MAX) { + err = -EINVAL; + goto out; + } + /* Stage 1 - find policy */ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; -- cgit v1.2.3 From 70df301a083c73353b2951e74f9ff6d682f72d63 Mon Sep 17 00:00:00 2001 From: "Xiangliang.Yu" Date: Wed, 16 Aug 2017 14:25:51 +0800 Subject: drm/ttm: Fix accounting error when fail to get pages for pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9afae2719273fa1d406829bf3498f82dbdba71c7 upstream. When fail to get needed page for pool, need to put allocated pages into pool. But current code has a miscalculation of allocated pages, correct it. Signed-off-by: Xiangliang.Yu Reviewed-by: Christian König Reviewed-by: Monk Liu Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index a37de5db5731..ddd6badd0eee 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -612,7 +612,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, } else { pr_err("Failed to fill pool (%p)\n", pool); /* If we have any pages left put them to the pool. */ - list_for_each_entry(p, &pool->list, lru) { + list_for_each_entry(p, &new_pages, lru) { ++cpages; } list_splice(&new_pages, &pool->list); -- cgit v1.2.3 From dd2342ad66659e759da0cba023fb13acb4b27406 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 16 May 2017 10:34:54 +0100 Subject: kvm: arm/arm64: Force reading uncached stage2 PGD commit 2952a6070e07ebdd5896f1f5b861acad677caded upstream. Make sure we don't use a cached value of the KVM stage2 PGD while resetting the PGD. Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 0c060c5e844a..2206e0e00934 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -837,7 +837,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - pgd = kvm->arch.pgd; + pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; } spin_unlock(&kvm->mmu_lock); -- cgit v1.2.3 From d325f1f1e245f3fc19fa4008018a19d224328a63 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 1 Sep 2017 18:55:33 +0200 Subject: epoll: fix race between ep_poll_callback(POLLFREE) and ep_free()/ep_remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 138e4ad67afd5c6c318b056b4d17c17f2c0ca5c0 upstream. The race was introduced by me in commit 971316f0503a ("epoll: ep_unregister_pollwait() can use the freed pwq->whead"). I did not realize that nothing can protect eventpoll after ep_poll_callback() sets ->whead = NULL, only whead->lock can save us from the race with ep_free() or ep_remove(). Move ->whead = NULL to the end of ep_poll_callback() and add the necessary barriers. TODO: cleanup the ewake/EPOLLEXCLUSIVE logic, it was confusing even before this patch. Hopefully this explains use-after-free reported by syzcaller: BUG: KASAN: use-after-free in debug_spin_lock_before ... _raw_spin_lock_irqsave+0x4a/0x60 kernel/locking/spinlock.c:159 ep_poll_callback+0x29f/0xff0 fs/eventpoll.c:1148 this is spin_lock(eventpoll->lock), ... Freed by task 17774: ... kfree+0xe8/0x2c0 mm/slub.c:3883 ep_free+0x22c/0x2a0 fs/eventpoll.c:865 Fixes: 971316f0503a ("epoll: ep_unregister_pollwait() can use the freed pwq->whead") Reported-by: 范龙飞 Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/eventpoll.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 10db91218933..3cbc30413add 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -523,8 +523,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq) wait_queue_head_t *whead; rcu_read_lock(); - /* If it is cleared by POLLFREE, it should be rcu-safe */ - whead = rcu_dereference(pwq->whead); + /* + * If it is cleared by POLLFREE, it should be rcu-safe. + * If we read NULL we need a barrier paired with + * smp_store_release() in ep_poll_callback(), otherwise + * we rely on whead->lock. + */ + whead = smp_load_acquire(&pwq->whead); if (whead) remove_wait_queue(whead, &pwq->wait); rcu_read_unlock(); @@ -1009,17 +1014,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct eventpoll *ep = epi->ep; int ewake = 0; - if ((unsigned long)key & POLLFREE) { - ep_pwq_from_wait(wait)->whead = NULL; - /* - * whead = NULL above can race with ep_remove_wait_queue() - * which can do another remove_wait_queue() after us, so we - * can't use __remove_wait_queue(). whead->lock is held by - * the caller. - */ - list_del_init(&wait->task_list); - } - spin_lock_irqsave(&ep->lock, flags); /* @@ -1101,10 +1095,26 @@ out_unlock: if (pwake) ep_poll_safewake(&ep->poll_wait); - if (epi->event.events & EPOLLEXCLUSIVE) - return ewake; + if (!(epi->event.events & EPOLLEXCLUSIVE)) + ewake = 1; + + if ((unsigned long)key & POLLFREE) { + /* + * If we race with ep_remove_wait_queue() it can miss + * ->whead = NULL and do another remove_wait_queue() after + * us, so we can't use __remove_wait_queue(). + */ + list_del_init(&wait->task_list); + /* + * ->whead != NULL protects us from the race with ep_free() + * or ep_remove(), ep_remove_wait_queue() takes whead->lock + * held by the caller. Once we nullify it, nothing protects + * ep/epi or even wait. + */ + smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); + } - return 1; + return ewake; } /* -- cgit v1.2.3 From 8a697a50c090053cf6c53a096972dd841332fe59 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 Sep 2017 08:35:53 +0200 Subject: Linux 4.9.48 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a0abbfc15a49..cfa188b427b1 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 47 +SUBLEVEL = 48 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From 2ea91c52ff5f8df5df3d5e2e16926049583c4cff Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 16 Aug 2017 10:53:20 +0800 Subject: usb: quirks: add delay init quirk for Corsair Strafe RGB keyboard commit de3af5bf259d7a0bfaac70441c8568ab5998d80c upstream. Corsair Strafe RGB keyboard has trouble to initialize: [ 1.679455] usb 3-6: new full-speed USB device number 4 using xhci_hcd [ 6.871136] usb 3-6: unable to read config index 0 descriptor/all [ 6.871138] usb 3-6: can't read configurations, error -110 [ 6.991019] usb 3-6: new full-speed USB device number 5 using xhci_hcd [ 12.246642] usb 3-6: unable to read config index 0 descriptor/all [ 12.246644] usb 3-6: can't read configurations, error -110 [ 12.366555] usb 3-6: new full-speed USB device number 6 using xhci_hcd [ 17.622145] usb 3-6: unable to read config index 0 descriptor/all [ 17.622147] usb 3-6: can't read configurations, error -110 [ 17.742093] usb 3-6: new full-speed USB device number 7 using xhci_hcd [ 22.997715] usb 3-6: unable to read config index 0 descriptor/all [ 22.997716] usb 3-6: can't read configurations, error -110 Although it may work after several times unpluging/pluging: [ 68.195240] usb 3-6: new full-speed USB device number 11 using xhci_hcd [ 68.337459] usb 3-6: New USB device found, idVendor=1b1c, idProduct=1b20 [ 68.337463] usb 3-6: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 68.337466] usb 3-6: Product: Corsair STRAFE RGB Gaming Keyboard [ 68.337468] usb 3-6: Manufacturer: Corsair [ 68.337470] usb 3-6: SerialNumber: 0F013021AEB8046755A93ED3F5001941 Tried three quirks: USB_QUIRK_DELAY_INIT, USB_QUIRK_NO_LPM and USB_QUIRK_DEVICE_QUALIFIER, user confirmed that USB_QUIRK_DELAY_INIT alone can workaround this issue. Hence add the quirk for Corsair Strafe RGB. BugLink: https://bugs.launchpad.net/bugs/1678477 Signed-off-by: Kai-Heng Feng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 574da2b4529c..1ea5060dae69 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -217,6 +217,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Corsair Strafe RGB */ + { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Acer C120 LED Projector */ { USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM }, -- cgit v1.2.3 From 773b93f4255f899fdfa417239f7b9ae2107f9793 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Tue, 29 Aug 2017 21:50:03 +0200 Subject: USB: serial: option: add support for D-Link DWM-157 C1 commit 169e86546f5712179709de23cd64bbb15f199fab upstream. This commit adds support (an ID, really) for D-Link DWM-157 hardware version C1 USB modem to option driver. According to manufacturer-provided Windows INF file the device has four serial ports: "D-Link HSPA+DataCard Diagnostics Interface" (interface 2; modem port), "D-Link HSPA+DataCard NMEA Device" (interface 3), "D-Link HSPA+DataCard Speech Port" (interface 4), "D-Link HSPA+DataCard Debug Port" (interface 5). usb-devices output: T: Bus=05 Lev=01 Prnt=01 Port=04 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2001 ProdID=7d0e Rev=03.00 S: Manufacturer=D-Link,Inc S: Product=D-Link DWM-157 C: #Ifs= 7 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=02 Prot=01 Driver=option I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 6 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Signed-off-by: Maciej S. Szmigiero Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index fe123153b1a5..2a9944326210 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2023,6 +2023,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d0e, 0xff) }, /* D-Link DWM-157 C1 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ -- cgit v1.2.3 From 99a22c84f51d9fff570beef1a8c788e0dc7a3a4d Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Fri, 25 Aug 2017 10:38:35 +0300 Subject: usb: Add device quirk for Logitech HD Pro Webcam C920-C commit a1279ef74eeeb5f627f091c71d80dd7ac766c99d upstream. Commit e0429362ab15 ("usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e") introduced quirk to workaround an issue with some Logitech webcams. Apparently model C920-C has the same issue so applying the same quirk as well. See aforementioned commit message for detailed explanation of the problem. Signed-off-by: Dmitry Fleytman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 1ea5060dae69..82806e311202 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -57,8 +57,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920 and C930e */ + /* Logitech HD Pro Webcams C920, C920-C and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ -- cgit v1.2.3 From 6b3b3a22ef206ba9c141e04c50b0f48762571bc9 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Thu, 24 Aug 2017 09:57:15 +0530 Subject: usb:xhci:Fix regression when ATI chipsets detected commit e6b422b88b46353cf596e0db6dc0e39d50d90d6e upstream. The following commit cause a regression on ATI chipsets. 'commit e788787ef4f9 ("usb:xhci:Add quirk for Certain failing HP keyboard on reset after resume")' This causes pinfo->smbus_dev to be wrongly set to NULL on systems with the ATI chipset that this function checks for first. Added conditional check for AMD chipsets to avoid the overwriting pinfo->smbus_dev. Reported-by: Ben Hutchings Fixes: e788787ef4f9 ("usb:xhci:Add quirk for Certain failing HP keyboard on reset after resume") cc: Nehal Shah Signed-off-by: Sandeep Singh Signed-off-by: Shyam Sundar S K Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 5f4ca7890435..58b9685eb21f 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -142,29 +142,30 @@ static int amd_chipset_sb_type_init(struct amd_chipset_info *pinfo) pinfo->sb_type.gen = AMD_CHIPSET_SB700; else if (rev >= 0x40 && rev <= 0x4f) pinfo->sb_type.gen = AMD_CHIPSET_SB800; - } - pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, - 0x145c, NULL); - if (pinfo->smbus_dev) { - pinfo->sb_type.gen = AMD_CHIPSET_TAISHAN; } else { pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS, NULL); - if (!pinfo->smbus_dev) { - pinfo->sb_type.gen = NOT_AMD_CHIPSET; - return 0; + if (pinfo->smbus_dev) { + rev = pinfo->smbus_dev->revision; + if (rev >= 0x11 && rev <= 0x14) + pinfo->sb_type.gen = AMD_CHIPSET_HUDSON2; + else if (rev >= 0x15 && rev <= 0x18) + pinfo->sb_type.gen = AMD_CHIPSET_BOLTON; + else if (rev >= 0x39 && rev <= 0x3a) + pinfo->sb_type.gen = AMD_CHIPSET_YANGTZE; + } else { + pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, + 0x145c, NULL); + if (pinfo->smbus_dev) { + rev = pinfo->smbus_dev->revision; + pinfo->sb_type.gen = AMD_CHIPSET_TAISHAN; + } else { + pinfo->sb_type.gen = NOT_AMD_CHIPSET; + return 0; + } } - - rev = pinfo->smbus_dev->revision; - if (rev >= 0x11 && rev <= 0x14) - pinfo->sb_type.gen = AMD_CHIPSET_HUDSON2; - else if (rev >= 0x15 && rev <= 0x18) - pinfo->sb_type.gen = AMD_CHIPSET_BOLTON; - else if (rev >= 0x39 && rev <= 0x3a) - pinfo->sb_type.gen = AMD_CHIPSET_YANGTZE; } - pinfo->sb_type.rev = rev; return 1; } -- cgit v1.2.3 From 80cdcd7f533585a62fa31f87a25ab5dbfd5e0e1a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 24 Aug 2017 11:38:36 -0500 Subject: USB: musb: fix external abort on suspend commit 082df8be455ade361748f0385aa6c9c8d07be167 upstream. Make sure that the controller is runtime resumed when system suspending to avoid an external abort when accessing the interrupt registers: Unhandled fault: external abort on non-linefetch (0x1008) at 0xd025840a ... [] (musb_default_readb) from [] (musb_disable_interrupts+0x84/0xa8) [] (musb_disable_interrupts) from [] (musb_suspend+0x38/0xb8) [] (musb_suspend) from [] (platform_pm_suspend+0x3c/0x64) This is easily reproduced on a BBB by enabling the peripheral port only (as the host port may enable the shared clock) and keeping it disconnected so that the controller is runtime suspended. (Well, you would also need to the not-yet-merged am33xx-suspend patches by Dave Gerlach to be able to suspend the BBB.) This is a regression that was introduced by commit 1c4d0b4e1806 ("usb: musb: Remove pm_runtime_set_irq_safe") which allowed the parent glue device to runtime suspend and thereby exposed a couple of older issues: Register accesses without explicitly making sure the controller is runtime resumed during suspend was first introduced by commit c338412b5ded ("usb: musb: unconditionally save and restore the context on suspend") in 3.14. Commit a1fc1920aaaa ("usb: musb: core: make sure musb is in RPM_ACTIVE on resume") later started setting the RPM status to active during resume, and this was also implicitly relying on the parent always being active. Since commit 71723f95463d ("PM / runtime: print error when activating a child to unactive parent") this now also results in the following warning: musb-hdrc musb-hdrc.0: runtime PM trying to activate child device musb-hdrc.0 but parent (47401400.usb) is not active This patch has been verified on 4.13-rc2, 4.12 and 4.9 using a BBB (the dsps glue would always be active also in 4.8). Fixes: c338412b5ded ("usb: musb: unconditionally save and restore the context on suspend") Fixes: a1fc1920aaaa ("usb: musb: core: make sure musb is in RPM_ACTIVE on resume") Fixes: 1c4d0b4e1806 ("usb: musb: Remove pm_runtime_set_irq_safe") Cc: Alan Stern Cc: Daniel Mack Cc: Dave Gerlach Cc: Rafael J. Wysocki Cc: Sebastian Andrzej Siewior Cc: Tony Lindgren Signed-off-by: Johan Hovold Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 261ed2ca28f9..a6b6b1cf1317 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2655,6 +2655,13 @@ static int musb_suspend(struct device *dev) { struct musb *musb = dev_to_musb(dev); unsigned long flags; + int ret; + + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } musb_platform_disable(musb); musb_generic_disable(musb); @@ -2703,14 +2710,6 @@ static int musb_resume(struct device *dev) if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - /* - * The USB HUB code expects the device to be in RPM_ACTIVE once it came - * out of suspend - */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - musb_start(musb); spin_lock_irqsave(&musb->lock, flags); @@ -2720,6 +2719,9 @@ static int musb_resume(struct device *dev) error); spin_unlock_irqrestore(&musb->lock, flags); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + return 0; } -- cgit v1.2.3 From afcfe0661a747bf1f447dd9f24be4ad67f1cf018 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 10 Aug 2017 15:42:22 -0700 Subject: USB: core: Avoid race of async_completed() w/ usbdev_release() commit ed62ca2f4f51c17841ea39d98c0c409cb53a3e10 upstream. While running reboot tests w/ a specific set of USB devices (and slub_debug enabled), I found that once every few hours my device would be crashed with a stack that looked like this: [ 14.012445] BUG: spinlock bad magic on CPU#0, modprobe/2091 [ 14.012460] lock: 0xffffffc0cb055978, .magic: ffffffc0, .owner: cryption contexts: %lu/%lu [ 14.012460] /1025536097, .owner_cpu: 0 [ 14.012466] CPU: 0 PID: 2091 Comm: modprobe Not tainted 4.4.79 #352 [ 14.012468] Hardware name: Google Kevin (DT) [ 14.012471] Call trace: [ 14.012483] [<....>] dump_backtrace+0x0/0x160 [ 14.012487] [<....>] show_stack+0x20/0x28 [ 14.012494] [<....>] dump_stack+0xb4/0xf0 [ 14.012500] [<....>] spin_dump+0x8c/0x98 [ 14.012504] [<....>] spin_bug+0x30/0x3c [ 14.012508] [<....>] do_raw_spin_lock+0x40/0x164 [ 14.012515] [<....>] _raw_spin_lock_irqsave+0x64/0x74 [ 14.012521] [<....>] __wake_up+0x2c/0x60 [ 14.012528] [<....>] async_completed+0x2d0/0x300 [ 14.012534] [<....>] __usb_hcd_giveback_urb+0xc4/0x138 [ 14.012538] [<....>] usb_hcd_giveback_urb+0x54/0xf0 [ 14.012544] [<....>] xhci_irq+0x1314/0x1348 [ 14.012548] [<....>] usb_hcd_irq+0x40/0x50 [ 14.012553] [<....>] handle_irq_event_percpu+0x1b4/0x3f0 [ 14.012556] [<....>] handle_irq_event+0x4c/0x7c [ 14.012561] [<....>] handle_fasteoi_irq+0x158/0x1c8 [ 14.012564] [<....>] generic_handle_irq+0x30/0x44 [ 14.012568] [<....>] __handle_domain_irq+0x90/0xbc [ 14.012572] [<....>] gic_handle_irq+0xcc/0x18c Investigation using kgdb() found that the wait queue that was passed into wake_up() had been freed (it was filled with slub_debug poison). I analyzed and instrumented the code and reproduced. My current belief is that this is happening: 1. async_completed() is called (from IRQ). Moves "as" onto the completed list. 2. On another CPU, proc_reapurbnonblock_compat() calls async_getcompleted(). Blocks on spinlock. 3. async_completed() releases the lock; keeps running; gets blocked midway through wake_up(). 4. proc_reapurbnonblock_compat() => async_getcompleted() gets the lock; removes "as" from completed list and frees it. 5. usbdev_release() is called. Frees "ps". 6. async_completed() finally continues running wake_up(). ...but wake_up() has a pointer to the freed "ps". The instrumentation that led me to believe this was based on adding some trace_printk() calls in a select few functions and then using kdb's "ftdump" at crash time. The trace follows (NOTE: in the trace below I cheated a little bit and added a udelay(1000) in async_completed() after releasing the spinlock because I wanted it to trigger quicker): <...>-2104 0d.h2 13759034us!: async_completed at start: as=ffffffc0cc638200 mtpd-2055 3.... 13759356us : async_getcompleted before spin_lock_irqsave mtpd-2055 3d..1 13759362us : async_getcompleted after list_del_init: as=ffffffc0cc638200 mtpd-2055 3.... 13759371us+: proc_reapurbnonblock_compat: free_async(ffffffc0cc638200) mtpd-2055 3.... 13759422us+: async_getcompleted before spin_lock_irqsave mtpd-2055 3.... 13759479us : usbdev_release at start: ps=ffffffc0cc042080 mtpd-2055 3.... 13759487us : async_getcompleted before spin_lock_irqsave mtpd-2055 3.... 13759497us!: usbdev_release after kfree(ps): ps=ffffffc0cc042080 <...>-2104 0d.h2 13760294us : async_completed before wake_up(): as=ffffffc0cc638200 To fix this problem we can just move the wake_up() under the ps->lock. There should be no issues there that I'm aware of. Signed-off-by: Douglas Anderson Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 840930b014f6..c8075eb3db26 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -629,6 +629,8 @@ static void async_completed(struct urb *urb) if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET && as->status != -ENOENT) cancel_bulk_urbs(ps, as->bulk_addr); + + wake_up(&ps->wait); spin_unlock(&ps->lock); if (signr) { @@ -636,8 +638,6 @@ static void async_completed(struct urb *urb) put_pid(pid); put_cred(cred); } - - wake_up(&ps->wait); } static void destroy_async(struct usb_dev_state *ps, struct list_head *list) -- cgit v1.2.3 From e58b04fb5b0bf2847f28b8e0cd879fabfec1355b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Aug 2017 14:34:16 +0100 Subject: staging/rts5208: fix incorrect shift to extract upper nybble commit 34ff1bf4920471cff66775dc39537b15c5f0feff upstream. The mask of sns_key_info1 suggests the upper nybble is being extracted however the following shift of 8 bits is too large and always results in 0. Fix this by shifting only by 4 bits to correctly get the upper nybble. Detected by CoverityScan, CID#142891 ("Operands don't affect result") Fixes: fa590c222fba ("staging: rts5208: add support for rts5208 and rts5288") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rts5208/rtsx_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rts5208/rtsx_scsi.c b/drivers/staging/rts5208/rtsx_scsi.c index becb4bba166c..b3790334fd3f 100644 --- a/drivers/staging/rts5208/rtsx_scsi.c +++ b/drivers/staging/rts5208/rtsx_scsi.c @@ -414,7 +414,7 @@ void set_sense_data(struct rtsx_chip *chip, unsigned int lun, u8 err_code, sense->ascq = ascq; if (sns_key_info0 != 0) { sense->sns_key_info[0] = SKSV | sns_key_info0; - sense->sns_key_info[1] = (sns_key_info1 & 0xf0) >> 8; + sense->sns_key_info[1] = (sns_key_info1 & 0xf0) >> 4; sense->sns_key_info[2] = sns_key_info1 & 0x0f; } } -- cgit v1.2.3 From 177d84e3a72abfc646fcc63e331b03dafa0ac943 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 21 Jul 2017 00:24:17 +0900 Subject: iio: adc: ti-ads1015: fix incorrect data rate setting update commit 0d106b74c558e3000aa0e058b4725cacb70ce77a upstream. The ti-ads1015 driver has eight iio voltage channels and each iio channel can hold own sampling frequency information. The ADS1015 device only have a single config register which contains an input multiplexer selection, PGA and data rate settings. So the driver should load the correct settings when the input multiplexer selection is changed. However, regardless of which channlel is currently selected, changing any iio channel's sampling frequency information immediately overwrites the current data rate setting in the config register. It breaks the current data rate setting if the different channel's sampling frequency information is changed because the data rate setting is not reloaded when the input multiplexer is switched. This removes the unexpected config register update and correctly load the data rate setting before getting adc result. Cc: Daniel Baluta Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index cde6f130a99a..33bda2c5317c 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -251,9 +251,11 @@ int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val) ret = regmap_update_bits_check(data->regmap, ADS1015_CFG_REG, ADS1015_CFG_MUX_MASK | - ADS1015_CFG_PGA_MASK, + ADS1015_CFG_PGA_MASK | + ADS1015_CFG_DR_MASK, chan << ADS1015_CFG_MUX_SHIFT | - pga << ADS1015_CFG_PGA_SHIFT, + pga << ADS1015_CFG_PGA_SHIFT | + dr << ADS1015_CFG_DR_SHIFT, &change); if (ret < 0) return ret; @@ -324,25 +326,16 @@ static int ads1015_set_scale(struct ads1015_data *data, int chan, static int ads1015_set_data_rate(struct ads1015_data *data, int chan, int rate) { - int i, ret, rindex = -1; + int i; - for (i = 0; i < ARRAY_SIZE(ads1015_data_rate); i++) + for (i = 0; i < ARRAY_SIZE(ads1015_data_rate); i++) { if (data->data_rate[i] == rate) { - rindex = i; - break; + data->channel_data[chan].data_rate = i; + return 0; } - if (rindex < 0) - return -EINVAL; - - ret = regmap_update_bits(data->regmap, ADS1015_CFG_REG, - ADS1015_CFG_DR_MASK, - rindex << ADS1015_CFG_DR_SHIFT); - if (ret < 0) - return ret; - - data->channel_data[chan].data_rate = rindex; + } - return 0; + return -EINVAL; } static int ads1015_read_raw(struct iio_dev *indio_dev, -- cgit v1.2.3 From 115af6c3b155a2e6d1d9c7b3e1db81fbf8d48727 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 21 Jul 2017 00:24:18 +0900 Subject: iio: adc: ti-ads1015: fix scale information for ADS1115 commit 8d0e8e795623bd6229cf48bb7777a1c456c370ed upstream. The ti-ads1015 driver supports ADS1015 and ADS1115 devices. The same scale information is used for both devices in this driver, however they have actually different values and the ADS1115's one is not correct. These devices have the same full-scale input voltage range for each PGA selection. So instead of adding another hardcoded scale information, compute a correct scale on demand from each device's resolution. Cc: Daniel Baluta Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 48 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 33bda2c5317c..5b1ba5585ffd 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -80,18 +80,12 @@ static const unsigned int ads1115_data_rate[] = { 8, 16, 32, 64, 128, 250, 475, 860 }; -static const struct { - int scale; - int uscale; -} ads1015_scale[] = { - {3, 0}, - {2, 0}, - {1, 0}, - {0, 500000}, - {0, 250000}, - {0, 125000}, - {0, 125000}, - {0, 125000}, +/* + * Translation from PGA bits to full-scale positive and negative input voltage + * range in mV + */ +static int ads1015_fullscale_range[] = { + 6144, 4096, 2048, 1024, 512, 256, 256, 256 }; #define ADS1015_V_CHAN(_chan, _addr) { \ @@ -299,17 +293,20 @@ err: return IRQ_HANDLED; } -static int ads1015_set_scale(struct ads1015_data *data, int chan, +static int ads1015_set_scale(struct ads1015_data *data, + struct iio_chan_spec const *chan, int scale, int uscale) { int i, ret, rindex = -1; + int fullscale = div_s64((scale * 1000000LL + uscale) << + (chan->scan_type.realbits - 1), 1000000); - for (i = 0; i < ARRAY_SIZE(ads1015_scale); i++) - if (ads1015_scale[i].scale == scale && - ads1015_scale[i].uscale == uscale) { + for (i = 0; i < ARRAY_SIZE(ads1015_fullscale_range); i++) { + if (ads1015_fullscale_range[i] == fullscale) { rindex = i; break; } + } if (rindex < 0) return -EINVAL; @@ -319,7 +316,7 @@ static int ads1015_set_scale(struct ads1015_data *data, int chan, if (ret < 0) return ret; - data->channel_data[chan].pga = rindex; + data->channel_data[chan->address].pga = rindex; return 0; } @@ -377,9 +374,9 @@ static int ads1015_read_raw(struct iio_dev *indio_dev, } case IIO_CHAN_INFO_SCALE: idx = data->channel_data[chan->address].pga; - *val = ads1015_scale[idx].scale; - *val2 = ads1015_scale[idx].uscale; - ret = IIO_VAL_INT_PLUS_MICRO; + *val = ads1015_fullscale_range[idx]; + *val2 = chan->scan_type.realbits - 1; + ret = IIO_VAL_FRACTIONAL_LOG2; break; case IIO_CHAN_INFO_SAMP_FREQ: idx = data->channel_data[chan->address].data_rate; @@ -406,7 +403,7 @@ static int ads1015_write_raw(struct iio_dev *indio_dev, mutex_lock(&data->lock); switch (mask) { case IIO_CHAN_INFO_SCALE: - ret = ads1015_set_scale(data, chan->address, val, val2); + ret = ads1015_set_scale(data, chan, val, val2); break; case IIO_CHAN_INFO_SAMP_FREQ: ret = ads1015_set_data_rate(data, chan->address, val); @@ -438,7 +435,10 @@ static const struct iio_buffer_setup_ops ads1015_buffer_setup_ops = { .validate_scan_mask = &iio_validate_scan_mask_onehot, }; -static IIO_CONST_ATTR(scale_available, "3 2 1 0.5 0.25 0.125"); +static IIO_CONST_ATTR_NAMED(ads1015_scale_available, scale_available, + "3 2 1 0.5 0.25 0.125"); +static IIO_CONST_ATTR_NAMED(ads1115_scale_available, scale_available, + "0.1875 0.125 0.0625 0.03125 0.015625 0.007813"); static IIO_CONST_ATTR_NAMED(ads1015_sampling_frequency_available, sampling_frequency_available, "128 250 490 920 1600 2400 3300"); @@ -446,7 +446,7 @@ static IIO_CONST_ATTR_NAMED(ads1115_sampling_frequency_available, sampling_frequency_available, "8 16 32 64 128 250 475 860"); static struct attribute *ads1015_attributes[] = { - &iio_const_attr_scale_available.dev_attr.attr, + &iio_const_attr_ads1015_scale_available.dev_attr.attr, &iio_const_attr_ads1015_sampling_frequency_available.dev_attr.attr, NULL, }; @@ -456,7 +456,7 @@ static const struct attribute_group ads1015_attribute_group = { }; static struct attribute *ads1115_attributes[] = { - &iio_const_attr_scale_available.dev_attr.attr, + &iio_const_attr_ads1115_scale_available.dev_attr.attr, &iio_const_attr_ads1115_sampling_frequency_available.dev_attr.attr, NULL, }; -- cgit v1.2.3 From c72ad1a4fdf044c5253a72bc413e1869d6b16f39 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 21 Jul 2017 00:24:19 +0900 Subject: iio: adc: ti-ads1015: enable conversion when CONFIG_PM is not set commit e8245c68350104b6022b6783719e843d69ea7c43 upstream. The ADS1015 device have two operating modes, continuous conversion mode and single-shot mode. This driver assumes that the continuous conversion mode is selected by runtime resume callback when the ADC result is requested. If CONFIG_PM is disabled, the device is always in the default single-shot mode and no one begins a single conversion. So the conversion register doesn't contain valid ADC result. Fix it by changing the continuous mode in probe function. Cc: Daniel Baluta Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 5b1ba5585ffd..c6eeb0cf7868 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -617,6 +617,13 @@ static int ads1015_probe(struct i2c_client *client, dev_err(&client->dev, "iio triggered buffer setup failed\n"); return ret; } + + ret = regmap_update_bits(data->regmap, ADS1015_CFG_REG, + ADS1015_CFG_MOD_MASK, + ADS1015_CONTINUOUS << ADS1015_CFG_MOD_SHIFT); + if (ret) + return ret; + ret = pm_runtime_set_active(&client->dev); if (ret) goto err_buffer_cleanup; -- cgit v1.2.3 From 1ed4565b7c7bb73d802800c333244498f0993274 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 21 Jul 2017 00:24:20 +0900 Subject: iio: adc: ti-ads1015: avoid getting stale result after runtime resume commit 73e3e3fc50de50cfd68e945d85679c983ed31bd9 upstream. This driver assumes that the device is operating in the continuous conversion mode which performs the conversion continuously. So this driver doesn't insert a wait time before reading the conversion register if the configuration is not changed from a previous request. This assumption is broken if the device is runtime suspended and entered a power-down state. The forthcoming request causes reading a stale result from the conversion register as the device is runtime resumed just before. Fix it by adding a flag to detect that condition and insert a necessary wait time. Cc: Daniel Baluta Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index c6eeb0cf7868..abd3a10f4b1c 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -176,6 +176,12 @@ struct ads1015_data { struct ads1015_channel_data channel_data[ADS1015_CHANNELS]; unsigned int *data_rate; + /* + * Set to true when the ADC is switched to the continuous-conversion + * mode and exits from a power-down state. This flag is used to avoid + * getting the stale result from the conversion register. + */ + bool conv_invalid; }; static bool ads1015_is_writeable_reg(struct device *dev, unsigned int reg) @@ -254,9 +260,10 @@ int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val) if (ret < 0) return ret; - if (change) { + if (change || data->conv_invalid) { conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]); usleep_range(conv_time, conv_time + 1); + data->conv_invalid = false; } return regmap_read(data->regmap, ADS1015_CONV_REG, val); @@ -624,6 +631,8 @@ static int ads1015_probe(struct i2c_client *client, if (ret) return ret; + data->conv_invalid = true; + ret = pm_runtime_set_active(&client->dev); if (ret) goto err_buffer_cleanup; @@ -679,10 +688,15 @@ static int ads1015_runtime_resume(struct device *dev) { struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct ads1015_data *data = iio_priv(indio_dev); + int ret; - return regmap_update_bits(data->regmap, ADS1015_CFG_REG, + ret = regmap_update_bits(data->regmap, ADS1015_CFG_REG, ADS1015_CFG_MOD_MASK, ADS1015_CONTINUOUS << ADS1015_CFG_MOD_SHIFT); + if (!ret) + data->conv_invalid = true; + + return ret; } #endif -- cgit v1.2.3 From ff4a98e3bcb3756544b31a2610a24db77f5a20b4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 21 Jul 2017 00:24:21 +0900 Subject: iio: adc: ti-ads1015: don't return invalid value from buffer setup callbacks commit a6fe5e52d5ecfc98530034d6c9db73777cf41ede upstream. pm_runtime_get_sync() and pm_runtime_put_autosuspend() return 0 on success, 1 if the device's runtime PM status was already requested status or error code on failure. So a positive return value doesn't indicate an error condition. However, any non-zero return values from buffer preenable and postdisable callbacks are recognized as an error and this driver reuses the return value from pm_runtime_get_sync() and pm_runtime_put_autosuspend() in these callbacks. This change fixes the false error detections. Cc: Daniel Baluta Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index abd3a10f4b1c..f943ed3f71d9 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -234,7 +234,7 @@ static int ads1015_set_power_state(struct ads1015_data *data, bool on) ret = pm_runtime_put_autosuspend(dev); } - return ret; + return ret < 0 ? ret : 0; } static -- cgit v1.2.3 From ffb58b875d241f4121335a7668da743f26c0514b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 21 Jul 2017 00:24:22 +0900 Subject: iio: adc: ti-ads1015: add adequate wait time to get correct conversion commit 4744d4e2afebf9644a439da9ca73d822fdd67bd9 upstream. This driver assumes that the device is operating in the continuous conversion mode which performs the conversion continuously. So this driver inserts a wait time before reading the conversion register if the configuration is changed from a previous request. Currently, the wait time is only the period required for a single conversion that is calculated as the reciprocal of the sampling frequency. However we also need to wait for the the previous conversion to complete. Otherwise we probably get the conversion result for the previous configuration when the sampling frequency is lower. Cc: Daniel Baluta Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads1015.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index f943ed3f71d9..472641fc890c 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -241,27 +241,34 @@ static int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val) { int ret, pga, dr, conv_time; - bool change; + unsigned int old, mask, cfg; if (chan < 0 || chan >= ADS1015_CHANNELS) return -EINVAL; + ret = regmap_read(data->regmap, ADS1015_CFG_REG, &old); + if (ret) + return ret; + pga = data->channel_data[chan].pga; dr = data->channel_data[chan].data_rate; + mask = ADS1015_CFG_MUX_MASK | ADS1015_CFG_PGA_MASK | + ADS1015_CFG_DR_MASK; + cfg = chan << ADS1015_CFG_MUX_SHIFT | pga << ADS1015_CFG_PGA_SHIFT | + dr << ADS1015_CFG_DR_SHIFT; - ret = regmap_update_bits_check(data->regmap, ADS1015_CFG_REG, - ADS1015_CFG_MUX_MASK | - ADS1015_CFG_PGA_MASK | - ADS1015_CFG_DR_MASK, - chan << ADS1015_CFG_MUX_SHIFT | - pga << ADS1015_CFG_PGA_SHIFT | - dr << ADS1015_CFG_DR_SHIFT, - &change); - if (ret < 0) + cfg = (old & ~mask) | (cfg & mask); + + ret = regmap_write(data->regmap, ADS1015_CFG_REG, cfg); + if (ret) return ret; - if (change || data->conv_invalid) { - conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]); + if (old != cfg || data->conv_invalid) { + int dr_old = (old & ADS1015_CFG_DR_MASK) >> + ADS1015_CFG_DR_SHIFT; + + conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr_old]); + conv_time += DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]); usleep_range(conv_time, conv_time + 1); data->conv_invalid = false; } -- cgit v1.2.3 From 5555eb956edc3c89f59ed7636b29a3a4d0dd8063 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 29 Aug 2017 21:23:49 +0200 Subject: driver core: bus: Fix a potential double free commit 0f9b011d3321ca1079c7a46c18cb1956fbdb7bcb upstream. The .release function of driver_ktype is 'driver_release()'. This function frees the container_of this kobject. So, this memory must not be freed explicitly in the error handling path of 'bus_add_driver()'. Otherwise a double free will occur. Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 6470eb8088f4..e32a74eb28a3 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -736,7 +736,7 @@ int bus_add_driver(struct device_driver *drv) out_unregister: kobject_put(&priv->kobj); - kfree(drv->p); + /* drv->p is freed in driver_release() */ drv->p = NULL; out_put_bus: bus_put(bus); -- cgit v1.2.3 From a22d561178ee2e96ca4bf519669d533144e7145c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Jun 2016 16:11:13 +0300 Subject: intel_th: pci: Add Cannon Lake PCH-H support commit 84331e1390b6378a5129a3678c87a42c6f697d29 upstream. This adds Intel(R) Trace Hub PCI ID for Cannon Lake PCH-H. Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 0bba3842336e..3d8cc1d178f3 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -85,6 +85,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6), .driver_data = (kernel_ulong_t)0, }, + { + /* Cannon Lake H */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa326), + .driver_data = (kernel_ulong_t)0, + }, { 0 }, }; -- cgit v1.2.3 From d8b992d93555af94d31264be3fbe73aca1945947 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Jun 2016 16:11:31 +0300 Subject: intel_th: pci: Add Cannon Lake PCH-LP support commit efb3669e14fe17d0ec4ecf57d0365039fe726f59 upstream. This adds Intel(R) Trace Hub PCI ID for Cannon Lake PCH-LP. Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 3d8cc1d178f3..63b5db4e4070 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -90,6 +90,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa326), .driver_data = (kernel_ulong_t)0, }, + { + /* Cannon Lake LP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6), + .driver_data = (kernel_ulong_t)0, + }, { 0 }, }; -- cgit v1.2.3 From 90a1e2e19ed731d66f95f5471c99d0e16a92021d Mon Sep 17 00:00:00 2001 From: Rakesh Pillai Date: Wed, 2 Aug 2017 16:03:37 +0530 Subject: ath10k: fix memory leak in rx ring buffer allocation commit f35a7f91f66af528b3ee1921de16bea31d347ab0 upstream. The rx ring buffers are added to a hash table if firmware support full rx reorder. If the full rx reorder support flag is not set before allocating the rx ring buffers, none of the buffers are added to the hash table. There is a race condition between rx ring refill and rx buffer replenish from napi poll. The interrupts are enabled in hif start, before the rx ring is refilled during init. We replenish buffers from napi poll due to the interrupts which get enabled after hif start. Hence before the entire rx ring is refilled during the init, the napi poll replenishes a few buffers in steps of 100 buffers per attempt. During this rx ring replenish from napi poll, the rx reorder flag has not been set due to which the replenished buffers are not added to the hash table Set the rx full reorder support flag before we allocate the rx ring buffer to avoid the memory leak. Signed-off-by: Rakesh Pillai Signed-off-by: Kalle Valo Cc: Christian Lamparter Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 972b5e224d5d..366d3dcb8e9d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1852,6 +1852,12 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_wmi_detach; } + /* If firmware indicates Full Rx Reorder support it must be used in a + * slightly different manner. Let HTT code know. + */ + ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER, + ar->wmi.svc_map)); + status = ath10k_htt_rx_alloc(&ar->htt); if (status) { ath10k_err(ar, "failed to alloc htt rx: %d\n", status); @@ -1964,12 +1970,6 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, } } - /* If firmware indicates Full Rx Reorder support it must be used in a - * slightly different manner. Let HTT code know. - */ - ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER, - ar->wmi.svc_map)); - status = ath10k_htt_rx_ring_refill(ar); if (status) { ath10k_err(ar, "failed to refill htt rx ring: %d\n", status); -- cgit v1.2.3 From ed7a384a904f1d5031b4d10173ef32ab405333a1 Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Tue, 18 Jul 2017 17:20:36 -0700 Subject: Input: trackpoint - assume 3 buttons when buttons detection fails commit 293b915fd9bebf33cdc906516fb28d54649a25ac upstream. Trackpoint buttons detection fails on ThinkPad 570 and 470 series, this makes the middle button of the trackpoint to not being recogized. As I don't believe there is any trackpoint with less than 3 buttons this patch just assumes three buttons when the extended button information read fails. Signed-off-by: Oscar Campos Acked-by: Peter Hutterer Signed-off-by: Dmitry Torokhov Signed-off-by: Aaron Ma Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/trackpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index ce6ff9b301bb..7e2dc5e56632 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -381,8 +381,8 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) return 0; if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) { - psmouse_warn(psmouse, "failed to get extended button data\n"); - button_info = 0; + psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); -- cgit v1.2.3 From b48f7183c64cd12b7451ea677f964e10acd6a52b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sun, 30 Jul 2017 09:02:19 +0100 Subject: rtlwifi: rtl_pci_probe: Fix fail path of _rtl_pci_find_adapter commit fc81bab5eeb103711925d7510157cf5cd2b153f4 upstream. _rtl_pci_find_adapter fail path will jump to label fail3 for unsupported adapter types. However, on course for fail3 there will be call rtl_deinit_core before rtl_init_core. For the inclusion of checking pci_iounmap this fail can be moved to fail2. Fixes [ 4.492963] BUG: unable to handle kernel NULL pointer dereference at (null) [ 4.493067] IP: rtl_deinit_core+0x31/0x90 [rtlwifi] Signed-off-by: Malcolm Priestley Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 5be4fc96002d..75ffeaa54ed8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -2269,7 +2269,7 @@ int rtl_pci_probe(struct pci_dev *pdev, /* find adapter */ if (!_rtl_pci_find_adapter(pdev, hw)) { err = -ENODEV; - goto fail3; + goto fail2; } /* Init IO handler */ @@ -2339,10 +2339,10 @@ fail3: pci_set_drvdata(pdev, NULL); rtl_deinit_core(hw); +fail2: if (rtlpriv->io.pci_mem_start != 0) pci_iounmap(pdev, (void __iomem *)rtlpriv->io.pci_mem_start); -fail2: pci_release_regions(pdev); complete(&rtlpriv->firmware_loading_complete); -- cgit v1.2.3 From 747562619512d1fd9ad99d0836f692d47f9b2853 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Tue, 8 Aug 2017 14:09:02 +0300 Subject: Bluetooth: Add support of 13d3:3494 RTL8723BE device commit a81d72d2002d6a932bd83022cbf8c442b1b97512 upstream. T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3494 Rev= 2.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index dd220fad366c..74e677ac8e37 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -342,6 +342,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3410), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x13d3, 0x3416), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x13d3, 0x3459), .driver_info = BTUSB_REALTEK }, + { USB_DEVICE(0x13d3, 0x3494), .driver_info = BTUSB_REALTEK }, /* Additional Realtek 8821AE Bluetooth devices */ { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK }, -- cgit v1.2.3 From 23b7d4f52b694a0d25ad1f619e90a482898efc85 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 16 Aug 2017 08:47:38 +0300 Subject: iwlwifi: pci: add new PCI ID for 7265D commit 3f7a5e13e85026b6e460bbd6e87f87379421d272 upstream. We have a new PCI subsystem ID for 7265D. Add it to the list. Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 2f8134b2a504..177fd5be2811 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -429,6 +429,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095B, 0x520A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9000, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9400, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9E10, iwl7265_2ac_cfg)}, /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, -- cgit v1.2.3 From 5c23d3ed1190eb91ebb208d2fb8414b92b566a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 3 Aug 2017 10:30:06 +0100 Subject: dlm: avoid double-free on error path in dlm_device_{register,unregister} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 55acdd926f6b21a5cdba23da98a48aedf19ac9c3 upstream. Can be reproduced when running dlm_controld (tested on 4.4.x, 4.12.4): # seq 1 100 | xargs -P0 -n1 dlm_tool join # seq 1 100 | xargs -P0 -n1 dlm_tool leave misc_register fails due to duplicate sysfs entry, which causes dlm_device_register to free ls->ls_device.name. In dlm_device_deregister the name was freed again, causing memory corruption. According to the comment in dlm_device_deregister the name should've been set to NULL when registration fails, so this patch does that. sysfs: cannot create duplicate filename '/dev/char/10:1' ------------[ cut here ]------------ warning: cpu: 1 pid: 4450 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x56/0x70 modules linked in: msr rfcomm dlm ccm bnep dm_crypt uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core videodev btusb media btrtl btbcm btintel bluetooth ecdh_generic intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm snd_hda_codec_hdmi irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel thinkpad_acpi pcbc nvram snd_seq_midi snd_seq_midi_event aesni_intel snd_hda_codec_realtek snd_hda_codec_generic snd_rawmidi aes_x86_64 crypto_simd glue_helper snd_hda_intel snd_hda_codec cryptd intel_cstate arc4 snd_hda_core snd_seq snd_seq_device snd_hwdep iwldvm intel_rapl_perf mac80211 joydev input_leds iwlwifi serio_raw cfg80211 snd_pcm shpchp snd_timer snd mac_hid mei_me lpc_ich mei soundcore sunrpc parport_pc ppdev lp parport autofs4 i915 psmouse e1000e ahci libahci i2c_algo_bit sdhci_pci ptp drm_kms_helper sdhci pps_core syscopyarea sysfillrect sysimgblt fb_sys_fops drm wmi video cpu: 1 pid: 4450 comm: dlm_test.exe not tainted 4.12.4-041204-generic hardware name: lenovo 232425u/232425u, bios g2et82ww (2.02 ) 09/11/2012 task: ffff96b0cbabe140 task.stack: ffffb199027d0000 rip: 0010:sysfs_warn_dup+0x56/0x70 rsp: 0018:ffffb199027d3c58 eflags: 00010282 rax: 0000000000000038 rbx: ffff96b0e2c49158 rcx: 0000000000000006 rdx: 0000000000000000 rsi: 0000000000000086 rdi: ffff96b15e24dcc0 rbp: ffffb199027d3c70 r08: 0000000000000001 r09: 0000000000000721 r10: ffffb199027d3c00 r11: 0000000000000721 r12: ffffb199027d3cd1 r13: ffff96b1592088f0 r14: 0000000000000001 r15: ffffffffffffffef fs: 00007f78069c0700(0000) gs:ffff96b15e240000(0000) knlgs:0000000000000000 cs: 0010 ds: 0000 es: 0000 cr0: 0000000080050033 cr2: 000000178625ed28 cr3: 0000000091d3e000 cr4: 00000000001406e0 call trace: sysfs_do_create_link_sd.isra.2+0x9e/0xb0 sysfs_create_link+0x25/0x40 device_add+0x5a9/0x640 device_create_groups_vargs+0xe0/0xf0 device_create_with_groups+0x3f/0x60 ? snprintf+0x45/0x70 misc_register+0x140/0x180 device_write+0x6a8/0x790 [dlm] __vfs_write+0x37/0x160 ? apparmor_file_permission+0x1a/0x20 ? security_file_permission+0x3b/0xc0 vfs_write+0xb5/0x1a0 sys_write+0x55/0xc0 ? sys_fcntl+0x5d/0xb0 entry_syscall_64_fastpath+0x1e/0xa9 rip: 0033:0x7f78083454bd rsp: 002b:00007f78069bbd30 eflags: 00000293 orig_rax: 0000000000000001 rax: ffffffffffffffda rbx: 0000000000000006 rcx: 00007f78083454bd rdx: 000000000000009c rsi: 00007f78069bee00 rdi: 0000000000000005 rbp: 00007f77f8000a20 r08: 000000000000fcf0 r09: 0000000000000032 r10: 0000000000000024 r11: 0000000000000293 r12: 00007f78069bde00 r13: 00007f78069bee00 r14: 000000000000000a r15: 00007f78069bbd70 code: 85 c0 48 89 c3 74 12 b9 00 10 00 00 48 89 c2 31 f6 4c 89 ef e8 2c c8 ff ff 4c 89 e2 48 89 de 48 c7 c7 b0 8e 0c a8 e8 41 e8 ed ff <0f> ff 48 89 df e8 00 d5 f4 ff 5b 41 5c 41 5d 5d c3 66 0f 1f 84 ---[ end trace 40412246357cc9e0 ]--- dlm: 59f24629-ae39-44e2-9030-397ebc2eda26: leaving the lockspace group... bug: unable to handle kernel null pointer dereference at 0000000000000001 ip: [] kmem_cache_alloc+0x7a/0x140 pgd 0 oops: 0000 [#1] smp modules linked in: dlm 8021q garp mrp stp llc openvswitch nf_defrag_ipv6 nf_conntrack libcrc32c iptable_filter dm_multipath crc32_pclmul dm_mod aesni_intel psmouse aes_x86_64 sg ablk_helper cryptd lrw gf128mul glue_helper i2c_piix4 nls_utf8 tpm_tis tpm isofs nfsd auth_rpcgss oid_registry nfs_acl lockd grace sunrpc xen_wdt ip_tables x_tables autofs4 hid_generic usbhid hid sr_mod cdrom sd_mod ata_generic pata_acpi 8139too serio_raw ata_piix 8139cp mii uhci_hcd ehci_pci ehci_hcd libata scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh_alua scsi_mod ipv6 cpu: 0 pid: 394 comm: systemd-udevd tainted: g w 4.4.0+0 #1 hardware name: xen hvm domu, bios 4.7.2-2.2 05/11/2017 task: ffff880002410000 ti: ffff88000243c000 task.ti: ffff88000243c000 rip: e030:[] [] kmem_cache_alloc+0x7a/0x140 rsp: e02b:ffff88000243fd90 eflags: 00010202 rax: 0000000000000000 rbx: ffff8800029864d0 rcx: 000000000007b36c rdx: 000000000007b36b rsi: 00000000024000c0 rdi: ffff880036801c00 rbp: ffff88000243fdc0 r08: 0000000000018880 r09: 0000000000000054 r10: 000000000000004a r11: ffff880034ace6c0 r12: 00000000024000c0 r13: ffff880036801c00 r14: 0000000000000001 r15: ffffffff8118dcc2 fs: 00007f0ab77548c0(0000) gs:ffff880036e00000(0000) knlgs:0000000000000000 cs: e033 ds: 0000 es: 0000 cr0: 0000000080050033 cr2: 0000000000000001 cr3: 000000000332d000 cr4: 0000000000040660 stack: ffffffff8118dc90 ffff8800029864d0 0000000000000000 ffff88003430b0b0 ffff880034b78320 ffff88003430b0b0 ffff88000243fdf8 ffffffff8118dcc2 ffff8800349c6700 ffff8800029864d0 000000000000000b 00007f0ab7754b90 call trace: [] ? anon_vma_fork+0x60/0x140 [] anon_vma_fork+0x92/0x140 [] copy_process+0xcae/0x1a80 [] _do_fork+0x8b/0x2d0 [] sys_clone+0x19/0x20 [] entry_syscall_64_fastpath+0x12/0x71 ] code: f6 75 1c 4c 89 fa 44 89 e6 4c 89 ef e8 a7 e4 00 00 41 f7 c4 00 80 00 00 49 89 c6 74 47 eb 32 49 63 45 20 48 8d 4a 01 4d 8b 45 00 <49> 8b 1c 06 4c 89 f0 65 49 0f c7 08 0f 94 c0 84 c0 74 ac 49 63 rip [] kmem_cache_alloc+0x7a/0x140 rsp cr2: 0000000000000001 --[ end trace 70cb9fd1b164a0e8 ]-- Signed-off-by: Edwin Török Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/user.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 58c2f4a21b7f..9ac65914ab5b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -355,6 +355,10 @@ static int dlm_device_register(struct dlm_ls *ls, char *name) error = misc_register(&ls->ls_device); if (error) { kfree(ls->ls_device.name); + /* this has to be set to NULL + * to avoid a double-free in dlm_device_deregister + */ + ls->ls_device.name = NULL; } fail: return error; -- cgit v1.2.3 From 0e720cd70631968e506e13fa366da77d4cf770e1 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 29 Jun 2017 18:23:54 -0700 Subject: mwifiex: correct channel stat buffer overflows commit 4b5dde2d6234ff5bc68e97e6901d1f2a0a7f3749 upstream. mwifiex records information about various channels as it receives scan information. It does this by appending to a buffer that was sized to the max number of supported channels on any band, but there are numerous problems: (a) scans can return info from more than one band (e.g., both 2.4 and 5 GHz), so the determined "max" is not large enough (b) some firmware appears to return multiple results for a given channel, so the max *really* isn't large enough (c) there is no bounds checking when stashing these stats, so problems (a) and (b) can easily lead to buffer overflows Let's patch this by setting a slightly-more-correct max (that accounts for a combination of both 2.4G and 5G bands) and adding a bounds check when writing to our statistics buffer. Due to problem (b), we still might not properly report all known survey information (e.g., with "iw survey dump"), since duplicate results (or otherwise "larger than expected" results) will cause some truncation. But that's a problem for a future bugfix. (And because of this known deficiency, only log the excess at the WARN level, since that isn't visible by default in this driver and would otherwise be a bit too noisy.) Fixes: bf35443314ac ("mwifiex: channel statistics support for mwifiex") Cc: Avinash Patil Cc: Xinming Hu Signed-off-by: Brian Norris Reviewed-by: Dmitry Torokhov Reviewed-by: Ganapathi Bhat Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- drivers/net/wireless/marvell/mwifiex/scan.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index afdbbf59a278..8677a53ef725 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4188,7 +4188,7 @@ int mwifiex_init_channel_scan_gap(struct mwifiex_adapter *adapter) if (adapter->config_bands & BAND_A) n_channels_a = mwifiex_band_5ghz.n_channels; - adapter->num_in_chan_stats = max_t(u32, n_channels_bg, n_channels_a); + adapter->num_in_chan_stats = n_channels_bg + n_channels_a; adapter->chan_stats = vmalloc(sizeof(*adapter->chan_stats) * adapter->num_in_chan_stats); diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 97c9765b5bc6..78d59a67f7e1 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -2479,6 +2479,12 @@ mwifiex_update_chan_statistics(struct mwifiex_private *priv, sizeof(struct mwifiex_chan_stats); for (i = 0 ; i < num_chan; i++) { + if (adapter->survey_idx >= adapter->num_in_chan_stats) { + mwifiex_dbg(adapter, WARN, + "FW reported too many channel results (max %d)\n", + adapter->num_in_chan_stats); + return; + } chan_stats.chan_num = fw_chan_stats->chan_num; chan_stats.bandcfg = fw_chan_stats->bandcfg; chan_stats.flags = fw_chan_stats->flags; -- cgit v1.2.3 From c193becad9add8bb170e525872f95b80c99caca6 Mon Sep 17 00:00:00 2001 From: Michael Moese Date: Tue, 29 Aug 2017 14:47:24 +0200 Subject: MCB: add support for SC31 to mcb-lpc commit acf5e051ac44d5dc60b21bc4734ef1b844d55551 upstream. This patch adds the resources and DMI ID's for the MEN SC31, which uses a different address region to map the LPC bus than the one used for the existing SC24. Signed-off-by: Michael Moese [jth add stable tag] Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-lpc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/mcb/mcb-lpc.c b/drivers/mcb/mcb-lpc.c index d072c088ce73..945091a88354 100644 --- a/drivers/mcb/mcb-lpc.c +++ b/drivers/mcb/mcb-lpc.c @@ -114,6 +114,12 @@ static struct resource sc24_fpga_resource = { .flags = IORESOURCE_MEM, }; +static struct resource sc31_fpga_resource = { + .start = 0xf000e000, + .end = 0xf000e000 + CHAM_HEADER_SIZE, + .flags = IORESOURCE_MEM, +}; + static struct platform_driver mcb_lpc_driver = { .driver = { .name = "mcb-lpc", @@ -132,6 +138,15 @@ static const struct dmi_system_id mcb_lpc_dmi_table[] = { .driver_data = (void *)&sc24_fpga_resource, .callback = mcb_lpc_create_platform_device, }, + { + .ident = "SC31", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEN"), + DMI_MATCH(DMI_PRODUCT_VERSION, "14SC31"), + }, + .driver_data = (void *)&sc31_fpga_resource, + .callback = mcb_lpc_create_platform_device, + }, {} }; MODULE_DEVICE_TABLE(dmi, mcb_lpc_dmi_table); -- cgit v1.2.3 From 078866740e3503825e8717c7f7f0cecf55817e09 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 24 Aug 2017 12:55:08 +0200 Subject: s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs commit fa41ba0d08de7c975c3e94d0067553f9b934221f upstream. Right now there is a potential hang situation for postcopy migrations, if the guest is enabling storage keys on the target system during the postcopy process. For storage key virtualization, we have to forbid the empty zero page as the storage key is a property of the physical page frame. As we enable storage key handling lazily we then drop all mappings for empty zero pages for lazy refaulting later on. This does not work with the postcopy migration, which relies on the empty zero page never triggering a fault again in the future. The reason is that postcopy migration will simply read a page on the target system if that page is a known zero page to fault in an empty zero page. At the same time postcopy remembers that this page was already transferred - so any future userfault on that page will NOT be retransmitted again to avoid races. If now the guest enters the storage key mode while in postcopy, we will break this assumption of postcopy. The solution is to disable the empty zero page for KVM guests early on and not during storage key enablement. With this change, the postcopy migration process is guaranteed to start after no zero pages are left. As guest pages are very likely not empty zero pages anyway the memory overhead is also pretty small. While at it this also adds proper page table locking to the zero page removal. Signed-off-by: Christian Borntraeger Acked-by: Janosch Frank Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/mm/gmap.c | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0cea7026e4ff..d33f245af5e9 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -480,7 +480,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm) * In the case that a guest uses storage keys * faults should no longer be backed by zero pages */ -#define mm_forbids_zeropage mm_use_skey +#define mm_forbids_zeropage mm_has_pgste static inline int mm_use_skey(struct mm_struct *mm) { #ifdef CONFIG_PGSTE diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 3ba622702ce4..cb2cd04e6698 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2124,6 +2124,37 @@ static inline void thp_split_mm(struct mm_struct *mm) #endif } +/* + * Remove all empty zero pages from the mapping for lazy refaulting + * - This must be called after mm->context.has_pgste is set, to avoid + * future creation of zero pages + * - This must be called after THP was enabled + */ +static int __zap_zero_pages(pmd_t *pmd, unsigned long start, + unsigned long end, struct mm_walk *walk) +{ + unsigned long addr; + + for (addr = start; addr != end; addr += PAGE_SIZE) { + pte_t *ptep; + spinlock_t *ptl; + + ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (is_zero_pfn(pte_pfn(*ptep))) + ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_unmap_unlock(ptep, ptl); + } + return 0; +} + +static inline void zap_zero_pages(struct mm_struct *mm) +{ + struct mm_walk walk = { .pmd_entry = __zap_zero_pages }; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); +} + /* * switch on pgstes for its userspace process (for kvm) */ @@ -2141,6 +2172,7 @@ int s390_enable_sie(void) mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); + zap_zero_pages(mm); up_write(&mm->mmap_sem); return 0; } @@ -2153,13 +2185,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); static int __s390_enable_skey(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) - ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; -- cgit v1.2.3 From 25bdc516b58e84b96d3755b55c73674778055a41 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 10 Aug 2017 12:13:40 -0400 Subject: drm/nouveau/pci/msi: disable MSI on big-endian platforms by default commit bc60c90f472b6e762ea96ef384072145adc8d4af upstream. It appears that MSI does not work on either G5 PPC nor on a E5500-based platform, where other hardware is reported to work fine with MSI. Both tests were conducted with NV4x hardware, so perhaps other (or even this) hardware can be made to work. It's still possible to force-enable with config=NvMSI=1 on load. Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index eb9b278198b2..a4cb82495cee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -192,6 +192,10 @@ nvkm_pci_new_(const struct nvkm_pci_func *func, struct nvkm_device *device, } } +#ifdef __BIG_ENDIAN + pci->msi = false; +#endif + pci->msi = nvkm_boolopt(device->cfgopt, "NvMSI", pci->msi); if (pci->msi && func->msi_rearm) { pci->msi = pci_enable_msi(pci->pdev) == 0; -- cgit v1.2.3 From ec552ece1f25e8ecadf0afafe87ce314d13dc3e7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 3 Sep 2017 01:18:41 +0100 Subject: workqueue: Fix flag collision commit fbf1c41fc0f4d3574ac2377245efd666c1fa3075 upstream. Commit 0a94efb5acbb ("workqueue: implicit ordered attribute should be overridable") introduced a __WQ_ORDERED_EXPLICIT flag but gave it the same value as __WQ_LEGACY. I don't believe these were intended to mean the same thing, so renumber __WQ_ORDERED_EXPLICIT. Fixes: 0a94efb5acbb ("workqueue: implicit ordered attribute should be ...") Signed-off-by: Ben Hutchings Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 733a21ef8da4..1061add575d2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -311,8 +311,8 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ - __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ + __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ -- cgit v1.2.3 From 5b9c6a54c629e38271acf833f75154f2f8333365 Mon Sep 17 00:00:00 2001 From: Andrey Korolyov Date: Thu, 10 Aug 2017 13:21:14 +0300 Subject: cs5536: add support for IDE controller variant commit 591b6bb605785c12a21e8b07a08a277065b655a5 upstream. Several legacy devices such as Geode-based Cisco ASA appliances and DB800 development board do possess CS5536 IDE controller with different PCI id than existing one. Using pata_generic is not always feasible as at least DB800 requires MSR quirk from pata_cs5536 to be used with vendor firmware. Signed-off-by: Andrey Korolyov Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/pata_amd.c | 1 + drivers/ata/pata_cs5536.c | 1 + include/linux/pci_ids.h | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c index 8d4d959a821c..8706533db57b 100644 --- a/drivers/ata/pata_amd.c +++ b/drivers/ata/pata_amd.c @@ -616,6 +616,7 @@ static const struct pci_device_id amd[] = { { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE), 8 }, { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE), 8 }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_IDE), 9 }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_DEV_IDE), 9 }, { }, }; diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index 6c15a554efbe..dc1255294628 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -289,6 +289,7 @@ static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id) static const struct pci_device_id cs5536[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_IDE), }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_DEV_IDE), }, { }, }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 3e5dbbe75f70..43082049baf2 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -574,6 +574,7 @@ #define PCI_DEVICE_ID_AMD_CS5536_EHC 0x2095 #define PCI_DEVICE_ID_AMD_CS5536_UDC 0x2096 #define PCI_DEVICE_ID_AMD_CS5536_UOC 0x2097 +#define PCI_DEVICE_ID_AMD_CS5536_DEV_IDE 0x2092 #define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A #define PCI_DEVICE_ID_AMD_LX_VIDEO 0x2081 #define PCI_DEVICE_ID_AMD_LX_AES 0x2082 -- cgit v1.2.3 From b06e1abf1ff26046f0f3369b7abe8a221dadf26b Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 15 Aug 2017 22:41:08 -0700 Subject: scsi: sg: protect against races between mmap() and SG_SET_RESERVED_SIZE commit 6a8dadcca81fceff9976e8828cceb072873b7bd5 upstream. Take f_mutex around mmap() processing to protect against races with the SG_SET_RESERVED_SIZE ioctl. Ensure the reserve buffer length remains consistent during the mapping operation, and set the "mmap called" flag to prevent further changes to the reserved buffer size as an atomic operation with the mapping. [mkp: fixed whitespace] Signed-off-by: Todd Poynor Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index fed37aabf828..0506a776cc10 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1244,6 +1244,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) unsigned long req_sz, len, sa; Sg_scatter_hold *rsv_schp; int k, length; + int ret = 0; if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) return -ENXIO; @@ -1254,8 +1255,11 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) if (vma->vm_pgoff) return -EINVAL; /* want no offset */ rsv_schp = &sfp->reserve; - if (req_sz > rsv_schp->bufflen) - return -ENOMEM; /* cannot map more than reserved buffer */ + mutex_lock(&sfp->f_mutex); + if (req_sz > rsv_schp->bufflen) { + ret = -ENOMEM; /* cannot map more than reserved buffer */ + goto out; + } sa = vma->vm_start; length = 1 << (PAGE_SHIFT + rsv_schp->page_order); @@ -1269,7 +1273,9 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; - return 0; +out: + mutex_unlock(&sfp->f_mutex); + return ret; } static void -- cgit v1.2.3 From 7791b59153cb2adfe2fa4273b2f9c8925c3238ed Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 15 Aug 2017 21:48:43 -0700 Subject: scsi: sg: recheck MMAP_IO request length with lock held commit 8d26f491116feaa0b16de370b6a7ba40a40fa0b4 upstream. Commit 1bc0eb044615 ("scsi: sg: protect accesses to 'reserved' page array") adds needed concurrency protection for the "reserve" buffer. Some checks that are initially made outside the lock are replicated once the lock is taken to ensure the checks and resulting decisions are made using consistent state. The check that a request with flag SG_FLAG_MMAP_IO set fits in the reserve buffer also needs to be performed again under the lock to ensure the reserve buffer length compared against matches the value in effect when the request is linked to the reserve buffer. An -ENOMEM should be returned in this case, instead of switching over to an indirect buffer as for non-MMAP_IO requests. Signed-off-by: Todd Poynor Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0506a776cc10..9236a13d5d2a 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1747,9 +1747,12 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) !sfp->res_in_use) { sfp->res_in_use = 1; sg_link_reserve(sfp, srp, dxfer_len); - } else if ((hp->flags & SG_FLAG_MMAP_IO) && sfp->res_in_use) { + } else if (hp->flags & SG_FLAG_MMAP_IO) { + res = -EBUSY; /* sfp->res_in_use == 1 */ + if (dxfer_len > rsv_schp->bufflen) + res = -ENOMEM; mutex_unlock(&sfp->f_mutex); - return -EBUSY; + return res; } else { res = sg_build_indirect(req_schp, sfp, dxfer_len); if (res) { -- cgit v1.2.3 From 8b5a7e4436228c98d6fc97071f3bddcd6a614915 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 16 Jan 2017 16:52:47 -0800 Subject: drm/bridge: adv7511: Use work_struct to defer hotplug handing to out of irq context commit 518cb7057a59b9441336d2e88a396d52b6ab0cce upstream. I was recently seeing issues with EDID probing, where the logic to wait for the EDID read bit to be set by the IRQ wasn't happening and the code would time out and fail. Digging deeper, I found this was due to the fact that IRQs were disabled as we were running in IRQ context from the HPD signal. Thus this patch changes the logic to handle the HPD signal via a work_struct so we can be out of irq context. With this patch, the EDID probing on hotplug does not time out. Cc: David Airlie Cc: Archit Taneja Cc: Wolfram Sang Cc: Lars-Peter Clausen Cc: Laurent Pinchart Cc: dri-devel@lists.freedesktop.org Reviewed-by: Laurent Pinchart Tested-by: Laurent Pinchart Signed-off-by: John Stultz Signed-off-by: Archit Taneja Link: http://patchwork.freedesktop.org/patch/msgid/1484614372-15342-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thong Ho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/adv7511/adv7511.h | 2 ++ drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 161c923d6162..3e74e1a6584c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -315,6 +315,8 @@ struct adv7511 { bool edid_read; wait_queue_head_t wq; + struct work_struct hpd_work; + struct drm_bridge bridge; struct drm_connector connector; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 8ed3906dd411..b792536d9d64 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -402,6 +402,13 @@ static bool adv7511_hpd(struct adv7511 *adv7511) return false; } +static void adv7511_hpd_work(struct work_struct *work) +{ + struct adv7511 *adv7511 = container_of(work, struct adv7511, hpd_work); + + drm_helper_hpd_irq_event(adv7511->connector.dev); +} + static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd) { unsigned int irq0, irq1; @@ -419,7 +426,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd) regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1); if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder) - drm_helper_hpd_irq_event(adv7511->connector.dev); + schedule_work(&adv7511->hpd_work); if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) { adv7511->edid_read = true; @@ -1006,6 +1013,8 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) goto err_i2c_unregister_edid; } + INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work); + if (i2c->irq) { init_waitqueue_head(&adv7511->wq); -- cgit v1.2.3 From 8bc67f67b7631122f1047cd8f3c7edbda106d48d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 16 Jan 2017 16:52:48 -0800 Subject: drm/bridge: adv7511: Switch to using drm_kms_helper_hotplug_event() commit 6d5104c5a6b56385426e15047050584794bb6254 upstream. In chasing down a previous issue with EDID probing from calling drm_helper_hpd_irq_event() from irq context, Laurent noticed that the DRM documentation suggests that drm_kms_helper_hotplug_event() should be used instead. Thus this patch replaces drm_helper_hpd_irq_event() with drm_kms_helper_hotplug_event(), which requires we update the connector.status entry and only call _hotplug_event() when the status changes. Cc: David Airlie Cc: Archit Taneja Cc: Wolfram Sang Cc: Lars-Peter Clausen Cc: Laurent Pinchart Cc: dri-devel@lists.freedesktop.org Reviewed-by: Laurent Pinchart Tested-by: Laurent Pinchart Signed-off-by: John Stultz Signed-off-by: Archit Taneja Link: http://patchwork.freedesktop.org/patch/msgid/1484614372-15342-3-git-send-email-john.stultz@linaro.org Signed-off-by: Thong Ho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index b792536d9d64..213d892b6fa3 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -405,8 +405,22 @@ static bool adv7511_hpd(struct adv7511 *adv7511) static void adv7511_hpd_work(struct work_struct *work) { struct adv7511 *adv7511 = container_of(work, struct adv7511, hpd_work); + enum drm_connector_status status; + unsigned int val; + int ret; - drm_helper_hpd_irq_event(adv7511->connector.dev); + ret = regmap_read(adv7511->regmap, ADV7511_REG_STATUS, &val); + if (ret < 0) + status = connector_status_disconnected; + else if (val & ADV7511_STATUS_HPD) + status = connector_status_connected; + else + status = connector_status_disconnected; + + if (adv7511->connector.status != status) { + adv7511->connector.status = status; + drm_kms_helper_hotplug_event(adv7511->connector.dev); + } } static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd) -- cgit v1.2.3 From f07cb3489cff38984a8df4d3a0fea5d0858c1eb0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 10 Sep 2017 07:49:19 +0200 Subject: Linux 4.9.49 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cfa188b427b1..1ebc553f5464 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 48 +SUBLEVEL = 49 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From 865162031c4ea66ef6a5ce448c818ba3784ad2a4 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 25 Nov 2016 11:32:32 +0100 Subject: mtd: nand: mxc: Fix mxc_v1 ooblayout commit 3bff08dffe3115a25ce04b95ea75f6d868572c60 upstream. Commit a894cf6c5a82 ("mtd: nand: mxc: switch to mtd_ooblayout_ops") introduced a bug in the OOB layout description. Even if the driver claims that 3 ECC bytes are reserved to protect 512 bytes of data, it's actually 5 ECC bytes to protect 512+6 bytes of data (some OOB bytes are also protected using extra ECC bytes). Fix the mxc_v1_ooblayout_{free,ecc}() functions to reflect this behavior. Signed-off-by: Boris Brezillon Fixes: a894cf6c5a82 ("mtd: nand: mxc: switch to mtd_ooblayout_ops") Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/mxc_nand.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index d7f724b24fd7..0c84ee80e5b6 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -877,6 +877,8 @@ static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr) } } +#define MXC_V1_ECCBYTES 5 + static int mxc_v1_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobregion) { @@ -886,7 +888,7 @@ static int mxc_v1_ooblayout_ecc(struct mtd_info *mtd, int section, return -ERANGE; oobregion->offset = (section * 16) + 6; - oobregion->length = nand_chip->ecc.bytes; + oobregion->length = MXC_V1_ECCBYTES; return 0; } @@ -908,8 +910,7 @@ static int mxc_v1_ooblayout_free(struct mtd_info *mtd, int section, oobregion->length = 4; } } else { - oobregion->offset = ((section - 1) * 16) + - nand_chip->ecc.bytes + 6; + oobregion->offset = ((section - 1) * 16) + MXC_V1_ECCBYTES + 6; if (section < nand_chip->ecc.steps) oobregion->length = (section * 16) + 6 - oobregion->offset; -- cgit v1.2.3 From f4a272d5783936b786b44e6d2afbf78dd6a1fc8c Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Fri, 11 Aug 2017 17:09:16 +0530 Subject: mtd: nand: qcom: fix read failure without complete bootchain commit d8a9b320a26c1ea28e51e4f3ecfb593d5aac2910 upstream. The NAND page read fails without complete boot chain since NAND_DEV_CMD_VLD value is not proper. The default power on reset value for this register is 0xe - ERASE_START_VALID | WRITE_START_VALID | READ_STOP_VALID The READ_START_VALID should be enabled for sending PAGE_READ command. READ_STOP_VALID should be cleared since normal NAND page read does not require READ_STOP command. Fixes: c76b78d8ec05a ("mtd: nand: Qualcomm NAND controller driver") Reviewed-by: Archit Taneja Signed-off-by: Abhishek Sahu Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/qcom_nandc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index 57d483ac5765..a45e61fe0a66 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -109,7 +109,11 @@ #define READ_ADDR 0 /* NAND_DEV_CMD_VLD bits */ -#define READ_START_VLD 0 +#define READ_START_VLD BIT(0) +#define READ_STOP_VLD BIT(1) +#define WRITE_START_VLD BIT(2) +#define ERASE_START_VLD BIT(3) +#define SEQ_READ_START_VLD BIT(4) /* NAND_EBI2_ECC_BUF_CFG bits */ #define NUM_STEPS 0 @@ -148,6 +152,10 @@ #define FETCH_ID 0xb #define RESET_DEVICE 0xd +/* Default Value for NAND_DEV_CMD_VLD */ +#define NAND_DEV_CMD_VLD_VAL (READ_START_VLD | WRITE_START_VLD | \ + ERASE_START_VLD | SEQ_READ_START_VLD) + /* * the NAND controller performs reads/writes with ECC in 516 byte chunks. * the driver calls the chunks 'step' or 'codeword' interchangeably @@ -672,8 +680,7 @@ static int nandc_param(struct qcom_nand_host *host) /* configure CMD1 and VLD for ONFI param probing */ nandc_set_reg(nandc, NAND_DEV_CMD_VLD, - (nandc->vld & ~(1 << READ_START_VLD)) - | 0 << READ_START_VLD); + (nandc->vld & ~READ_START_VLD)); nandc_set_reg(nandc, NAND_DEV_CMD1, (nandc->cmd1 & ~(0xFF << READ_ADDR)) | NAND_CMD_PARAM << READ_ADDR); @@ -1972,13 +1979,14 @@ static int qcom_nandc_setup(struct qcom_nand_controller *nandc) { /* kill onenand */ nandc_write(nandc, SFLASHC_BURST_CFG, 0); + nandc_write(nandc, NAND_DEV_CMD_VLD, NAND_DEV_CMD_VLD_VAL); /* enable ADM DMA */ nandc_write(nandc, NAND_FLASH_CHIP_SELECT, DM_EN); /* save the original values of these registers */ nandc->cmd1 = nandc_read(nandc, NAND_DEV_CMD1); - nandc->vld = nandc_read(nandc, NAND_DEV_CMD_VLD); + nandc->vld = NAND_DEV_CMD_VLD_VAL; return 0; } -- cgit v1.2.3 From b276bc66d439e6b510f54d4ae0c18da9fcd60319 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Thu, 3 Aug 2017 17:56:39 +0200 Subject: mtd: nand: qcom: fix config error for BCH commit 10777de570016471fd929869c7830a7772893e39 upstream. The configuration for BCH is not correct in the current driver. The ECC_CFG_ECC_DISABLE bit defines whether to enable or disable the BCH ECC in which 0x1 : BCH_DISABLED 0x0 : BCH_ENABLED But currently host->bch_enabled is being assigned to BCH_DISABLED. Fixes: c76b78d8ec05a ("mtd: nand: Qualcomm NAND controller driver") Signed-off-by: Abhishek Sahu Reviewed-by: Archit Taneja Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/qcom_nandc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index a45e61fe0a66..6f0fd1512ad2 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -1900,7 +1900,7 @@ static int qcom_nand_host_setup(struct qcom_nand_host *host) | wide_bus << WIDE_FLASH | 1 << DEV0_CFG1_ECC_DISABLE; - host->ecc_bch_cfg = host->bch_enabled << ECC_CFG_ECC_DISABLE + host->ecc_bch_cfg = !host->bch_enabled << ECC_CFG_ECC_DISABLE | 0 << ECC_SW_RESET | host->cw_data << ECC_NUM_DATA_BYTES | 1 << ECC_FORCE_CLK_OPEN -- cgit v1.2.3 From f52a535c8438d4761c9b42a63e1b6971415a5744 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Wed, 30 Aug 2017 15:18:19 -0700 Subject: nvme-fabrics: generate spec-compliant UUID NQNs commit 40a5fce495715c48c2e02668144e68a507ac5a30 upstream. The default host NQN, which is generated based on the host's UUID, does not follow the UUID-based NQN format laid out in the NVMe 1.3 specification. Remove the "NVMf:" portion of the NQN to match the spec. Signed-off-by: Daniel Verkamp Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5a3f008d3480..eef1a68e5d95 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -77,7 +77,7 @@ static struct nvmf_host *nvmf_host_default(void) kref_init(&host->ref); uuid_be_gen(&host->id); snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); + "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); -- cgit v1.2.3 From 0f7dbc4d5bc88432ab1c8639c66628d4f5903ae9 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 4 Jul 2017 21:49:06 +1000 Subject: btrfs: resume qgroup rescan on rw remount commit 6c6b5a39c4bf3dbd8cf629c9f5450e983c19dbb9 upstream. Several distributions mount the "proper root" as ro during initrd and then remount it as rw before pivot_root(2). Thus, if a rescan had been aborted by a previous shutdown, the rescan would never be resumed. This issue would manifest itself as several btrfs ioctl(2)s causing the entire machine to hang when btrfs_qgroup_wait_for_completion was hit (due to the fs_info->qgroup_rescan_running flag being set but the rescan itself not being resumed). Notably, Docker's btrfs storage driver makes regular use of BTRFS_QUOTA_CTL_DISABLE and BTRFS_IOC_QUOTA_RESCAN_WAIT (causing this problem to be manifested on boot for some machines). Cc: Jeff Mahoney Fixes: b382a324b60f ("Btrfs: fix qgroup rescan resume on mount") Signed-off-by: Aleksa Sarai Reviewed-by: Nikolay Borisov Tested-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74ed5aae6cea..f6e111984ce2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1834,6 +1834,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; } + btrfs_qgroup_rescan_resume(fs_info); + if (!fs_info->uuid_root) { btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); -- cgit v1.2.3 From ebf381be016f3610c483bca561262cd499b04e84 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 1 Aug 2017 07:11:36 -0700 Subject: selftests/x86/fsgsbase: Test selectors 1, 2, and 3 commit 23d98c204386a98d9ef9f9e744f41443ece4929f upstream. Those are funny cases. Make sure they work. (Something is screwy with signal handling if a selector is 1, 2, or 3. Anyone who wants to dive into that rabbit hole is welcome to do so.) Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chang Seok Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/fsgsbase.c | 41 +++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5b2b4b3c634c..9b4610c6d3fb 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -285,9 +285,12 @@ static void *threadproc(void *ctx) } } -static void set_gs_and_switch_to(unsigned long local, unsigned long remote) +static void set_gs_and_switch_to(unsigned long local, + unsigned short force_sel, + unsigned long remote) { unsigned long base; + unsigned short sel_pre_sched, sel_post_sched; bool hard_zero = false; if (local == HARD_ZERO) { @@ -297,6 +300,8 @@ static void set_gs_and_switch_to(unsigned long local, unsigned long remote) printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n", local, hard_zero ? " and clear gs" : "", remote); + if (force_sel) + printf("\tBefore schedule, set selector to 0x%hx\n", force_sel); if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0) err(1, "ARCH_SET_GS"); if (hard_zero) @@ -307,18 +312,35 @@ static void set_gs_and_switch_to(unsigned long local, unsigned long remote) printf("[FAIL]\tGSBASE wasn't set as expected\n"); } + if (force_sel) { + asm volatile ("mov %0, %%gs" : : "rm" (force_sel)); + sel_pre_sched = force_sel; + local = read_base(GS); + + /* + * Signal delivery seems to mess up weird selectors. Put it + * back. + */ + asm volatile ("mov %0, %%gs" : : "rm" (force_sel)); + } else { + asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched)); + } + remote_base = remote; ftx = 1; syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); while (ftx != 0) syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); + asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched)); base = read_base(GS); - if (base == local) { - printf("[OK]\tGSBASE remained 0x%lx\n", local); + if (base == local && sel_pre_sched == sel_post_sched) { + printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n", + sel_pre_sched, local); } else { nerrs++; - printf("[FAIL]\tGSBASE changed to 0x%lx\n", base); + printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n", + sel_pre_sched, local, sel_post_sched, base); } } @@ -381,8 +403,15 @@ int main() for (int local = 0; local < 4; local++) { for (int remote = 0; remote < 4; remote++) { - set_gs_and_switch_to(bases_with_hard_zero[local], - bases_with_hard_zero[remote]); + for (unsigned short s = 0; s < 5; s++) { + unsigned short sel = s; + if (s == 4) + asm ("mov %%ss, %0" : "=rm" (sel)); + set_gs_and_switch_to( + bases_with_hard_zero[local], + sel, + bases_with_hard_zero[remote]); + } } } -- cgit v1.2.3 From 3c8381df2a56f097dd47c7c949f6cb57e70cc667 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 8 Sep 2017 16:13:12 -0700 Subject: mm/memory.c: fix mem_cgroup_oom_disable() call missing commit de0c799bba2610a8e1e9a50d76a28614520a4cd4 upstream. Seen while reading the code, in handle_mm_fault(), in the case arch_vma_access_permitted() is failing the call to mem_cgroup_oom_disable() is not made. To fix that, move the call to mem_cgroup_oom_enable() after calling arch_vma_access_permitted() as it should not have entered the memcg OOM. Link: http://lkml.kernel.org/r/1504625439-31313-1-git-send-email-ldufour@linux.vnet.ibm.com Fixes: bae473a423f6 ("mm: introduce fault_env") Signed-off-by: Laurent Dufour Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index d064caff9d7d..1aa63e7dd790 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3596,6 +3596,11 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* do counter updates before entering really critical section. */ check_sync_rss_stat(current); + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_INSTRUCTION, + flags & FAULT_FLAG_REMOTE)) + return VM_FAULT_SIGSEGV; + /* * Enable the memcg OOM handling for faults triggered in user * space. Kernel faults are handled more gracefully. @@ -3603,11 +3608,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (flags & FAULT_FLAG_USER) mem_cgroup_oom_enable(); - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, - flags & FAULT_FLAG_INSTRUCTION, - flags & FAULT_FLAG_REMOTE)) - return VM_FAULT_SIGSEGV; - if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else -- cgit v1.2.3 From d21f3eaa09c0dcbf7930ec3b127cbacbfba99bb5 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 10 Nov 2016 13:06:39 -0800 Subject: locktorture: Fix potential memory leak with rw lock test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f4dbba591945dc301c302672adefba9e2ec08dc5 upstream. When running locktorture module with the below commands with kmemleak enabled: $ modprobe locktorture torture_type=rw_lock_irq $ rmmod locktorture The below kmemleak got caught: root@10:~# echo scan > /sys/kernel/debug/kmemleak [ 323.197029] kmemleak: 2 new suspected memory leaks (see /sys/kernel/debug/kmemleak) root@10:~# cat /sys/kernel/debug/kmemleak unreferenced object 0xffffffc07592d500 (size 128): comm "modprobe", pid 368, jiffies 4294924118 (age 205.824s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c3 7b 02 00 00 00 00 00 .........{...... 00 00 00 00 00 00 00 00 d7 9b 02 00 00 00 00 00 ................ backtrace: [] create_object+0x110/0x288 [] kmemleak_alloc+0x58/0xa0 [] __kmalloc+0x234/0x318 [] 0xffffff80006fa130 [] do_one_initcall+0x44/0x138 [] do_init_module+0x68/0x1cc [] load_module+0x1a68/0x22e0 [] SyS_finit_module+0xe0/0xf0 [] el0_svc_naked+0x24/0x28 [] 0xffffffffffffffff unreferenced object 0xffffffc07592d480 (size 128): comm "modprobe", pid 368, jiffies 4294924118 (age 205.824s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 3b 6f 01 00 00 00 00 00 ........;o...... 00 00 00 00 00 00 00 00 23 6a 01 00 00 00 00 00 ........#j...... backtrace: [] create_object+0x110/0x288 [] kmemleak_alloc+0x58/0xa0 [] __kmalloc+0x234/0x318 [] 0xffffff80006fa22c [] do_one_initcall+0x44/0x138 [] do_init_module+0x68/0x1cc [] load_module+0x1a68/0x22e0 [] SyS_finit_module+0xe0/0xf0 [] el0_svc_naked+0x24/0x28 [] 0xffffffffffffffff It is because cxt.lwsa and cxt.lrsa don't get freed in module_exit, so free them in lock_torture_cleanup() and free writer_tasks if reader_tasks is failed at memory allocation. Signed-off-by: Yang Shi Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Cc: 石洋 Signed-off-by: Greg Kroah-Hartman --- kernel/locking/locktorture.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index f8c5af52a131..d3de04b12f8c 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -780,6 +780,10 @@ static void lock_torture_cleanup(void) else lock_torture_print_module_parms(cxt.cur_ops, "End of test: SUCCESS"); + + kfree(cxt.lwsa); + kfree(cxt.lrsa); + end: torture_cleanup_end(); } @@ -924,6 +928,8 @@ static int __init lock_torture_init(void) GFP_KERNEL); if (reader_tasks == NULL) { VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); + kfree(writer_tasks); + writer_tasks = NULL; firsterr = -ENOMEM; goto unwind; } -- cgit v1.2.3 From 03bea515b9a2f2a48d46a5a4bcc69be264afb6af Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 6 Jul 2017 12:34:40 +0200 Subject: ALSA: msnd: Optimize / harden DSP and MIDI loops commit 20e2b791796bd68816fa115f12be5320de2b8021 upstream. The ISA msnd drivers have loops fetching the ring-buffer head, tail and size values inside the loops. Such codes are inefficient and fragile. This patch optimizes it, and also adds the sanity check to avoid the endless loops. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196131 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196133 Signed-off-by: Takashi Iwai Signed-off-by: grygorii tertychnyi Signed-off-by: Greg Kroah-Hartman --- sound/isa/msnd/msnd_midi.c | 30 +++++++++++++++--------------- sound/isa/msnd/msnd_pinnacle.c | 23 ++++++++++++----------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/sound/isa/msnd/msnd_midi.c b/sound/isa/msnd/msnd_midi.c index ffc67fd80c23..58e59cd3c95c 100644 --- a/sound/isa/msnd/msnd_midi.c +++ b/sound/isa/msnd/msnd_midi.c @@ -120,24 +120,24 @@ void snd_msndmidi_input_read(void *mpuv) unsigned long flags; struct snd_msndmidi *mpu = mpuv; void *pwMIDQData = mpu->dev->mappedbase + MIDQ_DATA_BUFF; + u16 head, tail, size; spin_lock_irqsave(&mpu->input_lock, flags); - while (readw(mpu->dev->MIDQ + JQS_wTail) != - readw(mpu->dev->MIDQ + JQS_wHead)) { - u16 wTmp, val; - val = readw(pwMIDQData + 2 * readw(mpu->dev->MIDQ + JQS_wHead)); - - if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER, - &mpu->mode)) - snd_rawmidi_receive(mpu->substream_input, - (unsigned char *)&val, 1); - - wTmp = readw(mpu->dev->MIDQ + JQS_wHead) + 1; - if (wTmp > readw(mpu->dev->MIDQ + JQS_wSize)) - writew(0, mpu->dev->MIDQ + JQS_wHead); - else - writew(wTmp, mpu->dev->MIDQ + JQS_wHead); + head = readw(mpu->dev->MIDQ + JQS_wHead); + tail = readw(mpu->dev->MIDQ + JQS_wTail); + size = readw(mpu->dev->MIDQ + JQS_wSize); + if (head > size || tail > size) + goto out; + while (head != tail) { + unsigned char val = readw(pwMIDQData + 2 * head); + + if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER, &mpu->mode)) + snd_rawmidi_receive(mpu->substream_input, &val, 1); + if (++head > size) + head = 0; + writew(head, mpu->dev->MIDQ + JQS_wHead); } + out: spin_unlock_irqrestore(&mpu->input_lock, flags); } EXPORT_SYMBOL(snd_msndmidi_input_read); diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c index 4c072666115d..a31ea6c22d19 100644 --- a/sound/isa/msnd/msnd_pinnacle.c +++ b/sound/isa/msnd/msnd_pinnacle.c @@ -170,23 +170,24 @@ static irqreturn_t snd_msnd_interrupt(int irq, void *dev_id) { struct snd_msnd *chip = dev_id; void *pwDSPQData = chip->mappedbase + DSPQ_DATA_BUFF; + u16 head, tail, size; /* Send ack to DSP */ /* inb(chip->io + HP_RXL); */ /* Evaluate queued DSP messages */ - while (readw(chip->DSPQ + JQS_wTail) != readw(chip->DSPQ + JQS_wHead)) { - u16 wTmp; - - snd_msnd_eval_dsp_msg(chip, - readw(pwDSPQData + 2 * readw(chip->DSPQ + JQS_wHead))); - - wTmp = readw(chip->DSPQ + JQS_wHead) + 1; - if (wTmp > readw(chip->DSPQ + JQS_wSize)) - writew(0, chip->DSPQ + JQS_wHead); - else - writew(wTmp, chip->DSPQ + JQS_wHead); + head = readw(chip->DSPQ + JQS_wHead); + tail = readw(chip->DSPQ + JQS_wTail); + size = readw(chip->DSPQ + JQS_wSize); + if (head > size || tail > size) + goto out; + while (head != tail) { + snd_msnd_eval_dsp_msg(chip, readw(pwDSPQData + 2 * head)); + if (++head > size) + head = 0; + writew(head, chip->DSPQ + JQS_wHead); } + out: /* Send ack to DSP */ inb(chip->io + HP_RXL); return IRQ_HANDLED; -- cgit v1.2.3 From 6300c8bfafe032187f3cbaa43dbf7d306650c5ed Mon Sep 17 00:00:00 2001 From: Ben Seri Date: Sat, 9 Sep 2017 23:15:59 +0200 Subject: Bluetooth: Properly check L2CAP config option output buffer length commit e860d2c904d1a9f38a24eb44c9f34b8f915a6ea3 upstream. Validate the output buffer length for L2CAP config requests and responses to avoid overflowing the stack buffer used for building the option blocks. Signed-off-by: Ben Seri Signed-off-by: Marcel Holtmann Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 80 +++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 577f1c01454a..ffd09c1675d4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -58,7 +58,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data); -static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data); +static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size); static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, @@ -1473,7 +1473,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -2977,12 +2977,15 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, return len; } -static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) +static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size) { struct l2cap_conf_opt *opt = *ptr; BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val); + if (size < L2CAP_CONF_OPT_SIZE + len) + return; + opt->type = type; opt->len = len; @@ -3007,7 +3010,7 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) *ptr += L2CAP_CONF_OPT_SIZE + len; } -static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan) +static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size) { struct l2cap_conf_efs efs; @@ -3035,7 +3038,7 @@ static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan) } l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, size); } static void l2cap_ack_timeout(struct work_struct *work) @@ -3181,11 +3184,12 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan) chan->ack_win = chan->tx_win; } -static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) +static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_req *req = data; struct l2cap_conf_rfc rfc = { .mode = chan->mode }; void *ptr = req->data; + void *endptr = data + data_size; u16 size; BT_DBG("chan %p", chan); @@ -3210,7 +3214,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) done: if (chan->imtu != L2CAP_DEFAULT_MTU) - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); switch (chan->mode) { case L2CAP_MODE_BASIC: @@ -3229,7 +3233,7 @@ done: rfc.max_pdu_size = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); break; case L2CAP_MODE_ERTM: @@ -3249,21 +3253,21 @@ done: L2CAP_DEFAULT_TX_WINDOW); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) - l2cap_add_opt_efs(&ptr, chan); + l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, - chan->tx_win); + chan->tx_win, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, - chan->fcs); + chan->fcs, endptr - ptr); } break; @@ -3281,17 +3285,17 @@ done: rfc.max_pdu_size = cpu_to_le16(size); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) - l2cap_add_opt_efs(&ptr, chan); + l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, - chan->fcs); + chan->fcs, endptr - ptr); } break; } @@ -3302,10 +3306,11 @@ done: return ptr - data; } -static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) +static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; + void *endptr = data + data_size; void *req = chan->conf_req; int len = chan->conf_len; int type, hint, olen; @@ -3407,7 +3412,7 @@ done: return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); } if (result == L2CAP_CONF_SUCCESS) { @@ -3420,7 +3425,7 @@ done: chan->omtu = mtu; set_bit(CONF_MTU_DONE, &chan->conf_state); } - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr); if (remote_efs) { if (chan->local_stype != L2CAP_SERV_NOTRAFIC && @@ -3434,7 +3439,7 @@ done: l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, endptr - ptr); } else { /* Send PENDING Conf Rsp */ result = L2CAP_CONF_PENDING; @@ -3467,7 +3472,7 @@ done: set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); + sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; @@ -3481,7 +3486,7 @@ done: le32_to_cpu(efs.sdu_itime); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, endptr - ptr); } break; @@ -3495,7 +3500,7 @@ done: set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); break; @@ -3517,10 +3522,11 @@ done: } static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, - void *data, u16 *result) + void *data, size_t size, u16 *result) { struct l2cap_conf_req *req = data; void *ptr = req->data; + void *endptr = data + size; int type, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; @@ -3538,13 +3544,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: chan->flush_to = val; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, chan->flush_to); + 2, chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: @@ -3558,13 +3564,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, chan->fcs = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); + sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, - chan->tx_win); + chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: @@ -3577,7 +3583,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: @@ -3682,7 +3688,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) return; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -3890,7 +3896,7 @@ sendresp: u8 buf[128]; set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -3968,7 +3974,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, break; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, req), req); + l2cap_build_conf_req(chan, req, sizeof(req)), req); chan->num_conf_req++; break; @@ -4080,7 +4086,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, } /* Complete config. */ - len = l2cap_parse_conf_req(chan, rsp); + len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp)); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto unlock; @@ -4114,7 +4120,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) { u8 buf[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -4174,7 +4180,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, char buf[64]; len = l2cap_parse_conf_rsp(chan, rsp->data, len, - buf, &result); + buf, sizeof(buf), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; @@ -4204,7 +4210,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, /* throw out any old stored conf requests */ result = L2CAP_CONF_SUCCESS; len = l2cap_parse_conf_rsp(chan, rsp->data, len, - req, &result); + req, sizeof(req), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; @@ -4781,7 +4787,7 @@ static void l2cap_do_create(struct l2cap_chan *chan, int result, set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } } @@ -7457,7 +7463,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } -- cgit v1.2.3 From b40aa8b047b89c63b2040d3628eacea6eafe8708 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 1 Jul 2017 15:16:34 +0100 Subject: ARM64: dts: marvell: armada-37xx: Fix GIC maintenance interrupt commit 95696d292e204073433ed2ef3ff4d3d8f42a8248 upstream. The GIC-500 integrated in the Armada-37xx SoCs is compliant with the GICv3 architecture, and thus provides a maintenance interrupt that is required for hypervisors to function correctly. With the interrupt provided in the DT, KVM now works as it should. Tested on an Espressobin system. Fixes: adbc3695d9e4 ("arm64: dts: add the Marvell Armada 3700 family and a development board") Signed-off-by: Marc Zyngier Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 49a5d8ccae27..68e6f88bdcfe 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -170,6 +170,7 @@ interrupt-controller; reg = <0x1d00000 0x10000>, /* GICD */ <0x1d40000 0x40000>; /* GICR */ + interrupts = ; }; }; -- cgit v1.2.3 From 301d91e03c9d76e9ae6442844f6c186030d01941 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 22 Aug 2017 11:36:17 +0100 Subject: ARM: 8692/1: mm: abort uaccess retries upon fatal signal commit 746a272e44141af24a02f6c9b0f65f4c4598ed42 upstream. When there's a fatal signal pending, arm's do_page_fault() implementation returns 0. The intent is that we'll return to the faulting userspace instruction, delivering the signal on the way. However, if we take a fatal signal during fixing up a uaccess, this results in a return to the faulting kernel instruction, which will be instantly retried, resulting in the same fault being taken forever. As the task never reaches userspace, the signal is not delivered, and the task is left unkillable. While the task is stuck in this state, it can inhibit the forward progress of the system. To avoid this, we must ensure that when a fatal signal is pending, we apply any necessary fixup for a faulting kernel instruction. Thus we will return to an error path, and it is up to that code to make forward progress towards delivering the fatal signal. Signed-off-by: Mark Rutland Reviewed-by: Steve Capper Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/fault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0122ad1a6027..f7861dc83182 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -314,8 +314,11 @@ retry: * signal first. We do not need to release the mmap_sem because * it would already be released in __lock_page_or_retry in * mm/filemap.c. */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + if (!user_mode(regs)) + goto no_context; return 0; + } /* * Major/minor page fault accounting is only done on the -- cgit v1.2.3 From a70912a6bfff1289a2461e6b99a97f462fd14756 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Sep 2017 21:28:11 -0400 Subject: NFS: Fix 2 use after free issues in the I/O code commit 196639ebbe63a037fe9a80669140bd292d8bcd80 upstream. The writeback code wants to send a commit after processing the pages, which is why we want to delay releasing the struct path until after that's done. Also, the layout code expects that we do not free the inode before we've put the layout segments in pnfs_writehdr_free() and pnfs_readhdr_free() Fixes: 919e3bd9a875 ("NFS: Ensure we commit after writeback is complete") Fixes: 4714fb51fd03 ("nfs: remove pgio_header refcount, related cleanup") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/internal.h | 1 - fs/nfs/pagelist.c | 26 ++++++++++++-------------- fs/nfs/pnfs.c | 2 -- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 80bcc0befb07..52ea41bce038 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -248,7 +248,6 @@ int nfs_iocounter_wait(struct nfs_lock_context *l_ctx); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); -void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 142a74f3c59b..3d17fc82b9fe 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -497,16 +497,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) } EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc); -/* - * nfs_pgio_header_free - Free a read or write header - * @hdr: The header to free - */ -void nfs_pgio_header_free(struct nfs_pgio_header *hdr) -{ - hdr->rw_ops->rw_free_header(hdr); -} -EXPORT_SYMBOL_GPL(nfs_pgio_header_free); - /** * nfs_pgio_data_destroy - make @hdr suitable for reuse * @@ -515,14 +505,24 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); * * @hdr: A header that has had nfs_generic_pgio called */ -void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) +static void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { if (hdr->args.context) put_nfs_open_context(hdr->args.context); if (hdr->page_array.pagevec != hdr->page_array.page_array) kfree(hdr->page_array.pagevec); } -EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); + +/* + * nfs_pgio_header_free - Free a read or write header + * @hdr: The header to free + */ +void nfs_pgio_header_free(struct nfs_pgio_header *hdr) +{ + nfs_pgio_data_destroy(hdr); + hdr->rw_ops->rw_free_header(hdr); +} +EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call @@ -636,7 +636,6 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); static void nfs_pgio_error(struct nfs_pgio_header *hdr) { set_bit(NFS_IOHDR_REDO, &hdr->flags); - nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } @@ -647,7 +646,6 @@ static void nfs_pgio_error(struct nfs_pgio_header *hdr) static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 415d7e69bc5e..b7a07ba8783a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2145,7 +2145,6 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); mirror->pg_recoalesce = 1; } - nfs_pgio_data_destroy(hdr); hdr->release(hdr); } @@ -2257,7 +2256,6 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); mirror->pg_recoalesce = 1; } - nfs_pgio_data_destroy(hdr); hdr->release(hdr); } -- cgit v1.2.3 From 3885bc68ae143c9d69eec76066049bf33a89a9d6 Mon Sep 17 00:00:00 2001 From: "tarangg@amazon.com" Date: Thu, 7 Sep 2017 09:29:23 -0400 Subject: NFS: Sync the correct byte range during synchronous writes commit e973b1a5999e57da677ab50da5f5479fdc0f0c31 upstream. Since commit 18290650b1c8 ("NFS: Move buffered I/O locking into nfs_file_write()") nfs_file_write() has not flushed the correct byte range during synchronous writes. generic_write_sync() expects that iocb->ki_pos points to the right edge of the range rather than the left edge. To replicate the problem, open a file with O_DSYNC, have the client write at increasing offsets, and then print the successful offsets. Block port 2049 partway through that sequence, and observe that the client application indicates successful writes in advance of what the server received. Fixes: 18290650b1c8 ("NFS: Move buffered I/O locking into nfs_file_write()") Signed-off-by: Jacob Strauss Signed-off-by: Tarang Gupta Tested-by: Tarang Gupta Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 84c1cb9237d0..1eec947c562d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -636,11 +636,11 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result <= 0) goto out; - result = generic_write_sync(iocb, result); - if (result < 0) - goto out; written = result; iocb->ki_pos += written; + result = generic_write_sync(iocb, written); + if (result < 0) + goto out; /* Return error values */ if (nfs_need_check_write(file, inode)) { -- cgit v1.2.3 From 5b82e0e938af5d9dfb038e2483cb2a84e24584fd Mon Sep 17 00:00:00 2001 From: Richard Wareing Date: Wed, 13 Sep 2017 09:09:35 +1000 Subject: xfs: XFS_IS_REALTIME_INODE() should be false if no rt device present commit b31ff3cdf540110da4572e3e29bd172087af65cc upstream. If using a kernel with CONFIG_XFS_RT=y and we set the RHINHERIT flag on a directory in a filesystem that does not have a realtime device and create a new file in that directory, it gets marked as a real time file. When data is written and a fsync is issued, the filesystem attempts to flush a non-existent rt device during the fsync process. This results in a crash dereferencing a null buftarg pointer in xfs_blkdev_issue_flush(): BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: xfs_blkdev_issue_flush+0xd/0x20 ..... Call Trace: xfs_file_fsync+0x188/0x1c0 vfs_fsync_range+0x3b/0xa0 do_fsync+0x3d/0x70 SyS_fsync+0x10/0x20 do_syscall_64+0x4d/0xb0 entry_SYSCALL64_slow_path+0x25/0x25 Setting RT inode flags does not require special privileges so any unprivileged user can cause this oops to occur. To reproduce, confirm kernel is compiled with CONFIG_XFS_RT=y and run: # mkfs.xfs -f /dev/pmem0 # mount /dev/pmem0 /mnt/test # mkdir /mnt/test/foo # xfs_io -c 'chattr +t' /mnt/test/foo # xfs_io -f -c 'pwrite 0 5m' -c fsync /mnt/test/foo/bar Or just run xfstests with MKFS_OPTIONS="-d rtinherit=1" and wait. Kernels built with CONFIG_XFS_RT=n are not exposed to this bug. Fixes: f538d4da8d52 ("[XFS] write barrier support") Signed-off-by: Richard Wareing Signed-off-by: Dave Chinner Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_linux.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 1455b25205a8..3ebed168e508 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -363,7 +363,14 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) #endif /* DEBUG */ #ifdef CONFIG_XFS_RT -#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) + +/* + * make sure we ignore the inode flag if the filesystem doesn't have a + * configured realtime device. + */ +#define XFS_IS_REALTIME_INODE(ip) \ + (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \ + (ip)->i_mount->m_rtdev_targp) #else #define XFS_IS_REALTIME_INODE(ip) (0) #endif -- cgit v1.2.3 From 4ad5dcaca7428dd2bc1a6a40c948e3799c1e27ae Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Sep 2017 14:13:54 -0700 Subject: Linux 4.9.50 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1ebc553f5464..038d126a15fc 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 49 +SUBLEVEL = 50 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3 From dccb31be7ef8984b8fa636b65f74b662db6b3cb3 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 18 Aug 2017 14:40:53 +0200 Subject: ipv6: accept 64k - 1 packet length in ip6_find_1stfragopt() [ Upstream commit 3de33e1ba0506723ab25734e098cf280ecc34756 ] A packet length of exactly IPV6_MAXPLEN is allowed, we should refuse parsing options only if the size is 64KiB or more. While at it, remove one extra variable and one assignment which were also introduced by the commit that introduced the size check. Checking the sum 'offset + len' and only later adding 'len' to 'offset' doesn't provide any advantage over directly summing to 'offset' and checking it. Fixes: 6399f1fae4ec ("ipv6: avoid overflow of offset in ip6_find_1stfragopt") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/output_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index abb2c307fbe8..a338bbc33cf3 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -86,7 +86,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; - unsigned int len; switch (**nexthdr) { @@ -112,10 +111,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - len = ipv6_optlen(exthdr); - if (len + offset >= IPV6_MAXPLEN) + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) return -EINVAL; - offset += len; *nexthdr = &exthdr->nexthdr; } -- cgit v1.2.3 From 7f8f23fc8026a7a4f29f49c18a2ebbb529ee3916 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 21 Aug 2017 09:47:10 -0700 Subject: ipv6: add rcu grace period before freeing fib6_node [ Upstream commit c5cff8561d2d0006e972bd114afd51f082fee77c ] We currently keep rt->rt6i_node pointing to the fib6_node for the route. And some functions make use of this pointer to dereference the fib6_node from rt structure, e.g. rt6_check(). However, as there is neither refcount nor rcu taken when dereferencing rt->rt6i_node, it could potentially cause crashes as rt->rt6i_node could be set to NULL by other CPUs when doing a route deletion. This patch introduces an rcu grace period before freeing fib6_node and makes sure the functions that dereference it takes rcu_read_lock(). Note: there is no "Fixes" tag because this bug was there in a very early stage. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_fib.h | 30 +++++++++++++++++++++++++++++- net/ipv6/ip6_fib.c | 20 ++++++++++++++++---- net/ipv6/route.c | 14 +++++++++++--- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a74e2aa40ef4..c17180118897 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -68,6 +68,7 @@ struct fib6_node { __u16 fn_flags; int fn_sernum; struct rt6_info *rr_ptr; + struct rcu_head rcu; }; #ifndef CONFIG_IPV6_SUBTREES @@ -165,13 +166,40 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +/* Function to safely get fn->sernum for passed in rt + * and store result in passed in cookie. + * Return true if we can get cookie safely + * Return false if not + */ +static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, + u32 *cookie) +{ + struct fib6_node *fn; + bool status = false; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + + if (fn) { + *cookie = fn->fn_sernum; + status = true; + } + + rcu_read_unlock(); + return status; +} + static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + u32 cookie = 0; + if (rt->rt6i_flags & RTF_PCPU || (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); - return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + rt6_get_cookie_safe(rt, &cookie); + + return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ff389591a340..ed832b3d9b70 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -148,11 +148,23 @@ static struct fib6_node *node_alloc(void) return fn; } -static void node_free(struct fib6_node *fn) +static void node_free_immediate(struct fib6_node *fn) +{ + kmem_cache_free(fib6_node_kmem, fn); +} + +static void node_free_rcu(struct rcu_head *head) { + struct fib6_node *fn = container_of(head, struct fib6_node, rcu); + kmem_cache_free(fib6_node_kmem, fn); } +static void node_free(struct fib6_node *fn) +{ + call_rcu(&fn->rcu, node_free_rcu); +} + static void rt6_rcu_free(struct rt6_info *rt) { call_rcu(&rt->dst.rcu_head, dst_rcu_free); @@ -589,9 +601,9 @@ insert_above: if (!in || !ln) { if (in) - node_free(in); + node_free_immediate(in); if (ln) - node_free(ln); + node_free_immediate(ln); return ERR_PTR(-ENOMEM); } @@ -1020,7 +1032,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, root, and then (in failure) stale node in main tree. */ - node_free(sfn); + node_free_immediate(sfn); err = PTR_ERR(sn); goto failure; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5764a84465f8..632987ffc07d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1267,7 +1267,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt) static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + u32 rt_cookie; + + if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -1335,8 +1337,14 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt->rt6i_flags & RTF_CACHE) { dst_hold(&rt->dst); ip6_del_rt(rt); - } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { - rt->rt6i_node->fn_sernum = -1; + } else { + struct fib6_node *fn; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) + fn->fn_sernum = -1; + rcu_read_unlock(); } } } -- cgit v1.2.3 From 43c792a8488087668f7e1052201e2eeb32150141 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Aug 2017 15:03:10 -0700 Subject: ipv6: fix sparse warning on rt6i_node [ Upstream commit 4e587ea71bf924f7dac621f1351653bd41e446cb ] Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This generates a new sparse warning on rt->rt6i_node related code: net/ipv6/route.c:1394:30: error: incompatible types in comparison expression (different address spaces) ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison expression (different address spaces) This commit adds "__rcu" tag for rt6i_node and makes sure corresponding rcu API is used for it. After this fix, sparse no longer generates the above warning. Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_fib.h | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 11 +++++++---- net/ipv6/route.c | 3 ++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c17180118897..a6bcb18ac4c3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -103,7 +103,7 @@ struct rt6_info { * the same cache line. */ struct fib6_table *rt6i_table; - struct fib6_node *rt6i_node; + struct fib6_node __rcu *rt6i_node; struct in6_addr rt6i_gateway; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b2cabda72320..cc101b1be903 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5443,7 +5443,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!(ifp->rt->rt6i_node)) + if (!rcu_access_pointer(ifp->rt->rt6i_node)) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ed832b3d9b70..af7442211ffb 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -874,7 +874,7 @@ add: rt->dst.rt6_next = iter; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -899,7 +899,7 @@ add: return err; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); @@ -1459,8 +1459,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct net *net = info->nl_net; - struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; #if RT6_DEBUG >= 2 @@ -1649,7 +1650,9 @@ static int fib6_clean_node(struct fib6_walker *w) if (res) { #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", - __func__, rt, rt->rt6i_node, res); + __func__, rt, + rcu_access_pointer(rt->rt6i_node), + res); #endif continue; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 632987ffc07d..9c2dd3f77cdb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1361,7 +1361,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); + (rt->rt6i_flags & RTF_PCPU || + rcu_access_pointer(rt->rt6i_node)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, -- cgit v1.2.3 From 4b4a194a10e2a2dd7bf3f90016b56ac495a1d37e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 22 Aug 2017 15:36:08 +0200 Subject: macsec: add genl family module alias [ Upstream commit 78362998f58c7c271e2719dcd0aaced435c801f9 ] This helps tools such as wpa_supplicant can start even if the macsec module isn't loaded yet. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a5d66e205bb2..2caac0c37059 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3510,6 +3510,7 @@ module_init(macsec_init); module_exit(macsec_exit); MODULE_ALIAS_RTNL_LINK("macsec"); +MODULE_ALIAS_GENL_FAMILY("macsec"); MODULE_DESCRIPTION("MACsec IEEE 802.1AE"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 1e39e5c6a2ea1f488ad13d351d6c55a5ef530666 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Aug 2017 09:39:28 -0700 Subject: udp: on peeking bad csum, drop packets even if not at head [ Upstream commit fd6055a806edc4019be1b9fb7d25262599bca5b1 ] When peeking, if a bad csum is discovered, the skb is unlinked from the queue with __sk_queue_drop_skb and the peek operation restarted. __sk_queue_drop_skb only drops packets that match the queue head. This fails if the skb was found after the head, using SO_PEEK_OFF socket option. This causes an infinite loop. We MUST drop this problematic skb, and we can simply check if skb was already removed by another thread, by looking at skb->next : This pointer is set to NULL by the __skb_unlink() operation, that might have happened only under the spinlock protection. Many thanks to syzkaller team (and particularly Dmitry Vyukov who provided us nice C reproducers exhibiting the lockup) and Willem de Bruijn who provided first version for this patch and a test program. Fixes: 627d2d6b5500 ("udp: enable MSG_PEEK at non-zero offset") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Acked-by: Paolo Abeni Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 58dfa23d12ca..4fa4011feec1 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -351,7 +351,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) if (flags & MSG_PEEK) { err = -ENOENT; spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { + if (skb->next) { __skb_unlink(skb, &sk->sk_receive_queue); atomic_dec(&skb->users); err = 0; -- cgit v1.2.3 From 4d8ee1935bcd666360311dfdadeee235d682d69a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Aug 2017 15:24:47 -0700 Subject: fsl/man: Inherit parent device and of_node [ Upstream commit a1a50c8e4c241a505b7270e1a3c6e50d94e794b1 ] Junote Cai reported that he was not able to get a DSA setup involving the Freescale DPAA/FMAN driver to work and narrowed it down to of_find_net_device_by_node(). This function requires the network device's device reference to be correctly set which is the case here, though we have lost any device_node association there. The problem is that dpaa_eth_add_device() allocates a "dpaa-ethernet" platform device, and later on dpaa_eth_probe() is called but SET_NETDEV_DEV() won't be propagating &pdev->dev.of_node properly. Fix this by inherenting both the parent device and the of_node when dpaa_eth_add_device() creates the platform device. Fixes: 3933961682a3 ("fsl/fman: Add FMan MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fman/mac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 736db9d9b0ad..81021f87e4f3 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -622,6 +622,9 @@ static struct platform_device *dpaa_eth_add_device(int fman_id, goto no_mem; } + pdev->dev.of_node = node; + pdev->dev.parent = priv->dev; + ret = platform_device_add_data(pdev, &data, sizeof(data)); if (ret) goto err; -- cgit v1.2.3 From 08d56d8a99bb82e134ba7704e4cfdabbcc16fc4f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 23 Aug 2017 13:27:13 +0200 Subject: sctp: Avoid out-of-bounds reads from address storage [ Upstream commit ee6c88bb754e3d363e568da78086adfedb692447 ] inet_diag_msg_sctp{,l}addr_fill() and sctp_get_sctp_info() copy sizeof(sockaddr_storage) bytes to fill in sockaddr structs used to export diagnostic information to userspace. However, the memory allocated to store sockaddr information is smaller than that and depends on the address family, so we leak up to 100 uninitialized bytes to userspace. Just use the size of the source structs instead, in all the three cases this is what userspace expects. Zero out the remaining memory. Unused bytes (i.e. when IPv4 addresses are used) in source structs sctp_sockaddr_entry and sctp_transport are already cleared by sctp_add_bind_addr() and sctp_transport_new(), respectively. Noticed while testing KASAN-enabled kernel with 'ss': [ 2326.885243] BUG: KASAN: slab-out-of-bounds in inet_sctp_diag_fill+0x42c/0x6c0 [sctp_diag] at addr ffff881be8779800 [ 2326.896800] Read of size 128 by task ss/9527 [ 2326.901564] CPU: 0 PID: 9527 Comm: ss Not tainted 4.11.0-22.el7a.x86_64 #1 [ 2326.909236] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 2326.917585] Call Trace: [ 2326.920312] dump_stack+0x63/0x8d [ 2326.924014] kasan_object_err+0x21/0x70 [ 2326.928295] kasan_report+0x288/0x540 [ 2326.932380] ? inet_sctp_diag_fill+0x42c/0x6c0 [sctp_diag] [ 2326.938500] ? skb_put+0x8b/0xd0 [ 2326.942098] ? memset+0x31/0x40 [ 2326.945599] check_memory_region+0x13c/0x1a0 [ 2326.950362] memcpy+0x23/0x50 [ 2326.953669] inet_sctp_diag_fill+0x42c/0x6c0 [sctp_diag] [ 2326.959596] ? inet_diag_msg_sctpasoc_fill+0x460/0x460 [sctp_diag] [ 2326.966495] ? __lock_sock+0x102/0x150 [ 2326.970671] ? sock_def_wakeup+0x60/0x60 [ 2326.975048] ? remove_wait_queue+0xc0/0xc0 [ 2326.979619] sctp_diag_dump+0x44a/0x760 [sctp_diag] [ 2326.985063] ? sctp_ep_dump+0x280/0x280 [sctp_diag] [ 2326.990504] ? memset+0x31/0x40 [ 2326.994007] ? mutex_lock+0x12/0x40 [ 2326.997900] __inet_diag_dump+0x57/0xb0 [inet_diag] [ 2327.003340] ? __sys_sendmsg+0x150/0x150 [ 2327.007715] inet_diag_dump+0x4d/0x80 [inet_diag] [ 2327.012979] netlink_dump+0x1e6/0x490 [ 2327.017064] __netlink_dump_start+0x28e/0x2c0 [ 2327.021924] inet_diag_handler_cmd+0x189/0x1a0 [inet_diag] [ 2327.028045] ? inet_diag_rcv_msg_compat+0x1b0/0x1b0 [inet_diag] [ 2327.034651] ? inet_diag_dump_compat+0x190/0x190 [inet_diag] [ 2327.040965] ? __netlink_lookup+0x1b9/0x260 [ 2327.045631] sock_diag_rcv_msg+0x18b/0x1e0 [ 2327.050199] netlink_rcv_skb+0x14b/0x180 [ 2327.054574] ? sock_diag_bind+0x60/0x60 [ 2327.058850] sock_diag_rcv+0x28/0x40 [ 2327.062837] netlink_unicast+0x2e7/0x3b0 [ 2327.067212] ? netlink_attachskb+0x330/0x330 [ 2327.071975] ? kasan_check_write+0x14/0x20 [ 2327.076544] netlink_sendmsg+0x5be/0x730 [ 2327.080918] ? netlink_unicast+0x3b0/0x3b0 [ 2327.085486] ? kasan_check_write+0x14/0x20 [ 2327.090057] ? selinux_socket_sendmsg+0x24/0x30 [ 2327.095109] ? netlink_unicast+0x3b0/0x3b0 [ 2327.099678] sock_sendmsg+0x74/0x80 [ 2327.103567] ___sys_sendmsg+0x520/0x530 [ 2327.107844] ? __get_locked_pte+0x178/0x200 [ 2327.112510] ? copy_msghdr_from_user+0x270/0x270 [ 2327.117660] ? vm_insert_page+0x360/0x360 [ 2327.122133] ? vm_insert_pfn_prot+0xb4/0x150 [ 2327.126895] ? vm_insert_pfn+0x32/0x40 [ 2327.131077] ? vvar_fault+0x71/0xd0 [ 2327.134968] ? special_mapping_fault+0x69/0x110 [ 2327.140022] ? __do_fault+0x42/0x120 [ 2327.144008] ? __handle_mm_fault+0x1062/0x17a0 [ 2327.148965] ? __fget_light+0xa7/0xc0 [ 2327.153049] __sys_sendmsg+0xcb/0x150 [ 2327.157133] ? __sys_sendmsg+0xcb/0x150 [ 2327.161409] ? SyS_shutdown+0x140/0x140 [ 2327.165688] ? exit_to_usermode_loop+0xd0/0xd0 [ 2327.170646] ? __do_page_fault+0x55d/0x620 [ 2327.175216] ? __sys_sendmsg+0x150/0x150 [ 2327.179591] SyS_sendmsg+0x12/0x20 [ 2327.183384] do_syscall_64+0xe3/0x230 [ 2327.187471] entry_SYSCALL64_slow_path+0x25/0x25 [ 2327.192622] RIP: 0033:0x7f41d18fa3b0 [ 2327.196608] RSP: 002b:00007ffc3b731218 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 2327.205055] RAX: ffffffffffffffda RBX: 00007ffc3b731380 RCX: 00007f41d18fa3b0 [ 2327.213017] RDX: 0000000000000000 RSI: 00007ffc3b731340 RDI: 0000000000000003 [ 2327.220978] RBP: 0000000000000002 R08: 0000000000000004 R09: 0000000000000040 [ 2327.228939] R10: 00007ffc3b730f30 R11: 0000000000000246 R12: 0000000000000003 [ 2327.236901] R13: 00007ffc3b731340 R14: 00007ffc3b7313d0 R15: 0000000000000084 [ 2327.244865] Object at ffff881be87797e0, in cache kmalloc-64 size: 64 [ 2327.251953] Allocated: [ 2327.254581] PID = 9484 [ 2327.257215] save_stack_trace+0x1b/0x20 [ 2327.261485] save_stack+0x46/0xd0 [ 2327.265179] kasan_kmalloc+0xad/0xe0 [ 2327.269165] kmem_cache_alloc_trace+0xe6/0x1d0 [ 2327.274138] sctp_add_bind_addr+0x58/0x180 [sctp] [ 2327.279400] sctp_do_bind+0x208/0x310 [sctp] [ 2327.284176] sctp_bind+0x61/0xa0 [sctp] [ 2327.288455] inet_bind+0x5f/0x3a0 [ 2327.292151] SYSC_bind+0x1a4/0x1e0 [ 2327.295944] SyS_bind+0xe/0x10 [ 2327.299349] do_syscall_64+0xe3/0x230 [ 2327.303433] return_from_SYSCALL_64+0x0/0x6a [ 2327.308194] Freed: [ 2327.310434] PID = 4131 [ 2327.313065] save_stack_trace+0x1b/0x20 [ 2327.317344] save_stack+0x46/0xd0 [ 2327.321040] kasan_slab_free+0x73/0xc0 [ 2327.325220] kfree+0x96/0x1a0 [ 2327.328530] dynamic_kobj_release+0x15/0x40 [ 2327.333195] kobject_release+0x99/0x1e0 [ 2327.337472] kobject_put+0x38/0x70 [ 2327.341266] free_notes_attrs+0x66/0x80 [ 2327.345545] mod_sysfs_teardown+0x1a5/0x270 [ 2327.350211] free_module+0x20/0x2a0 [ 2327.354099] SyS_delete_module+0x2cb/0x2f0 [ 2327.358667] do_syscall_64+0xe3/0x230 [ 2327.362750] return_from_SYSCALL_64+0x0/0x6a [ 2327.367510] Memory state around the buggy address: [ 2327.372855] ffff881be8779700: fc fc fc fc 00 00 00 00 00 00 00 00 fc fc fc fc [ 2327.380914] ffff881be8779780: fb fb fb fb fb fb fb fb fc fc fc fc 00 00 00 00 [ 2327.388972] >ffff881be8779800: 00 00 00 00 fc fc fc fc fb fb fb fb fb fb fb fb [ 2327.397031] ^ [ 2327.401792] ffff881be8779880: fc fc fc fc fb fb fb fb fb fb fb fb fc fc fc fc [ 2327.409850] ffff881be8779900: 00 00 00 00 00 04 fc fc fc fc fc fc 00 00 00 00 [ 2327.417907] ================================================================== This fixes CVE-2017-7558. References: https://bugzilla.redhat.com/show_bug.cgi?id=1480266 Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Cc: Xin Long Cc: Vlad Yasevich Cc: Neil Horman Signed-off-by: Stefano Brivio Acked-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sctp_diag.c | 7 +++++-- net/sctp/socket.c | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 048954eee984..e8f56b7c5afb 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, info = nla_data(attr); list_for_each_entry_rcu(laddr, address_list, list) { - memcpy(info, &laddr->a, addrlen); + memcpy(info, &laddr->a, sizeof(laddr->a)); + memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } @@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, info = nla_data(attr); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { - memcpy(info, &from->ipaddr, addrlen); + memcpy(info, &from->ipaddr, sizeof(from->ipaddr)); + memset(info + sizeof(from->ipaddr), 0, + addrlen - sizeof(from->ipaddr)); info += addrlen; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9647e314d4fc..3ef725229449 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4373,8 +4373,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; prim = asoc->peer.primary_path; - memcpy(&info->sctpi_p_address, &prim->ipaddr, - sizeof(struct sockaddr_storage)); + memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr)); info->sctpi_p_state = prim->state; info->sctpi_p_cwnd = prim->cwnd; info->sctpi_p_srtt = prim->srtt; -- cgit v1.2.3 From 64dfc67548da52fe7891decf725342a8e87e32d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Aug 2017 15:59:49 +0200 Subject: qlge: avoid memcpy buffer overflow [ Upstream commit e58f95831e7468d25eb6e41f234842ecfe6f014f ] gcc-8.0.0 (snapshot) points out that we copy a variable-length string into a fixed length field using memcpy() with the destination length, and that ends up copying whatever follows the string: inlined from 'ql_core_dump' at drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:1106:2: drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:708:2: error: 'memcpy' reading 15 bytes from a region of size 14 [-Werror=stringop-overflow=] memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); Changing it to use strncpy() will instead zero-pad the destination, which seems to be the right thing to do here. The bug is probably harmless, but it seems like a good idea to address it in stable kernels as well, if only for the purpose of building with gcc-8 without warnings. Fixes: a61f80261306 ("qlge: Add ethtool register dump function.") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index 829be21f97b2..be258d90de9e 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -724,7 +724,7 @@ static void ql_build_coredump_seg_header( seg_hdr->cookie = MPI_COREDUMP_COOKIE; seg_hdr->segNum = seg_number; seg_hdr->segSize = seg_size; - memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); + strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); } /* -- cgit v1.2.3 From de2ecec26dba848c729e51faaf2b4daf35096330 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 24 Aug 2017 16:49:16 -0700 Subject: netvsc: fix deadlock betwen link status and removal [ Upstream commit 9b4e946ce14e20d7addbfb7d9139e604f9fda107 ] There is a deadlock possible when canceling the link status delayed work queue. The removal process is run with RTNL held, and the link status callback is acquring RTNL. Resolve the issue by using trylock and rescheduling. If cancel is in process, that block it from happening. Fixes: 122a5f6410f4 ("staging: hv: use delayed_work for netvsc_send_garp()") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index ff038e507fd6..36a04e182af1 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1084,7 +1084,12 @@ static void netvsc_link_change(struct work_struct *w) bool notify = false, reschedule = false; unsigned long flags, next_reconfig, delay; - rtnl_lock(); + /* if changes are happening, comeback later */ + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT); + return; + } + if (ndev_ctx->start_remove) goto out_unlock; -- cgit v1.2.3 From 2b3bd5972a5ce434b3f2211181e72033efe018d9 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 25 Aug 2017 22:48:48 +0200 Subject: cxgb4: Fix stack out-of-bounds read due to wrong size to t4_record_mbox() [ Upstream commit 0f3086868e8889a823a6e0f3d299102aa895d947 ] Passing commands for logging to t4_record_mbox() with size MBOX_LEN, when the actual command size is actually smaller, causes out-of-bounds stack accesses in t4_record_mbox() while copying command words here: for (i = 0; i < size / 8; i++) entry->cmd[i] = be64_to_cpu(cmd[i]); Up to 48 bytes from the stack are then leaked to debugfs. This happens whenever we send (and log) commands described by structs fw_sched_cmd (32 bytes leaked), fw_vi_rxmode_cmd (48), fw_hello_cmd (48), fw_bye_cmd (48), fw_initialize_cmd (48), fw_reset_cmd (48), fw_pfvf_cmd (32), fw_eq_eth_cmd (16), fw_eq_ctrl_cmd (32), fw_eq_ofld_cmd (32), fw_acl_mac_cmd(16), fw_rss_glb_config_cmd(32), fw_rss_vi_config_cmd(32), fw_devlog_cmd(32), fw_vi_enable_cmd(48), fw_port_cmd(32), fw_sched_cmd(32), fw_devlog_cmd(32). The cxgb4vf driver got this right instead. When we call t4_record_mbox() to log a command reply, a MBOX_LEN size can be used though, as get_mbox_rpl() will fill cmd_rpl up completely. Fixes: 7f080c3f2ff0 ("cxgb4: Add support to enable logging of firmware mailbox commands") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8139514d32c..9e073fb6870a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -317,12 +317,12 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, if (v != MBOX_OWNER_DRV) { ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT; - t4_record_mbox(adap, cmd, MBOX_LEN, access, ret); + t4_record_mbox(adap, cmd, size, access, ret); return ret; } /* Copy in the new mailbox command and send it on its way ... */ - t4_record_mbox(adap, cmd, MBOX_LEN, access, 0); + t4_record_mbox(adap, cmd, size, access, 0); for (i = 0; i < size; i += 8) t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p++)); @@ -371,7 +371,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, } ret = (pcie_fw & PCIE_FW_ERR_F) ? -ENXIO : -ETIMEDOUT; - t4_record_mbox(adap, cmd, MBOX_LEN, access, ret); + t4_record_mbox(adap, cmd, size, access, ret); dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n", *(const u8 *)cmd, mbox); t4_report_fw_error(adap); -- cgit v1.2.3 From 8c623e5d03692dc478277185a0b907d53aea1b43 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 28 Aug 2017 14:29:41 -0400 Subject: packet: Don't write vnet header beyond end of buffer [ Upstream commit edbd58be15a957f6a760c4a514cd475217eb97fd ] ... which may happen with certain values of tp_reserve and maclen. Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv") Signed-off-by: Benjamin Poirier Cc: Willem de Bruijn Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ae7bfd26cd91..35ba4b60d927 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2151,6 +2151,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; + bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2201,8 +2202,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) + if (po->has_vnet_hdr) { netoff += sizeof(struct virtio_net_hdr); + do_vnet = true; + } macoff = netoff - maclen; } if (po->tp_version <= TPACKET_V2) { @@ -2219,8 +2222,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, skb_set_owner_r(copy_skb, sk); } snaplen = po->rx_ring.frame_size - macoff; - if ((int)snaplen < 0) + if ((int)snaplen < 0) { snaplen = 0; + do_vnet = false; + } } } else if (unlikely(macoff + snaplen > GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { @@ -2233,6 +2238,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely((int)snaplen < 0)) { snaplen = 0; macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; + do_vnet = false; } } spin_lock(&sk->sk_receive_queue.lock); @@ -2258,7 +2264,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); - if (po->has_vnet_hdr) { + if (do_vnet) { if (__packet_rcv_vnet(skb, h.raw + macoff - sizeof(struct virtio_net_hdr))) { spin_lock(&sk->sk_receive_queue.lock); -- cgit v1.2.3 From af33da0ed95f6a7b652f774fbb07fb52d2c21a97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Aug 2017 09:29:31 -0700 Subject: kcm: do not attach PF_KCM sockets to avoid deadlock [ Upstream commit 351050ecd6523374b370341cc29fe61e2201556b ] syzkaller had no problem to trigger a deadlock, attaching a KCM socket to another one (or itself). (original syzkaller report was a very confusing lockdep splat during a sendmsg()) It seems KCM claims to only support TCP, but no enforcement is done, so we might need to add additional checks. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index fecad1098cf8..7eb0e8fe3ca8 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1381,6 +1381,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; + /* We must prevent loops or risk deadlock ! */ + if (csk->sk_family == PF_KCM) + return -EOPNOTSUPP; + psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); if (!psock) return -ENOMEM; -- cgit v1.2.3 From a6e51fda71a205fbd8f7b98da799c46e563c3db1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 30 Aug 2017 17:49:29 -0700 Subject: Revert "net: phy: Correctly process PHY_HALTED in phy_stop_machine()" [ Upstream commit ebc8254aeae34226d0bc8fda309fd9790d4dccfe ] This reverts commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ("net: phy: Correctly process PHY_HALTED in phy_stop_machine()") because it is creating the possibility for a NULL pointer dereference. David Daney provide the following call trace and diagram of events: When ndo_stop() is called we call: phy_disconnect() +---> phy_stop_interrupts() implies: phydev->irq = PHY_POLL; +---> phy_stop_machine() | +---> phy_state_machine() | +----> queue_delayed_work(): Work queued. +--->phy_detach() implies: phydev->attached_dev = NULL; Now at a later time the queued work does: phy_state_machine() +---->netif_carrier_off(phydev->attached_dev): Oh no! It is NULL: CPU 12 Unable to handle kernel paging request at virtual address 0000000000000048, epc == ffffffff80de37ec, ra == ffffffff80c7c Oops[#1]: CPU: 12 PID: 1502 Comm: kworker/12:1 Not tainted 4.9.43-Cavium-Octeon+ #1 Workqueue: events_power_efficient phy_state_machine task: 80000004021ed100 task.stack: 8000000409d70000 $ 0 : 0000000000000000 ffffffff84720060 0000000000000048 0000000000000004 $ 4 : 0000000000000000 0000000000000001 0000000000000004 0000000000000000 $ 8 : 0000000000000000 0000000000000000 00000000ffff98f3 0000000000000000 $12 : 8000000409d73fe0 0000000000009c00 ffffffff846547c8 000000000000af3b $16 : 80000004096bab68 80000004096babd0 0000000000000000 80000004096ba800 $20 : 0000000000000000 0000000000000000 ffffffff81090000 0000000000000008 $24 : 0000000000000061 ffffffff808637b0 $28 : 8000000409d70000 8000000409d73cf0 80000000271bd300 ffffffff80c7804c Hi : 000000000000002a Lo : 000000000000003f epc : ffffffff80de37ec netif_carrier_off+0xc/0x58 ra : ffffffff80c7804c phy_state_machine+0x48c/0x4f8 Status: 14009ce3 KX SX UX KERNEL EXL IE Cause : 00800008 (ExcCode 02) BadVA : 0000000000000048 PrId : 000d9501 (Cavium Octeon III) Modules linked in: Process kworker/12:1 (pid: 1502, threadinfo=8000000409d70000, task=80000004021ed100, tls=0000000000000000) Stack : 8000000409a54000 80000004096bab68 80000000271bd300 80000000271c1e00 0000000000000000 ffffffff808a1708 8000000409a54000 80000000271bd300 80000000271bd320 8000000409a54030 ffffffff80ff0f00 0000000000000001 ffffffff81090000 ffffffff808a1ac0 8000000402182080 ffffffff84650000 8000000402182080 ffffffff84650000 ffffffff80ff0000 8000000409a54000 ffffffff808a1970 0000000000000000 80000004099e8000 8000000402099240 0000000000000000 ffffffff808a8598 0000000000000000 8000000408eeeb00 8000000409a54000 00000000810a1d00 0000000000000000 8000000409d73de8 8000000409d73de8 0000000000000088 000000000c009c00 8000000409d73e08 8000000409d73e08 8000000402182080 ffffffff808a84d0 8000000402182080 ... Call Trace: [] netif_carrier_off+0xc/0x58 [] phy_state_machine+0x48c/0x4f8 [] process_one_work+0x158/0x368 [] worker_thread+0x150/0x4c0 [] kthread+0xc8/0xe0 [] ret_from_kernel_thread+0x14/0x1c The original motivation for this change originated from Marc Gonzales indicating that his network driver did not have its adjust_link callback executing with phydev->link = 0 while he was expecting it. PHYLIB has never made any such guarantees ever because phy_stop() merely just tells the workqueue to move into PHY_HALTED state which will happen asynchronously. Reported-by: Geert Uytterhoeven Reported-by: David Daney Fixes: 7ad813f20853 ("net: phy: Correctly process PHY_HALTED in phy_stop_machine()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 775a6e1fdef9..6e12401b5102 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -674,9 +674,6 @@ void phy_stop_machine(struct phy_device *phydev) if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); - - /* Now we can run the state machine synchronously */ - phy_state_machine(&phydev->state_queue.work); } /** -- cgit v1.2.3 From a10c510179b369f7d1e8cf77f43ee2db900c1ac9 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 May 2017 11:22:33 -0700 Subject: tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0 [ Upstream commit 499350a5a6e7512d9ed369ed63a4244b6536f4f8 ] When tcp_disconnect() is called, inet_csk_delack_init() sets icsk->icsk_ack.rcv_mss to 0. This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() => __tcp_select_window() call path to have division by 0 issue. So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0. Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1a4db27f5833..6b3d27e50317 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2297,6 +2297,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); -- cgit v1.2.3 From 73ee5a73e75f3c0e5d4ca0c5a362424e93413bb0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 10:52:31 +0200 Subject: mlxsw: spectrum: Forbid linking to devices that have uppers [ Upstream commit 25cc72a33835ed8a6f53180a822cadab855852ac ] The mlxsw driver relies on NETDEV_CHANGEUPPER events to configure the device in case a port is enslaved to a master netdev such as bridge or bond. Since the driver ignores events unrelated to its ports and their uppers, it's possible to engineer situations in which the device's data path differs from the kernel's. One example to such a situation is when a port is enslaved to a bond that is already enslaved to a bridge. When the bond was enslaved the driver ignored the event - as the bond wasn't one of its uppers - and therefore a bridge port instance isn't created in the device. Until such configurations are supported forbid them by checking that the upper device doesn't have uppers of its own. Fixes: 0d65fc13042f ("mlxsw: spectrum: Implement LAG port join/leave") Signed-off-by: Ido Schimmel Reported-by: Nogah Frankel Tested-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ++++++ include/linux/netdevice.h | 2 ++ net/core/dev.c | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index f902c4d3de99..1806b1fc6e4c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4172,6 +4172,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, return -EINVAL; if (!info->linking) break; + if (netdev_has_any_upper_dev(upper_dev)) + return -EINVAL; /* HW limitation forbids to put ports to multiple bridges. */ if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) @@ -4185,6 +4187,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) return -EINVAL; + if (!info->linking) + break; + if (netdev_has_any_upper_dev(upper_dev)) + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 780e7171f548..23db1ae37464 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3901,6 +3901,8 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +bool netdev_has_any_upper_dev(struct net_device *dev); + void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); void *netdev_lower_get_next_private_rcu(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 1d0a7369d5a2..ba7b8121a414 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5337,12 +5337,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -static bool netdev_has_any_upper_dev(struct net_device *dev) +bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->all_adj_list.upper); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device -- cgit v1.2.3 From b5a3ae8b127e692d6ebf4707c4ec6db68c413024 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 12:22:25 +0300 Subject: bridge: switchdev: Clear forward mark when transmitting packet [ Upstream commit 79e99bdd60b484af9afe0147e85a13e66d5c1cdb ] Commit 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices") added the 'offload_fwd_mark' bit to the skb in order to allow drivers to indicate to the bridge driver that they already forwarded the packet in L2. In case the bit is set, before transmitting the packet from each port, the port's mark is compared with the mark stored in the skb's control block. If both marks are equal, we know the packet arrived from a switch device that already forwarded the packet and it's not re-transmitted. However, if the packet is transmitted from the bridge device itself (e.g., br0), we should clear the 'offload_fwd_mark' bit as the mark stored in the skb's control block isn't valid. This scenario can happen in rare cases where a packet was trapped during L3 forwarding and forwarded by the kernel to a bridge device. Fixes: 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices") Signed-off-by: Ido Schimmel Reported-by: Yotam Gigi Tested-by: Yotam Gigi Reviewed-by: Jiri Pirko Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 89a687f3c0a3..5f5e28f210e0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -53,6 +53,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); +#ifdef CONFIG_NET_SWITCHDEV + skb->offload_fwd_mark = 0; +#endif BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); -- cgit v1.2.3 From 5a7a40bad254d2571d93059ba4b3963dc448cdb0 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:08 +0200 Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" [ Upstream commit fb452a1aa3fd4034d7999e309c5466ff2d7005aa ] This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. There is a bug in fragmentation codes use of the percpu_counter API, that can cause issues on systems with many CPUs. The frag_mem_limit() just reads the global counter (fbc->count), without considering other CPUs can have upto batch size (130K) that haven't been subtracted yet. Due to the 3MBytes lower thresh limit, this become dangerous at >=24 CPUs (3*1024*1024/130000=24). The correct API usage would be to use __percpu_counter_compare() which does the right thing, and takes into account the number of (online) CPUs and batch size, to account for this and call __percpu_counter_sum() when needed. We choose to revert the use of the lib/percpu_counter API for frag memory accounting for several reasons: 1) On systems with CPUs > 24, the heavier fully locked __percpu_counter_sum() is always invoked, which will be more expensive than the atomic_t that is reverted to. Given systems with more than 24 CPUs are becoming common this doesn't seem like a good option. To mitigate this, the batch size could be decreased and thresh be increased. 2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX CPU, before SKBs are pushed into sockets on remote CPUs. Given NICs can only hash on L2 part of the IP-header, the NIC-RXq's will likely be limited. Thus, a fair chance that atomic add+dec happen on the same CPU. Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") removed init_frag_mem_limit() and instead use inet_frags_init_net(). After this revert, inet_frags_uninit_net() becomes empty. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 36 +++++++++--------------------------- net/ipv4/inet_fragment.c | 4 +--- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 909972aa3acd..3bb8dfec7725 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,14 +1,9 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include - struct netns_frags { - /* The percpu_counter "mem" need to be cacheline aligned. - * mem.count must not share cacheline with other writers - */ - struct percpu_counter mem ____cacheline_aligned_in_smp; - + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ int timeout; int high_thresh; @@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); + atomic_set(&nf->mem, 0); + return 0; } static inline void inet_frags_uninit_net(struct netns_frags *nf) { - percpu_counter_destroy(&nf->mem); } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); @@ -140,37 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -/* The default percpu_counter batch size is not big enough to scale to - * fragmentation mem acct sizes. - * The mem size of a 64K fragment is approx: - * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes - */ -static unsigned int frag_percpu_counter_batch = 130000; - static inline int frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_read(&nf->mem); + return atomic_read(&nf->mem); } static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); + atomic_sub(i, &nf->mem); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); + atomic_add(i, &nf->mem); } -static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) +static inline int sum_frag_mem_limit(struct netns_frags *nf) { - unsigned int res; - - local_bh_disable(); - res = percpu_counter_sum_positive(&nf->mem); - local_bh_enable(); - - return res; + return atomic_read(&nf->mem); } /* RFC 3168 support : diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b5e9317eaf9e..631c0d0d7cf8 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -234,10 +234,8 @@ evict_again: cond_resched(); if (read_seqretry(&f->rnd_seqlock, seq) || - percpu_counter_sum(&nf->mem)) + sum_frag_mem_limit(nf)) goto evict_again; - - percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); -- cgit v1.2.3 From 1bcf18718ec63ad5fb025b75a5d2439e1dcf1213 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:13 +0200 Subject: Revert "net: fix percpu memory leaks" [ Upstream commit 5a63643e583b6a9789d7a225ae076fb4e603991c ] This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") then here is no need for this fix-up patch. As percpu_counter is no longer used, it cannot memory leak it any-longer. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 7 +------ net/ieee802154/6lowpan/reassembly.c | 11 +++-------- net/ipv4/ip_fragment.c | 12 +++--------- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++--------- net/ipv6/reassembly.c | 12 +++--------- 5 files changed, 13 insertions(+), 41 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 3bb8dfec7725..634d19203e7d 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -103,15 +103,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +static inline void inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; } -static inline void inet_frags_uninit_net(struct netns_frags *nf) -{ -} - void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 30d875dff6b5..f85b08baff16 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&ieee802154_lowpan->frags); - if (res) - return res; - res = lowpan_frags_ns_sysctl_register(net); - if (res) - inet_frags_uninit_net(&ieee802154_lowpan->frags); - return res; + inet_frags_init_net(&ieee802154_lowpan->frags); + + return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbe7f72db9c1..453db950dc9f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -835,8 +835,6 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { - int res; - /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -862,13 +860,9 @@ static int __net_init ipv4_frags_init_net(struct net *net) net->ipv4.frags.max_dist = 64; - res = inet_frags_init_net(&net->ipv4.frags); - if (res) - return res; - res = ip4_frags_ns_ctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv4.frags); - return res; + inet_frags_init_net(&net->ipv4.frags); + + return ip4_frags_ns_ctl_register(net); } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 986d4ca38832..b263bf3a19f7 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); static int nf_ct_net_init(struct net *net) { - int res; - net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&net->nf_frag.frags); - if (res) - return res; - res = nf_ct_frag6_sysctl_register(net); - if (res) - inet_frags_uninit_net(&net->nf_frag.frags); - return res; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); } static void nf_ct_net_exit(struct net *net) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3815e8505ed2..e585c0a2591c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -709,19 +709,13 @@ static void ip6_frags_sysctl_unregister(void) static int __net_init ipv6_frags_init_net(struct net *net) { - int res; - net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&net->ipv6.frags); - if (res) - return res; - res = ip6_frags_ns_sysctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv6.frags); - return res; + inet_frags_init_net(&net->ipv6.frags); + + return ip6_frags_ns_sysctl_register(net); } static void __net_exit ipv6_frags_exit_net(struct net *net) -- cgit v1.2.3 From 90406e68e42fa50c41b69a5d607fa979d0ab562b Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Mon, 4 Sep 2017 10:45:28 +0300 Subject: gianfar: Fix Tx flow control deactivation [ Upstream commit 5d621672bc1a1e5090c1ac5432a18c79e0e13e03 ] The wrong register is checked for the Tx flow control bit, it should have been maccfg1 not maccfg2. This went unnoticed for so long probably because the impact is hardly visible, not to mention the tangled code from adjust_link(). First, link flow control (i.e. handling of Rx/Tx link level pause frames) is disabled by default (needs to be enabled via 'ethtool -A'). Secondly, maccfg2 always returns 0 for tx_flow_oldval (except for a few old boards), which results in Tx flow control remaining always on once activated. Fixes: 45b679c9a3ccd9e34f28e6ec677b812a860eb8eb ("gianfar: Implement PAUSE frame generation support") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3f4e71148808..fd206889a433 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3690,7 +3690,7 @@ static noinline void gfar_update_link_state(struct gfar_private *priv) u32 tempval1 = gfar_read(®s->maccfg1); u32 tempval = gfar_read(®s->maccfg2); u32 ecntrl = gfar_read(®s->ecntrl); - u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW); + u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW); if (phydev->duplex != priv->oldduplex) { if (!(phydev->duplex)) -- cgit v1.2.3 From f5755c0e870056dd35c95a0b5c0a038cdb4382ee Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 5 Sep 2017 09:22:05 +0800 Subject: vhost_net: correctly check tx avail during rx busy polling [ Upstream commit 8b949bef9172ca69d918e93509a4ecb03d0355e0 ] We check tx avail through vhost_enable_notify() in the past which is wrong since it only checks whether or not guest has filled more available buffer since last avail idx synchronization which was just done by vhost_vq_avail_empty() before. What we really want is checking pending buffers in the avail ring. Fix this by calling vhost_vq_avail_empty() instead. This issue could be noticed by doing netperf TCP_RR benchmark as client from guest (but not host). With this fix, TCP_RR from guest to localhost restores from 1375.91 trans per sec to 55235.28 trans per sec on my laptop (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz). Fixes: 030881372460 ("vhost_net: basic polling support") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 5dc128a8da83..96a0661011fd 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -537,8 +537,13 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) preempt_enable(); - if (vhost_enable_notify(&net->dev, vq)) + if (!vhost_vq_avail_empty(&net->dev, vq)) vhost_poll_queue(&vq->poll); + else if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); + vhost_poll_queue(&vq->poll); + } + mutex_unlock(&vq->mutex); len = peek_head_len(sk); -- cgit v1.2.3 From ca7d8a337bd3e3eda49ab1b4dfa09ac9b335a56b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 5 Sep 2017 17:26:33 +0800 Subject: ip6_gre: update mtu properly in ip6gre_err [ Upstream commit 5c25f30c93fdc5bf25e62101aeaae7a4f9b421b3 ] Now when probessing ICMPV6_PKT_TOOBIG, ip6gre_err only subtracts the offset of gre header from mtu info. The expected mtu of gre device should also subtract gre header. Otherwise, the next packets still can't be sent out. Jianlin found this issue when using the topo: client(ip6gre)<---->(nic1)route(nic2)<----->(ip6gre)server and reducing nic2's mtu, then both tcp and sctp's performance with big size data became 0. This patch is to fix it by also subtracting grehdr (tun->tun_hlen) from mtu info when updating gre device's mtu in ip6gre_err(). It also needs to subtract ETH_HLEN if gre dev'type is ARPHRD_ETHER. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d2844ee469cb..f78afe43bdff 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -432,7 +432,9 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: - mtu = be32_to_cpu(info) - offset; + mtu = be32_to_cpu(info) - offset - t->tun_hlen; + if (t->dev->type == ARPHRD_ETHER) + mtu -= ETH_HLEN; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; -- cgit v1.2.3 From c9335db792c04be68e553c6d0537c9df8b20e557 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 8 Sep 2017 10:26:19 +0200 Subject: ipv6: fix memory leak with multiple tables during netns destruction [ Upstream commit ba1cc08d9488c94cb8d94f545305688b72a2a300 ] fib6_net_exit only frees the main and local tables. If another table was created with fib6_alloc_table, we leak it when the netns is destroyed. Fix this in the same way ip_fib_net_exit cleans up tables, by walking through the whole hashtable of fib6_table's. We can get rid of the special cases for local and main, since they're also part of the hashtable. Reproducer: ip netns add x ip -net x -6 rule add from 6003:1::/64 table 100 ip netns del x Reported-by: Jianlin Shi Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index af7442211ffb..291ec5e2d3cb 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -201,6 +201,12 @@ static void rt6_release(struct rt6_info *rt) } } +static void fib6_free_table(struct fib6_table *table) +{ + inetpeer_invalidate_tree(&table->tb6_peers); + kfree(table); +} + static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -1893,15 +1899,22 @@ out_timer: static void fib6_net_exit(struct net *net) { + unsigned int i; + rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); - kfree(net->ipv6.fib6_local_tbl); -#endif - inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); - kfree(net->ipv6.fib6_main_tbl); + for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[i]; + struct hlist_node *tmp; + struct fib6_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { + hlist_del(&tb->tb6_hlist); + fib6_free_table(tb); + } + } + kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); } -- cgit v1.2.3 From bf8ed95d2ca9c99f0237fb3cf56c381b19130610 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2017 15:48:47 -0700 Subject: ipv6: fix typo in fib6_net_exit() [ Upstream commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b ] IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ. Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 291ec5e2d3cb..5da864997495 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1904,7 +1904,7 @@ static void fib6_net_exit(struct net *net) rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { struct hlist_head *head = &net->ipv6.fib_table_hash[i]; struct hlist_node *tmp; struct fib6_table *tb; -- cgit v1.2.3 From 3f60dadbe1781e292b560dd353d4a5a637ed192d Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 8 Sep 2017 11:35:21 -0300 Subject: sctp: fix missing wake ups in some situations [ Upstream commit 7906b00f5cd1cd484fced7fcda892176e3202c8a ] Commit fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible") minimized the number of wake ups that are triggered in case the association receives a packet with multiple data chunks on it and/or when io_events are enabled and then commit 0970f5b36659 ("sctp: signal sk_data_ready earlier on data chunks reception") moved the wake up to as soon as possible. It thus relies on the state machine running later to clean the flag that the event was already generated. The issue is that there are 2 call paths that calls sctp_ulpq_tail_event() outside of the state machine, causing the flag to linger and possibly omitting a needed wake up in the sequence. One of the call paths is when enabling SCTP_SENDER_DRY_EVENTS via setsockopt(SCTP_EVENTS), as noticed by Harald Welte. The other is when partial reliability triggers removal of chunks from the send queue when the application calls sendmsg(). This commit fixes it by not setting the flag in case the socket is not owned by the user, as it won't be cleaned later. This works for user-initiated calls and also for rx path processing. Fixes: fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible") Reported-by: Harald Welte Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ulpqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 84d0fdaf7de9..d3cfbf2f407d 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { - sp->data_ready_signalled = 1; + if (!sock_owned_by_user(sk)) + sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } return 1; -- cgit v1.2.3 From 60b94125a1fe4988f5392d8537305dad441ef43d Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Thu, 7 Sep 2017 14:08:34 +0800 Subject: ip_tunnel: fix setting ttl and tos value in collect_md mode [ Upstream commit 0f693f1995cf002432b70f43ce73f79bf8d0b6c9 ] ttl and tos variables are declared and assigned, but are not used in iptunnel_xmit() function. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Cc: Alexei Starovoitov Signed-off-by: Haishuang Yan Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5719d6ba0824..bd7f1836bb70 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -609,8 +609,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) ip_rt_put(rt); goto tx_dropped; } - iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, - key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return; tx_error: dev->stats.tx_errors++; -- cgit v1.2.3 From 0f90297cba9ba37eb37723423c2df022ce77704a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 10 Aug 2017 17:35:04 -0700 Subject: f2fs: let fill_super handle roll-forward errors commit afd2b4da40b3b567ef8d8e6881479345a2312a03 upstream. If we set CP_ERROR_FLAG in roll-forward error, f2fs is no longer to proceed any IOs due to f2fs_cp_error(). But, for example, if some stale data is involved on roll-forward process, we're able to get -ENOENT, getting fs stuck. If we get any error, let fill_super set SBI_NEED_FSCK and try to recover back to stable point. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/recovery.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2fc84a991325..66395f7c0309 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -626,8 +626,6 @@ out: } clear_sbi_flag(sbi, SBI_POR_DOING); - if (err) - set_ckpt_flags(sbi, CP_ERROR_FLAG); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ -- cgit v1.2.3 From cc9618c9fffe6bd362f048928e15effe04e5b6cd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 12 Aug 2017 21:33:23 -0700 Subject: f2fs: check hot_data for roll-forward recovery commit 125c9fb1ccb53eb2ea9380df40f3c743f3fb2fed upstream. We need to check HOT_DATA to truncate any previous data block when doing roll-forward recovery. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 66395f7c0309..98c1a63a4614 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -316,7 +316,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return 0; /* Get the previous summary */ - for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); if (curseg->segno == segno) { sum = curseg->sum_blk->entries[blkoff]; -- cgit v1.2.3 From c7d1ddec251d39415cd488c29e9d60b22d4b61b7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 1 Aug 2017 07:11:34 -0700 Subject: x86/fsgsbase/64: Fully initialize FS and GS state in start_thread_common commit 767d035d838f4fd6b5a5bbd7a3f6d293b7f65a49 upstream. execve used to leak FSBASE and GSBASE on AMD CPUs. Fix it. The security impact of this bug is small but not quite zero -- it could weaken ASLR when a privileged task execs a less privileged program, but only if program changed bitness across the exec, or the child binary was highly unusual or actively malicious. A child program that was compromised after the exec would not have access to the leaked base. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chang Seok Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b3760b3c1ca0..02fa4701cc2e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -216,10 +216,19 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp, unsigned int _cs, unsigned int _ss, unsigned int _ds) { + WARN_ON_ONCE(regs != current_pt_regs()); + + if (static_cpu_has(X86_BUG_NULL_SEG)) { + /* Loading zero below won't clear the base. */ + loadsegment(fs, __USER_DS); + load_gs_index(__USER_DS); + } + loadsegment(fs, 0); loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); + regs->ip = new_ip; regs->sp = new_sp; regs->cs = _cs; -- cgit v1.2.3 From 0caec70692a0f19538ed4ebb816df0d5585c8bd0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 1 Aug 2017 07:11:35 -0700 Subject: x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps commit 9584d98bed7a7a904d0702ad06bbcc94703cb5b4 upstream. In ELF_COPY_CORE_REGS, we're copying from the current task, so accessing thread.fsbase and thread.gsbase makes no sense. Just read the values from the CPU registers. In practice, the old code would have been correct most of the time simply because thread.fsbase and thread.gsbase usually matched the CPU registers. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chang Seok Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/elf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index b31761ecce63..7bcd138c3aa9 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -204,6 +204,7 @@ void set_personality_ia32(bool); #define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ + unsigned long base; \ unsigned v; \ (pr_reg)[0] = (regs)->r15; \ (pr_reg)[1] = (regs)->r14; \ @@ -226,8 +227,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - (pr_reg)[21] = current->thread.fsbase; \ - (pr_reg)[22] = current->thread.gsbase; \ + rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \ + rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ -- cgit v1.2.3 From 3fddeb80034b2be27179cdc4e23167bc78d304d1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 1 Aug 2017 07:11:37 -0700 Subject: x86/switch_to/64: Rewrite FS/GS switching yet again to fix AMD CPUs commit e137a4d8f4dd2e277e355495b6b2cb241a8693c3 upstream. Switching FS and GS is a mess, and the current code is still subtly wrong: it assumes that "Loading a nonzero value into FS sets the index and base", which is false on AMD CPUs if the value being loaded is 1, 2, or 3. (The current code came from commit 3e2b68d752c9 ("x86/asm, sched/x86: Rewrite the FS and GS context switch code"), which made it better but didn't fully fix it.) Rewrite it to be much simpler and more obviously correct. This should fix it fully on AMD CPUs and shouldn't adversely affect performance. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chang Seok Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 227 +++++++++++++++++++++++-------------------- 1 file changed, 122 insertions(+), 105 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 02fa4701cc2e..0887d2ae3797 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -136,6 +136,123 @@ void release_thread(struct task_struct *dead_task) } } +enum which_selector { + FS, + GS +}; + +/* + * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are + * not available. The goal is to be reasonably fast on non-FSGSBASE systems. + * It's forcibly inlined because it'll generate better code and this function + * is hot. + */ +static __always_inline void save_base_legacy(struct task_struct *prev_p, + unsigned short selector, + enum which_selector which) +{ + if (likely(selector == 0)) { + /* + * On Intel (without X86_BUG_NULL_SEG), the segment base could + * be the pre-existing saved base or it could be zero. On AMD + * (with X86_BUG_NULL_SEG), the segment base could be almost + * anything. + * + * This branch is very hot (it's hit twice on almost every + * context switch between 64-bit programs), and avoiding + * the RDMSR helps a lot, so we just assume that whatever + * value is already saved is correct. This matches historical + * Linux behavior, so it won't break existing applications. + * + * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we + * report that the base is zero, it needs to actually be zero: + * see the corresponding logic in load_seg_legacy. + */ + } else { + /* + * If the selector is 1, 2, or 3, then the base is zero on + * !X86_BUG_NULL_SEG CPUs and could be anything on + * X86_BUG_NULL_SEG CPUs. In the latter case, Linux + * has never attempted to preserve the base across context + * switches. + * + * If selector > 3, then it refers to a real segment, and + * saving the base isn't necessary. + */ + if (which == FS) + prev_p->thread.fsbase = 0; + else + prev_p->thread.gsbase = 0; + } +} + +static __always_inline void save_fsgs(struct task_struct *task) +{ + savesegment(fs, task->thread.fsindex); + savesegment(gs, task->thread.gsindex); + save_base_legacy(task, task->thread.fsindex, FS); + save_base_legacy(task, task->thread.gsindex, GS); +} + +static __always_inline void loadseg(enum which_selector which, + unsigned short sel) +{ + if (which == FS) + loadsegment(fs, sel); + else + load_gs_index(sel); +} + +static __always_inline void load_seg_legacy(unsigned short prev_index, + unsigned long prev_base, + unsigned short next_index, + unsigned long next_base, + enum which_selector which) +{ + if (likely(next_index <= 3)) { + /* + * The next task is using 64-bit TLS, is not using this + * segment at all, or is having fun with arcane CPU features. + */ + if (next_base == 0) { + /* + * Nasty case: on AMD CPUs, we need to forcibly zero + * the base. + */ + if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { + loadseg(which, __USER_DS); + loadseg(which, next_index); + } else { + /* + * We could try to exhaustively detect cases + * under which we can skip the segment load, + * but there's really only one case that matters + * for performance: if both the previous and + * next states are fully zeroed, we can skip + * the load. + * + * (This assumes that prev_base == 0 has no + * false positives. This is the case on + * Intel-style CPUs.) + */ + if (likely(prev_index | next_index | prev_base)) + loadseg(which, next_index); + } + } else { + if (prev_index != next_index) + loadseg(which, next_index); + wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE, + next_base); + } + } else { + /* + * The next task is using a real segment. Loading the selector + * is sufficient. + */ + loadseg(which, next_index); + } +} + int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { @@ -273,7 +390,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); - unsigned prev_fsindex, prev_gsindex; fpu_switch_t fpu_switch; fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); @@ -283,8 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * * (e.g. xen_load_tls()) */ - savesegment(fs, prev_fsindex); - savesegment(gs, prev_gsindex); + save_fsgs(prev_p); /* * Load TLS before restoring any segments so that segment loads @@ -323,108 +438,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (unlikely(next->ds | prev->ds)) loadsegment(ds, next->ds); - /* - * Switch FS and GS. - * - * These are even more complicated than DS and ES: they have - * 64-bit bases are that controlled by arch_prctl. The bases - * don't necessarily match the selectors, as user code can do - * any number of things to cause them to be inconsistent. - * - * We don't promise to preserve the bases if the selectors are - * nonzero. We also don't promise to preserve the base if the - * selector is zero and the base doesn't match whatever was - * most recently passed to ARCH_SET_FS/GS. (If/when the - * FSGSBASE instructions are enabled, we'll need to offer - * stronger guarantees.) - * - * As an invariant, - * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is - * impossible. - */ - if (next->fsindex) { - /* Loading a nonzero value into FS sets the index and base. */ - loadsegment(fs, next->fsindex); - } else { - if (next->fsbase) { - /* Next index is zero but next base is nonzero. */ - if (prev_fsindex) - loadsegment(fs, 0); - wrmsrl(MSR_FS_BASE, next->fsbase); - } else { - /* Next base and index are both zero. */ - if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { - /* - * We don't know the previous base and can't - * find out without RDMSR. Forcibly clear it. - */ - loadsegment(fs, __USER_DS); - loadsegment(fs, 0); - } else { - /* - * If the previous index is zero and ARCH_SET_FS - * didn't change the base, then the base is - * also zero and we don't need to do anything. - */ - if (prev->fsbase || prev_fsindex) - loadsegment(fs, 0); - } - } - } - /* - * Save the old state and preserve the invariant. - * NB: if prev_fsindex == 0, then we can't reliably learn the base - * without RDMSR because Intel user code can zero it without telling - * us and AMD user code can program any 32-bit value without telling - * us. - */ - if (prev_fsindex) - prev->fsbase = 0; - prev->fsindex = prev_fsindex; - - if (next->gsindex) { - /* Loading a nonzero value into GS sets the index and base. */ - load_gs_index(next->gsindex); - } else { - if (next->gsbase) { - /* Next index is zero but next base is nonzero. */ - if (prev_gsindex) - load_gs_index(0); - wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase); - } else { - /* Next base and index are both zero. */ - if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { - /* - * We don't know the previous base and can't - * find out without RDMSR. Forcibly clear it. - * - * This contains a pointless SWAPGS pair. - * Fixing it would involve an explicit check - * for Xen or a new pvop. - */ - load_gs_index(__USER_DS); - load_gs_index(0); - } else { - /* - * If the previous index is zero and ARCH_SET_GS - * didn't change the base, then the base is - * also zero and we don't need to do anything. - */ - if (prev->gsbase || prev_gsindex) - load_gs_index(0); - } - } - } - /* - * Save the old state and preserve the invariant. - * NB: if prev_gsindex == 0, then we can't reliably learn the base - * without RDMSR because Intel user code can zero it without telling - * us and AMD user code can program any 32-bit value without telling - * us. - */ - if (prev_gsindex) - prev->gsbase = 0; - prev->gsindex = prev_gsindex; + load_seg_legacy(prev->fsindex, prev->fsbase, + next->fsindex, next->fsbase, FS); + load_seg_legacy(prev->gsindex, prev->gsbase, + next->gsindex, next->gsbase, GS); switch_fpu_finish(next_fpu, fpu_switch); -- cgit v1.2.3 From 4c1d33c4cf864cd1fa14868440daa300a8494900 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:24 -0700 Subject: xfs: Move handling of missing page into one place in xfs_find_get_desired_pgoff() commit a54fba8f5a0dc36161cacdf2aa90f007f702ec1a upstream. Currently several places in xfs_find_get_desired_pgoff() handle the case of a missing page. Make them all handled in one place after the loop has terminated. Signed-off-by: Jan Kara Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index df206cfc21f7..2e04b1cdb0d2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1139,29 +1139,8 @@ xfs_find_get_desired_pgoff( want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); - /* - * No page mapped into given range. If we are searching holes - * and if this is the first time we got into the loop, it means - * that the given offset is landed in a hole, return it. - * - * If we have already stepped through some block buffers to find - * holes but they all contains data. In this case, the last - * offset is already updated and pointed to the end of the last - * mapped page, if it does not reach the endpoint to search, - * that means there should be a hole between them. - */ - if (nr_pages == 0) { - /* Data search found nothing */ - if (type == DATA_OFF) - break; - - ASSERT(type == HOLE_OFF); - if (lastoff == startoff || lastoff < endoff) { - found = true; - *offset = lastoff; - } + if (nr_pages == 0) break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1227,21 +1206,20 @@ xfs_find_get_desired_pgoff( /* * The number of returned pages less than our desired, search - * done. In this case, nothing was found for searching data, - * but we found a hole behind the last offset. + * done. */ - if (nr_pages < want) { - if (type == HOLE_OFF) { - *offset = lastoff; - found = true; - } + if (nr_pages < want) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + /* No page at lastoff and we are not done - we found a hole. */ + if (type == HOLE_OFF && lastoff < endoff) { + *offset = lastoff; + found = true; + } out: pagevec_release(&pvec); return found; -- cgit v1.2.3 From 85ab1b23d2d865049299f3c197ce550e80228fac Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 8 Jun 2017 08:23:07 -0700 Subject: xfs: fix spurious spin_is_locked() assert failures on non-smp kernels commit 95989c46d2a156365867b1d795fdefce71bce378 upstream. The 0-day kernel test robot reports assertion failures on !CONFIG_SMP kernels due to failed spin_is_locked() checks. As it turns out, spin_is_locked() is hardcoded to return zero on !CONFIG_SMP kernels and so this function cannot be relied on to verify spinlock state in this configuration. To avoid this problem, replace the associated asserts with lockdep variants that do the right thing regardless of kernel configuration. Drop the one assert that checks for an unlocked lock as there is no suitable lockdep variant for that case. This moves the spinlock checks from XFS debug code to lockdep, but generally provides the same level of protection. Reported-by: kbuild test robot Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 2 +- fs/xfs/xfs_icache.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 16269271ebd6..24940dd3baa8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -116,7 +116,7 @@ static inline void __xfs_buf_ioacct_dec( struct xfs_buf *bp) { - ASSERT(spin_is_locked(&bp->b_lock)); + lockdep_assert_held(&bp->b_lock); if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 74304b6ce84b..e279882de427 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -66,7 +66,6 @@ xfs_inode_alloc( XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); @@ -192,7 +191,7 @@ xfs_perag_set_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (pag->pag_ici_reclaimable++) return; @@ -214,7 +213,7 @@ xfs_perag_clear_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (--pag->pag_ici_reclaimable) return; -- cgit v1.2.3 From 7cb011bbacef6fcf1d26fe8cd8cc8079404b01f8 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 14 Jun 2017 21:21:45 -0700 Subject: xfs: push buffer of flush locked dquot to avoid quotacheck deadlock commit 7912e7fef2aebe577f0b46d3cba261f2783c5695 upstream. Reclaim during quotacheck can lead to deadlocks on the dquot flush lock: - Quotacheck populates a local delwri queue with the physical dquot buffers. - Quotacheck performs the xfs_qm_dqusage_adjust() bulkstat and dirties all of the dquots. - Reclaim kicks in and attempts to flush a dquot whose buffer is already queud on the quotacheck queue. The flush succeeds but queueing to the reclaim delwri queue fails as the backing buffer is already queued. The flush unlock is now deferred to I/O completion of the buffer from the quotacheck queue. - The dqadjust bulkstat continues and dirties the recently flushed dquot once again. - Quotacheck proceeds to the xfs_qm_flush_one() walk which requires the flush lock to update the backing buffers with the in-core recalculated values. It deadlocks on the redirtied dquot as the flush lock was already acquired by reclaim, but the buffer resides on the local delwri queue which isn't submitted until the end of quotacheck. This is reproduced by running quotacheck on a filesystem with a couple million inodes in low memory (512MB-1GB) situations. This is a regression as of commit 43ff2122e6 ("xfs: on-stack delayed write buffer lists"), which removed a trylock and buffer I/O submission from the quotacheck dquot flush sequence. Quotacheck first resets and collects the physical dquot buffers in a delwri queue. Then, it traverses the filesystem inodes via bulkstat, updates the in-core dquots, flushes the corrected dquots to the backing buffers and finally submits the delwri queue for I/O. Since the backing buffers are queued across the entire quotacheck operation, dquot reclaim cannot possibly complete a dquot flush before quotacheck completes. Therefore, quotacheck must submit the buffer for I/O in order to cycle the flush lock and flush the dirty in-core dquot to the buffer. Add a delwri queue buffer push mechanism to submit an individual buffer for I/O without losing the delwri queue status and use it from quotacheck to avoid the deadlock. This restores quotacheck behavior to as before the regression was introduced. Reported-by: Martin Svec Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_buf.h | 1 + fs/xfs/xfs_qm.c | 28 ++++++++++++++++++++++++- fs/xfs/xfs_trace.h | 1 + 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 24940dd3baa8..eca7baecc9f0 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2022,6 +2022,66 @@ xfs_buf_delwri_submit( return error; } +/* + * Push a single buffer on a delwri queue. + * + * The purpose of this function is to submit a single buffer of a delwri queue + * and return with the buffer still on the original queue. The waiting delwri + * buffer submission infrastructure guarantees transfer of the delwri queue + * buffer reference to a temporary wait list. We reuse this infrastructure to + * transfer the buffer back to the original queue. + * + * Note the buffer transitions from the queued state, to the submitted and wait + * listed state and back to the queued state during this call. The buffer + * locking and queue management logic between _delwri_pushbuf() and + * _delwri_queue() guarantee that the buffer cannot be queued to another list + * before returning. + */ +int +xfs_buf_delwri_pushbuf( + struct xfs_buf *bp, + struct list_head *buffer_list) +{ + LIST_HEAD (submit_list); + int error; + + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); + + /* + * Isolate the buffer to a new local list so we can submit it for I/O + * independently from the rest of the original list. + */ + xfs_buf_lock(bp); + list_move(&bp->b_list, &submit_list); + xfs_buf_unlock(bp); + + /* + * Delwri submission clears the DELWRI_Q buffer flag and returns with + * the buffer on the wait list with an associated reference. Rather than + * bounce the buffer from a local wait list back to the original list + * after I/O completion, reuse the original list as the wait list. + */ + xfs_buf_delwri_submit_buffers(&submit_list, buffer_list); + + /* + * The buffer is now under I/O and wait listed as during typical delwri + * submission. Lock the buffer to wait for I/O completion. Rather than + * remove the buffer from the wait list and release the reference, we + * want to return with the buffer queued to the original list. The + * buffer already sits on the original list with a wait list reference, + * however. If we let the queue inherit that wait list reference, all we + * need to do is reset the DELWRI_Q flag. + */ + xfs_buf_lock(bp); + error = bp->b_error; + bp->b_flags |= _XBF_DELWRI_Q; + xfs_buf_unlock(bp); + + return error; +} + int __init xfs_buf_init(void) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index ad514a8025dd..f961b19b9cc2 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -333,6 +333,7 @@ extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); +extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); /* Buffer Daemon Setup Routines */ extern int xfs_buf_init(void); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 8b9a9f15f022..8068867a8183 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1247,6 +1247,7 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { + struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1257,7 +1258,32 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - xfs_dqflock(dqp); + /* + * The only way the dquot is already flush locked by the time quotacheck + * gets here is if reclaim flushed it before the dqadjust walk dirtied + * it for the final time. Quotacheck collects all dquot bufs in the + * local delwri queue before dquots are dirtied, so reclaim can't have + * possibly queued it for I/O. The only way out is to push the buffer to + * cycle the flush lock. + */ + if (!xfs_dqflock_nowait(dqp)) { + /* buf is pinned in-core by delwri list */ + DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen); + bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL); + if (!bp) { + error = -EINVAL; + goto out_unlock; + } + xfs_buf_unlock(bp); + + xfs_buf_delwri_pushbuf(bp, buffer_list); + xfs_buf_rele(bp); + + error = -EAGAIN; + goto out_unlock; + } + error = xfs_qm_dqflush(dqp, &bp); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 828f383df121..2df73f3a73c1 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -366,6 +366,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); +DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_bdstrat_shut); DEFINE_BUF_EVENT(xfs_buf_item_relse); -- cgit v1.2.3 From ce83e494d1bbbdd045aae236dcbb412cdd721319 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Jun 2017 21:25:57 -0700 Subject: xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent commit e1a4e37cc7b665b6804fba812aca2f4d7402c249 upstream. In a pathological scenario where we are trying to bunmapi a single extent in which every other block is shared, it's possible that trying to unmap the entire large extent in a single transaction can generate so many EFIs that we overflow the transaction reservation. Therefore, use a heuristic to guess at the number of blocks we can safely unmap from a reflink file's data fork in an single transaction. This should prevent problems such as the log head slamming into the tail and ASSERTs that trigger because we've exceeded the transaction reservation. Note that since bunmapi can fail to unmap the entire range, we must also teach the deferred unmap code to roll into a new transaction whenever we get low on reservation. Signed-off-by: Darrick J. Wong [hch: random edits, all bugs are my fault] Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 37 ++++++++++++++++++++++++++++--------- fs/xfs/libxfs/xfs_bmap.h | 2 +- fs/xfs/libxfs/xfs_refcount.c | 10 +--------- fs/xfs/libxfs/xfs_refcount.h | 16 ++++++++++++++++ fs/xfs/xfs_bmap_item.c | 17 +++++++++++++++-- fs/xfs/xfs_trans.h | 2 +- fs/xfs/xfs_trans_bmap.c | 11 +++++++++-- 7 files changed, 71 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2a8cbd15d5d1..b79719a87638 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5555,6 +5555,7 @@ __xfs_bunmapi( int whichfork; /* data or attribute fork */ xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ + xfs_fileoff_t max_len; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5576,6 +5577,16 @@ __xfs_bunmapi( ASSERT(len > 0); ASSERT(nexts >= 0); + /* + * Guesstimate how many blocks we can unmap without running the risk of + * blowing out the transaction with a mix of EFIs and reflink + * adjustments. + */ + if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); + else + max_len = len; + if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; @@ -5621,7 +5632,7 @@ __xfs_bunmapi( extno = 0; while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && - (nexts == 0 || extno < nexts)) { + (nexts == 0 || extno < nexts) && max_len > 0) { /* * Is the found extent after a hole in which bno lives? * Just back up to the previous extent, if so. @@ -5655,6 +5666,15 @@ __xfs_bunmapi( } if (del.br_startoff + del.br_blockcount > bno + 1) del.br_blockcount = bno + 1 - del.br_startoff; + + /* How much can we safely unmap? */ + if (max_len < del.br_blockcount) { + del.br_startoff += del.br_blockcount - max_len; + if (!wasdel) + del.br_startblock += del.br_blockcount - max_len; + del.br_blockcount = max_len; + } + sum = del.br_startblock + del.br_blockcount; if (isrt && (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { @@ -5835,6 +5855,7 @@ __xfs_bunmapi( if (!isrt && wasdel) xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false); + max_len -= del.br_blockcount; bno = del.br_startoff - 1; nodelete: /* @@ -6604,25 +6625,24 @@ xfs_bmap_finish_one( int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, + xfs_filblks_t *blockcount, xfs_exntst_t state) { struct xfs_bmbt_irec bmap; int nimaps = 1; xfs_fsblock_t firstfsb; int flags = XFS_BMAPI_REMAP; - int done; int error = 0; bmap.br_startblock = startblock; bmap.br_startoff = startoff; - bmap.br_blockcount = blockcount; + bmap.br_blockcount = *blockcount; bmap.br_state = state; trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), - ip->i_ino, whichfork, startoff, blockcount, state); + ip->i_ino, whichfork, startoff, *blockcount, state); if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) return -EFSCORRUPTED; @@ -6641,12 +6661,11 @@ xfs_bmap_finish_one( bmap.br_blockcount, flags, &firstfsb, bmap.br_blockcount, &bmap, &nimaps, dfops); + *blockcount = 0; break; case XFS_BMAP_UNMAP: - error = xfs_bunmapi(tp, ip, bmap.br_startoff, - bmap.br_blockcount, flags, 1, &firstfsb, - dfops, &done); - ASSERT(done); + error = __xfs_bunmapi(tp, ip, startoff, blockcount, + XFS_BMAPI_REMAP, 1, &firstfsb, dfops); break; default: ASSERT(0); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index e7d40b39f18f..db53ac7ff6df 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -265,7 +265,7 @@ struct xfs_bmap_intent { int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops, struct xfs_inode *ip, enum xfs_bmap_intent_type type, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, xfs_exntst_t state); + xfs_filblks_t *blockcount, xfs_exntst_t state); int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 82a38d86ebad..e17016163542 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -784,14 +784,6 @@ xfs_refcount_merge_extents( } /* - * While we're adjusting the refcounts records of an extent, we have - * to keep an eye on the number of extents we're dirtying -- run too - * many in a single transaction and we'll exceed the transaction's - * reservation and crash the fs. Each record adds 12 bytes to the - * log (plus any key updates) so we'll conservatively assume 24 bytes - * per record. We must also leave space for btree splits on both ends - * of the range and space for the CUD and a new CUI. - * * XXX: This is a pretty hand-wavy estimate. The penalty for guessing * true incorrectly is a shutdown FS; the penalty for guessing false * incorrectly is more transaction rolls than might be necessary. @@ -822,7 +814,7 @@ xfs_refcount_still_have_space( else if (overhead > cur->bc_tp->t_log_res) return false; return cur->bc_tp->t_log_res - overhead > - cur->bc_private.a.priv.refc.nr_ops * 32; + cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; } /* diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 098dc668ab2c..eafb9d1f3b37 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp, extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, xfs_agnumber_t agno); +/* + * While we're adjusting the refcounts records of an extent, we have + * to keep an eye on the number of extents we're dirtying -- run too + * many in a single transaction and we'll exceed the transaction's + * reservation and crash the fs. Each record adds 12 bytes to the + * log (plus any key updates) so we'll conservatively assume 32 bytes + * per record. We must also leave space for btree splits on both ends + * of the range and space for the CUD and a new CUI. + */ +#define XFS_REFCOUNT_ITEM_OVERHEAD 32 + +static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) +{ + return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; +} + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index c4b90e794e41..5a54dcd7e7b1 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -395,6 +395,7 @@ xfs_bui_recover( struct xfs_map_extent *bmap; xfs_fsblock_t startblock_fsb; xfs_fsblock_t inode_fsb; + xfs_filblks_t count; bool op_ok; struct xfs_bud_log_item *budp; enum xfs_bmap_intent_type type; @@ -403,6 +404,7 @@ xfs_bui_recover( struct xfs_trans *tp; struct xfs_inode *ip = NULL; struct xfs_defer_ops dfops; + struct xfs_bmbt_irec irec; xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -480,13 +482,24 @@ xfs_bui_recover( } xfs_trans_ijoin(tp, ip, 0); + count = bmap->me_len; error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, ip, whichfork, bmap->me_startoff, - bmap->me_startblock, bmap->me_len, - state); + bmap->me_startblock, &count, state); if (error) goto err_dfops; + if (count > 0) { + ASSERT(type == XFS_BMAP_UNMAP); + irec.br_startblock = bmap->me_startblock; + irec.br_blockcount = count; + irec.br_startoff = bmap->me_startoff; + irec.br_state = state; + error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + if (error) + goto err_dfops; + } + /* Finish transaction, free inodes. */ error = xfs_defer_finish(&tp, &dfops, NULL); if (error) diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 98024cb933ef..c0e72ab57741 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -277,6 +277,6 @@ int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops, enum xfs_bmap_intent_type type, struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, xfs_exntst_t state); + xfs_filblks_t *blockcount, xfs_exntst_t state); #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c index 6408e7d7c08c..14543d93cd4b 100644 --- a/fs/xfs/xfs_trans_bmap.c +++ b/fs/xfs/xfs_trans_bmap.c @@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update( int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, + xfs_filblks_t *blockcount, xfs_exntst_t state) { int error; @@ -196,16 +196,23 @@ xfs_bmap_update_finish_item( void **state) { struct xfs_bmap_intent *bmap; + xfs_filblks_t count; int error; bmap = container_of(item, struct xfs_bmap_intent, bi_list); + count = bmap->bi_bmap.br_blockcount; error = xfs_trans_log_finish_bmap_update(tp, done_item, dop, bmap->bi_type, bmap->bi_owner, bmap->bi_whichfork, bmap->bi_bmap.br_startoff, bmap->bi_bmap.br_startblock, - bmap->bi_bmap.br_blockcount, + &count, bmap->bi_bmap.br_state); + if (!error && count > 0) { + ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); + bmap->bi_bmap.br_blockcount = count; + return -EAGAIN; + } kmem_free(bmap); return error; } -- cgit v1.2.3 From 6c0ecde201d796363b92de79553b75089760d9a4 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 14 Jun 2017 21:35:35 -0700 Subject: xfs: release bli from transaction properly on fs shutdown commit 79e641ce29cfae5b8fc55fb77ac62d11d2d849c0 upstream. If a filesystem shutdown occurs with a buffer log item in the CIL and a log force occurs, the ->iop_unpin() handler is generally expected to tear down the bli properly. This entails freeing the bli memory and releasing the associated hold on the buffer so it can be released and the filesystem unmounted. If this sequence occurs while ->bli_refcount is elevated (i.e., another transaction is open and attempting to modify the buffer), however, ->iop_unpin() may not be responsible for releasing the bli. Instead, the transaction may release the final ->bli_refcount reference and thus xfs_trans_brelse() is responsible for tearing down the bli. While xfs_trans_brelse() does drop the reference count, it only attempts to release the bli if it is clean (i.e., not in the CIL/AIL). If the filesystem is shutdown and the bli is sitting dirty in the CIL as noted above, this ends up skipping the last opportunity to release the bli. In turn, this leaves the hold on the buffer and causes an unmount hang. This can be reproduced by running generic/388 in repetition. Update xfs_trans_brelse() to handle this shutdown corner case correctly. If the final bli reference is dropped and the filesystem is shutdown, remove the bli from the AIL (if necessary) and release the bli to drop the buffer hold and ensure an unmount does not hang. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_trans_buf.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8ee29ca132dc..86987d823d76 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -356,6 +356,7 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp) { xfs_buf_log_item_t *bip; + int freed; /* * Default to a normal brelse() call if the tp is NULL. @@ -419,16 +420,22 @@ xfs_trans_brelse(xfs_trans_t *tp, /* * Drop our reference to the buf log item. */ - atomic_dec(&bip->bli_refcount); + freed = atomic_dec_and_test(&bip->bli_refcount); /* - * If the buf item is not tracking data in the log, then - * we must free it before releasing the buffer back to the - * free pool. Before releasing the buffer to the free pool, - * clear the transaction pointer in b_fsprivate2 to dissolve - * its relation to this transaction. + * If the buf item is not tracking data in the log, then we must free it + * before releasing the buffer back to the free pool. + * + * If the fs has shutdown and we dropped the last reference, it may fall + * on us to release a (possibly dirty) bli if it never made it to the + * AIL (e.g., the aborted unpin already happened and didn't release it + * due to our reference). Since we're already shutdown and need xa_lock, + * just force remove from the AIL and release the bli here. */ - if (!xfs_buf_item_dirty(bip)) { + if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { + xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bp); + } else if (!xfs_buf_item_dirty(bip)) { /*** ASSERT(bp->b_pincount == 0); ***/ -- cgit v1.2.3 From 8913492d12b1e71bd89bb234408483b7c56700e0 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 14 Jun 2017 21:35:35 -0700 Subject: xfs: remove bli from AIL before release on transaction abort commit 3d4b4a3e30ae7a949c31e1e10268a3da4723d290 upstream. When a buffer is modified, logged and committed, it ultimately ends up sitting on the AIL with a dirty bli waiting for metadata writeback. If another transaction locks and invalidates the buffer (freeing an inode chunk, for example) in the meantime, the bli is flagged as stale, the dirty state is cleared and the bli remains in the AIL. If a shutdown occurs before the transaction that has invalidated the buffer is committed, the transaction is ultimately aborted. The log items are flagged as such and ->iop_unlock() handles the aborted items. Because the bli is clean (due to the invalidation), ->iop_unlock() unconditionally releases it. The log item may still reside in the AIL, however, which means the I/O completion handler may still run and attempt to access it. This results in assert failure due to the release of the bli while still present in the AIL and a subsequent NULL dereference and panic in the buffer I/O completion handling. This can be reproduced by running generic/388 in repetition. To avoid this problem, update xfs_buf_item_unlock() to first check whether the bli is aborted and if so, remove it from the AIL before it is released. This ensures that the bli is no longer accessed during the shutdown sequence after it has been freed. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 0306168af332..f6a8422e9562 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -636,20 +636,23 @@ xfs_buf_item_unlock( /* * Clean buffers, by definition, cannot be in the AIL. However, aborted - * buffers may be dirty and hence in the AIL. Therefore if we are - * aborting a buffer and we've just taken the last refernce away, we - * have to check if it is in the AIL before freeing it. We need to free - * it in this case, because an aborted transaction has already shut the - * filesystem down and this is the last chance we will have to do so. + * buffers may be in the AIL regardless of dirty state. An aborted + * transaction that invalidates a buffer already in the AIL may have + * marked it stale and cleared the dirty state, for example. + * + * Therefore if we are aborting a buffer and we've just taken the last + * reference away, we have to check if it is in the AIL before freeing + * it. We need to free it in this case, because an aborted transaction + * has already shut the filesystem down and this is the last chance we + * will have to do so. */ if (atomic_dec_and_test(&bip->bli_refcount)) { - if (clean) - xfs_buf_item_relse(bp); - else if (aborted) { + if (aborted) { ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); - } + } else if (clean) + xfs_buf_item_relse(bp); } if (!(flags & XFS_BLI_HOLD)) -- cgit v1.2.3 From 621d0b75a3476bce5f1d4e13bb99deaf57b9289d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 19 Jun 2017 13:19:08 -0700 Subject: xfs: don't allow bmap on rt files commit 61d819e7bcb7f33da710bf3f5dcb2bcf1e48203c upstream. bmap returns a dumb LBA address but not the block device that goes with that LBA. Swapfiles don't care about this and will blindly assume that the data volume is the correct blockdev, which is totally bogus for files on the rt subvolume. This results in the swap code doing IOs to arbitrary locations on the data device(!) if the passed in mapping is a realtime file, so just turn off bmap for rt files. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 578981412615..f750d888bd17 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1566,9 +1566,12 @@ xfs_vm_bmap( * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasseѕ the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, - * 0 is the magic code for a bmap error.. + * 0 is the magic code for a bmap error. + * + * Since we don't pass back blockdev info, we can't return bmap + * information for rt files either. */ - if (xfs_is_reflink_inode(ip)) { + if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return 0; } -- cgit v1.2.3 From 171192c92da616d5848e0e70c6cab4f14351d275 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:33 -0700 Subject: xfs: free uncommitted transactions during log recovery commit 39775431f82f890f4aaa08860a30883d081bffc7 upstream. Log recovery allocates in-core transaction and member item data structures on-demand as it processes the on-disk log. Transactions are allocated on first encounter on-disk and stored in a hash table structure where they are easily accessible for subsequent lookups. Transaction items are also allocated on demand and are attached to the associated transactions. When a commit record is encountered in the log, the transaction is committed to the fs and the in-core structures are freed. If a filesystem crashes or shuts down before all in-core log buffers are flushed to the log, however, not all transactions may have commit records in the log. As expected, the modifications in such an incomplete transaction are not replayed to the fs. The in-core data structures for the partial transaction are never freed, however, resulting in a memory leak. Update xlog_do_recovery_pass() to first correctly initialize the hash table array so empty lists can be distinguished from populated lists on function exit. Update xlog_recover_free_trans() to always remove the transaction from the list prior to freeing the associated memory. Finally, walk the hash table of transaction lists as the last step before it goes out of scope and free any transactions that may remain on the lists. This prevents a memory leak of partial transactions in the log. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9b3d7c76915d..e06aa2827ecb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4152,7 +4152,7 @@ xlog_recover_commit_trans( #define XLOG_RECOVER_COMMIT_QUEUE_MAX 100 - hlist_del(&trans->r_list); + hlist_del_init(&trans->r_list); error = xlog_recover_reorder_trans(log, trans, pass); if (error) @@ -4354,6 +4354,8 @@ xlog_recover_free_trans( xlog_recover_item_t *item, *n; int i; + hlist_del_init(&trans->r_list); + list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { /* Free the regions in the item. */ list_del(&item->ri_list); @@ -5222,12 +5224,16 @@ xlog_do_recovery_pass( int error2 = 0; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; + int i; struct hlist_head rhash[XLOG_RHASH_SIZE]; LIST_HEAD (buffer_list); ASSERT(head_blk != tail_blk); rhead_blk = 0; + for (i = 0; i < XLOG_RHASH_SIZE; i++) + INIT_HLIST_HEAD(&rhash[i]); + /* * Read the header of the tail block and get the iclog buffer size from * h_size. Use this to tell how many sectors make up the log header. @@ -5464,6 +5470,19 @@ xlog_do_recovery_pass( if (error && first_bad) *first_bad = rhead_blk; + /* + * Transactions are freed at commit time but transactions without commit + * records on disk are never committed. Free any that may be left in the + * hash table. + */ + for (i = 0; i < XLOG_RHASH_SIZE; i++) { + struct hlist_node *tmp; + struct xlog_recover *trans; + + hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list) + xlog_recover_free_trans(trans); + } + return error ? error : error2; } -- cgit v1.2.3 From b46382f02aff8d9ac141714bc6ae4f972836816f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:34 -0700 Subject: xfs: free cowblocks and retry on buffered write ENOSPC commit cf2cb7845d6e101cb17bd62f8aa08cd514fc8988 upstream. XFS runs an eofblocks reclaim scan before returning an ENOSPC error to userspace for buffered writes. This facilitates aggressive speculative preallocation without causing user visible side effects such as premature ENOSPC. Run a cowblocks scan in the same situation to reclaim lingering COW fork preallocation throughout the filesystem. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2e04b1cdb0d2..586b398f268d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -729,6 +729,7 @@ write_retry: xfs_rw_iunlock(ip, iolock); eofb.eof_flags = XFS_EOF_FLAGS_SYNC; xfs_icache_free_eofblocks(ip->i_mount, &eofb); + xfs_icache_free_cowblocks(ip->i_mount, &eofb); goto write_retry; } -- cgit v1.2.3 From e76496fa85543c48858c537c1a6465068e18db8b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:35 -0700 Subject: xfs: don't crash on unexpected holes in dir/attr btrees commit cd87d867920155911d0d2e6485b769d853547750 upstream. In quite a few places we call xfs_da_read_buf with a mappedbno that we don't control, then assume that the function passes back either an error code or a buffer pointer. Unfortunately, if mappedbno == -2 and bno maps to a hole, we get a return code of zero and a NULL buffer, which means that we crash if we actually try to use that buffer pointer. This happens immediately when we set the buffer type for transaction context. Therefore, check that we have no error code and a non-NULL bp before trying to use bp. This patch is a follow-up to an incomplete fix in 96a3aefb8ffde231 ("xfs: don't crash if reading a directory results in an unexpected hole"). Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr_leaf.c | 2 +- fs/xfs/libxfs/xfs_da_btree.c | 2 +- fs/xfs/libxfs/xfs_dir2_block.c | 2 +- fs/xfs/libxfs/xfs_dir2_leaf.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 2852521fc8ec..c6c15e5717e4 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -351,7 +351,7 @@ xfs_attr3_leaf_read( err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 1bdf2888295b..b305dbfd81c4 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -263,7 +263,7 @@ xfs_da3_node_read( err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, which_fork, &xfs_da3_node_buf_ops); - if (!err && tp) { + if (!err && tp && *bpp) { struct xfs_da_blkinfo *info = (*bpp)->b_addr; int type; diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index aa17cb788946..43c902f7a68d 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -139,7 +139,7 @@ xfs_dir3_block_read( err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index b887fb2a2bcf..f2e342e05365 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -268,7 +268,7 @@ xfs_dir3_leaf_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF); return err; } @@ -285,7 +285,7 @@ xfs_dir3_leafn_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF); return err; } -- cgit v1.2.3 From a6247b0189fab0edbe065ab42e76eddb2a03a631 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:36 -0700 Subject: xfs: check _btree_check_block value commit 1e86eabe73b73c82e1110c746ed3ec6d5e1c0a0d upstream. Check the _btree_check_block return value for the firstrec and lastrec functions, since we have the ability to signal that the repositioning did not succeed. Fixes-coverity-id: 114067 Fixes-coverity-id: 114068 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 91c68913d495..e9f26a09a0be 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -714,7 +714,8 @@ xfs_btree_firstrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ @@ -743,7 +744,8 @@ xfs_btree_lastrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ -- cgit v1.2.3 From c32b1ec8a266476494f04843434538cdb25d9190 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:37 -0700 Subject: xfs: set firstfsb to NULLFSBLOCK before feeding it to _bmapi_write commit 4c1a67bd3606540b9b42caff34a1d5cd94b1cf65 upstream. We must initialize the firstfsb parameter to _bmapi_write so that it doesn't incorrectly treat stack garbage as a restriction on which AGs it can search for free space. Fixes-coverity-id: 1402025 Fixes-coverity-id: 1415167 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 9 +++++++++ fs/xfs/xfs_reflink.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b79719a87638..73571fb4dfed 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -6639,6 +6639,15 @@ xfs_bmap_finish_one( bmap.br_blockcount = *blockcount; bmap.br_state = state; + /* + * firstfsb is tied to the transaction lifetime and is used to + * ensure correct AG locking order and schedule work item + * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us + * to only making one bmap call per transaction, so it should + * be safe to have it as a local variable here. + */ + firstfsb = NULLFSBLOCK; + trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 29a75ecb2425..350fc64441b1 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -333,7 +333,7 @@ xfs_reflink_convert_cow_extent( struct xfs_defer_ops *dfops) { struct xfs_bmbt_irec irec = *imap; - xfs_fsblock_t first_block; + xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1; if (imap->br_state == XFS_EXT_NORM) -- cgit v1.2.3 From 01bc132048cf9505ed49152cc82e583b18c5538d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:38 -0700 Subject: xfs: check _alloc_read_agf buffer pointer before using commit 10479e2dea83d4c421ad05dfc55d918aa8dfc0cd upstream. In some circumstances, _alloc_read_agf can return an error code of zero but also a null AGF buffer pointer. Check for this and jump out. Fixes-coverity-id: 1415250 Fixes-coverity-id: 1415320 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_refcount.c | 4 ++++ fs/xfs/xfs_reflink.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index e17016163542..d71cb63cdea3 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1640,6 +1640,10 @@ xfs_refcount_recover_cow_leftovers( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) goto out_trans; + if (!agbp) { + error = -ENOMEM; + goto out_trans; + } cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 350fc64441b1..0015c19c7455 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -169,6 +169,8 @@ xfs_reflink_find_shared( error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; + if (!agbp) + return -ENOMEM; cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); -- cgit v1.2.3 From 81e27c94f9ab86c04ba4ca5f1d2bcf9e61f7b5af Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:39 -0700 Subject: xfs: fix quotacheck dquot id overflow infinite loop commit cfaf2d034360166e569a4929dd83ae9698bed856 upstream. If a dquot has an id of U32_MAX, the next lookup index increment overflows the uint32_t back to 0. This starts the lookup sequence over from the beginning, repeats indefinitely and results in a livelock. Update xfs_qm_dquot_walk() to explicitly check for the lookup overflow and exit the loop. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_qm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 8068867a8183..1fdd3face2d9 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -111,6 +111,9 @@ restart: skipped = 0; break; } + /* we're done if id overflows back to zero */ + if (!next_index) + break; } if (skipped) { -- cgit v1.2.3 From 229980158f95098ba82e7bec91ce8ada18335bdc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Sep 2017 14:06:40 -0700 Subject: xfs: fix multi-AG deadlock in xfs_bunmapi commit 5b094d6dac0451ad89b1dc088395c7b399b7e9e8 upstream. Just like in the allocator we must avoid touching multiple AGs out of order when freeing blocks, as freeing still locks the AGF and can cause the same AB-BA deadlocks as in the allocation path. Signed-off-by: Christoph Hellwig Reported-by: Nikolay Borisov Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 73571fb4dfed..2ab50caca14c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5556,6 +5556,7 @@ __xfs_bunmapi( xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t max_len; + xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5658,6 +5659,17 @@ __xfs_bunmapi( ASSERT(ep != NULL); del = got; wasdel = isnullstartblock(del.br_startblock); + + /* + * Make sure we don't touch multiple AGF headers out of order + * in a single transaction, as that could cause AB-BA deadlocks. + */ + if (!wasdel) { + agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); + if (prev_agno != NULLAGNUMBER && prev_agno > agno) + break; + prev_agno = agno; + } if (got.br_startoff < start) { del.br_startoff = start; del.br_blockcount -= start - got.br_startoff; -- cgit v1.2.3 From f90756d75d69cb05d82a061c799c54dc46e1db1b Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sun, 17 Sep 2017 14:06:41 -0700 Subject: xfs: Fix per-inode DAX flag inheritance commit 56bdf855e676f1f2ed7033f288f57dfd315725ba upstream. According to the commit that implemented per-inode DAX flag: commit 58f88ca2df72 ("xfs: introduce per-inode DAX enablement") the flag is supposed to act as "inherit flag". Currently this only works in the situations where parent directory already has a flag in di_flags set, otherwise inheritance does not work. This is because setting the XFS_DIFLAG2_DAX flag is done in a wrong branch designated for di_flags, not di_flags2. Fix this by moving the code to branch designated for setting di_flags2, which does test for flags in di_flags2. Fixes: 58f88ca2df72 ("xfs: introduce per-inode DAX enablement") Signed-off-by: Lukas Czerner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7a0b4eeb99e4..98cd905eadca 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -881,7 +881,6 @@ xfs_ialloc( case S_IFREG: case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - uint64_t di_flags2 = 0; uint di_flags = 0; if (S_ISDIR(mode)) { @@ -918,20 +917,23 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NODEFRAG; if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; - if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) - di_flags2 |= XFS_DIFLAG2_DAX; ip->i_d.di_flags |= di_flags; - ip->i_d.di_flags2 |= di_flags2; } if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && pip->i_d.di_version == 3 && ip->i_d.di_version == 3) { + uint64_t di_flags2 = 0; + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { - ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; } + if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) + di_flags2 |= XFS_DIFLAG2_DAX; + + ip->i_d.di_flags2 |= di_flags2; } /* FALLTHROUGH */ case S_IFLNK: -- cgit v1.2.3 From 8edd73a13dc03d4bdcb25d9273908a901f880d09 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 17 Sep 2017 14:06:42 -0700 Subject: xfs: fix inobt inode allocation search optimization commit c44245b3d5435f533ca8346ece65918f84c057f9 upstream. When we try to allocate a free inode by searching the inobt, we try to find the inode nearest the parent inode by searching chunks both left and right of the chunk containing the parent. As an optimization, we cache the leftmost and rightmost records that we previously searched; if we do another allocation with the same parent inode, we'll pick up the search where it last left off. There's a bug in the case where we found a free inode to the left of the parent's chunk: we need to update the cached left and right records, but because we already reassigned the right record to point to the left, we end up assigning the left record to both the cached left and right records. This isn't a correctness problem strictly, but it can result in the next allocation rechecking chunks unnecessarily or allocating inodes further away from the parent than it needs to. Fix it by swapping the record pointer after we update the cached left and right records. Fixes: bd169565993b ("xfs: speed up free inode search") Signed-off-by: Omar Sandoval Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index a2818f6e8598..af6acd5f276c 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1236,13 +1236,13 @@ xfs_dialloc_ag_inobt( /* free inodes to the left? */ if (useleft && trec.ir_freecount) { - rec = trec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); cur = tcur; pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; + rec = trec; goto alloc_inode; } -- cgit v1.2.3 From 7fb3e5e373bb45342c6909ea8320010c461b4082 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:43 -0700 Subject: xfs: clear MS_ACTIVE after finishing log recovery commit 8204f8ddaafafcae074746fcf2a05a45e6827603 upstream. Way back when we established inode block-map redo log items, it was discovered that we needed to prevent the VFS from evicting inodes during log recovery because any given inode might be have bmap redo items to replay even if the inode has no link count and is ultimately deleted, and any eviction of an unlinked inode causes the inode to be truncated and freed too early. To make this possible, we set MS_ACTIVE so that inodes would not be torn down immediately upon release. Unfortunately, this also results in the quota inodes not being released at all if a later part of the mount process should fail, because we never reclaim the inodes. So, set MS_ACTIVE right before we do the last part of log recovery and clear it immediately after we finish the log recovery so that everything will be torn down properly if we abort the mount. Fixes: 17c12bcd30 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 11 +++++++++++ fs/xfs/xfs_mount.c | 10 ---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b57ab34fbf3c..c235170f3a07 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -749,9 +749,20 @@ xfs_log_mount_finish( return 0; } + /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. + * xfs_fs_drop_inode needs to be able to prevent the deletion + * of inodes before we're done replaying log items on those + * inodes. Turn it off immediately after recovery finishes + * so that we don't leak the quota inodes if subsequent mount + * activities fail. + */ + mp->m_super->s_flags |= MS_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); + mp->m_super->s_flags &= ~MS_ACTIVE; return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 13796f212f98..ab058c7dc598 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -924,15 +924,6 @@ xfs_mountfs( } } - /* - * During the second phase of log recovery, we need iget and - * iput to behave like they do for an active filesystem. - * xfs_fs_drop_inode needs to be able to prevent the deletion - * of inodes before we're done replaying log items on those - * inodes. - */ - mp->m_super->s_flags |= MS_ACTIVE; - /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently @@ -1008,7 +999,6 @@ xfs_mountfs( out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: - mp->m_super->s_flags &= ~MS_ACTIVE; xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); -- cgit v1.2.3 From e1a7b7e1f6c294f64602b9cb1c15d44432f48561 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:44 -0700 Subject: xfs: don't leak quotacheck dquots when cow recovery commit 77aff8c76425c8f49b50d0b9009915066739e7d2 upstream. If we fail a mount on account of cow recovery errors, it's possible that a previous quotacheck left some dquots in memory. The bailout clause of xfs_mountfs forgets to purge these, and so we leak them. Fix that. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_mount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ab058c7dc598..d4ce8d277992 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1004,6 +1004,8 @@ xfs_mountfs( IRELE(rip); cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); + /* Clean out dquots that might be in memory after quotacheck. */ + xfs_qm_unmount(mp); out_log_dealloc: mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); -- cgit v1.2.3 From ec0d46ef8b7e35b4f7c82bcf12afbe96b711350f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Sep 2017 14:06:45 -0700 Subject: iomap: fix integer truncation issues in the zeroing and dirtying helpers commit e28ae8e428fefe2facd72cea9f29906ecb9c861d upstream. Fix the min_t calls in the zeroing and dirtying helpers to perform the comparisms on 64-bit types, which prevents them from incorrectly being truncated, and larger zeroing operations being stuck in a never ending loop. Special thanks to Markus Stockhausen for spotting the bug. Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 798c291cbc75..a49db8806a3a 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -281,7 +281,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long bytes; /* Bytes to write to page */ offset = (pos & (PAGE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_SIZE - offset, length); + bytes = min_t(loff_t, PAGE_SIZE - offset, length); rpage = __iomap_read_page(inode, pos); if (IS_ERR(rpage)) @@ -376,7 +376,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, unsigned offset, bytes; offset = pos & (PAGE_SIZE - 1); /* Within page */ - bytes = min_t(unsigned, PAGE_SIZE - offset, count); + bytes = min_t(loff_t, PAGE_SIZE - offset, count); if (IS_DAX(inode)) status = iomap_dax_zero(pos, offset, bytes, iomap); -- cgit v1.2.3 From 01d38e380746e5880d744c634f0c459ea6646dd9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 17 Sep 2017 14:06:46 -0700 Subject: xfs: write unmount record for ro mounts commit 757a69ef6cf2bf839bd4088e5609ddddd663b0c4 upstream. There are dueling comments in the xfs code about intent for log writes when unmounting a readonly filesystem. In xfs_mountfs, we see the intent: /* * Now the log is fully replayed, we can transition to full read-only * mode for read-only mounts. This will sync all the metadata and clean * the log so that the recovery we just performed does not have to be * replayed again on the next mount. */ and it calls xfs_quiesce_attr(), but by the time we get to xfs_log_unmount_write(), it returns early for a RDONLY mount: * Don't write out unmount record on read-only mounts. Because of this, sequential ro mounts of a filesystem with a dirty log will replay the log each time, which seems odd. Fix this by writing an unmount record even for RO mounts, as long as norecovery wasn't specified (don't write a clean log record if a dirty log may still be there!) and the log device is writable. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c235170f3a07..4f59cbc0b185 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -812,11 +812,14 @@ xfs_log_unmount_write(xfs_mount_t *mp) int error; /* - * Don't write out unmount record on read-only mounts. + * Don't write out unmount record on norecovery mounts or ro devices. * Or, if we are doing a forced umount (typically because of IO errors). */ - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (mp->m_flags & XFS_MOUNT_NORECOVERY || + xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { + ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); return 0; + } error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); -- cgit v1.2.3 From 9a3f752290907e7bfa80a333e4965574932f5670 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 17 Sep 2017 14:06:47 -0700 Subject: xfs: toggle readonly state around xfs_log_mount_finish commit 6f4a1eefdd0ad4561543270a7fceadabcca075dd upstream. When we do log recovery on a readonly mount, unlinked inode processing does not happen due to the readonly checks in xfs_inactive(), which are trying to prevent any I/O on a readonly mount. This is misguided - we do I/O on readonly mounts all the time, for consistency; for example, log recovery. So do the same RDONLY flag twiddling around xfs_log_mount_finish() as we do around xfs_log_mount(), for the same reason. This all cries out for a big rework but for now this is a simple fix to an obvious problem. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4f59cbc0b185..ebe20f1591f1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -743,10 +743,14 @@ xfs_log_mount_finish( struct xfs_mount *mp) { int error = 0; + bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY); if (mp->m_flags & XFS_MOUNT_NORECOVERY) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); return 0; + } else if (readonly) { + /* Allow unlinked processing to proceed */ + mp->m_flags &= ~XFS_MOUNT_RDONLY; } /* @@ -764,6 +768,9 @@ xfs_log_mount_finish( xfs_log_work_queue(mp); mp->m_super->s_flags &= ~MS_ACTIVE; + if (readonly) + mp->m_flags |= XFS_MOUNT_RDONLY; + return error; } -- cgit v1.2.3 From 1ba04933408e4b4567f557d363f7bdecfabe9399 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Sep 2017 14:06:48 -0700 Subject: xfs: remove xfs_trans_ail_delete_bulk commit 27af1bbf524459962d1477a38ac6e0b7f79aaecc upstream. xfs_iflush_done uses an on-stack variable length array to pass the log items to be deleted to xfs_trans_ail_delete_bulk. On-stack VLAs are a nasty gcc extension that can lead to unbounded stack allocations, but fortunately we can easily avoid them by simply open coding xfs_trans_ail_delete_bulk in xfs_iflush_done, which is the only caller of it except for the single-item xfs_trans_ail_delete. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode_item.c | 29 +++++++++++--------- fs/xfs/xfs_trans_ail.c | 71 ++++++++++++++++++++++++------------------------- fs/xfs/xfs_trans_priv.h | 15 +++-------- 3 files changed, 55 insertions(+), 60 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index d90e7811ccdd..08cb7d1a4a3a 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -731,22 +731,27 @@ xfs_iflush_done( * holding the lock before removing the inode from the AIL. */ if (need_ail) { - struct xfs_log_item *log_items[need_ail]; - int i = 0; + bool mlip_changed = false; + + /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->xa_lock); for (blip = lip; blip; blip = blip->li_bio_list) { - iip = INODE_ITEM(blip); - if (iip->ili_logged && - blip->li_lsn == iip->ili_flush_lsn) { - log_items[i++] = blip; - } - ASSERT(i <= need_ail); + if (INODE_ITEM(blip)->ili_logged && + blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) + mlip_changed |= xfs_ail_delete_one(ailp, blip); } - /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ - xfs_trans_ail_delete_bulk(ailp, log_items, i, - SHUTDOWN_CORRUPT_INCORE); - } + if (mlip_changed) { + if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) + xlog_assign_tail_lsn_locked(ailp->xa_mount); + if (list_empty(&ailp->xa_ail)) + wake_up_all(&ailp->xa_empty); + } + spin_unlock(&ailp->xa_lock); + + if (mlip_changed) + xfs_log_space_wake(ailp->xa_mount); + } /* * clean up and unlock the flush lock now we are done. We can clear the diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d6c9c3e9e02b..9056c0f34a3c 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -684,8 +684,23 @@ xfs_trans_ail_update_bulk( } } -/* - * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL +bool +xfs_ail_delete_one( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_log_item *mlip = xfs_ail_min(ailp); + + trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); + xfs_ail_delete(ailp, lip); + lip->li_flags &= ~XFS_LI_IN_AIL; + lip->li_lsn = 0; + + return mlip == lip; +} + +/** + * Remove a log items from the AIL * * @xfs_trans_ail_delete_bulk takes an array of log items that all need to * removed from the AIL. The caller is already holding the AIL lock, and done @@ -706,52 +721,36 @@ xfs_trans_ail_update_bulk( * before returning. */ void -xfs_trans_ail_delete_bulk( +xfs_trans_ail_delete( struct xfs_ail *ailp, - struct xfs_log_item **log_items, - int nr_items, + struct xfs_log_item *lip, int shutdown_type) __releases(ailp->xa_lock) { - xfs_log_item_t *mlip; - int mlip_changed = 0; - int i; + struct xfs_mount *mp = ailp->xa_mount; + bool mlip_changed; - mlip = xfs_ail_min(ailp); - - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - if (!(lip->li_flags & XFS_LI_IN_AIL)) { - struct xfs_mount *mp = ailp->xa_mount; - - spin_unlock(&ailp->xa_lock); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_alert_tag(mp, XFS_PTAG_AILDELETE, - "%s: attempting to delete a log item that is not in the AIL", - __func__); - xfs_force_shutdown(mp, shutdown_type); - } - return; + if (!(lip->li_flags & XFS_LI_IN_AIL)) { + spin_unlock(&ailp->xa_lock); + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_alert_tag(mp, XFS_PTAG_AILDELETE, + "%s: attempting to delete a log item that is not in the AIL", + __func__); + xfs_force_shutdown(mp, shutdown_type); } - - trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); - xfs_ail_delete(ailp, lip); - lip->li_flags &= ~XFS_LI_IN_AIL; - lip->li_lsn = 0; - if (mlip == lip) - mlip_changed = 1; + return; } + mlip_changed = xfs_ail_delete_one(ailp, lip); if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - xlog_assign_tail_lsn_locked(ailp->xa_mount); + if (!XFS_FORCED_SHUTDOWN(mp)) + xlog_assign_tail_lsn_locked(mp); if (list_empty(&ailp->xa_ail)) wake_up_all(&ailp->xa_empty); - spin_unlock(&ailp->xa_lock); + } + spin_unlock(&ailp->xa_lock); + if (mlip_changed) xfs_log_space_wake(ailp->xa_mount); - } else { - spin_unlock(&ailp->xa_lock); - } } int diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 49931b72da8a..d91706c56c63 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -106,18 +106,9 @@ xfs_trans_ail_update( xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } -void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, - struct xfs_log_item **log_items, int nr_items, - int shutdown_type) - __releases(ailp->xa_lock); -static inline void -xfs_trans_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip, - int shutdown_type) __releases(ailp->xa_lock) -{ - xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type); -} +bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); +void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, + int shutdown_type) __releases(ailp->xa_lock); static inline void xfs_trans_ail_remove( -- cgit v1.2.3 From 7942f605c3086abe6c9f61f29e9326c48d5c8095 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Sun, 17 Sep 2017 14:06:49 -0700 Subject: xfs: Add infrastructure needed for error propagation during buffer IO failure commit 0b80ae6ed13169bd3a244e71169f2cc020b0c57a upstream. With the current code, XFS never re-submit a failed buffer for IO, because the failed item in the buffer is kept in the flush locked state forever. To be able to resubmit an log item for IO, we need a way to mark an item as failed, if, for any reason the buffer which the item belonged to failed during writeback. Add a new log item callback to be used after an IO completion failure and make the needed clean ups. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 32 +++++++++++++++++++++++++++++++- fs/xfs/xfs_trans.h | 7 +++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f6a8422e9562..7573a1f0bc9a 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -29,6 +29,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_log.h" +#include "xfs_inode.h" kmem_zone_t *xfs_buf_item_zone; @@ -1054,6 +1055,31 @@ xfs_buf_do_callbacks( } } +/* + * Invoke the error state callback for each log item affected by the failed I/O. + * + * If a metadata buffer write fails with a non-permanent error, the buffer is + * eventually resubmitted and so the completion callbacks are not run. The error + * state may need to be propagated to the log items attached to the buffer, + * however, so the next AIL push of the item knows hot to handle it correctly. + */ +STATIC void +xfs_buf_do_callbacks_fail( + struct xfs_buf *bp) +{ + struct xfs_log_item *next; + struct xfs_log_item *lip = bp->b_fspriv; + struct xfs_ail *ailp = lip->li_ailp; + + spin_lock(&ailp->xa_lock); + for (; lip; lip = next) { + next = lip->li_bio_list; + if (lip->li_ops->iop_error) + lip->li_ops->iop_error(lip, bp); + } + spin_unlock(&ailp->xa_lock); +} + static bool xfs_buf_iodone_callback_error( struct xfs_buf *bp) @@ -1123,7 +1149,11 @@ xfs_buf_iodone_callback_error( if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) goto permanent_error; - /* still a transient error, higher layers will retry */ + /* + * Still a transient error, run IO completion failure callbacks and let + * the higher layers retry the buffer. + */ + xfs_buf_do_callbacks_fail(bp); xfs_buf_ioerror(bp, 0); xfs_buf_relse(bp); return true; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c0e72ab57741..22fddad9fe11 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -65,11 +65,13 @@ typedef struct xfs_log_item { } xfs_log_item_t; #define XFS_LI_IN_AIL 0x1 -#define XFS_LI_ABORTED 0x2 +#define XFS_LI_ABORTED 0x2 +#define XFS_LI_FAILED 0x4 #define XFS_LI_FLAGS \ { XFS_LI_IN_AIL, "IN_AIL" }, \ - { XFS_LI_ABORTED, "ABORTED" } + { XFS_LI_ABORTED, "ABORTED" }, \ + { XFS_LI_FAILED, "FAILED" } struct xfs_item_ops { void (*iop_size)(xfs_log_item_t *, int *, int *); @@ -80,6 +82,7 @@ struct xfs_item_ops { void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); + void (*iop_error)(xfs_log_item_t *, xfs_buf_t *); }; void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, -- cgit v1.2.3 From 0800356def7f3ede34986adeeb03235176297eb0 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Sun, 17 Sep 2017 14:06:50 -0700 Subject: xfs: Properly retry failed inode items in case of error during buffer writeback commit d3a304b6292168b83b45d624784f973fdc1ca674 upstream. When a buffer has been failed during writeback, the inode items into it are kept flush locked, and are never resubmitted due the flush lock, so, if any buffer fails to be written, the items in AIL are never written to disk and never unlocked. This causes unmount operation to hang due these items flush locked in AIL, but this also causes the items in AIL to never be written back, even when the IO device comes back to normal. I've been testing this patch with a DM-thin device, creating a filesystem larger than the real device. When writing enough data to fill the DM-thin device, XFS receives ENOSPC errors from the device, and keep spinning on xfsaild (when 'retry forever' configuration is set). At this point, the filesystem can not be unmounted because of the flush locked items in AIL, but worse, the items in AIL are never retried at all (once xfs_inode_item_push() will skip the items that are flush locked), even if the underlying DM-thin device is expanded to the proper size. This patch fixes both cases, retrying any item that has been failed previously, using the infra-structure provided by the previous patch. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 28 ++++++++++++++++++++++++++++ fs/xfs/xfs_buf_item.h | 3 +++ fs/xfs/xfs_inode_item.c | 47 +++++++++++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_trans.h | 1 + fs/xfs/xfs_trans_ail.c | 3 ++- fs/xfs/xfs_trans_priv.h | 31 +++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 7573a1f0bc9a..573fc72c3f23 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1234,3 +1234,31 @@ xfs_buf_iodone( xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); xfs_buf_item_free(BUF_ITEM(lip)); } + +/* + * Requeue a failed buffer for writeback + * + * Return true if the buffer has been re-queued properly, false otherwise + */ +bool +xfs_buf_resubmit_failed_buffers( + struct xfs_buf *bp, + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + struct xfs_log_item *next; + + /* + * Clear XFS_LI_FAILED flag from all items before resubmit + * + * XFS_LI_FAILED set/clear is protected by xa_lock, caller this + * function already have it acquired + */ + for (; lip; lip = next) { + next = lip->li_bio_list; + xfs_clear_li_failed(lip); + } + + /* Add this buffer back to the delayed write list */ + return xfs_buf_delwri_queue(bp, buffer_list); +} diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index f7eba99d19dd..530686e1afb9 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -70,6 +70,9 @@ void xfs_buf_attach_iodone(struct xfs_buf *, xfs_log_item_t *); void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); +bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, + struct xfs_log_item *, + struct list_head *); extern kmem_zone_t *xfs_buf_item_zone; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 08cb7d1a4a3a..94915747042c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -27,6 +27,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_trans_priv.h" +#include "xfs_buf_item.h" #include "xfs_log.h" @@ -475,6 +476,23 @@ xfs_inode_item_unpin( wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * This informs the AIL that the inode is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_inode_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_inode_item_push( struct xfs_log_item *lip, @@ -484,13 +502,28 @@ xfs_inode_item_push( { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO. + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; @@ -622,7 +655,8 @@ static const struct xfs_item_ops xfs_inode_item_ops = { .iop_unlock = xfs_inode_item_unlock, .iop_committed = xfs_inode_item_committed, .iop_push = xfs_inode_item_push, - .iop_committing = xfs_inode_item_committing + .iop_committing = xfs_inode_item_committing, + .iop_error = xfs_inode_item_error }; @@ -710,7 +744,8 @@ xfs_iflush_done( * the AIL lock. */ iip = INODE_ITEM(blip); - if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) + if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || + lip->li_flags & XFS_LI_FAILED) need_ail++; blip = next; @@ -718,7 +753,8 @@ xfs_iflush_done( /* make sure we capture the state of the initial inode. */ iip = INODE_ITEM(lip); - if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) + if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) || + lip->li_flags & XFS_LI_FAILED) need_ail++; /* @@ -739,6 +775,9 @@ xfs_iflush_done( if (INODE_ITEM(blip)->ili_logged && blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) mlip_changed |= xfs_ail_delete_one(ailp, blip); + else { + xfs_clear_li_failed(blip); + } } if (mlip_changed) { diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 22fddad9fe11..0318e92aed66 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -50,6 +50,7 @@ typedef struct xfs_log_item { struct xfs_ail *li_ailp; /* ptr to AIL */ uint li_type; /* item type */ uint li_flags; /* misc flags */ + struct xfs_buf *li_buf; /* real buffer pointer */ struct xfs_log_item *li_bio_list; /* buffer item list */ void (*li_cb)(struct xfs_buf *, struct xfs_log_item *); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 9056c0f34a3c..70f5ab017323 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -687,12 +687,13 @@ xfs_trans_ail_update_bulk( bool xfs_ail_delete_one( struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip) { struct xfs_log_item *mlip = xfs_ail_min(ailp); trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); xfs_ail_delete(ailp, lip); + xfs_clear_li_failed(lip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index d91706c56c63..b317a3644c00 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -164,4 +164,35 @@ xfs_trans_ail_copy_lsn( *dst = *src; } #endif + +static inline void +xfs_clear_li_failed( + struct xfs_log_item *lip) +{ + struct xfs_buf *bp = lip->li_buf; + + ASSERT(lip->li_flags & XFS_LI_IN_AIL); + lockdep_assert_held(&lip->li_ailp->xa_lock); + + if (lip->li_flags & XFS_LI_FAILED) { + lip->li_flags &= ~XFS_LI_FAILED; + lip->li_buf = NULL; + xfs_buf_rele(bp); + } +} + +static inline void +xfs_set_li_failed( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + lockdep_assert_held(&lip->li_ailp->xa_lock); + + if (!(lip->li_flags & XFS_LI_FAILED)) { + xfs_buf_hold(bp); + lip->li_flags |= XFS_LI_FAILED; + lip->li_buf = bp; + } +} + #endif /* __XFS_TRANS_PRIV_H__ */ -- cgit v1.2.3 From 35093926c2f8bd259e50b73685f638095cc59c89 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:51 -0700 Subject: xfs: fix recovery failure when log record header wraps log end commit 284f1c2c9bebf871861184b0e2c40fa921dd380b upstream. The high-level log recovery algorithm consists of two loops that walk the physical log and process log records from the tail to the head. The first loop handles the case where the tail is beyond the head and processes records up to the end of the physical log. The subsequent loop processes records from the beginning of the physical log to the head. Because log records can wrap around the end of the physical log, the first loop mentioned above must handle this case appropriately. Records are processed from in-core buffers, which means that this algorithm must split the reads of such records into two partial I/Os: 1.) from the beginning of the record to the end of the log and 2.) from the beginning of the log to the end of the record. This is further complicated by the fact that the log record header and log record data are read into independent buffers. The current handling of each buffer correctly splits the reads when either the header or data starts before the end of the log and wraps around the end. The data read does not correctly handle the case where the prior header read wrapped or ends on the physical log end boundary. blk_no is incremented to or beyond the log end after the header read to point to the record data, but the split data read logic triggers, attempts to read from an invalid log block and ultimately causes log recovery to fail. This can be reproduced fairly reliably via xfstests tests generic/047 and generic/388 with large iclog sizes (256k) and small (10M) logs. If the record header read has pushed beyond the end of the physical log, the subsequent data read is actually contiguous. Update the data read logic to detect the case where blk_no has wrapped, mod it against the log size to read from the correct address and issue one contiguous read for the log data buffer. The log record is processed as normal from the buffer(s), the loop exits after the current iteration and the subsequent loop picks up with the first new record after the start of the log. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index e06aa2827ecb..9cef8916314a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -5216,7 +5216,7 @@ xlog_do_recovery_pass( xfs_daddr_t *first_bad) /* out: first bad log rec */ { xlog_rec_header_t *rhead; - xfs_daddr_t blk_no; + xfs_daddr_t blk_no, rblk_no; xfs_daddr_t rhead_blk; char *offset; xfs_buf_t *hbp, *dbp; @@ -5369,9 +5369,19 @@ xlog_do_recovery_pass( bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); blk_no += hblks; - /* Read in data for log record */ - if (blk_no + bblks <= log->l_logBBsize) { - error = xlog_bread(log, blk_no, bblks, dbp, + /* + * Read the log record data in multiple reads if it + * wraps around the end of the log. Note that if the + * header already wrapped, blk_no could point past the + * end of the log. The record data is contiguous in + * that case. + */ + if (blk_no + bblks <= log->l_logBBsize || + blk_no >= log->l_logBBsize) { + /* mod blk_no in case the header wrapped and + * pushed it beyond the end of the log */ + rblk_no = do_mod(blk_no, log->l_logBBsize); + error = xlog_bread(log, rblk_no, bblks, dbp, &offset); if (error) goto bread_err2; -- cgit v1.2.3 From e34b72a2381e6432b9eab07a3ec285b59a80e45f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:52 -0700 Subject: xfs: always verify the log tail during recovery commit 5297ac1f6d7cbf45464a49b9558831f271dfc559 upstream. Log tail verification currently only occurs when torn writes are detected at the head of the log. This was introduced because a change in the head block due to torn writes can lead to a change in the tail block (each log record header references the current tail) and the tail block should be verified before log recovery proceeds. Tail corruption is possible outside of torn write scenarios, however. For example, partial log writes can be detected and cleared during the initial head/tail block discovery process. If the partial write coincides with a tail overwrite, the log tail is corrupted and recovery fails. To facilitate correct handling of log tail overwites, update log recovery to always perform tail verification. This is necessary to detect potential tail overwrite conditions when torn writes may not have occurred. This changes normal (i.e., no torn writes) recovery behavior slightly to detect and return CRC related errors near the tail before actual recovery starts. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9cef8916314a..1457fa0f8637 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1183,31 +1183,11 @@ xlog_verify_head( ASSERT(0); return 0; } - - /* - * Now verify the tail based on the updated head. This is - * required because the torn writes trimmed from the head could - * have been written over the tail of a previous record. Return - * any errors since recovery cannot proceed if the tail is - * corrupt. - * - * XXX: This leaves a gap in truly robust protection from torn - * writes in the log. If the head is behind the tail, the tail - * pushes forward to create some space and then a crash occurs - * causing the writes into the previous record's tail region to - * tear, log recovery isn't able to recover. - * - * How likely is this to occur? If possible, can we do something - * more intelligent here? Is it safe to push the tail forward if - * we can determine that the tail is within the range of the - * torn write (e.g., the kernel can only overwrite the tail if - * it has actually been pushed forward)? Alternatively, could we - * somehow prevent this condition at runtime? - */ - error = xlog_verify_tail(log, *head_blk, *tail_blk); } + if (error) + return error; - return error; + return xlog_verify_tail(log, *head_blk, *tail_blk); } /* -- cgit v1.2.3 From 47db1fc608b89820f712ab7806b0bd4d4ed69c16 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:53 -0700 Subject: xfs: fix log recovery corruption error due to tail overwrite commit 4a4f66eac4681378996a1837ad1ffec3a2e2981f upstream. If we consider the case where the tail (T) of the log is pinned long enough for the head (H) to push and block behind the tail, we can end up blocked in the following state without enough free space (f) in the log to satisfy a transaction reservation: 0 phys. log N [-------HffT---H'--T'---] The last good record in the log (before H) refers to T. The tail eventually pushes forward (T') leaving more free space in the log for writes to H. At this point, suppose space frees up in the log for the maximum of 8 in-core log buffers to start flushing out to the log. If this pushes the head from H to H', these next writes overwrite the previous tail T. This is safe because the items logged from T to T' have been written back and removed from the AIL. If the next log writes (H -> H') happen to fail and result in partial records in the log, the filesystem shuts down having overwritten T with invalid data. Log recovery correctly locates H on the subsequent mount, but H still refers to the now corrupted tail T. This results in log corruption errors and recovery failure. Since the tail overwrite results from otherwise correct runtime behavior, it is up to log recovery to try and deal with this situation. Update log recovery tail verification to run a CRC pass from the first record past the tail to the head. This facilitates error detection at T and moves the recovery tail to the first good record past H' (similar to truncating the head on torn write detection). If corruption is detected beyond the range possibly affected by the max number of iclogs, the log is legitimately corrupted and log recovery failure is expected. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 108 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1457fa0f8637..fdad8c9ecbb3 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1029,61 +1029,106 @@ out_error: } /* - * Check the log tail for torn writes. This is required when torn writes are - * detected at the head and the head had to be walked back to a previous record. - * The tail of the previous record must now be verified to ensure the torn - * writes didn't corrupt the previous tail. + * Calculate distance from head to tail (i.e., unused space in the log). + */ +static inline int +xlog_tail_distance( + struct xlog *log, + xfs_daddr_t head_blk, + xfs_daddr_t tail_blk) +{ + if (head_blk < tail_blk) + return tail_blk - head_blk; + + return tail_blk + (log->l_logBBsize - head_blk); +} + +/* + * Verify the log tail. This is particularly important when torn or incomplete + * writes have been detected near the front of the log and the head has been + * walked back accordingly. + * + * We also have to handle the case where the tail was pinned and the head + * blocked behind the tail right before a crash. If the tail had been pushed + * immediately prior to the crash and the subsequent checkpoint was only + * partially written, it's possible it overwrote the last referenced tail in the + * log with garbage. This is not a coherency problem because the tail must have + * been pushed before it can be overwritten, but appears as log corruption to + * recovery because we have no way to know the tail was updated if the + * subsequent checkpoint didn't write successfully. * - * Return an error if CRC verification fails as recovery cannot proceed. + * Therefore, CRC check the log from tail to head. If a failure occurs and the + * offending record is within max iclog bufs from the head, walk the tail + * forward and retry until a valid tail is found or corruption is detected out + * of the range of a possible overwrite. */ STATIC int xlog_verify_tail( struct xlog *log, xfs_daddr_t head_blk, - xfs_daddr_t tail_blk) + xfs_daddr_t *tail_blk, + int hsize) { struct xlog_rec_header *thead; struct xfs_buf *bp; xfs_daddr_t first_bad; - int count; int error = 0; bool wrapped; - xfs_daddr_t tmp_head; + xfs_daddr_t tmp_tail; + xfs_daddr_t orig_tail = *tail_blk; bp = xlog_get_bp(log, 1); if (!bp) return -ENOMEM; /* - * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get - * a temporary head block that points after the last possible - * concurrently written record of the tail. + * Make sure the tail points to a record (returns positive count on + * success). */ - count = xlog_seek_logrec_hdr(log, head_blk, tail_blk, - XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead, - &wrapped); - if (count < 0) { - error = count; + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, + &tmp_tail, &thead, &wrapped); + if (error < 0) goto out; - } + if (*tail_blk != tmp_tail) + *tail_blk = tmp_tail; /* - * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran - * into the actual log head. tmp_head points to the start of the record - * so update it to the actual head block. + * Run a CRC check from the tail to the head. We can't just check + * MAX_ICLOGS records past the tail because the tail may point to stale + * blocks cleared during the search for the head/tail. These blocks are + * overwritten with zero-length records and thus record count is not a + * reliable indicator of the iclog state before a crash. */ - if (count < XLOG_MAX_ICLOGS + 1) - tmp_head = head_blk; - - /* - * We now have a tail and temporary head block that covers at least - * XLOG_MAX_ICLOGS records from the tail. We need to verify that these - * records were completely written. Run a CRC verification pass from - * tail to head and return the result. - */ - error = xlog_do_recovery_pass(log, tmp_head, tail_blk, + first_bad = 0; + error = xlog_do_recovery_pass(log, head_blk, *tail_blk, XLOG_RECOVER_CRCPASS, &first_bad); + while (error == -EFSBADCRC && first_bad) { + int tail_distance; + + /* + * Is corruption within range of the head? If so, retry from + * the next record. Otherwise return an error. + */ + tail_distance = xlog_tail_distance(log, head_blk, first_bad); + if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize)) + break; + /* skip to the next record; returns positive count on success */ + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, + &tmp_tail, &thead, &wrapped); + if (error < 0) + goto out; + + *tail_blk = tmp_tail; + first_bad = 0; + error = xlog_do_recovery_pass(log, head_blk, *tail_blk, + XLOG_RECOVER_CRCPASS, &first_bad); + } + + if (!error && *tail_blk != orig_tail) + xfs_warn(log->l_mp, + "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", + orig_tail, *tail_blk); out: xlog_put_bp(bp); return error; @@ -1187,7 +1232,8 @@ xlog_verify_head( if (error) return error; - return xlog_verify_tail(log, *head_blk, *tail_blk); + return xlog_verify_tail(log, *head_blk, tail_blk, + be32_to_cpu((*rhead)->h_size)); } /* -- cgit v1.2.3 From 7549e7c01fb0220e47515ad3ee52f46e2742f178 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:54 -0700 Subject: xfs: handle -EFSCORRUPTED during head/tail verification commit a4c9b34d6a17081005ec459b57b8effc08f4c731 upstream. Torn write and tail overwrite detection both trigger only on -EFSBADCRC errors. While this is the most likely failure scenario for each condition, -EFSCORRUPTED is still possible in certain cases depending on what ends up on disk when a torn write or partial tail overwrite occurs. For example, an invalid log record h_len can lead to an -EFSCORRUPTED error when running the log recovery CRC pass. Therefore, update log head and tail verification to trigger the associated head/tail fixups in the event of -EFSCORRUPTED errors along with -EFSBADCRC. Also, -EFSCORRUPTED can currently be returned from xlog_do_recovery_pass() before rhead_blk is initialized if the first record encountered happens to be corrupted. This leads to an incorrect 'first_bad' return value. Initialize rhead_blk earlier in the function to address that problem as well. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index fdad8c9ecbb3..83e90bf1899f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1102,7 +1102,7 @@ xlog_verify_tail( first_bad = 0; error = xlog_do_recovery_pass(log, head_blk, *tail_blk, XLOG_RECOVER_CRCPASS, &first_bad); - while (error == -EFSBADCRC && first_bad) { + while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) { int tail_distance; /* @@ -1188,7 +1188,7 @@ xlog_verify_head( */ error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk, XLOG_RECOVER_CRCPASS, &first_bad); - if (error == -EFSBADCRC) { + if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) { /* * We've hit a potential torn write. Reset the error and warn * about it. @@ -5255,7 +5255,7 @@ xlog_do_recovery_pass( LIST_HEAD (buffer_list); ASSERT(head_blk != tail_blk); - rhead_blk = 0; + blk_no = rhead_blk = tail_blk; for (i = 0; i < XLOG_RHASH_SIZE; i++) INIT_HLIST_HEAD(&rhash[i]); @@ -5333,7 +5333,6 @@ xlog_do_recovery_pass( } memset(rhash, 0, sizeof(rhash)); - blk_no = rhead_blk = tail_blk; if (tail_blk > head_blk) { /* * Perform recovery around the end of the physical log. -- cgit v1.2.3 From 6b6505d90b77f98b0ce08a8332f03cb62f97c78f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:55 -0700 Subject: xfs: add log recovery tracepoint for head/tail commit e67d3d4246e5fbb0c7c700426d11241ca9c6f473 upstream. Torn write detection and tail overwrite detection can shift the log head and tail respectively in the event of CRC mismatch or corruption errors. Add a high-level log recovery tracepoint to dump the final log head/tail and make those values easily attainable in debug/diagnostic situations. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 2 ++ fs/xfs/xfs_trace.h | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 83e90bf1899f..edd849b8f14d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -5596,6 +5596,8 @@ xlog_do_recover( xfs_buf_t *bp; xfs_sb_t *sbp; + trace_xfs_log_recover(log, head_blk, tail_blk); + /* * First replay the images in the log. */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2df73f3a73c1..6221c3818c6e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1991,6 +1991,24 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); +TRACE_EVENT(xfs_log_recover, + TP_PROTO(struct xlog *log, xfs_daddr_t headblk, xfs_daddr_t tailblk), + TP_ARGS(log, headblk, tailblk), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, headblk) + __field(xfs_daddr_t, tailblk) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->headblk = headblk; + __entry->tailblk = tailblk; + ), + TP_printk("dev %d:%d headblk 0x%llx tailblk 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->headblk, + __entry->tailblk) +) + TRACE_EVENT(xfs_log_recover_record, TP_PROTO(struct xlog *log, struct xlog_rec_header *rhead, int pass), TP_ARGS(log, rhead, pass), -- cgit v1.2.3 From 536932f39e93411c48a165c9c859e806c8989301 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Sun, 17 Sep 2017 14:06:56 -0700 Subject: xfs: stop searching for free slots in an inode chunk when there are none commit 2d32311cf19bfb8c1d2b4601974ddd951f9cfd0b upstream. In a filesystem without finobt, the Space manager selects an AG to alloc a new inode, where xfs_dialloc_ag_inobt() will search the AG for the free slot chunk. When the new inode is in the same AG as its parent, the btree will be searched starting on the parent's record, and then retried from the top if no slot is available beyond the parent's record. To exit this loop though, xfs_dialloc_ag_inobt() relies on the fact that the btree must have a free slot available, once its callers relied on the agi->freecount when deciding how/where to allocate this new inode. In the case when the agi->freecount is corrupted, showing available inodes in an AG, when in fact there is none, this becomes an infinite loop. Add a way to stop the loop when a free slot is not found in the btree, making the function to fall into the whole AG scan which will then, be able to detect the corruption and shut the filesystem down. As pointed by Brian, this might impact performance, giving the fact we don't reset the search distance anymore when we reach the end of the tree, giving it fewer tries before falling back to the whole AG search, but it will only affect searches that start within 10 records to the end of the tree. Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ialloc.c | 55 +++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index af6acd5f276c..4536ac588fa3 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1123,6 +1123,7 @@ xfs_dialloc_ag_inobt( int error; int offset; int i, j; + int searchdistance = 10; pag = xfs_perag_get(mp, agno); @@ -1149,7 +1150,6 @@ xfs_dialloc_ag_inobt( if (pagno == agno) { int doneleft; /* done, to the left */ int doneright; /* done, to the right */ - int searchdistance = 10; error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); if (error) @@ -1210,21 +1210,9 @@ xfs_dialloc_ag_inobt( /* * Loop until we find an inode chunk with a free inode. */ - while (!doneleft || !doneright) { + while (--searchdistance > 0 && (!doneleft || !doneright)) { int useleft; /* using left inode chunk this time */ - if (!--searchdistance) { - /* - * Not in range - save last search - * location and allocate a new inode - */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - pag->pagl_leftrec = trec.ir_startino; - pag->pagl_rightrec = rec.ir_startino; - pag->pagl_pagino = pagino; - goto newino; - } - /* figure out the closer block if both are valid. */ if (!doneleft && !doneright) { useleft = pagino - @@ -1268,26 +1256,37 @@ xfs_dialloc_ag_inobt( goto error1; } - /* - * We've reached the end of the btree. because - * we are only searching a small chunk of the - * btree each search, there is obviously free - * inodes closer to the parent inode than we - * are now. restart the search again. - */ - pag->pagl_pagino = NULLAGINO; - pag->pagl_leftrec = NULLAGINO; - pag->pagl_rightrec = NULLAGINO; - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - goto restart_pagno; + if (searchdistance <= 0) { + /* + * Not in range - save last search + * location and allocate a new inode + */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + + } else { + /* + * We've reached the end of the btree. because + * we are only searching a small chunk of the + * btree each search, there is obviously free + * inodes closer to the parent inode than we + * are now. restart the search again. + */ + pag->pagl_pagino = NULLAGINO; + pag->pagl_leftrec = NULLAGINO; + pag->pagl_rightrec = NULLAGINO; + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + goto restart_pagno; + } } /* * In a different AG from the parent. * See if the most recently allocated block has any free. */ -newino: if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), XFS_LOOKUP_EQ, &i); -- cgit v1.2.3 From 63d184d2955bab0584acc10b502e415ce23394b1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 17 Sep 2017 14:06:57 -0700 Subject: xfs: evict all inodes involved with log redo item commit 799ea9e9c59949008770aab4e1da87f10e99dbe4 upstream. When we introduced the bmap redo log items, we set MS_ACTIVE on the mountpoint and XFS_IRECOVERY on the inode to prevent unlinked inodes from being truncated prematurely during log recovery. This also had the effect of putting linked inodes on the lru instead of evicting them. Unfortunately, we neglected to find all those unreferenced lru inodes and evict them after finishing log recovery, which means that we leak them if anything goes wrong in the rest of xfs_mountfs, because the lru is only cleaned out on unmount. Therefore, evict unreferenced inodes in the lru list immediately after clearing MS_ACTIVE. Fixes: 17c12bcd30 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") Signed-off-by: Darrick J. Wong Cc: viro@ZenIV.linux.org.uk Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 1 + fs/internal.h | 1 - fs/xfs/xfs_log.c | 12 ++++++++++++ include/linux/fs.h | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 88110fd0b282..920aa0b1c6b0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -637,6 +637,7 @@ again: dispose_list(&dispose); } +EXPORT_SYMBOL_GPL(evict_inodes); /** * invalidate_inodes - attempt to free all inodes on a superblock diff --git a/fs/internal.h b/fs/internal.h index f4da3341b4a3..8b7143b0211c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -136,7 +136,6 @@ extern bool atime_needs_update_rcu(const struct path *, struct inode *); extern void inode_io_list_del(struct inode *inode); extern long get_nr_dirty_inodes(void); -extern void evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *, bool); /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ebe20f1591f1..fe5f3df8b253 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -761,12 +761,24 @@ xfs_log_mount_finish( * inodes. Turn it off immediately after recovery finishes * so that we don't leak the quota inodes if subsequent mount * activities fail. + * + * We let all inodes involved in redo item processing end up on + * the LRU instead of being evicted immediately so that if we do + * something to an unlinked inode, the irele won't cause + * premature truncation and freeing of the inode, which results + * in log recovery failure. We have to evict the unreferenced + * lru inodes after clearing MS_ACTIVE because we don't + * otherwise clean up the lru if there's a subsequent failure in + * xfs_mountfs, which leads to us leaking the inodes if nothing + * else (e.g. quotacheck) references the inodes before the + * mount failure occurs. */ mp->m_super->s_flags |= MS_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); mp->m_super->s_flags &= ~MS_ACTIVE; + evict_inodes(mp->m_super); if (readonly) mp->m_flags |= XFS_MOUNT_RDONLY; diff --git a/include/linux/fs.h b/include/linux/fs.h index dd88ded27fc8..d705ae084edd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2760,6 +2760,7 @@ static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); extern unsigned int get_next_ino(void); +extern void evict_inodes(struct super_block *sb); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); -- cgit v1.2.3 From 81286ade81f73e895fe2edf89f3e8054a595ebe5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 17 Sep 2017 14:06:58 -0700 Subject: xfs: check for race with xfs_reclaim_inode() in xfs_ifree_cluster() commit f2e9ad212def50bcf4c098c6288779dd97fff0f0 upstream. After xfs_ifree_cluster() finds an inode in the radix tree and verifies that the inode number is what it expected, xfs_reclaim_inode() can swoop in and free it. xfs_ifree_cluster() will then happily continue working on the freed inode. Most importantly, it will mark the inode stale, which will probably be overwritten when the inode slab object is reallocated, but if it has already been reallocated then we can end up with an inode spuriously marked stale. In 8a17d7ddedb4 ("xfs: mark reclaimed inodes invalid earlier") we added a second check to xfs_iflush_cluster() to detect this race, but the similar RCU lookup in xfs_ifree_cluster() needs the same treatment. Signed-off-by: Omar Sandoval Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 10 +++++----- fs/xfs/xfs_inode.c | 23 ++++++++++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index e279882de427..86a4911520cc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1078,11 +1078,11 @@ reclaim: * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. * We do this as early as possible under the ILOCK so that - * xfs_iflush_cluster() can be guaranteed to detect races with us here. - * By doing this, we guarantee that once xfs_iflush_cluster has locked - * XFS_ILOCK that it will see either a valid, flushable inode that will - * serialise correctly, or it will see a clean (and invalid) inode that - * it can skip. + * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to + * detect races with us here. By doing this, we guarantee that once + * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that + * it will see either a valid inode that will serialise correctly, or it + * will see an invalid inode that it can skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 98cd905eadca..9e795ab08a53 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2368,11 +2368,24 @@ retry: * already marked stale. If we can't lock it, back off * and retry. */ - if (ip != free_ip && - !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - rcu_read_unlock(); - delay(1); - goto retry; + if (ip != free_ip) { + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + rcu_read_unlock(); + delay(1); + goto retry; + } + + /* + * Check the inode number again in case we're + * racing with freeing in xfs_reclaim_inode(). + * See the comments in that function for more + * information as to why the initial check is + * not sufficient. + */ + if (ip->i_ino != inum + i) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } } rcu_read_unlock(); -- cgit v1.2.3 From 0f5af7eae8846fd73d01ecbe0d60309560084a74 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:06:59 -0700 Subject: xfs: open-code xfs_buf_item_dirty() commit a4f6cf6b2b6b60ec2a05a33a32e65caa4149aa2b upstream. It checks a single flag and has one caller. It probably isn't worth its own function. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 11 ----------- fs/xfs/xfs_buf_item.h | 1 - fs/xfs/xfs_trans_buf.c | 2 +- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 573fc72c3f23..cdae0ad5e0a5 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -945,17 +945,6 @@ xfs_buf_item_log( } -/* - * Return 1 if the buffer has been logged or ordered in a transaction (at any - * point, not just the current transaction) and 0 if not. - */ -uint -xfs_buf_item_dirty( - xfs_buf_log_item_t *bip) -{ - return (bip->bli_flags & XFS_BLI_DIRTY); -} - STATIC void xfs_buf_item_free( xfs_buf_log_item_t *bip) diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 530686e1afb9..e0e744aefaa8 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -64,7 +64,6 @@ typedef struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); -uint xfs_buf_item_dirty(xfs_buf_log_item_t *); void xfs_buf_attach_iodone(struct xfs_buf *, void(*)(struct xfs_buf *, xfs_log_item_t *), xfs_log_item_t *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 86987d823d76..cac8abbeca3f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -435,7 +435,7 @@ xfs_trans_brelse(xfs_trans_t *tp, if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); - } else if (!xfs_buf_item_dirty(bip)) { + } else if (!(bip->bli_flags & XFS_BLI_DIRTY)) { /*** ASSERT(bp->b_pincount == 0); ***/ -- cgit v1.2.3 From ba986b3c84987bbc5e52d8ab83a851e613ce4001 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:00 -0700 Subject: xfs: remove unnecessary dirty bli format check for ordered bufs commit 6453c65d3576bc3e602abb5add15f112755c08ca upstream. xfs_buf_item_unlock() historically checked the dirty state of the buffer by manually checking the buffer log formats for dirty segments. The introduction of ordered buffers invalidated this check because ordered buffers have dirty bli's but no dirty (logged) segments. The check was updated to accommodate ordered buffers by looking at the bli state first and considering the blf only if the bli is clean. This logic is safe but unnecessary. There is no valid case where the bli is clean yet the blf has dirty segments. The bli is set dirty whenever the blf is logged (via xfs_trans_log_buf()) and the blf is cleared in the only place BLI_DIRTY is cleared (xfs_trans_binval()). Remove the conditional blf dirty checks and replace with an assert that should catch any discrepencies between bli and blf dirty states. Refactor the old blf dirty check into a helper function to be used by the assert. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 62 ++++++++++++++++++++++++++------------------------- fs/xfs/xfs_buf_item.h | 1 + 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index cdae0ad5e0a5..ff076d11804a 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -575,26 +575,18 @@ xfs_buf_item_unlock( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool clean; - bool aborted; - int flags; + bool aborted = !!(lip->li_flags & XFS_LI_ABORTED); + bool hold = !!(bip->bli_flags & XFS_BLI_HOLD); + bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY); + bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED); /* Clear the buffer's association with this transaction. */ bp->b_transp = NULL; /* - * If this is a transaction abort, don't return early. Instead, allow - * the brelse to happen. Normally it would be done for stale - * (cancelled) buffers at unpin time, but we'll never go through the - * pin/unpin cycle if we abort inside commit. + * The per-transaction state has been copied above so clear it from the + * bli. */ - aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false; - /* - * Before possibly freeing the buf item, copy the per-transaction state - * so we can reference it safely later after clearing it from the - * buffer log item. - */ - flags = bip->bli_flags; bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); /* @@ -602,7 +594,7 @@ xfs_buf_item_unlock( * unlock the buffer and free the buf item when the buffer is unpinned * for the last time. */ - if (flags & XFS_BLI_STALE) { + if (bip->bli_flags & XFS_BLI_STALE) { trace_xfs_buf_item_unlock_stale(bip); ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); if (!aborted) { @@ -620,20 +612,11 @@ xfs_buf_item_unlock( * regardless of whether it is dirty or not. A dirty abort implies a * shutdown, anyway. * - * Ordered buffers are dirty but may have no recorded changes, so ensure - * we only release clean items here. + * The bli dirty state should match whether the blf has logged segments + * except for ordered buffers, where only the bli should be dirty. */ - clean = (flags & XFS_BLI_DIRTY) ? false : true; - if (clean) { - int i; - for (i = 0; i < bip->bli_format_count; i++) { - if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, - bip->bli_formats[i].blf_map_size)) { - clean = false; - break; - } - } - } + ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) || + (ordered && dirty && !xfs_buf_item_dirty_format(bip))); /* * Clean buffers, by definition, cannot be in the AIL. However, aborted @@ -652,11 +635,11 @@ xfs_buf_item_unlock( ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); - } else if (clean) + } else if (!dirty) xfs_buf_item_relse(bp); } - if (!(flags & XFS_BLI_HOLD)) + if (!hold) xfs_buf_relse(bp); } @@ -945,6 +928,25 @@ xfs_buf_item_log( } +/* + * Return true if the buffer has any ranges logged/dirtied by a transaction, + * false otherwise. + */ +bool +xfs_buf_item_dirty_format( + struct xfs_buf_log_item *bip) +{ + int i; + + for (i = 0; i < bip->bli_format_count; i++) { + if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, + bip->bli_formats[i].blf_map_size)) + return true; + } + + return false; +} + STATIC void xfs_buf_item_free( xfs_buf_log_item_t *bip) diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index e0e744aefaa8..9690ce62c9a7 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -64,6 +64,7 @@ typedef struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); +bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, void(*)(struct xfs_buf *, xfs_log_item_t *), xfs_log_item_t *); -- cgit v1.2.3 From 93b64516019249fa196cc3cf4c9040270cf4106f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:01 -0700 Subject: xfs: ordered buffer log items are never formatted commit e9385cc6fb7edf23702de33a2dc82965d92d9392 upstream. Ordered buffers pass through the logging infrastructure without ever being written to the log. The way this works is that the ordered buffer status is transferred to the log vector at commit time via the ->iop_size() callback. In xlog_cil_insert_format_items(), ordered log vectors bypass ->iop_format() processing altogether. Therefore it is unnecessary for xfs_buf_item_format() to handle ordered buffers. Remove the unnecessary logic and assert that an ordered buffer never reaches this point. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 12 ++---------- fs/xfs/xfs_trace.h | 1 - 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index ff076d11804a..ef2c1375f092 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -323,6 +323,8 @@ xfs_buf_item_format( ASSERT((bip->bli_flags & XFS_BLI_STALE) || (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF)); + ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED) || + (bip->bli_flags & XFS_BLI_STALE)); /* @@ -347,16 +349,6 @@ xfs_buf_item_format( bip->bli_flags &= ~XFS_BLI_INODE_BUF; } - if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) == - XFS_BLI_ORDERED) { - /* - * The buffer has been logged just to order it. It is not being - * included in the transaction commit, so don't format it. - */ - trace_xfs_buf_item_format_ordered(bip); - return; - } - for (i = 0; i < bip->bli_format_count; i++) { xfs_buf_item_format_segment(bip, lv, &vecp, offset, &bip->bli_formats[i]); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6221c3818c6e..bdf69e1c7410 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -520,7 +520,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); -- cgit v1.2.3 From 19a87a9407654b6e46fff9f325cac0a11dec75f7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:02 -0700 Subject: xfs: refactor buffer logging into buffer dirtying helper commit 9684010d38eccda733b61106765e9357cf436f65 upstream. xfs_trans_log_buf() is responsible for logging the dirty segments of a buffer along with setting all of the necessary state on the transaction, buffer, bli, etc., to ensure that the associated items are marked as dirty and prepared for I/O. We have a couple use cases that need to to dirty a buffer in a transaction without actually logging dirty ranges of the buffer. One existing use case is ordered buffers, which are currently logged with arbitrary ranges to accomplish this even though the content of ordered buffers is never written to the log. Another pending use case is to relog an already dirty buffer across rolled transactions within the deferred operations infrastructure. This is required to prevent a held (XFS_BLI_HOLD) buffer from pinning the tail of the log. Refactor xfs_trans_log_buf() into a new function that contains all of the logic responsible to dirty the transaction, lidp, buffer and bli. This new function can be used in the future for the use cases outlined above. This patch does not introduce functional changes. Signed-off-by: Brian Foster Reviewed-by: Allison Henderson Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_trans.h | 4 +++- fs/xfs/xfs_trans_buf.c | 46 ++++++++++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0318e92aed66..40555bcaa277 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -222,7 +222,9 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); -void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); +void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint, + uint); +void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); void xfs_extent_free_init_defer_op(void); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index cac8abbeca3f..8c99813e5377 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -493,25 +493,17 @@ xfs_trans_bhold_release(xfs_trans_t *tp, } /* - * This is called to mark bytes first through last inclusive of the given - * buffer as needing to be logged when the transaction is committed. - * The buffer must already be associated with the given transaction. - * - * First and last are numbers relative to the beginning of this buffer, - * so the first byte in the buffer is numbered 0 regardless of the - * value of b_blkno. + * Mark a buffer dirty in the transaction. */ void -xfs_trans_log_buf(xfs_trans_t *tp, - xfs_buf_t *bp, - uint first, - uint last) +xfs_trans_dirty_buf( + struct xfs_trans *tp, + struct xfs_buf *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); - ASSERT(first <= last && last < BBTOB(bp->b_length)); ASSERT(bp->b_iodone == NULL || bp->b_iodone == xfs_buf_iodone_callbacks); @@ -531,8 +523,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, bp->b_iodone = xfs_buf_iodone_callbacks; bip->bli_item.li_cb = xfs_buf_iodone; - trace_xfs_trans_log_buf(bip); - /* * If we invalidated the buffer within this transaction, then * cancel the invalidation now that we're dirtying the buffer @@ -545,15 +535,39 @@ xfs_trans_log_buf(xfs_trans_t *tp, bp->b_flags &= ~XBF_STALE; bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL; } + bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; tp->t_flags |= XFS_TRANS_DIRTY; bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} + +/* + * This is called to mark bytes first through last inclusive of the given + * buffer as needing to be logged when the transaction is committed. + * The buffer must already be associated with the given transaction. + * + * First and last are numbers relative to the beginning of this buffer, + * so the first byte in the buffer is numbered 0 regardless of the + * value of b_blkno. + */ +void +xfs_trans_log_buf( + struct xfs_trans *tp, + struct xfs_buf *bp, + uint first, + uint last) +{ + struct xfs_buf_log_item *bip = bp->b_fspriv; + + ASSERT(first <= last && last < BBTOB(bp->b_length)); + + xfs_trans_dirty_buf(tp, bp); /* * If we have an ordered buffer we are not logging any dirty range but * it still needs to be marked dirty and that it has been logged. */ - bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; + trace_xfs_trans_log_buf(bip); if (!(bip->bli_flags & XFS_BLI_ORDERED)) xfs_buf_item_log(bip, first, last); } -- cgit v1.2.3 From fe211e1744db41fb23b0a85f7cda87de8fab5ea2 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:03 -0700 Subject: xfs: don't log dirty ranges for ordered buffers commit 8dc518dfa7dbd079581269e51074b3c55a65a880 upstream. Ordered buffers are attached to transactions and pushed through the logging infrastructure just like normal buffers with the exception that they are not actually written to the log. Therefore, we don't need to log dirty ranges of ordered buffers. xfs_trans_log_buf() is called on ordered buffers to set up all of the dirty state on the transaction, buffer and log item and prepare the buffer for I/O. Now that xfs_trans_dirty_buf() is available, call it from xfs_trans_ordered_buf() so the latter is now mutually exclusive with xfs_trans_log_buf(). This reflects the implementation of ordered buffers and helps eliminate confusion over the need to log ranges of ordered buffers just to set up internal log state. Signed-off-by: Brian Foster Reviewed-by: Allison Henderson Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree.c | 6 ++---- fs/xfs/libxfs/xfs_ialloc.c | 2 -- fs/xfs/xfs_trans_buf.c | 26 ++++++++++++++------------ 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index e9f26a09a0be..69c3f428b582 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4447,12 +4447,10 @@ xfs_btree_block_change_owner( * though, so everything is consistent in memory. */ if (bp) { - if (cur->bc_tp) { + if (cur->bc_tp) xfs_trans_ordered_buf(cur->bc_tp, bp); - xfs_btree_log_block(cur, bp, XFS_BB_OWNER); - } else { + else xfs_buf_delwri_queue(bp, bbcoi->buffer_list); - } } else { ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); ASSERT(level == cur->bc_nlevels - 1); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 4536ac588fa3..42fef0731e2a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -368,8 +368,6 @@ xfs_ialloc_inode_init( * transaction and pin the log appropriately. */ xfs_trans_ordered_buf(tp, fbuf); - xfs_trans_log_buf(tp, fbuf, 0, - BBTOB(fbuf->b_length) - 1); } } else { fbuf->b_flags |= XBF_DONE; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8c99813e5377..3089e8015369 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -560,16 +560,12 @@ xfs_trans_log_buf( struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(first <= last && last < BBTOB(bp->b_length)); + ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); xfs_trans_dirty_buf(tp, bp); - /* - * If we have an ordered buffer we are not logging any dirty range but - * it still needs to be marked dirty and that it has been logged. - */ trace_xfs_trans_log_buf(bip); - if (!(bip->bli_flags & XFS_BLI_ORDERED)) - xfs_buf_item_log(bip, first, last); + xfs_buf_item_log(bip, first, last); } @@ -722,12 +718,11 @@ xfs_trans_inode_alloc_buf( } /* - * Mark the buffer as ordered for this transaction. This means - * that the contents of the buffer are not recorded in the transaction - * but it is tracked in the AIL as though it was. This allows us - * to record logical changes in transactions rather than the physical - * changes we make to the buffer without changing writeback ordering - * constraints of metadata buffers. + * Mark the buffer as ordered for this transaction. This means that the contents + * of the buffer are not recorded in the transaction but it is tracked in the + * AIL as though it was. This allows us to record logical changes in + * transactions rather than the physical changes we make to the buffer without + * changing writeback ordering constraints of metadata buffers. */ void xfs_trans_ordered_buf( @@ -739,9 +734,16 @@ xfs_trans_ordered_buf( ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); + ASSERT(!xfs_buf_item_dirty_format(bip)); bip->bli_flags |= XFS_BLI_ORDERED; trace_xfs_buf_item_ordered(bip); + + /* + * We don't log a dirty range of an ordered buffer but it still needs + * to be marked dirty and that it has been logged. + */ + xfs_trans_dirty_buf(tp, bp); } /* -- cgit v1.2.3 From f9e583edf1a71b7b40d5c5c492319a07ebe82d71 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:04 -0700 Subject: xfs: skip bmbt block ino validation during owner change commit 99c794c639a65cc7b74f30a674048fd100fe9ac8 upstream. Extent swap uses xfs_btree_visit_blocks() to fix up bmbt block owners on v5 (!rmapbt) filesystems. The bmbt scan uses xfs_btree_lookup_get_block() to read bmbt blocks which verifies the current owner of the block against the parent inode of the bmbt. This works during extent swap because the bmbt owners are updated to the opposite inode number before the inode extent forks are swapped. The modified bmbt blocks are marked as ordered buffers which allows everything to commit in a single transaction. If the transaction commits to the log and the system crashes such that recovery of the extent swap is required, log recovery restarts the bmbt scan to fix up any bmbt blocks that may have not been written back before the crash. The log recovery bmbt scan occurs after the inode forks have been swapped, however. This causes the bmbt block owner verification to fail, leads to log recovery failure and requires xfs_repair to zap the log to recover. Define a new invalid inode owner flag to inform the btree block lookup mechanism that the current inode may be invalid with respect to the current owner of the bmbt block. Set this flag on the cursor used for change owner scans to allow this operation to work at runtime and during log recovery. Signed-off-by: Brian Foster Fixes: bb3be7e7c ("xfs: check for bogus values in btree block headers") Cc: stable@vger.kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap_btree.c | 1 + fs/xfs/libxfs/xfs_btree.c | 1 + fs/xfs/libxfs/xfs_btree.h | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 5c3918678bb6..9968a746c649 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -888,6 +888,7 @@ xfs_bmbt_change_owner( cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); if (!cur) return -ENOMEM; + cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 69c3f428b582..1df747fadc3a 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1774,6 +1774,7 @@ xfs_btree_lookup_get_block( /* Check the inode owner since the verifiers don't. */ if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && + !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && (cur->bc_flags & XFS_BTREE_LONG_PTRS) && be64_to_cpu((*blkp)->bb_u.l.bb_owner) != cur->bc_private.b.ip->i_ino) diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 3b0fc1afada5..33c7be2357b9 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -268,7 +268,8 @@ typedef struct xfs_btree_cur short forksize; /* fork's inode space */ char whichfork; /* data or attr fork */ char flags; /* flags */ -#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ +#define XFS_BTCUR_BPRV_WASDEL (1<<0) /* was delayed */ +#define XFS_BTCUR_BPRV_INVALID_OWNER (1<<1) /* for ext swap */ } b; } bc_private; /* per-btree type data */ } xfs_btree_cur_t; -- cgit v1.2.3 From a51e3e2cf3cbb306faa16784fd4f1791ee304816 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:05 -0700 Subject: xfs: move bmbt owner change to last step of extent swap commit 6fb10d6d22094bc4062f92b9ccbcee2f54033d04 upstream. The extent swap operation currently resets bmbt block owners before the inode forks are swapped. The bmbt buffers are marked as ordered so they do not have to be physically logged in the transaction. This use of ordered buffers is not safe as bmbt buffers may have been previously physically logged. The bmbt owner change algorithm needs to be updated to physically log buffers that are already dirty when/if they are encountered. This means that an extent swap will eventually require multiple rolling transactions to handle large btrees. In addition, all inode related changes must be logged before the bmbt owner change scan begins and can roll the transaction for the first time to preserve fs consistency via log recovery. In preparation for such fixes to the bmbt owner change algorithm, refactor the bmbt scan out of the extent fork swap code to the last operation before the transaction is committed. Update xfs_swap_extent_forks() to only set the inode log flags when an owner change scan is necessary. Update xfs_swap_extents() to trigger the owner change based on the inode log flags. Note that since the owner change now occurs after the extent fork swap, the inode btrees must be fixed up with the inode number of the current inode (similar to log recovery). Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 87b495e2f15a..15cd36f29fc4 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1825,29 +1825,18 @@ xfs_swap_extent_forks( } /* - * Before we've swapped the forks, lets set the owners of the forks - * appropriately. We have to do this as we are demand paging the btree - * buffers, and so the validation done on read will expect the owner - * field to be correctly set. Once we change the owners, we can swap the - * inode forks. + * Btree format (v3) inodes have the inode number stamped in the bmbt + * block headers. We can't start changing the bmbt blocks until the + * inode owner change is logged so recovery does the right thing in the + * event of a crash. Set the owner change log flags now and leave the + * bmbt scan as the last step. */ if (ip->i_d.di_version == 3 && - ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { + ip->i_d.di_format == XFS_DINODE_FMT_BTREE) (*target_log_flags) |= XFS_ILOG_DOWNER; - error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, - tip->i_ino, NULL); - if (error) - return error; - } - if (tip->i_d.di_version == 3 && - tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { + tip->i_d.di_format == XFS_DINODE_FMT_BTREE) (*src_log_flags) |= XFS_ILOG_DOWNER; - error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, - ip->i_ino, NULL); - if (error) - return error; - } /* * Swap the data forks of the inodes @@ -2076,6 +2065,25 @@ xfs_swap_extents( xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); + /* + * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems + * have inode number owner values in the bmbt blocks that still refer to + * the old inode. Scan each bmbt to fix up the owner values with the + * inode number of the current inode. + */ + if (src_log_flags & XFS_ILOG_DOWNER) { + error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, + ip->i_ino, NULL); + if (error) + goto out_trans_cancel; + } + if (target_log_flags & XFS_ILOG_DOWNER) { + error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, + tip->i_ino, NULL); + if (error) + goto out_trans_cancel; + } + /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. -- cgit v1.2.3 From e2bb92633615ad801c4ab56fdb3eba3c701b2a3c Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:06 -0700 Subject: xfs: disallow marking previously dirty buffers as ordered commit a5814bceea48ee1c57c4db2bd54b0c0246daf54a upstream. Ordered buffers are used in situations where the buffer is not physically logged but must pass through the transaction/logging pipeline for a particular transaction. As a result, ordered buffers are not unpinned and written back until the transaction commits to the log. Ordered buffers have a strict requirement that the target buffer must not be currently dirty and resident in the log pipeline at the time it is marked ordered. If a dirty+ordered buffer is committed, the buffer is reinserted to the AIL but not physically relogged at the LSN of the associated checkpoint. The buffer log item is assigned the LSN of the latest checkpoint and the AIL effectively releases the previously logged buffer content from the active log before the buffer has been written back. If the tail pushes forward and a filesystem crash occurs while in this state, an inconsistent filesystem could result. It is currently the caller responsibility to ensure an ordered buffer is not already dirty from a previous modification. This is unclear and error prone when not used in situations where it is guaranteed a buffer has not been previously modified (such as new metadata allocations). To facilitate general purpose use of ordered buffers, update xfs_trans_ordered_buf() to conditionally order the buffer based on state of the log item and return the status of the result. If the bli is dirty, do not order the buffer and return false. The caller must either physically log the buffer (having acquired the appropriate log reservation) or push it from the AIL to clean it before it can be marked ordered in the current transaction. Note that ordered buffers are currently only used in two situations: 1.) inode chunk allocation where previously logged buffers are not possible and 2.) extent swap which will be updated to handle ordered buffer failures in a separate patch. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_trans.h | 2 +- fs/xfs/xfs_trans_buf.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 40555bcaa277..5669cf00bae0 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -217,7 +217,7 @@ void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); -void xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); +bool xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3089e8015369..3ba7a96a8abd 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -724,7 +724,7 @@ xfs_trans_inode_alloc_buf( * transactions rather than the physical changes we make to the buffer without * changing writeback ordering constraints of metadata buffers. */ -void +bool xfs_trans_ordered_buf( struct xfs_trans *tp, struct xfs_buf *bp) @@ -734,7 +734,9 @@ xfs_trans_ordered_buf( ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); - ASSERT(!xfs_buf_item_dirty_format(bip)); + + if (xfs_buf_item_dirty_format(bip)) + return false; bip->bli_flags |= XFS_BLI_ORDERED; trace_xfs_buf_item_ordered(bip); @@ -744,6 +746,7 @@ xfs_trans_ordered_buf( * to be marked dirty and that it has been logged. */ xfs_trans_dirty_buf(tp, bp); + return true; } /* -- cgit v1.2.3 From a46cf59265cf5282be0a488abc913e94db924e87 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Sep 2017 14:07:07 -0700 Subject: xfs: relog dirty buffers during swapext bmbt owner change commit 2dd3d709fc4338681a3aa61658122fa8faa5a437 upstream. The owner change bmbt scan that occurs during extent swap operations does not handle ordered buffer failures. Buffers that cannot be marked ordered must be physically logged so previously dirty ranges of the buffer can be relogged in the transaction. Since the bmbt scan may need to process and potentially log a large number of blocks, we can't expect to complete this operation in a single transaction. Update extent swap to use a permanent transaction with enough log reservation to physically log a buffer. Update the bmbt scan to physically log any buffers that cannot be ordered and to terminate the scan with -EAGAIN. On -EAGAIN, the caller rolls the transaction and restarts the scan. Finally, update the bmbt scan helper function to skip bmbt blocks that already match the expected owner so they are not reprocessed after scan restarts. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig [darrick: fix the xfs_trans_roll call] Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree.c | 26 ++++++++++++++------- fs/xfs/xfs_bmap_util.c | 59 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 1df747fadc3a..4ad1e214b1b2 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4435,10 +4435,15 @@ xfs_btree_block_change_owner( /* modify the owner */ block = xfs_btree_get_block(cur, level, &bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) + return 0; block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); - else + } else { + if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) + return 0; block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); + } /* * If the block is a root block hosted in an inode, we might not have a @@ -4447,14 +4452,19 @@ xfs_btree_block_change_owner( * block is formatted into the on-disk inode fork. We still change it, * though, so everything is consistent in memory. */ - if (bp) { - if (cur->bc_tp) - xfs_trans_ordered_buf(cur->bc_tp, bp); - else - xfs_buf_delwri_queue(bp, bbcoi->buffer_list); - } else { + if (!bp) { ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); ASSERT(level == cur->bc_nlevels - 1); + return 0; + } + + if (cur->bc_tp) { + if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { + xfs_btree_log_block(cur, bp, XFS_BB_OWNER); + return -EAGAIN; + } + } else { + xfs_buf_delwri_queue(bp, bbcoi->buffer_list); } return 0; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 15cd36f29fc4..5ffefac081f7 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1914,6 +1914,48 @@ xfs_swap_extent_forks( return 0; } +/* + * Fix up the owners of the bmbt blocks to refer to the current inode. The + * change owner scan attempts to order all modified buffers in the current + * transaction. In the event of ordered buffer failure, the offending buffer is + * physically logged as a fallback and the scan returns -EAGAIN. We must roll + * the transaction in this case to replenish the fallback log reservation and + * restart the scan. This process repeats until the scan completes. + */ +static int +xfs_swap_change_owner( + struct xfs_trans **tpp, + struct xfs_inode *ip, + struct xfs_inode *tmpip) +{ + int error; + struct xfs_trans *tp = *tpp; + + do { + error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, + NULL); + /* success or fatal error */ + if (error != -EAGAIN) + break; + + error = xfs_trans_roll(tpp, NULL); + if (error) + break; + tp = *tpp; + + /* + * Redirty both inodes so they can relog and keep the log tail + * moving forward. + */ + xfs_trans_ijoin(tp, ip, 0); + xfs_trans_ijoin(tp, tmpip, 0); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); + } while (true); + + return error; +} + int xfs_swap_extents( struct xfs_inode *ip, /* target inode */ @@ -1927,8 +1969,8 @@ xfs_swap_extents( int error = 0; int lock_flags; struct xfs_ifork *cowfp; - __uint64_t f; - int resblks; + uint64_t f; + int resblks = 0; /* * Lock the inodes against other IO, page faults and truncate to @@ -1976,11 +2018,8 @@ xfs_swap_extents( XFS_SWAP_RMAP_SPACE_RES(mp, XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), XFS_DATA_FORK); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, - 0, 0, &tp); - } else - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, - 0, 0, &tp); + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) goto out_unlock; @@ -2072,14 +2111,12 @@ xfs_swap_extents( * inode number of the current inode. */ if (src_log_flags & XFS_ILOG_DOWNER) { - error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, - ip->i_ino, NULL); + error = xfs_swap_change_owner(&tp, ip, tip); if (error) goto out_trans_cancel; } if (target_log_flags & XFS_ILOG_DOWNER) { - error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, - tip->i_ino, NULL); + error = xfs_swap_change_owner(&tp, tip, ip); if (error) goto out_trans_cancel; } -- cgit v1.2.3 From 0e8d7e364ec546c44762664d30f4b1f6fd912197 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Sep 2017 14:07:08 -0700 Subject: xfs: disable per-inode DAX flag commit 742d84290739ae908f1b61b7d17ea382c8c0073a upstream. Currently flag switching can be used to easily crash the kernel. Disable the per-inode DAX flag until that is sorted out. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 73cfc7179124..be54216027b6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1005,11 +1005,12 @@ xfs_diflags_to_linux( inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; +#if 0 /* disabled until the flag switching races are sorted out */ if (xflags & FS_XFLAG_DAX) inode->i_flags |= S_DAX; else inode->i_flags &= ~S_DAX; - +#endif } static int -- cgit v1.2.3 From f46a61f686b0a8042ee4b7cb108ece81e3fb9401 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 17 Sep 2017 14:07:09 -0700 Subject: xfs: fix incorrect log_flushed on fsync commit 47c7d0b19502583120c3f396c7559e7a77288a68 upstream. When calling into _xfs_log_force{,_lsn}() with a pointer to log_flushed variable, log_flushed will be set to 1 if: 1. xlog_sync() is called to flush the active log buffer AND/OR 2. xlog_wait() is called to wait on a syncing log buffers xfs_file_fsync() checks the value of log_flushed after _xfs_log_force_lsn() call to optimize away an explicit PREFLUSH request to the data block device after writing out all the file's pages to disk. This optimization is incorrect in the following sequence of events: Task A Task B ------------------------------------------------------- xfs_file_fsync() _xfs_log_force_lsn() xlog_sync() [submit PREFLUSH] xfs_file_fsync() file_write_and_wait_range() [submit WRITE X] [endio WRITE X] _xfs_log_force_lsn() xlog_wait() [endio PREFLUSH] The write X is not guarantied to be on persistent storage when PREFLUSH request in completed, because write A was submitted after the PREFLUSH request, but xfs_file_fsync() of task A will be notified of log_flushed=1 and will skip explicit flush. If the system crashes after fsync of task A, write X may not be present on disk after reboot. This bug was discovered and demonstrated using Josef Bacik's dm-log-writes target, which can be used to record block io operations and then replay a subset of these operations onto the target device. The test goes something like this: - Use fsx to execute ops of a file and record ops on log device - Every now and then fsync the file, store md5 of file and mark the location in the log - Then replay log onto device for each mark, mount fs and compare md5 of file to stored value Cc: Christoph Hellwig Cc: Josef Bacik Cc: Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fe5f3df8b253..33c9a3aae948 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3337,8 +3337,6 @@ maybe_sleep: */ if (iclog->ic_state & XLOG_STATE_IOERROR) return -EIO; - if (log_flushed) - *log_flushed = 1; } else { no_sleep: @@ -3442,8 +3440,6 @@ try_again: xlog_wait(&iclog->ic_prev->ic_write_wait, &log->l_icloglock); - if (log_flushed) - *log_flushed = 1; already_slept = 1; goto try_again; } @@ -3477,9 +3473,6 @@ try_again: */ if (iclog->ic_state & XLOG_STATE_IOERROR) return -EIO; - - if (log_flushed) - *log_flushed = 1; } else { /* just return */ spin_unlock(&log->l_icloglock); } -- cgit v1.2.3 From bb69e8a228a74c9aa7b70f6624e5c4fa1af70533 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Sep 2017 14:07:10 -0700 Subject: xfs: don't set v3 xflags for v2 inodes commit dd60687ee541ca3f6df8758f38e6f22f57c42a37 upstream. Reject attempts to set XFLAGS that correspond to di_flags2 inode flags if the inode isn't a v3 inode, because di_flags2 only exists on v3. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index be54216027b6..bce2e260f55e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -928,16 +928,15 @@ xfs_ioc_fsgetxattr( return 0; } -STATIC void -xfs_set_diflags( +STATIC uint16_t +xfs_flags2diflags( struct xfs_inode *ip, unsigned int xflags) { - unsigned int di_flags; - uint64_t di_flags2; - /* can't set PREALLOC this way, just preserve it */ - di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + uint16_t di_flags = + (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + if (xflags & FS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; if (xflags & FS_XFLAG_APPEND) @@ -967,19 +966,24 @@ xfs_set_diflags( if (xflags & FS_XFLAG_EXTSIZE) di_flags |= XFS_DIFLAG_EXTSIZE; } - ip->i_d.di_flags = di_flags; - /* diflags2 only valid for v3 inodes. */ - if (ip->i_d.di_version < 3) - return; + return di_flags; +} + +STATIC uint64_t +xfs_flags2diflags2( + struct xfs_inode *ip, + unsigned int xflags) +{ + uint64_t di_flags2 = + (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); - di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; if (xflags & FS_XFLAG_COWEXTSIZE) di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; - ip->i_d.di_flags2 = di_flags2; + return di_flags2; } STATIC void @@ -1020,6 +1024,7 @@ xfs_ioctl_setattr_xflags( struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; + uint64_t di_flags2; /* Can't change realtime flag if any extents are allocated. */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && @@ -1050,7 +1055,14 @@ xfs_ioctl_setattr_xflags( !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - xfs_set_diflags(ip, fa->fsx_xflags); + /* diflags2 only valid for v3 inodes. */ + di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); + if (di_flags2 && ip->i_d.di_version < 3) + return -EINVAL; + + ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags); + ip->i_d.di_flags2 = di_flags2; + xfs_diflags_to_linux(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -- cgit v1.2.3 From 772003c6a4282211487c9d33958594d7f2be7dd2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Sep 2017 14:07:11 -0700 Subject: xfs: open code end_buffer_async_write in xfs_finish_page_writeback commit 8353a814f2518dcfa79a5bb77afd0e7dfa391bb1 upstream. Our loop in xfs_finish_page_writeback, which iterates over all buffer heads in a page and then calls end_buffer_async_write, which also iterates over all buffers in the page to check if any I/O is in flight is not only inefficient, but also potentially dangerous as end_buffer_async_write can cause the page and all buffers to be freed. Replace it with a single loop that does the work of end_buffer_async_write on a per-page basis. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 72 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f750d888bd17..d23889e0bedc 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -90,11 +90,11 @@ xfs_find_bdev_for_inode( * associated buffer_heads, paying attention to the start and end offsets that * we need to process on the page. * - * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last - * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or - * the page at all, as we may be racing with memory reclaim and it can free both - * the bufferhead chain and the page as it will see the page as clean and - * unused. + * Note that we open code the action in end_buffer_async_write here so that we + * only have to iterate over the buffers attached to the page once. This is not + * only more efficient, but also ensures that we only calls end_page_writeback + * at the end of the iteration, and thus avoids the pitfall of having the page + * and buffers potentially freed after every call to end_buffer_async_write. */ static void xfs_finish_page_writeback( @@ -102,29 +102,45 @@ xfs_finish_page_writeback( struct bio_vec *bvec, int error) { - unsigned int end = bvec->bv_offset + bvec->bv_len - 1; - struct buffer_head *head, *bh, *next; + struct buffer_head *head = page_buffers(bvec->bv_page), *bh = head; + bool busy = false; unsigned int off = 0; - unsigned int bsize; + unsigned long flags; ASSERT(bvec->bv_offset < PAGE_SIZE); ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); - ASSERT(end < PAGE_SIZE); + ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE); ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); - bh = head = page_buffers(bvec->bv_page); - - bsize = bh->b_size; + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &head->b_state); do { - if (off > end) - break; - next = bh->b_this_page; - if (off < bvec->bv_offset) - goto next_bh; - bh->b_end_io(bh, !error); -next_bh: - off += bsize; - } while ((bh = next) != head); + if (off >= bvec->bv_offset && + off < bvec->bv_offset + bvec->bv_len) { + ASSERT(buffer_async_write(bh)); + ASSERT(bh->b_end_io == NULL); + + if (error) { + mapping_set_error(bvec->bv_page->mapping, -EIO); + set_buffer_write_io_error(bh); + clear_buffer_uptodate(bh); + SetPageError(bvec->bv_page); + } else { + set_buffer_uptodate(bh); + } + clear_buffer_async_write(bh); + unlock_buffer(bh); + } else if (buffer_async_write(bh)) { + ASSERT(buffer_locked(bh)); + busy = true; + } + off += bh->b_size; + } while ((bh = bh->b_this_page) != head); + bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); + local_irq_restore(flags); + + if (!busy) + end_page_writeback(bvec->bv_page); } /* @@ -138,8 +154,10 @@ xfs_destroy_ioend( int error) { struct inode *inode = ioend->io_inode; - struct bio *last = ioend->io_bio; - struct bio *bio, *next; + struct bio *bio = &ioend->io_inline_bio; + struct bio *last = ioend->io_bio, *next; + u64 start = bio->bi_iter.bi_sector; + bool quiet = bio_flagged(bio, BIO_QUIET); for (bio = &ioend->io_inline_bio; bio; bio = next) { struct bio_vec *bvec; @@ -160,6 +178,11 @@ xfs_destroy_ioend( bio_put(bio); } + + if (unlikely(error && !quiet)) { + xfs_err_ratelimited(XFS_I(inode)->i_mount, + "writeback error on sector %llu", start); + } } /* @@ -427,7 +450,8 @@ xfs_start_buffer_writeback( ASSERT(!buffer_delay(bh)); ASSERT(!buffer_unwritten(bh)); - mark_buffer_async_write(bh); + bh->b_end_io = NULL; + set_buffer_async_write(bh); set_buffer_uptodate(bh); clear_buffer_dirty(bh); } -- cgit v1.2.3 From 81cb6f1a2a1964ed4d93604d1a3d49d92db2a01b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 17 Sep 2017 14:07:12 -0700 Subject: xfs: use kmem_free to free return value of kmem_zalloc commit 6c370590cfe0c36bcd62d548148aa65c984540b7 upstream. In function xfs_test_remount_options(), kfree() is used to free memory allocated by kmem_zalloc(). But it is better to use kmem_free(). Signed-off-by: Pan Bian Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 882fb8524fcb..67d589e0a49f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1214,7 +1214,7 @@ xfs_test_remount_options( tmp_mp->m_super = sb; error = xfs_parseargs(tmp_mp, options); xfs_free_fsname(tmp_mp); - kfree(tmp_mp); + kmem_free(tmp_mp); return error; } -- cgit v1.2.3 From 7b5fcb7fc05bdbce87e5bec9e358b059317ffb5f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 24 Aug 2017 09:53:59 -0700 Subject: md/raid5: release/flush io in raid5_do_work() commit 9c72a18e46ebe0f09484cce8ebf847abdab58498 upstream. In raid5, there are scenarios where some ios are deferred to a later time, and some IO need a flush to complete. To make sure we make progress with these IOs, we need to call the following functions: flush_deferred_bios(conf); r5l_flush_stripe_to_raid(conf->log); Both of these functions are called in raid5d(), but missing in raid5_do_work(). As a result, these functions are not called when multi-threading (group_thread_cnt > 0) is enabled. This patch adds calls to these function to raid5_do_work(). Note for stable branches: r5l_flush_stripe_to_raid(conf->log) is need for 4.4+ flush_deferred_bios(conf) is only needed for 4.11+ Signed-off-by: Song Liu Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 383f19c6bf24..549b4afd12e1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5844,6 +5844,8 @@ static void raid5_do_work(struct work_struct *work) spin_unlock_irq(&conf->device_lock); + r5l_flush_stripe_to_raid(conf->log); + async_tx_issue_pending_all(); blk_finish_plug(&plug); -- cgit v1.2.3 From ae04a8c4c6fc5b4aabfb166588045e2845b4d4e7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 31 Aug 2017 15:11:06 -0700 Subject: xfs: fix compiler warnings commit 7bf7a193a90cadccaad21c5970435c665c40fe27 upstream. Fix up all the compiler warnings that have crept in. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- fs/xfs/libxfs/xfs_inode_fork.c | 9 +++------ fs/xfs/xfs_buf_item.c | 2 ++ fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_log_recover.c | 4 ++++ 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2ab50caca14c..d2f4ab175096 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -579,7 +579,7 @@ xfs_bmap_validate_ret( #else #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) -#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) +#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) #endif /* DEBUG */ /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 8a37efe04de3..4e30448c4465 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -1539,14 +1539,11 @@ xfs_iext_realloc_indirect( xfs_ifork_t *ifp, /* inode fork pointer */ int new_size) /* new indirection array size */ { - int nlists; /* number of irec's (ex lists) */ - int size; /* current indirection array size */ - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - size = nlists * sizeof(xfs_ext_irec_t); ASSERT(ifp->if_real_bytes); - ASSERT((new_size >= 0) && (new_size != size)); + ASSERT((new_size >= 0) && + (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) * + sizeof(xfs_ext_irec_t)))); if (new_size == 0) { xfs_iext_destroy(ifp); } else { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index ef2c1375f092..e0a0af0946f2 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -570,7 +570,9 @@ xfs_buf_item_unlock( bool aborted = !!(lip->li_flags & XFS_LI_ABORTED); bool hold = !!(bip->bli_flags & XFS_BLI_HOLD); bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY); +#if defined(DEBUG) || defined(XFS_WARN) bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED); +#endif /* Clear the buffer's association with this transaction. */ bp->b_transp = NULL; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index a1247c3c1efb..5b81f7f41b80 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -802,7 +802,7 @@ xfs_vn_setattr_nonsize( * Caution: The caller of this function is responsible for calling * setattr_prepare() or otherwise verifying the change is fine. */ -int +STATIC int xfs_setattr_size( struct xfs_inode *ip, struct iattr *iattr) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index edd849b8f14d..05909269f973 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4827,12 +4827,16 @@ xlog_recover_process_intents( int error = 0; struct xfs_ail_cursor cur; struct xfs_ail *ailp; +#if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; +#endif ailp = log->l_ailp; spin_lock(&ailp->xa_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); +#if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); +#endif while (lip != NULL) { /* * We're done when we see something other than an intent. -- cgit v1.2.3 From 7829684088a216b8b53894768cd4f483c246cb94 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 25 Aug 2017 09:05:42 +0200 Subject: ipv6: Fix may be used uninitialized warning in rt6_check commit 3614364527daa870264f6dde77f02853cdecd02c upstream. rt_cookie might be used uninitialized, fix this by initializing it. Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c2dd3f77cdb..61729641e027 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1267,7 +1267,7 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt) static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { - u32 rt_cookie; + u32 rt_cookie = 0; if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) return NULL; -- cgit v1.2.3 From 089d7720383d7bc9ca6b8824a05dfa66f80d1f41 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Sep 2017 08:20:15 +0200 Subject: Linux 4.9.51 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 038d126a15fc..b48aebbe187f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 50 +SUBLEVEL = 51 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3