From 193038d913f47563594814c1011a3c40e15b86ac Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 20 Apr 2018 12:19:07 -0500 Subject: cifs: do not allow creating sockets except with SMB1 posix exensions commit 1d0cffa674cfa7d185a302c8c6850fc50b893bed upstream. RHBZ: 1453123 Since at least the 3.10 kernel and likely a lot earlier we have not been able to create unix domain sockets in a cifs share when mounted using the SFU mount option (except when mounted with the cifs unix extensions to Samba e.g.) Trying to create a socket, for example using the af_unix command from xfstests will cause : BUG: unable to handle kernel NULL pointer dereference at 00000000 00000040 Since no one uses or depends on being able to create unix domains sockets on a cifs share the easiest fix to stop this vulnerability is to simply not allow creation of any other special files than char or block devices when sfu is used. Added update to Ronnie's patch to handle a tcon link leak, and to address a buf leak noticed by Gustavo and Colin. Acked-by: Gustavo A. R. Silva CC: Colin Ian King Reviewed-by: Pavel Shilovsky Reported-by: Eryu Guan Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dir.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d9cbda269462..331ddd07e505 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -673,6 +673,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, goto mknod_out; } + if (!S_ISCHR(mode) && !S_ISBLK(mode)) + goto mknod_out; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) goto mknod_out; @@ -681,10 +684,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { - kfree(full_path); rc = -ENOMEM; - free_xid(xid); - return rc; + goto mknod_out; } if (backup_cred(cifs_sb)) @@ -731,7 +732,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, pdev->minor = cpu_to_le64(MINOR(device_number)); rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, &bytes_written, iov, 1); - } /* else if (S_ISFIFO) */ + } tcon->ses->server->ops->close(xid, tcon, &fid); d_drop(direntry); -- cgit v1.2.3 From 01eabcde1be5b9d0d88e98d67a97589b275a8ba6 Mon Sep 17 00:00:00 2001 From: Xiaoming Gao Date: Fri, 13 Apr 2018 17:48:08 +0800 Subject: x86/tsc: Prevent 32bit truncation in calc_hpet_ref() commit d3878e164dcd3925a237a20e879432400e369172 upstream. The TSC calibration code uses HPET as reference. The conversion normalizes the delta of two HPET timestamps: hpetref = ((tshpet1 - tshpet2) * HPET_PERIOD) / 1e6 and then divides the normalized delta of the corresponding TSC timestamps by the result to calulate the TSC frequency. tscfreq = ((tstsc1 - tstsc2 ) * 1e6) / hpetref This uses do_div() which takes an u32 as the divisor, which worked so far because the HPET frequency was low enough that 'hpetref' never exceeded 32bit. On Skylake machines the HPET frequency increased so 'hpetref' can exceed 32bit. do_div() truncates the divisor, which causes the calibration to fail. Use div64_u64() to avoid the problem. [ tglx: Fixes whitespace mangled patch and rewrote changelog ] Signed-off-by: Xiaoming Gao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: peterz@infradead.org Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/38894564-4fc9-b8ec-353f-de702839e44e@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index bbfb03eccb7f..da6a287a11e4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -409,7 +409,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) hpet2 -= hpet1; tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); do_div(tmp, 1000000); - do_div(deltatsc, tmp); + deltatsc = div64_u64(deltatsc, tmp); return (unsigned long) deltatsc; } -- cgit v1.2.3 From 5ddab9f7be78b36ded2a7a93d45642dff46fd409 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Mon, 2 Apr 2018 15:10:35 +0800 Subject: drm/vc4: Fix memory leak during BO teardown commit c0db1b677e1d584fab5d7ac76a32e1c0157542e0 upstream. During BO teardown, an indirect list 'uniform_addr_offsets' wasn't being freed leading to leaking many 128B allocations. Fix the memory leak by releasing it at teardown time. Cc: stable@vger.kernel.org Fixes: 6d45c81d229d ("drm/vc4: Add support for branching in shader validation.") Signed-off-by: Daniel J Blueman Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180402071035.25356-1-daniel@quora.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_bo.c | 2 ++ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index ec9023bd935b..d53e805d392f 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -80,6 +80,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo) struct vc4_dev *vc4 = to_vc4_dev(obj->dev); if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; @@ -328,6 +329,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 917321ce832f..19a5bde8e490 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -874,6 +874,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) fail: kfree(validation_state.branch_targets); if (validated_shader) { + kfree(validated_shader->uniform_addr_offsets); kfree(validated_shader->texture_samples); kfree(validated_shader); } -- cgit v1.2.3 From 9c87602abe12b555e5176403dc7e8b4ddb8ace33 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 16 Apr 2018 18:53:09 +0300 Subject: drm/i915: Fix LSPCON TMDS output buffer enabling from low-power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7eb2c4dd54ff841f2fe509a84973eb25fa20bda2 upstream. LSPCON adapters in low-power state may ignore the first I2C write during TMDS output buffer enabling, resulting in a blank screen even with an otherwise enabled pipe. Fix this by reading back and validating the written value a few times. The problem was noticed on GLK machines with an onboard LSPCON adapter after entering/exiting DC5 power state. Doing an I2C read of the adapter ID as the first transaction - instead of the I2C write to enable the TMDS buffers - returns the correct value. Based on this we assume that the transaction itself is sent properly, it's only the adapter that is not ready for some reason to accept this first write after waking from low-power state. In my case the second I2C write attempt always succeeded. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105854 Cc: Clinton Taylor Cc: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Imre Deak Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180416155309.11100-1-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_dual_mode_helper.c | 39 +++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index a7b2a751f6fe..cdb53586c8fe 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -322,19 +322,44 @@ int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type, { uint8_t tmds_oen = enable ? 0 : DP_DUAL_MODE_TMDS_DISABLE; ssize_t ret; + int retry; if (type < DRM_DP_DUAL_MODE_TYPE2_DVI) return 0; - ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, - &tmds_oen, sizeof(tmds_oen)); - if (ret) { - DRM_DEBUG_KMS("Failed to %s TMDS output buffers\n", - enable ? "enable" : "disable"); - return ret; + /* + * LSPCON adapters in low-power state may ignore the first write, so + * read back and verify the written value a few times. + */ + for (retry = 0; retry < 3; retry++) { + uint8_t tmp; + + ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmds_oen, sizeof(tmds_oen)); + if (ret) { + DRM_DEBUG_KMS("Failed to %s TMDS output buffers (%d attempts)\n", + enable ? "enable" : "disable", + retry + 1); + return ret; + } + + ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmp, sizeof(tmp)); + if (ret) { + DRM_DEBUG_KMS("I2C read failed during TMDS output buffer %s (%d attempts)\n", + enable ? "enabling" : "disabling", + retry + 1); + return ret; + } + + if (tmp == tmds_oen) + return 0; } - return 0; + DRM_DEBUG_KMS("I2C write value mismatch during TMDS output buffer %s\n", + enable ? "enabling" : "disabling"); + + return -EIO; } EXPORT_SYMBOL(drm_dp_dual_mode_set_tmds_output); -- cgit v1.2.3 From fbd45e2957fbf7ed392deab29cd3050a1ab2562f Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 13 Oct 2016 14:10:35 +0200 Subject: i2c: i801: store and restore the SLVCMD register at load and unload commit 22e94bd6779e1140350c0792e85c79552ec43673 upstream. Also do not override any other configuration in this register. Signed-off-by: Benjamin Tissoires Signed-off-by: Wolfram Sang Cc: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e6fe21a6135b..e7c91494d1cd 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -243,6 +243,7 @@ struct i801_priv { struct i2c_adapter adapter; unsigned long smba; unsigned char original_hstcfg; + unsigned char original_slvcmd; struct pci_dev *pci_dev; unsigned int features; @@ -962,13 +963,26 @@ static int i801_enable_host_notify(struct i2c_adapter *adapter) if (!priv->host_notify) return -ENOMEM; - outb_p(SMBSLVCMD_HST_NTFY_INTREN, SMBSLVCMD(priv)); + priv->original_slvcmd = inb_p(SMBSLVCMD(priv)); + + if (!(SMBSLVCMD_HST_NTFY_INTREN & priv->original_slvcmd)) + outb_p(SMBSLVCMD_HST_NTFY_INTREN | priv->original_slvcmd, + SMBSLVCMD(priv)); + /* clear Host Notify bit to allow a new notification */ outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv)); return 0; } +static void i801_disable_host_notify(struct i801_priv *priv) +{ + if (!(priv->features & FEATURE_HOST_NOTIFY)) + return; + + outb_p(priv->original_slvcmd, SMBSLVCMD(priv)); +} + static const struct i2c_algorithm smbus_algorithm = { .smbus_xfer = i801_access, .functionality = i801_func, @@ -1666,6 +1680,7 @@ static void i801_remove(struct pci_dev *dev) pm_runtime_forbid(&dev->dev); pm_runtime_get_noresume(&dev->dev); + i801_disable_host_notify(priv); i801_del_mux(priv); i2c_del_adapter(&priv->adapter); i801_acpi_remove(priv); -- cgit v1.2.3 From 53defab7a561973cc3b815b87ce0ddbb9bc3ee18 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 11 Apr 2018 18:03:31 +0200 Subject: i2c: i801: Save register SMBSLVCMD value only once commit a086bb8317303dd74725dca933b9b29575159382 upstream. Saving the original value of register SMBSLVCMD in i801_enable_host_notify() doesn't work, because this function is called not only at probe time but also at resume time. Do it in i801_probe() instead, so that the saved value is not overwritten at resume time. Signed-off-by: Jean Delvare Fixes: 22e94bd6779e ("i2c: i801: store and restore the SLVCMD register at load and unload") Reviewed-by: Benjamin Tissoires Tested-by: Jason Andryuk Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e7c91494d1cd..fcffd60410eb 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -963,8 +963,6 @@ static int i801_enable_host_notify(struct i2c_adapter *adapter) if (!priv->host_notify) return -ENOMEM; - priv->original_slvcmd = inb_p(SMBSLVCMD(priv)); - if (!(SMBSLVCMD_HST_NTFY_INTREN & priv->original_slvcmd)) outb_p(SMBSLVCMD_HST_NTFY_INTREN | priv->original_slvcmd, SMBSLVCMD(priv)); @@ -1603,6 +1601,10 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) outb_p(inb_p(SMBAUXCTL(priv)) & ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv)); + /* Remember original Host Notify setting */ + if (priv->features & FEATURE_HOST_NOTIFY) + priv->original_slvcmd = inb_p(SMBSLVCMD(priv)); + /* Default timeout in interrupt mode: 200 ms */ priv->adapter.timeout = HZ / 5; -- cgit v1.2.3 From bd8505f3f3d408da000957452d119b97f6eb2bba Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 11 Apr 2018 18:05:34 +0200 Subject: i2c: i801: Restore configuration at shutdown commit f7f6d915a10f7f2bce17e3b1b7d3376562395a28 upstream. On some systems, the BIOS expects certain SMBus register values to match the hardware defaults. Restore these configuration registers at shutdown time to avoid confusing the BIOS. This avoids hard-locking such systems upon reboot. Signed-off-by: Jean Delvare Tested-by: Jason Andryuk Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index fcffd60410eb..b32bf7eac3c8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1696,6 +1696,15 @@ static void i801_remove(struct pci_dev *dev) */ } +static void i801_shutdown(struct pci_dev *dev) +{ + struct i801_priv *priv = pci_get_drvdata(dev); + + /* Restore config registers to avoid hard hang on some systems */ + i801_disable_host_notify(priv); + pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg); +} + #ifdef CONFIG_PM static int i801_suspend(struct device *dev) { @@ -1728,6 +1737,7 @@ static struct pci_driver i801_driver = { .id_table = i801_ids, .probe = i801_probe, .remove = i801_remove, + .shutdown = i801_shutdown, .driver = { .pm = &i801_pm_ops, }, -- cgit v1.2.3 From c274101cd18bcf4d93941edc911b89fdb8065816 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 20 Feb 2018 07:30:10 -0600 Subject: usb: musb: fix enumeration after resume commit 17539f2f4f0b7fa906b508765c8ada07a1e45f52 upstream. On dm3730 there are enumeration problems after resume. Investigation led to the cause that the MUSB_POWER_SOFTCONN bit is not set. If it was set before suspend (because it was enabled via musb_pullup()), it is set in musb_restore_context() so the pullup is enabled. But then musb_start() is called which overwrites MUSB_POWER and therefore disables MUSB_POWER_SOFTCONN, so no pullup is enabled and the device is not enumerated. So let's do a subset of what musb_start() does in the same way as musb_suspend() does it. Platform-specific stuff it still called as there might be some phy-related stuff which needs to be enabled. Also interrupts are enabled, as it was the original idea of calling musb_start() in musb_resume() according to Commit 6fc6f4b87cb3 ("usb: musb: Disable interrupts on suspend, enable them on resume") Signed-off-by: Andreas Kemnade Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 2d9a8067eaca..0736cc27b5e7 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2710,7 +2710,8 @@ static int musb_resume(struct device *dev) if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - musb_start(musb); + musb_enable_interrupts(musb); + musb_platform_enable(musb); spin_lock_irqsave(&musb->lock, flags); error = musb_run_resume_work(musb); -- cgit v1.2.3 From 9e565114f4bc9459606dfc59d84df11fa6dc75a4 Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Mon, 5 Mar 2018 11:35:10 -0600 Subject: usb: musb: call pm_runtime_{get,put}_sync before reading vbus registers commit df6b074dbe248d8c43a82131e8fd429e401841a5 upstream. Without pm_runtime_{get,put}_sync calls in place, reading vbus status via /sys causes the following error: Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa0ab060 pgd = b333e822 [fa0ab060] *pgd=48011452(bad) [] (musb_default_readb) from [] (musb_vbus_show+0x58/0xe4) [] (musb_vbus_show) from [] (dev_attr_show+0x20/0x44) [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x80/0xdc) [] (sysfs_kf_seq_show) from [] (seq_read+0x250/0x448) [] (seq_read) from [] (__vfs_read+0x1c/0x118) [] (__vfs_read) from [] (vfs_read+0x90/0x144) [] (vfs_read) from [] (SyS_read+0x3c/0x74) [] (SyS_read) from [] (ret_fast_syscall+0x0/0x54) Solution was suggested by Tony Lindgren . Signed-off-by: Merlijn Wajer Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 0736cc27b5e7..d4c9e9544ba0 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1774,6 +1774,7 @@ musb_vbus_show(struct device *dev, struct device_attribute *attr, char *buf) int vbus; u8 devctl; + pm_runtime_get_sync(dev); spin_lock_irqsave(&musb->lock, flags); val = musb->a_wait_bcon; vbus = musb_platform_get_vbus_status(musb); @@ -1787,6 +1788,7 @@ musb_vbus_show(struct device *dev, struct device_attribute *attr, char *buf) vbus = 0; } spin_unlock_irqrestore(&musb->lock, flags); + pm_runtime_put_sync(dev); return sprintf(buf, "Vbus %s, timeout %lu msec\n", vbus ? "on" : "off", val); -- cgit v1.2.3 From 4dcd6ce1dff4903a6f3a928962dd03b0a7ecef15 Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Tue, 13 Mar 2018 09:48:40 -0500 Subject: usb: musb: Fix external abort in musb_remove on omap2430 commit 94e46a4f2d5eb14059e42f313c098d4854847376 upstream. This fixes an oops on unbind / module unload (on the musb omap2430 platform). musb_remove function now calls musb_platform_exit before disabling runtime pm. Signed-off-by: Merlijn Wajer Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index d4c9e9544ba0..579aa9accafc 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2485,10 +2485,11 @@ static int musb_remove(struct platform_device *pdev) musb_generic_disable(musb); spin_unlock_irqrestore(&musb->lock, flags); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); + musb_platform_exit(musb); + pm_runtime_dont_use_autosuspend(musb->controller); pm_runtime_put_sync(musb->controller); pm_runtime_disable(musb->controller); - musb_platform_exit(musb); musb_phy_callback = NULL; if (musb->dma_controller) musb_dma_controller_destroy(musb->dma_controller); -- cgit v1.2.3 From 5955f16e49ecf3d9ab8ffe2c46756ba4c228ebb2 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 23 Jan 2017 14:08:13 +0000 Subject: MIPS: Generic: Fix big endian CPUs on generic machine commit a3078e593b74fe196e69f122f03ff0b32f652c53 upstream. Big endian CPUs require SWAP_IO_SPACE enabled to swap accesses to little endian peripherals. Without this patch, big endian kernels fail to communicate with little endian periperals, such as PCI devices, on QEMU and FPGA based platforms. Signed-off-by: Matt Redfearn Fixes: eed0eabd12ef ("MIPS: generic: Introduce generic DT-based board support") Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15105/ Signed-off-by: Ralf Baechle Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2d2fd79ced9d..34fbbf8fdeaa 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -95,6 +95,7 @@ config MIPS_GENERIC select PCI_DRIVERS_GENERIC select PINCTRL select SMP_UP if SMP + select SWAP_IO_SPACE select SYS_HAS_CPU_MIPS32_R1 select SYS_HAS_CPU_MIPS32_R2 select SYS_HAS_CPU_MIPS32_R6 -- cgit v1.2.3 From 98b62bd6823dc7da57a940b84c5e16fd459dc50e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 10 Dec 2016 22:56:21 -0800 Subject: Input: drv260x - fix initializing overdrive voltage commit 74c82dae6c474933f2be401976e1530b5f623221 upstream. We were accidentally initializing haptics->rated_voltage twice, and did not initialize overdrive voltage. Acked-by: Dan Murphy Signed-off-by: Dmitry Torokhov Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/drv260x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index 930424e55439..251d64ca41ce 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -521,7 +521,7 @@ static int drv260x_probe(struct i2c_client *client, if (!haptics) return -ENOMEM; - haptics->rated_voltage = DRV260X_DEF_OD_CLAMP_VOLT; + haptics->overdrive_voltage = DRV260X_DEF_OD_CLAMP_VOLT; haptics->rated_voltage = DRV260X_DEF_RATED_VOLT; if (pdata) { -- cgit v1.2.3 From f9437fa5d27bd475017107a59a246c7eb4045b57 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 20 Jan 2017 13:25:06 +0000 Subject: power: supply: bq2415x: check for NULL acpi_id to avoid null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a1b94355ea3fde5e13db7ff37c0272fcde4e29b2 upstream. acpi_match_device can potentially return NULL, so it is prudent to check if acpi_id is null before it is dereferenced. Add a check and an error message to indicate the failure. Signed-off-by: Colin Ian King Reviewed-by: Pali Rohár Signed-off-by: Sebastian Reichel Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/bq2415x_charger.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 73e2f0b79dd4..c4770a94cc8e 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -1569,6 +1569,11 @@ static int bq2415x_probe(struct i2c_client *client, acpi_id = acpi_match_device(client->dev.driver->acpi_match_table, &client->dev); + if (!acpi_id) { + dev_err(&client->dev, "failed to match device name\n"); + ret = -ENODEV; + goto error_1; + } name = kasprintf(GFP_KERNEL, "%s-%d", acpi_id->id, num); } if (!name) { -- cgit v1.2.3 From d539f0aa5d1c07f312136363a020b7bde0d31094 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Nov 2016 09:11:54 -0200 Subject: stk-webcam: fix an endian bug in stk_camera_read_reg() commit d08876f524a605d64c7ca32cb8e9f5be3839e82e upstream. We pass an int pointer to stk_camera_read_reg() but only write to the highest byte. It's a bug on big endian systems and generally a nasty thing to do and doesn't match the write function either. Signed-off-by: Dan Carpenter Signed-off-by: Mauro Carvalho Chehab Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/stkwebcam/stk-sensor.c | 6 +++--- drivers/media/usb/stkwebcam/stk-webcam.c | 11 ++++++----- drivers/media/usb/stkwebcam/stk-webcam.h | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/media/usb/stkwebcam/stk-sensor.c b/drivers/media/usb/stkwebcam/stk-sensor.c index e546b014d7ad..2dcc8d0be9e7 100644 --- a/drivers/media/usb/stkwebcam/stk-sensor.c +++ b/drivers/media/usb/stkwebcam/stk-sensor.c @@ -228,7 +228,7 @@ static int stk_sensor_outb(struct stk_camera *dev, u8 reg, u8 val) { int i = 0; - int tmpval = 0; + u8 tmpval = 0; if (stk_camera_write_reg(dev, STK_IIC_TX_INDEX, reg)) return 1; @@ -253,7 +253,7 @@ static int stk_sensor_outb(struct stk_camera *dev, u8 reg, u8 val) static int stk_sensor_inb(struct stk_camera *dev, u8 reg, u8 *val) { int i = 0; - int tmpval = 0; + u8 tmpval = 0; if (stk_camera_write_reg(dev, STK_IIC_RX_INDEX, reg)) return 1; @@ -274,7 +274,7 @@ static int stk_sensor_inb(struct stk_camera *dev, u8 reg, u8 *val) if (stk_camera_read_reg(dev, STK_IIC_RX_VALUE, &tmpval)) return 1; - *val = (u8) tmpval; + *val = tmpval; return 0; } diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index 22a9aae16291..1c48f2f1e14a 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -144,7 +144,7 @@ int stk_camera_write_reg(struct stk_camera *dev, u16 index, u8 value) return 0; } -int stk_camera_read_reg(struct stk_camera *dev, u16 index, int *value) +int stk_camera_read_reg(struct stk_camera *dev, u16 index, u8 *value) { struct usb_device *udev = dev->udev; unsigned char *buf; @@ -163,7 +163,7 @@ int stk_camera_read_reg(struct stk_camera *dev, u16 index, int *value) sizeof(u8), 500); if (ret >= 0) - memcpy(value, buf, sizeof(u8)); + *value = *buf; kfree(buf); return ret; @@ -171,9 +171,10 @@ int stk_camera_read_reg(struct stk_camera *dev, u16 index, int *value) static int stk_start_stream(struct stk_camera *dev) { - int value; + u8 value; int i, ret; - int value_116, value_117; + u8 value_116, value_117; + if (!is_present(dev)) return -ENODEV; @@ -213,7 +214,7 @@ static int stk_start_stream(struct stk_camera *dev) static int stk_stop_stream(struct stk_camera *dev) { - int value; + u8 value; int i; if (is_present(dev)) { stk_camera_read_reg(dev, 0x0100, &value); diff --git a/drivers/media/usb/stkwebcam/stk-webcam.h b/drivers/media/usb/stkwebcam/stk-webcam.h index 9bbfa3d9bfdd..92bb48e3c74e 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.h +++ b/drivers/media/usb/stkwebcam/stk-webcam.h @@ -129,7 +129,7 @@ struct stk_camera { #define vdev_to_camera(d) container_of(d, struct stk_camera, vdev) int stk_camera_write_reg(struct stk_camera *, u16, u8); -int stk_camera_read_reg(struct stk_camera *, u16, int *); +int stk_camera_read_reg(struct stk_camera *, u16, u8 *); int stk_sensor_init(struct stk_camera *); int stk_sensor_configure(struct stk_camera *); -- cgit v1.2.3 From faf6fd7539a104a6ad2d263090738ab46c1828c5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 23 Sep 2016 16:38:27 +0100 Subject: OF: Prevent unaligned access in of_alias_scan() commit de96ec2a77c6d06a423c2c495bb4a2f4299f3d9e upstream. When allocating a struct alias_prop, of_alias_scan() only requested that it be aligned on a 4 byte boundary. The struct contains pointers which leads to us attempting 64 bit writes on 64 bit systems, and if the CPU doesn't support unaligned memory accesses then this causes problems - for example on some MIPS64r2 CPUs including the "mips64r2-generic" QEMU emulated CPU it will trigger an address error exception. Fix this by requesting alignment for the struct alias_prop allocation matching that which the compiler expects, using the __alignof__ keyword. Signed-off-by: Paul Burton Acked-by: Rob Herring Reviewed-by: Grant Likely Cc: Frank Rowand Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14306/ Signed-off-by: Ralf Baechle Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/of/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index a0bccb54a9bd..466b285cef3e 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -2109,7 +2109,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) continue; /* Allocate an alias_prop with enough space for the stem */ - ap = dt_alloc(sizeof(*ap) + len + 1, 4); + ap = dt_alloc(sizeof(*ap) + len + 1, __alignof__(*ap)); if (!ap) continue; memset(ap, 0, sizeof(*ap) + len + 1); -- cgit v1.2.3 From ff738afb8a847414f73b729cb25df624bd743089 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 2 Feb 2017 10:14:51 +0100 Subject: ath9k_hw: check if the chip failed to wake up commit a34d0a0da1abae46a5f6ebd06fb0ec484ca099d9 upstream. In an RFC patch, Sven Eckelmann and Simon Wunderlich reported: "QCA 802.11n chips (especially AR9330/AR9340) sometimes end up in a state in which a read of AR_CFG always returns 0xdeadbeef. This should not happen when when the power_mode of the device is ATH9K_PM_AWAKE." Include the check for the default register state in the existing MAC hang check. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index a35f78be8dec..acef4ec928c1 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1603,6 +1603,10 @@ bool ath9k_hw_check_alive(struct ath_hw *ah) int count = 50; u32 reg, last_val; + /* Check if chip failed to wake up */ + if (REG_READ(ah, AR_CFG) == 0xdeadbeef) + return false; + if (AR_SREV_9300(ah)) return !ath9k_hw_detect_mac_hang(ah); -- cgit v1.2.3 From cff3a5f282ff1813352a146c487f4d8b472c250e Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 1 Feb 2017 20:49:35 -0500 Subject: jbd2: fix use after free in kjournald2() commit dbfcef6b0f4012c57bc0b6e0e660d5ed12a5eaed upstream. Below is the synchronization issue between unmount and kjournald2 contexts, which results into use after free issue in kjournald2(). Fix this issue by using journal->j_state_lock to synchronize the wait_event() done in journal_kill_thread() and the wake_up() done in kjournald2(). TASK 1: umount cmd: |--jbd2_journal_destroy() { |--journal_kill_thread() { write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_UNMOUNT; ... write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_commit); TASK 2 wakes up here: kjournald2() { ... checks JBD2_UNMOUNT flag and calls goto end-loop; ... end_loop: write_unlock(&journal->j_state_lock); journal->j_task = NULL; --> If this thread gets pre-empted here, then TASK 1 wait_event will exit even before this thread is completely done. wait_event(journal->j_wait_done_commit, journal->j_task == NULL); ... write_lock(&journal->j_state_lock); write_unlock(&journal->j_state_lock); } |--kfree(journal); } } wake_up(&journal->j_wait_done_commit); --> this step now results into use after free issue. } Signed-off-by: Sahitya Tummala Signed-off-by: Theodore Ts'o Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 542e33d29088..d10bb2c30bf8 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -276,11 +276,11 @@ loop: goto loop; end_loop: - write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd_debug(1, "Journal thread exiting.\n"); + write_unlock(&journal->j_state_lock); return 0; } -- cgit v1.2.3 From fdc2090bdadf829d5459570f4e908d47e6ec6a1c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 24 Apr 2018 15:08:40 +0200 Subject: Revert "perf tools: Decompress kernel module when reading DSO data" This reverts commit e2d054998b151e85b6305aa72264f67097bd78e9 which is commit 1d6b3c9ba756a5134fd7ad1959acac776d17404b upstream. It breaks the build, so obviously none of us actually tested it :( Reported-by: Maxime Hadjinlian Reported-by: Akemi Yagi Cc: Namhyung Kim Cc: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Peter Zijlstra Cc: Wang Nan Cc: kernel-team@lge.com Cc: Arnaldo Carvalho de Melo Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/dso.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 4bc58822416c..d2c6cdd9d42b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -366,23 +366,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) if (!is_regular_file(name)) return -EINVAL; - if (dso__needs_decompress(dso)) { - char newpath[KMOD_DECOMP_LEN]; - size_t len = sizeof(newpath); - - if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) { - free(name); - return -dso->load_errno; - } - - strcpy(name, newpath); - } - fd = do_open(name); - - if (dso__needs_decompress(dso)) - unlink(name); - free(name); return fd; } -- cgit v1.2.3 From ac6f0cb331e2ddc5d40b9059908218349064ec5c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Apr 2018 11:23:51 +0200 Subject: perf: Fix sample_max_stack maximum check commit 5af44ca53d019de47efe6dbc4003dd518e5197ed upstream. The syzbot hit KASAN bug in perf_callchain_store having the entry stored behind the allocated bounds [1]. We miss the sample_max_stack check for the initial event that allocates callchain buffers. This missing check allows to create an event with sample_max_stack value bigger than the global sysctl maximum: # sysctl -a | grep perf_event_max_stack kernel.perf_event_max_stack = 127 # perf record -vv -C 1 -e cycles/max-stack=256/ kill ... perf_event_attr: size 112 ... sample_max_stack 256 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 4 Note the '-C 1', which forces perf record to create just single event. Otherwise it opens event for every cpu, then the sample_max_stack check fails on the second event and all's fine. The fix is to run the sample_max_stack check also for the first event with callchains. [1] https://marc.info/?l=linux-kernel&m=152352732920874&w=2 Reported-by: syzbot+7c449856228b63ac951e@syzkaller.appspotmail.com Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Cc: x86@kernel.org Fixes: 97c79a38cd45 ("perf core: Per event callchain limit") Link: http://lkml.kernel.org/r/20180415092352.12403-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- kernel/events/callchain.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 411226b26bca..04988d6466bf 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -117,19 +117,22 @@ int get_callchain_buffers(int event_max_stack) goto exit; } + /* + * If requesting per event more than the global cap, + * return a different error to help userspace figure + * this out. + * + * And also do it here so that we have &callchain_mutex held. + */ + if (event_max_stack > sysctl_perf_event_max_stack) { + err = -EOVERFLOW; + goto exit; + } + if (count > 1) { /* If the allocation failed, give up */ if (!callchain_cpus_entries) err = -ENOMEM; - /* - * If requesting per event more than the global cap, - * return a different error to help userspace figure - * this out. - * - * And also do it here so that we have &callchain_mutex held. - */ - if (event_max_stack > sysctl_perf_event_max_stack) - err = -EOVERFLOW; goto exit; } -- cgit v1.2.3 From 9acdfe4eecf29d38fd7c7d342b120f430c950400 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Apr 2018 11:23:50 +0200 Subject: perf: Return proper values for user stack errors commit 78b562fbfa2cf0a9fcb23c3154756b690f4905c1 upstream. Return immediately when we find issue in the user stack checks. The error value could get overwritten by following check for PERF_SAMPLE_REGS_INTR. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Cc: x86@kernel.org Fixes: 60e2364e60e8 ("perf: Add ability to sample machine state on interrupt") Link: http://lkml.kernel.org/r/20180415092352.12403-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 74710fad35d5..b1d6b9888fba 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9456,9 +9456,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, * __u16 sample size limit. */ if (attr->sample_stack_user >= USHRT_MAX) - ret = -EINVAL; + return -EINVAL; else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) - ret = -EINVAL; + return -EINVAL; } if (attr->sample_type & PERF_SAMPLE_REGS_INTR) -- cgit v1.2.3 From 9ffa6fb2583b2c9afdb83c14948a9e3e2192bf1f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 11 Mar 2018 13:51:32 +0200 Subject: RDMA/mlx5: Fix NULL dereference while accessing XRC_TGT QPs commit 75a4598209cbe45540baa316c3b51d9db222e96e upstream. mlx5 modify_qp() relies on FW that the error will be thrown if wrong state is supplied. The missing check in FW causes the following crash while using XRC_TGT QPs. [ 14.769632] BUG: unable to handle kernel NULL pointer dereference at (null) [ 14.771085] IP: mlx5_ib_modify_qp+0xf60/0x13f0 [ 14.771894] PGD 800000001472e067 P4D 800000001472e067 PUD 14529067 PMD 0 [ 14.773126] Oops: 0002 [#1] SMP PTI [ 14.773763] CPU: 0 PID: 365 Comm: ubsan Not tainted 4.16.0-rc1-00038-g8151138c0793 #119 [ 14.775192] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 14.777522] RIP: 0010:mlx5_ib_modify_qp+0xf60/0x13f0 [ 14.778417] RSP: 0018:ffffbf48001c7bd8 EFLAGS: 00010246 [ 14.779346] RAX: 0000000000000000 RBX: ffff9a8f9447d400 RCX: 0000000000000000 [ 14.780643] RDX: 0000000000000000 RSI: 000000000000000a RDI: 0000000000000000 [ 14.781930] RBP: 0000000000000000 R08: 00000000000217b0 R09: ffffffffbc9c1504 [ 14.783214] R10: fffff4a180519480 R11: ffff9a8f94523600 R12: ffff9a8f9493e240 [ 14.784507] R13: ffff9a8f9447d738 R14: 000000000000050a R15: 0000000000000000 [ 14.785800] FS: 00007f545b466700(0000) GS:ffff9a8f9fc00000(0000) knlGS:0000000000000000 [ 14.787073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 14.787792] CR2: 0000000000000000 CR3: 00000000144be000 CR4: 00000000000006b0 [ 14.788689] Call Trace: [ 14.789007] _ib_modify_qp+0x71/0x120 [ 14.789475] modify_qp.isra.20+0x207/0x2f0 [ 14.790010] ib_uverbs_modify_qp+0x90/0xe0 [ 14.790532] ib_uverbs_write+0x1d2/0x3c0 [ 14.791049] ? __handle_mm_fault+0x93c/0xe40 [ 14.791644] __vfs_write+0x36/0x180 [ 14.792096] ? handle_mm_fault+0xc1/0x210 [ 14.792601] vfs_write+0xad/0x1e0 [ 14.793018] SyS_write+0x52/0xc0 [ 14.793422] do_syscall_64+0x75/0x180 [ 14.793888] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 14.794527] RIP: 0033:0x7f545ad76099 [ 14.794975] RSP: 002b:00007ffd78787468 EFLAGS: 00000287 ORIG_RAX: 0000000000000001 [ 14.795958] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f545ad76099 [ 14.797075] RDX: 0000000000000078 RSI: 0000000020009000 RDI: 0000000000000003 [ 14.798140] RBP: 00007ffd78787470 R08: 00007ffd78787480 R09: 00007ffd78787480 [ 14.799207] R10: 00007ffd78787480 R11: 0000000000000287 R12: 00005599ada98760 [ 14.800277] R13: 00007ffd78787560 R14: 0000000000000000 R15: 0000000000000000 [ 14.801341] Code: 4c 8b 1c 24 48 8b 83 70 02 00 00 48 c7 83 cc 02 00 00 00 00 00 00 48 c7 83 24 03 00 00 00 00 00 00 c7 83 2c 03 00 00 00 00 00 00 00 00 00 00 00 48 8b 83 70 02 00 00 c7 40 04 00 00 00 00 4c [ 14.804012] RIP: mlx5_ib_modify_qp+0xf60/0x13f0 RSP: ffffbf48001c7bd8 [ 14.804838] CR2: 0000000000000000 [ 14.805288] ---[ end trace 3f1da0df5c8b7c37 ]--- Cc: syzkaller Reported-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 403df3591d29..5b8909d1b55e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2848,7 +2848,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { + if (new_state == IB_QPS_RESET && + !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) { mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) -- cgit v1.2.3 From 2d1264b5e665909a3217a16c362802c14ee807c0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Jan 2018 16:29:38 +0200 Subject: drm/i915/bxt, glk: Increase PCODE timeouts during CDCLK freq changing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e1df40f40ee45a97bb1066c3d71f0ae920a9672 upstream. Currently we see sporadic timeouts during CDCLK changing both on BXT and GLK as reported by the Bugzilla: ticket. It's easy to reproduce this by changing the frequency in a tight loop after blanking the display. The upper bound for the completion time is 800us based on my tests, so increase it from the current 500us to 2ms; with that I couldn't trigger the problem either on BXT or GLK. Note that timeouts happened during both the change notification and the voltage level setting PCODE request. (For the latter one BSpec doesn't require us to wait for completion before further HW programming.) This issue is similar to commit 2c7d0602c815 ("drm/i915/gen9: Fix PCODE polling during CDCLK change notification") but there the PCODE request does complete (as shown by the mbox busy flag), only the reply we get from PCODE indicates a failure. So there we keep resending the request until a success reply, here we just have to increase the timeout for the one PCODE request we send. v2: - s/snb_pcode_request/sandybridge_pcode_write_timeout/ (Ville) Cc: Chris Wilson Cc: Ville Syrjälä Cc: # v4.9 Acked-by: Chris Wilson (v1) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103326 Reviewed-by: Ville Syrjälä Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20180130142939.17983-1-imre.deak@intel.com (cherry picked from commit e76019a81921e87a4d9e7b3d86102bc708a6c227) Signed-off-by: Rodrigo Vivi (Rebased for v4.9 stable tree due to upstream intel_cdclk.c, cdclk_state and pcu_lock change) Signed-off-by: Imre Deak Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 6 +++++- drivers/gpu/drm/i915/intel_display.c | 9 +++++---- drivers/gpu/drm/i915/intel_pm.c | 6 +++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 36a665f0e5c9..e23748cca0c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3681,7 +3681,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val); +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox, + u32 val, int timeout_us); +#define sandybridge_pcode_write(dev_priv, mbox, val) \ + sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500) + int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ce32303b3013..c185625d67f2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6012,8 +6012,8 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) /* Inform power controller of upcoming frequency change */ mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); + ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000, 2000); mutex_unlock(&dev_priv->rps.hw_lock); if (ret) { @@ -6044,8 +6044,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) I915_WRITE(CDCLK_CTL, val); mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + DIV_ROUND_UP(cdclk, 25000), 2000); mutex_unlock(&dev_priv->rps.hw_lock); if (ret) { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 49de4760cc16..05427d292457 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7913,8 +7913,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val return 0; } -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, - u32 mbox, u32 val) +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, + u32 mbox, u32 val, int timeout_us) { int status; @@ -7935,7 +7935,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, if (intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500)) { + timeout_us)) { DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); return -ETIMEDOUT; } -- cgit v1.2.3 From 21670a464c6b05d92d24e64332e4c57b984d8701 Mon Sep 17 00:00:00 2001 From: Benjamin Beichler Date: Wed, 7 Mar 2018 18:11:07 +0100 Subject: mac80211_hwsim: fix use-after-free bug in hwsim_exit_net commit 8cfd36a0b53aeb4ec21d81eb79706697b84dfc3d upstream. When destroying a net namespace, all hwsim interfaces, which are not created in default namespace are deleted. But the async deletion of the interfaces could last longer than the actual destruction of the namespace, which results to an use after free bug. Therefore use synchronous deletion in this case. Fixes: 100cb9ff40e0 ("mac80211_hwsim: Allow managing radios from non-initial namespaces") Reported-by: syzbot+70ce058e01259de7bb1d@syzkaller.appspotmail.com Signed-off-by: Benjamin Beichler Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4182c3775a72..2681b5339810 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3346,8 +3346,11 @@ static void __net_exit hwsim_exit_net(struct net *net) continue; list_del(&data->list); - INIT_WORK(&data->destroy_work, destroy_radio); - schedule_work(&data->destroy_work); + spin_unlock_bh(&hwsim_radio_lock); + mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), + NULL); + spin_lock_bh(&hwsim_radio_lock); + } spin_unlock_bh(&hwsim_radio_lock); } -- cgit v1.2.3 From 403e7bd6ed4aa2515b05b6f5942a8bbad322d3e8 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Thu, 28 Sep 2017 11:35:00 -0700 Subject: r8152: add Linksys USB3GIGV1 id commit 90841047a01b452cc8c3f9b990698b264143334a upstream. This linksys dongle by default comes up in cdc_ether mode. This patch allows r8152 to claim the device: Bus 002 Device 002: ID 13b1:0041 Linksys Signed-off-by: Grant Grundler Reviewed-by: Douglas Anderson Signed-off-by: David S. Miller [krzk: Rebase on v4.4] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 10 ++++++++++ drivers/net/usb/r8152.c | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 4fb468666b19..99424c87b464 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -530,6 +530,7 @@ static const struct driver_info wwan_info = { #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef +#define LINKSYS_VENDOR_ID 0x13b1 #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 @@ -719,6 +720,15 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +#if IS_ENABLED(CONFIG_USB_RTL8152) +/* Linksys USB3GIGV1 Ethernet Adapter */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LINKSYS_VENDOR_ID, 0x0041, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, +#endif + /* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b2d7c7e32250..3cdfa2465e3f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -519,6 +519,7 @@ enum rtl8152_flags { #define VENDOR_ID_REALTEK 0x0bda #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_LINKSYS 0x13b1 #define VENDOR_ID_NVIDIA 0x0955 #define MCU_TYPE_PLA 0x0100 @@ -4506,6 +4507,7 @@ static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {} }; -- cgit v1.2.3 From b5145685a8bbe0756e5ab8440b5012d74c0daf5b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Apr 2018 17:28:00 +0200 Subject: Revert "pinctrl: intel: Initialize GPIO properly when used through irqchip" This reverts commit f5a26acf0162477af6ee4c11b4fb9cffe5d3e257 Mike writes: It seems that commit f5a26acf0162 ("pinctrl: intel: Initialize GPIO properly when used through irqchip") can cause problems on some Skylake systems with Sunrisepoint PCH-H. Namely on certain systems it may turn the backlight PWM pin from native mode to GPIO which makes the screen blank during boot. There is more information here: https://bugzilla.redhat.com/show_bug.cgi?id=1543769 The actual reason is that GPIO numbering used in BIOS is using "Windows" numbers meaning that they don't match the hardware 1:1 and because of this a wrong pin (backlight PWM) is picked and switched to GPIO mode. There is a proper fix for this but since it has quite many dependencies on commits that cannot be considered stable material, I suggest we revert commit f5a26acf0162 from stable trees 4.9, 4.14 and 4.15 to prevent the backlight issue. Reported-by: Mika Westerberg Fixes: f5a26acf0162 ("pinctrl: intel: Initialize GPIO properly when used through irqchip") Cc: Daniel Drake Cc: Chris Chiu Cc: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-intel.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index df63b7d997e8..b40a074822cf 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -368,18 +368,6 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } -static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) -{ - u32 value; - - /* Put the pad into GPIO mode */ - value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; - /* Disable SCI/SMI/NMI generation */ - value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); - value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); - writel(value, padcfg0); -} - static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) @@ -387,6 +375,7 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); void __iomem *padcfg0; unsigned long flags; + u32 value; raw_spin_lock_irqsave(&pctrl->lock, flags); @@ -396,7 +385,13 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, } padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); - intel_gpio_set_gpio_mode(padcfg0); + /* Put the pad into GPIO mode */ + value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; + /* Disable SCI/SMI/NMI generation */ + value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); + value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); + writel(value, padcfg0); + /* Disable TX buffer and enable RX (this will be input) */ __intel_gpio_set_direction(padcfg0, true); @@ -775,8 +770,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned type) raw_spin_lock_irqsave(&pctrl->lock, flags); - intel_gpio_set_gpio_mode(reg); - value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); -- cgit v1.2.3 From 237b5a362399bdd81422ef9f9ac9b9f034161efd Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 27 Mar 2018 11:25:29 +0300 Subject: Revert "ath10k: send (re)assoc peer command when NSS changed" commit 55cc11da69895a680940c1733caabc37be685f5e upstream. This reverts commit 55884c045d31a29cf69db8332d1064a1b61dd159. When Ath10k is in AP mode and an unassociated STA sends a VHT action frame (Operating Mode Notification for the NSS change) periodically to AP this causes ath10k to call ath10k_station_assoc() which sends WMI_PEER_ASSOC_CMDID during NSS update. Over the time (with a certain client it can happen within 15 mins when there are over 500 of these VHT action frames) continuous calls of WMI_PEER_ASSOC_CMDID cause firmware to assert due to resource exhaust. To my knowledge setting WMI_PEER_NSS peer param itself enough to handle NSS updates and no need to call ath10k_station_assoc(). So revert the original commit from 2014 as it's unclear why the change was really needed. Now the firmware assert doesn't happen anymore. Issue observed in QCA9984 platform with firmware version:10.4-3.5.3-00053. This Change tested in QCA9984 with firmware version: 10.4-3.5.3-00053 and QCA988x platform with firmware version: 10.2.4-1.0-00036. Firmware Assert log: ath10k_pci 0002:01:00.0: firmware crashed! (guid e61f1274-9acd-4c5b-bcca-e032ea6e723c) ath10k_pci 0002:01:00.0: qca9984/qca9994 hw1.0 target 0x01000000 chip_id 0x00000000 sub 168c:cafe ath10k_pci 0002:01:00.0: kconfig debug 1 debugfs 1 tracing 0 dfs 1 testmode 1 ath10k_pci 0002:01:00.0: firmware ver 10.4-3.5.3-00053 api 5 features no-p2p,mfp,peer-flow-ctrl,btcoex-param,allows-mesh-bcast crc32 4c56a386 ath10k_pci 0002:01:00.0: board_file api 2 bmi_id 0:4 crc32 c2271344 ath10k_pci 0002:01:00.0: htt-ver 2.2 wmi-op 6 htt-op 4 cal otp max-sta 512 raw 0 hwcrypto 1 ath10k_pci 0002:01:00.0: firmware register dump: ath10k_pci 0002:01:00.0: [00]: 0x0000000A 0x000015B3 0x00981E5F 0x00975B31 ath10k_pci 0002:01:00.0: [04]: 0x00981E5F 0x00060530 0x00000011 0x00446C60 ath10k_pci 0002:01:00.0: [08]: 0x0042F1FC 0x00458080 0x00000017 0x00000000 ath10k_pci 0002:01:00.0: [12]: 0x00000009 0x00000000 0x00973ABC 0x00973AD2 ath10k_pci 0002:01:00.0: [16]: 0x00973AB0 0x00960E62 0x009606CA 0x00000000 ath10k_pci 0002:01:00.0: [20]: 0x40981E5F 0x004066DC 0x00400000 0x00981E34 ath10k_pci 0002:01:00.0: [24]: 0x80983B48 0x0040673C 0x000000C0 0xC0981E5F ath10k_pci 0002:01:00.0: [28]: 0x80993DEB 0x0040676C 0x00431AB8 0x0045D0C4 ath10k_pci 0002:01:00.0: [32]: 0x80993E5C 0x004067AC 0x004303C0 0x0045D0C4 ath10k_pci 0002:01:00.0: [36]: 0x80994AAB 0x004067DC 0x00000000 0x0045D0C4 ath10k_pci 0002:01:00.0: [40]: 0x809971A0 0x0040681C 0x004303C0 0x00441B00 ath10k_pci 0002:01:00.0: [44]: 0x80991904 0x0040688C 0x004303C0 0x0045D0C4 ath10k_pci 0002:01:00.0: [48]: 0x80963AD3 0x00406A7C 0x004303C0 0x009918FC ath10k_pci 0002:01:00.0: [52]: 0x80960E80 0x00406A9C 0x0000001F 0x00400000 ath10k_pci 0002:01:00.0: [56]: 0x80960E51 0x00406ACC 0x00400000 0x00000000 ath10k_pci 0002:01:00.0: Copy Engine register dump: ath10k_pci 0002:01:00.0: index: addr: sr_wr_idx: sr_r_idx: dst_wr_idx: dst_r_idx: ath10k_pci 0002:01:00.0: [00]: 0x0004a000 15 15 3 3 ath10k_pci 0002:01:00.0: [01]: 0x0004a400 17 17 212 213 ath10k_pci 0002:01:00.0: [02]: 0x0004a800 21 21 20 21 ath10k_pci 0002:01:00.0: [03]: 0x0004ac00 25 25 27 25 ath10k_pci 0002:01:00.0: [04]: 0x0004b000 515 515 144 104 ath10k_pci 0002:01:00.0: [05]: 0x0004b400 28 28 155 156 ath10k_pci 0002:01:00.0: [06]: 0x0004b800 12 12 12 12 ath10k_pci 0002:01:00.0: [07]: 0x0004bc00 1 1 1 1 ath10k_pci 0002:01:00.0: [08]: 0x0004c000 0 0 127 0 ath10k_pci 0002:01:00.0: [09]: 0x0004c400 1 1 1 1 ath10k_pci 0002:01:00.0: [10]: 0x0004c800 0 0 0 0 ath10k_pci 0002:01:00.0: [11]: 0x0004cc00 0 0 0 0 ath10k_pci 0002:01:00.0: CE[1] write_index 212 sw_index 213 hw_index 0 nentries_mask 0x000001ff ath10k_pci 0002:01:00.0: CE[2] write_index 20 sw_index 21 hw_index 0 nentries_mask 0x0000007f ath10k_pci 0002:01:00.0: CE[5] write_index 155 sw_index 156 hw_index 0 nentries_mask 0x000001ff ath10k_pci 0002:01:00.0: DMA addr: nbytes: meta data: byte swap: gather: ath10k_pci 0002:01:00.0: [455]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [456]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [457]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [458]: 0x594a0038 0 0 0 1 ath10k_pci 0002:01:00.0: [459]: 0x580c0a42 0 0 0 0 ath10k_pci 0002:01:00.0: [460]: 0x594a0060 0 0 0 1 ath10k_pci 0002:01:00.0: [461]: 0x580c0c42 0 0 0 0 ath10k_pci 0002:01:00.0: [462]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [463]: 0x580c0c42 0 0 0 0 ath10k_pci 0002:01:00.0: [464]: 0x594a0038 0 0 0 1 ath10k_pci 0002:01:00.0: [465]: 0x580c0a42 0 0 0 0 ath10k_pci 0002:01:00.0: [466]: 0x594a0060 0 0 0 1 ath10k_pci 0002:01:00.0: [467]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [468]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [469]: 0x580c1c42 0 0 0 0 ath10k_pci 0002:01:00.0: [470]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [471]: 0x580c1c42 0 0 0 0 ath10k_pci 0002:01:00.0: [472]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [473]: 0x580c1c42 0 0 0 0 ath10k_pci 0002:01:00.0: [474]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [475]: 0x580c0642 0 0 0 0 ath10k_pci 0002:01:00.0: [476]: 0x594a0038 0 0 0 1 ath10k_pci 0002:01:00.0: [477]: 0x580c0842 0 0 0 0 ath10k_pci 0002:01:00.0: [478]: 0x594a0060 0 0 0 1 ath10k_pci 0002:01:00.0: [479]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [480]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [481]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [482]: 0x594a0038 0 0 0 1 ath10k_pci 0002:01:00.0: [483]: 0x580c0842 0 0 0 0 ath10k_pci 0002:01:00.0: [484]: 0x594a0060 0 0 0 1 ath10k_pci 0002:01:00.0: [485]: 0x580c0642 0 0 0 0 ath10k_pci 0002:01:00.0: [486]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [487]: 0x580c0642 0 0 0 0 ath10k_pci 0002:01:00.0: [488]: 0x594a0038 0 0 0 1 ath10k_pci 0002:01:00.0: [489]: 0x580c0842 0 0 0 0 ath10k_pci 0002:01:00.0: [490]: 0x594a0060 0 0 0 1 ath10k_pci 0002:01:00.0: [491]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [492]: 0x58174040 0 1 0 0 ath10k_pci 0002:01:00.0: [493]: 0x5a946040 0 1 0 0 ath10k_pci 0002:01:00.0: [494]: 0x59909040 0 1 0 0 ath10k_pci 0002:01:00.0: [495]: 0x5ae5a040 0 1 0 0 ath10k_pci 0002:01:00.0: [496]: 0x58096040 0 1 0 0 ath10k_pci 0002:01:00.0: [497]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [498]: 0x580c0642 0 0 0 0 ath10k_pci 0002:01:00.0: [499]: 0x5c1e0040 0 1 0 0 ath10k_pci 0002:01:00.0: [500]: 0x58153040 0 1 0 0 ath10k_pci 0002:01:00.0: [501]: 0x58129040 0 1 0 0 ath10k_pci 0002:01:00.0: [502]: 0x5952f040 0 1 0 0 ath10k_pci 0002:01:00.0: [503]: 0x59535040 0 1 0 0 ath10k_pci 0002:01:00.0: [504]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [505]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [506]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [507]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [508]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [509]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [510]: 0x594a0010 0 0 0 1 ath10k_pci 0002:01:00.0: [511]: 0x580c0042 0 0 0 0 ath10k_pci 0002:01:00.0: [512]: 0x5adcc040 0 1 0 0 ath10k_pci 0002:01:00.0: [513]: 0x5cf3d040 0 1 0 0 ath10k_pci 0002:01:00.0: [514]: 0x5c1e9040 64 1 0 0 ath10k_pci 0002:01:00.0: [515]: 0x00000000 0 0 0 0 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Cc: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index a497bf31953d..5aa5df24f4dc 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5819,9 +5819,8 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) sta->addr, smps, err); } - if (changed & IEEE80211_RC_SUPP_RATES_CHANGED || - changed & IEEE80211_RC_NSS_CHANGED) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM supp rates/nss\n", + if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) { + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM supp rates\n", sta->addr); err = ath10k_station_assoc(ar, arvif->vif, sta, true); -- cgit v1.2.3 From c8b1584e5e37fdbf654113a746076d37e9a93206 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 29 Aug 2017 14:45:45 -0500 Subject: PCI: Wait up to 60 seconds for device to become ready after FLR commit 821cdad5c46cae94ce65b9a98614c70a6ff021f8 upstream. Sporadic reset issues have been observed with an Intel 750 NVMe drive while assigning the physical function to the guest machine. The sequence of events observed is as follows: - perform a Function Level Reset (FLR) - sleep up to 1000ms total - read ~0 from PCI_COMMAND (CRS completion for config read) - warn that the device didn't return from FLR - touch the device before it's ready - device drops config writes when we restore register settings (there's no mechanism for software to learn about CRS completions for writes) - incomplete register restore leaves device in inconsistent state - device probe fails because device is in inconsistent state After reset, an endpoint may respond to config requests with Configuration Request Retry Status (CRS) to indicate that it is not ready to accept new requests. See PCIe r3.1, sec 2.3.1 and 6.6.2. Increase the timeout value from 1 second to 60 seconds to cover the period where device responds with CRS and also report polling progress. Signed-off-by: Sinan Kaya [bhelgaas: include the mandatory 100ms in the delays we print] Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a87c8e1aef68..9c13aeeeb973 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3756,27 +3756,49 @@ int pci_wait_for_pending_transaction(struct pci_dev *dev) } EXPORT_SYMBOL(pci_wait_for_pending_transaction); -/* - * We should only need to wait 100ms after FLR, but some devices take longer. - * Wait for up to 1000ms for config space to return something other than -1. - * Intel IGD requires this when an LCD panel is attached. We read the 2nd - * dword because VFs don't implement the 1st dword. - */ static void pci_flr_wait(struct pci_dev *dev) { - int i = 0; + int delay = 1, timeout = 60000; u32 id; - do { - msleep(100); + /* + * Per PCIe r3.1, sec 6.6.2, a device must complete an FLR within + * 100ms, but may silently discard requests while the FLR is in + * progress. Wait 100ms before trying to access the device. + */ + msleep(100); + + /* + * After 100ms, the device should not silently discard config + * requests, but it may still indicate that it needs more time by + * responding to them with CRS completions. The Root Port will + * generally synthesize ~0 data to complete the read (except when + * CRS SV is enabled and the read was for the Vendor ID; in that + * case it synthesizes 0x0001 data). + * + * Wait for the device to return a non-CRS completion. Read the + * Command register instead of Vendor ID so we don't have to + * contend with the CRS SV value. + */ + pci_read_config_dword(dev, PCI_COMMAND, &id); + while (id == ~0) { + if (delay > timeout) { + dev_warn(&dev->dev, "not ready %dms after FLR; giving up\n", + 100 + delay - 1); + return; + } + + if (delay > 1000) + dev_info(&dev->dev, "not ready %dms after FLR; waiting\n", + 100 + delay - 1); + + msleep(delay); + delay *= 2; pci_read_config_dword(dev, PCI_COMMAND, &id); - } while (i++ < 10 && id == ~0); + } - if (id == ~0) - dev_warn(&dev->dev, "Failed to return from FLR\n"); - else if (i > 1) - dev_info(&dev->dev, "Required additional %dms to return from FLR\n", - (i - 1) * 100); + if (delay > 1000) + dev_info(&dev->dev, "ready %dms after FLR\n", 100 + delay - 1); } static int pcie_flr(struct pci_dev *dev, int probe) -- cgit v1.2.3 From d69aa5e682c3ada69902936e6b7d8cd18827297a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 27 Apr 2018 07:36:39 +0200 Subject: s390: introduce CPU alternatives [ Upstream commit 686140a1a9c41d85a4212a1c26d671139b76404b ] Implement CPU alternatives, which allows to optionally patch newer instructions at runtime, based on CPU facilities availability. A new kernel boot parameter "noaltinstr" disables patching. Current implementation is derived from x86 alternatives. Although ideal instructions padding (when altinstr is longer then oldinstr) is added at compile time, and no oldinstr nops optimization has to be done at runtime. Also couple of compile time sanity checks are done: 1. oldinstr and altinstr must be <= 254 bytes long, 2. oldinstr and altinstr must not have an odd length. alternative(oldinstr, altinstr, facility); alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2); Both compile time and runtime padding consists of either 6/4/2 bytes nop or a jump (brcl) + 2 bytes nop filler if padding is longer then 6 bytes. .altinstructions and .altinstr_replacement sections are part of __init_begin : __init_end region and are freed after initialization. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 3 + arch/s390/Kconfig | 17 ++++ arch/s390/include/asm/alternative.h | 163 ++++++++++++++++++++++++++++++++++++ arch/s390/kernel/Makefile | 1 + arch/s390/kernel/alternative.c | 110 ++++++++++++++++++++++++ arch/s390/kernel/module.c | 17 ++++ arch/s390/kernel/setup.c | 3 + arch/s390/kernel/vmlinux.lds.S | 23 +++++ 8 files changed, 337 insertions(+) create mode 100644 arch/s390/include/asm/alternative.h create mode 100644 arch/s390/kernel/alternative.c diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 466c039c622b..5f9e51436a99 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2640,6 +2640,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noalign [KNL,ARM] + noaltinstr [S390] Disables alternative instructions patching + (CPU alternatives feature). + noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9aa0d04c9dcc..f3e43262db9d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -704,6 +704,22 @@ config SECCOMP If unsure, say Y. +config ALTERNATIVES + def_bool y + prompt "Patch optimized instructions for running CPU type" + help + When enabled the kernel code is compiled with additional + alternative instructions blocks optimized for newer CPU types. + These alternative instructions blocks are patched at kernel boot + time when running CPU supports them. This mechanism is used to + optimize some critical code paths (i.e. spinlocks) for newer CPUs + even if kernel is build to support older machine generations. + + This mechanism could be disabled by appending "noaltinstr" + option to the kernel command line. + + If unsure, say Y. + endmenu menu "Power Management" @@ -753,6 +769,7 @@ config PFAULT config SHARED_KERNEL bool "VM shared kernel support" depends on !JUMP_LABEL + depends on !ALTERNATIVES help Select this option, if you want to share the text segment of the Linux kernel between different VM guests. This reduces memory diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h new file mode 100644 index 000000000000..6c268f6a51d3 --- /dev/null +++ b/arch/s390/include/asm/alternative.h @@ -0,0 +1,163 @@ +#ifndef _ASM_S390_ALTERNATIVE_H +#define _ASM_S390_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 facility; /* facility bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +#ifdef CONFIG_ALTERNATIVES +extern void apply_alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +#else +static inline void apply_alternative_instructions(void) {}; +static inline void apply_alternatives(struct alt_instr *start, + struct alt_instr *end) {}; +#endif +/* + * |661: |662: |6620 |663: + * +-----------+---------------------+ + * | oldinstr | oldinstr_padding | + * | +----------+----------+ + * | | | | + * | | >6 bytes |6/4/2 nops| + * | |6 bytes jg-----------> + * +-----------+---------------------+ + * ^^ static padding ^^ + * + * .altinstr_replacement section + * +---------------------+-----------+ + * |6641: |6651: + * | alternative instr 1 | + * +-----------+---------+- - - - - -+ + * |6642: |6652: | + * | alternative instr 2 | padding + * +---------------------+- - - - - -+ + * ^ runtime ^ + * + * .altinstructions section + * +---------------------------------+ + * | alt_instr entries for each | + * | alternative instr | + * +---------------------------------+ + */ + +#define b_altinstr(num) "664"#num +#define e_altinstr(num) "665"#num + +#define e_oldinstr_pad_end "663" +#define oldinstr_len "662b-661b" +#define oldinstr_total_len e_oldinstr_pad_end"b-661b" +#define altinstr_len(num) e_altinstr(num)"b-"b_altinstr(num)"b" +#define oldinstr_pad_len(num) \ + "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \ + "((" altinstr_len(num) ")-(" oldinstr_len "))" + +#define INSTR_LEN_SANITY_CHECK(len) \ + ".if " len " > 254\n" \ + "\t.error \"cpu alternatives does not support instructions " \ + "blocks > 254 bytes\"\n" \ + ".endif\n" \ + ".if (" len ") %% 2\n" \ + "\t.error \"cpu alternatives instructions length is odd\"\n" \ + ".endif\n" + +#define OLDINSTR_PADDING(oldinstr, num) \ + ".if " oldinstr_pad_len(num) " > 6\n" \ + "\tjg " e_oldinstr_pad_end "f\n" \ + "6620:\n" \ + "\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \ + ".else\n" \ + "\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \ + "\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \ + "\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \ + ".endif\n" + +#define OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + OLDINSTR_PADDING(oldinstr, num) \ + e_oldinstr_pad_end ":\n" \ + INSTR_LEN_SANITY_CHECK(oldinstr_len) + +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n" \ + OLDINSTR_PADDING(oldinstr, num2) \ + ".else\n" \ + OLDINSTR_PADDING(oldinstr, num1) \ + ".endif\n" \ + e_oldinstr_pad_end ":\n" \ + INSTR_LEN_SANITY_CHECK(oldinstr_len) + +#define ALTINSTR_ENTRY(facility, num) \ + "\t.long 661b - .\n" /* old instruction */ \ + "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ + "\t.word " __stringify(facility) "\n" /* facility bit */ \ + "\t.byte " oldinstr_total_len "\n" /* source len */ \ + "\t.byte " altinstr_len(num) "\n" /* alt instruction len */ + +#define ALTINSTR_REPLACEMENT(altinstr, num) /* replacement */ \ + b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \ + INSTR_LEN_SANITY_CHECK(altinstr_len(num)) + +#ifdef CONFIG_ALTERNATIVES +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, altinstr, facility) \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(altinstr, 1) \ + ".popsection\n" \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(facility, 1) \ + ".popsection\n" + +#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(altinstr1, 1) \ + ALTINSTR_REPLACEMENT(altinstr2, 2) \ + ".popsection\n" \ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(facility1, 1) \ + ALTINSTR_ENTRY(facility2, 2) \ + ".popsection\n" +#else +/* Alternative instructions are disabled, let's put just oldinstr in */ +#define ALTERNATIVE(oldinstr, altinstr, facility) \ + oldinstr "\n" + +#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ + oldinstr "\n" +#endif + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * oldinstr is padded with jump and nops at compile time if altinstr is + * longer. altinstr is padded with jump and nops at run-time during patching. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, altinstr, facility) \ + asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") + +#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ + asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ + altinstr2, facility2) ::: "memory") + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1f0fe98f6db9..383855ebbfb0 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_ALTERNATIVES) += alternative.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c new file mode 100644 index 000000000000..315986a06cf5 --- /dev/null +++ b/arch/s390/kernel/alternative.c @@ -0,0 +1,110 @@ +#include +#include +#include + +#define MAX_PATCH_LEN (255 - 1) + +static int __initdata_or_module alt_instr_disabled; + +static int __init disable_alternative_instructions(char *str) +{ + alt_instr_disabled = 1; + return 0; +} + +early_param("noaltinstr", disable_alternative_instructions); + +struct brcl_insn { + u16 opc; + s32 disp; +} __packed; + +static u16 __initdata_or_module nop16 = 0x0700; +static u32 __initdata_or_module nop32 = 0x47000000; +static struct brcl_insn __initdata_or_module nop48 = { + 0xc004, 0 +}; + +static const void *nops[] __initdata_or_module = { + &nop16, + &nop32, + &nop48 +}; + +static void __init_or_module add_jump_padding(void *insns, unsigned int len) +{ + struct brcl_insn brcl = { + 0xc0f4, + len / 2 + }; + + memcpy(insns, &brcl, sizeof(brcl)); + insns += sizeof(brcl); + len -= sizeof(brcl); + + while (len > 0) { + memcpy(insns, &nop16, 2); + insns += 2; + len -= 2; + } +} + +static void __init_or_module add_padding(void *insns, unsigned int len) +{ + if (len > 6) + add_jump_padding(insns, len); + else if (len >= 2) + memcpy(insns, nops[len / 2 - 1], len); +} + +static void __init_or_module __apply_alternatives(struct alt_instr *start, + struct alt_instr *end) +{ + struct alt_instr *a; + u8 *instr, *replacement; + u8 insnbuf[MAX_PATCH_LEN]; + + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + */ + for (a = start; a < end; a++) { + int insnbuf_sz = 0; + + instr = (u8 *)&a->instr_offset + a->instr_offset; + replacement = (u8 *)&a->repl_offset + a->repl_offset; + + if (!test_facility(a->facility)) + continue; + + if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) { + WARN_ONCE(1, "cpu alternatives instructions length is " + "odd, skipping patching\n"); + continue; + } + + memcpy(insnbuf, replacement, a->replacementlen); + insnbuf_sz = a->replacementlen; + + if (a->instrlen > a->replacementlen) { + add_padding(insnbuf + a->replacementlen, + a->instrlen - a->replacementlen); + insnbuf_sz += a->instrlen - a->replacementlen; + } + + s390_kernel_write(instr, insnbuf, insnbuf_sz); + } +} + +void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) +{ + if (!alt_instr_disabled) + __apply_alternatives(start, end); +} + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +void __init apply_alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); +} diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index fbc07891f9e7..186fdb7f0f3d 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -31,6 +31,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -428,6 +429,22 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + const Elf_Shdr *s; + char *secstrings; + + if (IS_ENABLED(CONFIG_ALTERNATIVES)) { + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", + secstrings + s->sh_name)) { + /* patch .altinstructions */ + void *aseg = (void *)s->sh_addr; + + apply_alternatives(aseg, aseg + s->sh_size); + } + } + } + jump_label_apply_nops(me); return 0; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e974e53ab597..c3bdb3e80380 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "entry.h" /* @@ -931,6 +932,8 @@ void __init setup_arch(char **cmdline_p) conmode_default(); set_preferred_console(); + apply_alternative_instructions(); + /* Setup zfcpdump support */ setup_zfcpdump(); diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 115bda280d50..b8ec50cb1b6f 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -99,6 +99,29 @@ SECTIONS EXIT_DATA } + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + * Note, that it is a part of __init region. + */ + . = ALIGN(8); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + /* + * And here are the replacement instructions. The linker sticks + * them as binary blobs. The .altinstructions has enough data to + * get the address and the length of them to patch the kernel safely. + * Note, that it is a part of __init region. + */ + .altinstr_replacement : { + *(.altinstr_replacement) + } + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) -- cgit v1.2.3 From 420fd816bd206f266cf3a1300d5590790b5cdcb8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 27 Apr 2018 07:36:40 +0200 Subject: s390: enable CPU alternatives unconditionally [ Upstream commit 049a2c2d486e8cc82c5cd79fa479c5b105b109e9 ] Remove the CPU_ALTERNATIVES config option and enable the code unconditionally. The config option was only added to avoid a conflict with the named saved segment support. Since that code is gone there is no reason to keep the CPU_ALTERNATIVES config option. Just enable it unconditionally to also reduce the number of config options and make it less likely that something breaks. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 16 ---------------- arch/s390/include/asm/alternative.h | 20 +++----------------- arch/s390/kernel/Makefile | 3 +-- arch/s390/kernel/module.c | 15 ++++++--------- 4 files changed, 10 insertions(+), 44 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f3e43262db9d..f5a154d438ee 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -704,22 +704,6 @@ config SECCOMP If unsure, say Y. -config ALTERNATIVES - def_bool y - prompt "Patch optimized instructions for running CPU type" - help - When enabled the kernel code is compiled with additional - alternative instructions blocks optimized for newer CPU types. - These alternative instructions blocks are patched at kernel boot - time when running CPU supports them. This mechanism is used to - optimize some critical code paths (i.e. spinlocks) for newer CPUs - even if kernel is build to support older machine generations. - - This mechanism could be disabled by appending "noaltinstr" - option to the kernel command line. - - If unsure, say Y. - endmenu menu "Power Management" diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 6c268f6a51d3..a72002056b54 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -15,14 +15,9 @@ struct alt_instr { u8 replacementlen; /* length of new instruction */ } __packed; -#ifdef CONFIG_ALTERNATIVES -extern void apply_alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); -#else -static inline void apply_alternative_instructions(void) {}; -static inline void apply_alternatives(struct alt_instr *start, - struct alt_instr *end) {}; -#endif +void apply_alternative_instructions(void); +void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + /* * |661: |662: |6620 |663: * +-----------+---------------------+ @@ -109,7 +104,6 @@ static inline void apply_alternatives(struct alt_instr *start, b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \ INSTR_LEN_SANITY_CHECK(altinstr_len(num)) -#ifdef CONFIG_ALTERNATIVES /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, altinstr, facility) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ @@ -130,14 +124,6 @@ static inline void apply_alternatives(struct alt_instr *start, ALTINSTR_ENTRY(facility1, 1) \ ALTINSTR_ENTRY(facility2, 2) \ ".popsection\n" -#else -/* Alternative instructions are disabled, let's put just oldinstr in */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ - oldinstr "\n" - -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - oldinstr "\n" -#endif /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 383855ebbfb0..d4b73f28c395 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o -obj-y += entry.o reipl.o relocate_kernel.o +obj-y += entry.o reipl.o relocate_kernel.o alternative.o extra-y += head.o head64.o vmlinux.lds @@ -75,7 +75,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o -obj-$(CONFIG_ALTERNATIVES) += alternative.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 186fdb7f0f3d..6f68259bf80e 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -432,16 +432,13 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings; - if (IS_ENABLED(CONFIG_ALTERNATIVES)) { - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".altinstructions", - secstrings + s->sh_name)) { - /* patch .altinstructions */ - void *aseg = (void *)s->sh_addr; + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", secstrings + s->sh_name)) { + /* patch .altinstructions */ + void *aseg = (void *)s->sh_addr; - apply_alternatives(aseg, aseg + s->sh_size); - } + apply_alternatives(aseg, aseg + s->sh_size); } } -- cgit v1.2.3 From 4d9c2b611ff27fa1556414adf104418cba230daa Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Apr 2018 07:36:41 +0200 Subject: KVM: s390: wire up bpb feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 35b3fde6203b932b2b1a5b53b3d8808abc9c4f60 ] The new firmware interfaces for branch prediction behaviour changes are transparently available for the guest. Nevertheless, there is new state attached that should be migrated and properly resetted. Provide a mechanism for handling reset, migration and VSIE. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck [Changed capability number to 152. - Radim] Signed-off-by: Radim Krčmář Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/kvm_host.h | 3 ++- arch/s390/include/uapi/asm/kvm.h | 5 ++++- arch/s390/kvm/kvm-s390.c | 13 ++++++++++++- arch/s390/kvm/vsie.c | 10 ++++++++++ include/uapi/linux/kvm.h | 1 + 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a41faf34b034..5792590d0e7c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -181,7 +181,8 @@ struct kvm_s390_sie_block { __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ - __u8 reserved60; /* 0x0060 */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ __u8 ecb; /* 0x0061 */ __u8 ecb2; /* 0x0062 */ #define ECB3_AES 0x04 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a2ffec4139ad..81c02e198527 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -197,6 +197,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_BPBC (1UL << 10) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,7 +218,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a70ff09b4982..2032ab81b2d7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -401,6 +401,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_RI: r = test_facility(64); break; + case KVM_CAP_S390_BPB: + r = test_facility(82); + break; default: r = 0; } @@ -1713,6 +1716,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; /* fprs can be synchronized via vrs, even if the guest has no vx. With * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. */ @@ -1829,7 +1834,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; - save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.enabled_gmap); @@ -1877,6 +1881,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) @@ -2744,6 +2749,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (riccb->valid) vcpu->arch.sie_block->ecb3 |= 0x01; } + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && + test_kvm_facility(vcpu->kvm, 82)) { + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; + } kvm_run->kvm_dirty_regs = 0; } @@ -2762,6 +2772,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d8673e243f13..e56b7724054c 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -217,6 +217,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy(scb_o->gcr, scb_s->gcr, 128); scb_o->pp = scb_s->pp; + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) { + scb_o->fpf &= ~FPF_BPBC; + scb_o->fpf |= scb_s->fpf & FPF_BPBC; + } + /* interrupt intercept */ switch (scb_s->icptcode) { case ICPT_PROGI: @@ -259,6 +265,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecb3 = 0; scb_s->ecd = 0; scb_s->fac = 0; + scb_s->fpf = 0; rc = prepare_cpuflags(vcpu, vsie_page); if (rc) @@ -316,6 +323,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= scb_o->ecb & 0x10U; } + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) + scb_s->fpf |= scb_o->fpf & FPF_BPBC; /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { scb_s->eca |= scb_o->eca & 0x00020000U; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4ee67cb99143..05b9bb63dbec 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -870,6 +870,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_INSTR0 130 #define KVM_CAP_MSI_DEVID 131 #define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_S390_BPB 152 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From c225febe980f6f112fc0bfce58148d8b390d4cbf Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:42 +0200 Subject: s390: scrub registers on kernel entry and KVM exit [ Upstream commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 ] Clear all user space registers on entry to the kernel and all KVM guest registers on KVM guest exit if the register does not contain either a parameter or a result value. Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3bc2825173ef..04bff4d0e8f2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -255,6 +255,12 @@ ENTRY(sie64a) sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + xgr %r0,%r0 # clear guest registers to + xgr %r1,%r1 # prevent speculative use + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_EMPTY+16(%r15) # return exit reason code br %r14 @@ -290,6 +296,8 @@ ENTRY(system_call) .Lsysc_vtime: UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled register to prevent speculative use + xgr %r0,%r0 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC @@ -517,6 +525,15 @@ ENTRY(pgm_check_handler) mvc __THREAD_trap_tdb(256,%r14),0(%r13) 3: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC @@ -580,6 +597,16 @@ ENTRY(io_int_handler) lmg %r8,%r9,__LC_IO_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID @@ -755,6 +782,16 @@ ENTRY(ext_int_handler) lmg %r8,%r9,__LC_EXT_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) lghi %r1,__LC_EXT_PARAMS2 @@ -925,6 +962,16 @@ ENTRY(mcck_int_handler) .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) -- cgit v1.2.3 From 2dcf46aa49acc4c432ebefe3d4d413f428adc120 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:43 +0200 Subject: s390: add optimized array_index_mask_nospec [ Upstream commit e2dd833389cc4069a96b57bdd24227b5f52288f5 ] Add an optimized version of the array_index_mask_nospec function for s390 based on a compare and a subtract with borrow. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/barrier.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 5c8db3ce61c8..03b2e5bf1206 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -48,6 +48,30 @@ do { \ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() +/** + * array_index_mask_nospec - generate a mask for array_idx() that is + * ~0UL when the bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + */ +#define array_index_mask_nospec array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + if (__builtin_constant_p(size) && size > 0) { + asm(" clgr %2,%1\n" + " slbgr %0,%0\n" + :"=d" (mask) : "d" (size-1), "d" (index) :"cc"); + return mask; + } + asm(" clgr %1,%2\n" + " slbgr %0,%0\n" + :"=d" (mask) : "d" (size), "d" (index) :"cc"); + return ~mask; +} + #include #endif /* __ASM_BARRIER_H */ -- cgit v1.2.3 From 46bd2c0ffd5cf7642a9dd87b294be9d2e51adb69 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:44 +0200 Subject: s390/alternative: use a copy of the facility bit mask [ Upstream commit cf1489984641369611556bf00c48f945c77bcf02 ] To be able to switch off specific CPU alternatives with kernel parameters make a copy of the facility bit mask provided by STFLE and use the copy for the decision to apply an alternative. Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/facility.h | 18 ++++++++++++++++++ arch/s390/include/asm/lowcore.h | 3 ++- arch/s390/kernel/alternative.c | 3 ++- arch/s390/kernel/early.c | 3 +++ arch/s390/kernel/setup.c | 4 +++- arch/s390/kernel/smp.c | 4 +++- 6 files changed, 31 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 09b406db7529..7a8a1457dbb8 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -17,6 +17,24 @@ #define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +static inline void __set_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr = (unsigned char *) facilities; + + if (nr >= MAX_FACILITY_BIT) + return; + ptr[nr >> 3] |= 0x80 >> (nr & 7); +} + +static inline void __clear_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr = (unsigned char *) facilities; + + if (nr >= MAX_FACILITY_BIT) + return; + ptr[nr >> 3] &= ~(0x80 >> (nr & 7)); +} + static inline int __test_facility(unsigned long nr, void *facilities) { unsigned char *ptr; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 7b93b78f423c..d52e7efea7d6 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -150,7 +150,8 @@ struct lowcore { __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ /* Extended facility list */ - __u64 stfle_fac_list[32]; /* 0x0f00 */ + __u64 stfle_fac_list[16]; /* 0x0f00 */ + __u64 alt_stfle_fac_list[16]; /* 0x0f80 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ /* Pointer to vector register save area */ diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 315986a06cf5..f5060af61176 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -74,7 +74,8 @@ static void __init_or_module __apply_alternatives(struct alt_instr *start, instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - if (!test_facility(a->facility)) + if (!__test_facility(a->facility, + S390_lowcore.alt_stfle_fac_list)) continue; if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) { diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62578989c74d..171aaa221e4b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -299,6 +299,9 @@ static noinline __init void setup_facility_list(void) { stfle(S390_lowcore.stfle_fac_list, ARRAY_SIZE(S390_lowcore.stfle_fac_list)); + memcpy(S390_lowcore.alt_stfle_fac_list, + S390_lowcore.stfle_fac_list, + sizeof(S390_lowcore.alt_stfle_fac_list)); } static __init void detect_diag9c(void) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c3bdb3e80380..433b380896b9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -336,7 +336,9 @@ static void __init setup_lowcore(void) lc->machine_flags = S390_lowcore.machine_flags; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); + sizeof(lc->stfle_fac_list)); + memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, + sizeof(lc->alt_stfle_fac_list)); if (MACHINE_HAS_VX) lc->vector_save_area_addr = (unsigned long) &lc->vector_save_area; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35531fe1c5ea..63c8a840d56b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -253,7 +253,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) __ctl_store(lc->cregs_save_area, 0, 15); save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); + sizeof(lc->stfle_fac_list)); + memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, + sizeof(lc->alt_stfle_fac_list)); } static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) -- cgit v1.2.3 From 07f65a834a814fb1a1b428b26a671f9196031b60 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:45 +0200 Subject: s390: add options to change branch prediction behaviour for the kernel [ Upstream commit d768bd892fc8f066cd3aa000eb1867bcf32db0ee ] Add the PPA instruction to the system entry and exit path to switch the kernel to a different branch prediction behaviour. The instructions are added via CPU alternatives and can be disabled with the "nospec" or the "nobp=0" kernel parameter. If the default behaviour selected with CONFIG_KERNEL_NOBP is set to "n" then the "nobp=1" parameter can be used to enable the changed kernel branch prediction. Acked-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 17 ++++++++++++++ arch/s390/include/asm/processor.h | 1 + arch/s390/kernel/alternative.c | 23 +++++++++++++++++++ arch/s390/kernel/early.c | 2 ++ arch/s390/kernel/entry.S | 48 +++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/smp.c | 2 ++ 7 files changed, 94 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f5a154d438ee..02eb7a91ad35 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -704,6 +704,23 @@ config SECCOMP If unsure, say Y. +config KERNEL_NOBP + def_bool n + prompt "Enable modified branch prediction for the kernel by default" + help + If this option is selected the kernel will switch to a modified + branch prediction mode if the firmware interface is available. + The modified branch prediction mode improves the behaviour in + regard to speculative execution. + + With the option enabled the kernel parameter "nobp=0" or "nospec" + can be used to run the kernel in the normal branch prediction mode. + + With the option disabled the modified branch prediction mode is + enabled with the "nobp=1" kernel parameter. + + If unsure, say N. + endmenu menu "Power Management" diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6bcbbece082b..44de5d0014c5 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -84,6 +84,7 @@ void cpu_detect_mhz_feature(void); extern const struct seq_operations cpuinfo_op; extern int sysctl_ieee_emulation_warnings; extern void execve_tail(void); +extern void __bpon(void); /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index f5060af61176..ed21de98b79e 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -14,6 +14,29 @@ static int __init disable_alternative_instructions(char *str) early_param("noaltinstr", disable_alternative_instructions); +static int __init nobp_setup_early(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + if (enabled && test_facility(82)) + __set_facility(82, S390_lowcore.alt_stfle_fac_list); + else + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nobp", nobp_setup_early); + +static int __init nospec_setup_early(char *str) +{ + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nospec", nospec_setup_early); + struct brcl_insn { u16 opc; s32 disp; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 171aaa221e4b..0c7a7d5d95f1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -302,6 +302,8 @@ static noinline __init void setup_facility_list(void) memcpy(S390_lowcore.alt_stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(S390_lowcore.alt_stfle_fac_list)); + if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); } static __init void detect_diag9c(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 04bff4d0e8f2..bfcf04de5808 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -163,6 +163,34 @@ _PIF_WORK = (_PIF_PER_TRAP) tm off+\addr, \mask .endm + .macro BPOFF + .pushsection .altinstr_replacement, "ax" +660: .long 0xb2e8c000 + .popsection +661: .long 0x47000000 + .pushsection .altinstructions, "a" + .long 661b - . + .long 660b - . + .word 82 + .byte 4 + .byte 4 + .popsection + .endm + + .macro BPON + .pushsection .altinstr_replacement, "ax" +662: .long 0xb2e8d000 + .popsection +663: .long 0x47000000 + .pushsection .altinstructions, "a" + .long 663b - . + .long 662b - . + .word 82 + .byte 4 + .byte 4 + .popsection + .endm + .section .kprobes.text, "ax" .Ldummy: /* @@ -175,6 +203,11 @@ _PIF_WORK = (_PIF_PER_TRAP) */ nop 0 +ENTRY(__bpon) + .globl __bpon + BPON + br %r14 + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -234,7 +267,10 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed + BPON sie 0(%r14) +.Lsie_exit: + BPOFF .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -286,6 +322,7 @@ ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER .Lsysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC + BPOFF lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO lghi %r14,_PIF_SYSCALL @@ -332,6 +369,7 @@ ENTRY(system_call) jnz .Lsysc_work # check for work TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lsysc_work + BPON .Lsysc_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) @@ -492,6 +530,7 @@ ENTRY(kernel_thread_starter) ENTRY(pgm_check_handler) stpt __LC_SYNC_ENTER_TIMER + BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO @@ -590,6 +629,7 @@ ENTRY(pgm_check_handler) ENTRY(io_int_handler) STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER + BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO @@ -641,9 +681,13 @@ ENTRY(io_int_handler) lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno .Lio_exit_kernel + BPON .Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +.Lio_exit_kernel: lmg %r11,%r15,__PT_R11(%r11) lpswe __LC_RETURN_PSW .Lio_done: @@ -775,6 +819,7 @@ ENTRY(io_int_handler) ENTRY(ext_int_handler) STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER + BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO @@ -824,6 +869,7 @@ ENTRY(psw_idle) .Lpsw_idle_stcctm: #endif oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT + BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: @@ -931,6 +977,7 @@ load_fpu_regs: */ ENTRY(mcck_int_handler) STCK __LC_MCCK_CLOCK + BPOFF la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs @@ -997,6 +1044,7 @@ ENTRY(mcck_int_handler) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f + BPON stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 39127b691b78..df49f2a1a7e5 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -563,6 +563,7 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + __bpon(); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63c8a840d56b..b34ff0798d38 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -304,6 +304,7 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), mem_assign_absolute(lc->restart_fn, (unsigned long) func); mem_assign_absolute(lc->restart_data, (unsigned long) data); mem_assign_absolute(lc->restart_source, source_cpu); + __bpon(); asm volatile( "0: sigp 0,%0,%2 # sigp restart to target cpu\n" " brc 2,0b # busy, try again\n" @@ -877,6 +878,7 @@ void __cpu_die(unsigned int cpu) void __noreturn cpu_die(void) { idle_task_exit(); + __bpon(); pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); for (;;) ; } -- cgit v1.2.3 From a1f44e7917a67bcd48a3c4bf5c13fd150850dc25 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:46 +0200 Subject: s390: run user space and KVM guests with modified branch prediction [ Upstream commit 6b73044b2b0081ee3dd1cd6eaab7dee552601efb ] Define TIF_ISOLATE_BP and TIF_ISOLATE_BP_GUEST and add the necessary plumbing in entry.S to be able to run user space and KVM guests with limited branch prediction. To switch a user space process to limited branch prediction the s390_isolate_bp() function has to be call, and to run a vCPU of a KVM guest associated with the current task with limited branch prediction call s390_isolate_bp_guest(). Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/processor.h | 3 +++ arch/s390/include/asm/thread_info.h | 4 +++ arch/s390/kernel/entry.S | 49 ++++++++++++++++++++++++++++++++++--- arch/s390/kernel/processor.c | 18 ++++++++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 44de5d0014c5..d5842126ec70 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -360,6 +360,9 @@ extern void memcpy_absolute(void *, void *, size_t); memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ } +extern int s390_isolate_bp(void); +extern int s390_isolate_bp_guest(void); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index f15c0398c363..84f2ae44b4e9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -79,6 +79,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_SECCOMP 5 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_UPROBE 7 /* breakpointed or single-stepping */ +#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ +#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ @@ -94,6 +96,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define _TIF_SECCOMP _BITUL(TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT) #define _TIF_UPROBE _BITUL(TIF_UPROBE) +#define _TIF_ISOLATE_BP _BITUL(TIF_ISOLATE_BP) +#define _TIF_ISOLATE_BP_GUEST _BITUL(TIF_ISOLATE_BP_GUEST) #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bfcf04de5808..0dd787aabac7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -105,6 +105,7 @@ _PIF_WORK = (_PIF_PER_TRAP) j 3f 1: LAST_BREAK %r14 UPDATE_VTIME %r14,%r15,\timer + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP 2: lg %r15,__LC_ASYNC_STACK # load async stack 3: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm @@ -191,6 +192,40 @@ _PIF_WORK = (_PIF_PER_TRAP) .popsection .endm + .macro BPENTER tif_ptr,tif_mask + .pushsection .altinstr_replacement, "ax" +662: .word 0xc004, 0x0000, 0x0000 # 6 byte nop + .word 0xc004, 0x0000, 0x0000 # 6 byte nop + .popsection +664: TSTMSK \tif_ptr,\tif_mask + jz . + 8 + .long 0xb2e8d000 + .pushsection .altinstructions, "a" + .long 664b - . + .long 662b - . + .word 82 + .byte 12 + .byte 12 + .popsection + .endm + + .macro BPEXIT tif_ptr,tif_mask + TSTMSK \tif_ptr,\tif_mask + .pushsection .altinstr_replacement, "ax" +662: jnz . + 8 + .long 0xb2e8d000 + .popsection +664: jz . + 8 + .long 0xb2e8c000 + .pushsection .altinstructions, "a" + .long 664b - . + .long 662b - . + .word 82 + .byte 8 + .byte 8 + .popsection + .endm + .section .kprobes.text, "ax" .Ldummy: /* @@ -248,9 +283,11 @@ ENTRY(__switch_to) */ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + lg %r12,__LC_CURRENT stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0 + mvc __SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? jno .Lsie_load_guest_gprs brasl %r14,load_fpu_regs # load guest fp/vx regs @@ -267,10 +304,11 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed - BPON + BPEXIT __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) sie 0(%r14) .Lsie_exit: BPOFF + BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -332,6 +370,7 @@ ENTRY(system_call) LAST_BREAK %r13 .Lsysc_vtime: UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP stmg %r0,%r7,__PT_R0(%r11) # clear user controlled register to prevent speculative use xgr %r0,%r0 @@ -369,7 +408,7 @@ ENTRY(system_call) jnz .Lsysc_work # check for work TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lsysc_work - BPON + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP .Lsysc_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) @@ -555,6 +594,7 @@ ENTRY(pgm_check_handler) j 3f 2: LAST_BREAK %r14 UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK lg %r14,__TI_task(%r12) aghi %r14,__TASK_thread # pointer to thread_struct @@ -683,7 +723,7 @@ ENTRY(io_int_handler) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel - BPON + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP .Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER @@ -1044,7 +1084,7 @@ ENTRY(mcck_int_handler) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f - BPON + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) @@ -1165,6 +1205,7 @@ cleanup_critical: .quad .Lsie_done .Lcleanup_sie: + BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_EMPTY(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 81d0808085e6..d856263fd768 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -179,3 +179,21 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +int s390_isolate_bp(void) +{ + if (!test_facility(82)) + return -EOPNOTSUPP; + set_thread_flag(TIF_ISOLATE_BP); + return 0; +} +EXPORT_SYMBOL(s390_isolate_bp); + +int s390_isolate_bp_guest(void) +{ + if (!test_facility(82)) + return -EOPNOTSUPP; + set_thread_flag(TIF_ISOLATE_BP_GUEST); + return 0; +} +EXPORT_SYMBOL(s390_isolate_bp_guest); -- cgit v1.2.3 From 24fbc4eee899f1a8d291ffe47333a673a3ed6c01 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:47 +0200 Subject: s390: introduce execute-trampolines for branches [ Upstream commit f19fbd5ed642dc31c809596412dab1ed56f2f156 ] Add CONFIG_EXPOLINE to enable the use of the new -mindirect-branch= and -mfunction_return= compiler options to create a kernel fortified against the specte v2 attack. With CONFIG_EXPOLINE=y all indirect branches will be issued with an execute type instruction. For z10 or newer the EXRL instruction will be used, for older machines the EX instruction. The typical indirect call basr %r14,%r1 is replaced with a PC relative call to a new thunk brasl %r14,__s390x_indirect_jump_r1 The thunk contains the EXRL/EX instruction to the indirect branch __s390x_indirect_jump_r1: exrl 0,0f j . 0: br %r1 The detour via the execute type instruction has a performance impact. To get rid of the detour the new kernel parameter "nospectre_v2" and "spectre_v2=[on,off,auto]" can be used. If the parameter is specified the kernel and module code will be patched at runtime. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 28 +++++++++ arch/s390/Makefile | 10 +++ arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/nospec-branch.h | 18 ++++++ arch/s390/kernel/Makefile | 4 ++ arch/s390/kernel/entry.S | 113 ++++++++++++++++++++++++++-------- arch/s390/kernel/module.c | 62 ++++++++++++++++--- arch/s390/kernel/nospec-branch.c | 101 ++++++++++++++++++++++++++++++ arch/s390/kernel/setup.c | 4 ++ arch/s390/kernel/smp.c | 1 + arch/s390/kernel/vmlinux.lds.S | 14 +++++ drivers/s390/char/Makefile | 2 + 12 files changed, 326 insertions(+), 35 deletions(-) create mode 100644 arch/s390/include/asm/nospec-branch.h create mode 100644 arch/s390/kernel/nospec-branch.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 02eb7a91ad35..c88bb4a50db7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -721,6 +721,34 @@ config KERNEL_NOBP If unsure, say N. +config EXPOLINE + def_bool n + prompt "Avoid speculative indirect branches in the kernel" + help + Compile the kernel with the expoline compiler options to guard + against kernel-to-user data leaks by avoiding speculative indirect + branches. + Requires a compiler with -mindirect-branch=thunk support for full + protection. The kernel may run slower. + + If unsure, say N. + +choice + prompt "Expoline default" + depends on EXPOLINE + default EXPOLINE_FULL + +config EXPOLINE_OFF + bool "spectre_v2=off" + +config EXPOLINE_MEDIUM + bool "spectre_v2=auto" + +config EXPOLINE_FULL + bool "spectre_v2=on" + +endchoice + endmenu menu "Power Management" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 54e00526b8df..d241a9fddf43 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -79,6 +79,16 @@ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack endif +ifdef CONFIG_EXPOLINE + ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y) + CC_FLAGS_EXPOLINE := -mindirect-branch=thunk + CC_FLAGS_EXPOLINE += -mfunction-return=thunk + CC_FLAGS_EXPOLINE += -mindirect-branch-table + export CC_FLAGS_EXPOLINE + cflags-y += $(CC_FLAGS_EXPOLINE) + endif +endif + ifdef CONFIG_FUNCTION_TRACER # make use of hotpatch feature if the compiler supports it cc_hotpatch := -mhotpatch=0,3 diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index d52e7efea7d6..ad4e0cee1557 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -135,7 +135,9 @@ struct lowcore { /* Per cpu primary space access list */ __u32 paste[16]; /* 0x0400 */ - __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */ + /* br %r1 trampoline */ + __u16 br_r1_trampoline; /* 0x0440 */ + __u8 pad_0x0442[0x0e00-0x0442]; /* 0x0442 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h new file mode 100644 index 000000000000..7df48e5cf36f --- /dev/null +++ b/arch/s390/include/asm/nospec-branch.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_EXPOLINE_H +#define _ASM_S390_EXPOLINE_H + +#ifndef __ASSEMBLY__ + +#include + +extern int nospec_call_disable; +extern int nospec_return_disable; + +void nospec_init_branches(void); +void nospec_call_revert(s32 *start, s32 *end); +void nospec_return_revert(s32 *start, s32 *end); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_EXPOLINE_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index d4b73f28c395..1c3e5d529cc1 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -42,6 +42,7 @@ ifneq ($(CC_FLAGS_MARCH),-march=z900) CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) CFLAGS_sclp.o += -march=z900 CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE) CFLAGS_als.o += -march=z900 AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) AFLAGS_head.o += -march=z900 @@ -61,6 +62,9 @@ obj-y += entry.o reipl.o relocate_kernel.o alternative.o extra-y += head.o head64.o vmlinux.lds +obj-$(CONFIG_EXPOLINE) += nospec-branch.o +CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE) + obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 0dd787aabac7..8cbe75d7353d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -226,6 +226,68 @@ _PIF_WORK = (_PIF_PER_TRAP) .popsection .endm +#ifdef CONFIG_EXPOLINE + + .macro GEN_BR_THUNK name,reg,tmp + .section .text.\name,"axG",@progbits,\name,comdat + .globl \name + .hidden \name + .type \name,@function +\name: + .cfi_startproc +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,0f +#else + larl \tmp,0f + ex 0,0(\tmp) +#endif + j . +0: br \reg + .cfi_endproc + .endm + + GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 + GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1 + GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11 + + .macro BASR_R14_R9 +0: brasl %r14,__s390x_indirect_jump_r1use_r9 + .pushsection .s390_indirect_branches,"a",@progbits + .long 0b-. + .popsection + .endm + + .macro BR_R1USE_R14 +0: jg __s390x_indirect_jump_r1use_r14 + .pushsection .s390_indirect_branches,"a",@progbits + .long 0b-. + .popsection + .endm + + .macro BR_R11USE_R14 +0: jg __s390x_indirect_jump_r11use_r14 + .pushsection .s390_indirect_branches,"a",@progbits + .long 0b-. + .popsection + .endm + +#else /* CONFIG_EXPOLINE */ + + .macro BASR_R14_R9 + basr %r14,%r9 + .endm + + .macro BR_R1USE_R14 + br %r14 + .endm + + .macro BR_R11USE_R14 + br %r14 + .endm + +#endif /* CONFIG_EXPOLINE */ + + .section .kprobes.text, "ax" .Ldummy: /* @@ -241,7 +303,7 @@ _PIF_WORK = (_PIF_PER_TRAP) ENTRY(__bpon) .globl __bpon BPON - br %r14 + BR_R1USE_R14 /* * Scheduler resume function, called by switch_to @@ -269,9 +331,9 @@ ENTRY(__switch_to) mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP - bzr %r14 + jz 0f .insn s,0xb2800000,__LC_LPP # set program parameter - br %r14 +0: BR_R1USE_R14 .L__critical_start: @@ -337,7 +399,7 @@ sie_exit: xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_EMPTY+16(%r15) # return exit reason code - br %r14 + BR_R1USE_R14 .Lsie_fault: lghi %r14,-EFAULT stg %r14,__SF_EMPTY+16(%r15) # set exit reason code @@ -396,7 +458,7 @@ ENTRY(system_call) lgf %r9,0(%r8,%r10) # get system call add. TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys - basr %r14,%r9 # call sys_xxxx + BASR_R14_R9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: @@ -536,7 +598,7 @@ ENTRY(system_call) lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) - basr %r14,%r9 # call sys_xxx + BASR_R14_R9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: TSTMSK __TI_flags(%r12),_TIF_TRACE @@ -560,7 +622,7 @@ ENTRY(ret_from_fork) lmg %r9,%r10,__PT_R9(%r11) # load gprs ENTRY(kernel_thread_starter) la %r2,0(%r10) - basr %r14,%r9 + BASR_R14_R9 j .Lsysc_tracenogo /* @@ -634,9 +696,9 @@ ENTRY(pgm_check_handler) nill %r10,0x007f sll %r10,2 je .Lpgm_return - lgf %r1,0(%r10,%r1) # load address of handler routine + lgf %r9,0(%r10,%r1) # load address of handler routine lgr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # branch to interrupt-handler + BASR_R14_R9 # branch to interrupt-handler .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? @@ -914,7 +976,7 @@ ENTRY(psw_idle) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) - br %r14 + BR_R1USE_R14 .Lpsw_idle_end: /* @@ -928,7 +990,7 @@ ENTRY(save_fpu_regs) lg %r2,__LC_CURRENT aghi %r2,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 + jo .Lsave_fpu_regs_exit stfpc __THREAD_FPU_fpc(%r2) .Lsave_fpu_regs_fpc_end: lg %r3,__THREAD_FPU_regs(%r2) @@ -958,7 +1020,8 @@ ENTRY(save_fpu_regs) std 15,120(%r3) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU - br %r14 +.Lsave_fpu_regs_exit: + BR_R1USE_R14 .Lsave_fpu_regs_end: #if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL(save_fpu_regs) @@ -978,7 +1041,7 @@ load_fpu_regs: lg %r4,__LC_CURRENT aghi %r4,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 + jno .Lload_fpu_regs_exit lfpc __THREAD_FPU_fpc(%r4) TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area @@ -1007,7 +1070,8 @@ load_fpu_regs: ld 15,120(%r4) .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU - br %r14 +.Lload_fpu_regs_exit: + BR_R1USE_R14 .Lload_fpu_regs_end: .L__critical_end: @@ -1180,7 +1244,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: br %r14 +0: BR_R11USE_R14 .align 8 .Lcleanup_table: @@ -1210,7 +1274,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - br %r14 + BR_R11USE_R14 #endif .Lcleanup_system_call: @@ -1267,7 +1331,7 @@ cleanup_critical: stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,.Lsysc_do_svc - br %r14 + BR_R11USE_R14 .Lcleanup_system_call_insn: .quad system_call .quad .Lsysc_stmg @@ -1277,7 +1341,7 @@ cleanup_critical: .Lcleanup_sysc_tif: larl %r9,.Lsysc_tif - br %r14 + BR_R11USE_R14 .Lcleanup_sysc_restore: # check if stpt has been executed @@ -1294,14 +1358,14 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - br %r14 + BR_R11USE_R14 .Lcleanup_sysc_restore_insn: .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: larl %r9,.Lio_tif - br %r14 + BR_R11USE_R14 .Lcleanup_io_restore: # check if stpt has been executed @@ -1315,7 +1379,7 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - br %r14 + BR_R11USE_R14 .Lcleanup_io_restore_insn: .quad .Lio_exit_timer .quad .Lio_done - 4 @@ -1368,17 +1432,17 @@ cleanup_critical: # prepare return psw nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle - br %r14 + BR_R11USE_R14 .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: larl %r9,save_fpu_regs - br %r14 + BR_R11USE_R14 .Lcleanup_load_fpu_regs: larl %r9,load_fpu_regs - br %r14 + BR_R11USE_R14 /* * Integer constants @@ -1394,7 +1458,6 @@ cleanup_critical: .Lsie_critical_length: .quad .Lsie_done - .Lsie_gmap #endif - .section .rodata, "a" #define SYSCALL(esame,emu) .long esame .globl sys_call_table diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 6f68259bf80e..f461b09b3e85 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #if 0 #define DEBUGP printk @@ -168,7 +170,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, me->arch.got_offset = me->core_layout.size; me->core_layout.size += me->arch.got_size; me->arch.plt_offset = me->core_layout.size; - me->core_layout.size += me->arch.plt_size; + if (me->arch.plt_size) { + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable) + me->arch.plt_size += PLT_ENTRY_SIZE; + me->core_layout.size += me->arch.plt_size; + } return 0; } @@ -322,9 +328,21 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, unsigned int *ip; ip = me->core_layout.base + me->arch.plt_offset + info->plt_offset; - ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ - ip[1] = 0x100a0004; - ip[2] = 0x07f10000; + ip[0] = 0x0d10e310; /* basr 1,0 */ + ip[1] = 0x100a0004; /* lg 1,10(1) */ + if (IS_ENABLED(CONFIG_EXPOLINE) && + !nospec_call_disable) { + unsigned int *ij; + ij = me->core_layout.base + + me->arch.plt_offset + + me->arch.plt_size - PLT_ENTRY_SIZE; + ip[2] = 0xa7f40000 + /* j __jump_r1 */ + (unsigned int)(u16) + (((unsigned long) ij - 8 - + (unsigned long) ip) / 2); + } else { + ip[2] = 0x07f10000; /* br %r1 */ + } ip[3] = (unsigned int) (val >> 32); ip[4] = (unsigned int) val; info->plt_initialized = 1; @@ -430,16 +448,42 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s; - char *secstrings; + char *secstrings, *secname; + void *aseg; + + if (IS_ENABLED(CONFIG_EXPOLINE) && + !nospec_call_disable && me->arch.plt_size) { + unsigned int *ij; + + ij = me->core_layout.base + me->arch.plt_offset + + me->arch.plt_size - PLT_ENTRY_SIZE; + if (test_facility(35)) { + ij[0] = 0xc6000000; /* exrl %r0,.+10 */ + ij[1] = 0x0005a7f4; /* j . */ + ij[2] = 0x000007f1; /* br %r1 */ + } else { + ij[0] = 0x44000000 | (unsigned int) + offsetof(struct lowcore, br_r1_trampoline); + ij[1] = 0xa7f40000; /* j . */ + } + } secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".altinstructions", secstrings + s->sh_name)) { - /* patch .altinstructions */ - void *aseg = (void *)s->sh_addr; + aseg = (void *) s->sh_addr; + secname = secstrings + s->sh_name; + if (!strcmp(".altinstructions", secname)) + /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size); - } + + if (IS_ENABLED(CONFIG_EXPOLINE) && + (!strcmp(".nospec_call_table", secname))) + nospec_call_revert(aseg, aseg + s->sh_size); + + if (IS_ENABLED(CONFIG_EXPOLINE) && + (!strcmp(".nospec_return_table", secname))) + nospec_return_revert(aseg, aseg + s->sh_size); } jump_label_apply_nops(me); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c new file mode 100644 index 000000000000..86ee26a612cf --- /dev/null +++ b/arch/s390/kernel/nospec-branch.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF); +int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL); + +static int __init nospectre_v2_setup_early(char *str) +{ + nospec_call_disable = 1; + nospec_return_disable = 1; + return 0; +} +early_param("nospectre_v2", nospectre_v2_setup_early); + +static int __init spectre_v2_setup_early(char *str) +{ + if (str && !strncmp(str, "on", 2)) { + nospec_call_disable = 0; + nospec_return_disable = 0; + } + if (str && !strncmp(str, "off", 3)) { + nospec_call_disable = 1; + nospec_return_disable = 1; + } + if (str && !strncmp(str, "auto", 4)) { + nospec_call_disable = 0; + nospec_return_disable = 1; + } + return 0; +} +early_param("spectre_v2", spectre_v2_setup_early); + +static void __init_or_module __nospec_revert(s32 *start, s32 *end) +{ + enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type; + u8 *instr, *thunk, *br; + u8 insnbuf[6]; + s32 *epo; + + /* Second part of the instruction replace is always a nop */ + memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4); + for (epo = start; epo < end; epo++) { + instr = (u8 *) epo + *epo; + if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) + type = BRCL_EXPOLINE; /* brcl instruction */ + else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05) + type = BRASL_EXPOLINE; /* brasl instruction */ + else + continue; + thunk = instr + (*(int *)(instr + 2)) * 2; + if (thunk[0] == 0xc6 && thunk[1] == 0x00) + /* exrl %r0, */ + br = thunk + (*(int *)(thunk + 2)) * 2; + else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 && + thunk[6] == 0x44 && thunk[7] == 0x00 && + (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 && + (thunk[1] & 0xf0) == (thunk[8] & 0xf0)) + /* larl %rx, + ex %r0,0(%rx) */ + br = thunk + (*(int *)(thunk + 2)) * 2; + else + continue; + if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + continue; + switch (type) { + case BRCL_EXPOLINE: + /* brcl to thunk, replace with br + nop */ + insnbuf[0] = br[0]; + insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + break; + case BRASL_EXPOLINE: + /* brasl to thunk, replace with basr + nop */ + insnbuf[0] = 0x0d; + insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + break; + } + + s390_kernel_write(instr, insnbuf, 6); + } +} + +void __init_or_module nospec_call_revert(s32 *start, s32 *end) +{ + if (nospec_call_disable) + __nospec_revert(start, end); +} + +void __init_or_module nospec_return_revert(s32 *start, s32 *end) +{ + if (nospec_return_disable) + __nospec_revert(start, end); +} + +extern s32 __nospec_call_start[], __nospec_call_end[]; +extern s32 __nospec_return_start[], __nospec_return_end[]; +void __init nospec_init_branches(void) +{ + nospec_call_revert(__nospec_call_start, __nospec_call_end); + nospec_return_revert(__nospec_return_start, __nospec_return_end); +} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 433b380896b9..bc25366f0244 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -64,6 +64,7 @@ #include #include #include +#include #include "entry.h" /* @@ -375,6 +376,7 @@ static void __init setup_lowcore(void) #ifdef CONFIG_SMP lc->spinlock_lockval = arch_spin_lockval(0); #endif + lc->br_r1_trampoline = 0x07f1; /* br %r1 */ set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; @@ -935,6 +937,8 @@ void __init setup_arch(char **cmdline_p) set_preferred_console(); apply_alternative_instructions(); + if (IS_ENABLED(CONFIG_EXPOLINE)) + nospec_init_branches(); /* Setup zfcpdump support */ setup_zfcpdump(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b34ff0798d38..0a31110f41f6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -205,6 +205,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->br_r1_trampoline = 0x07f1; /* br %r1 */ if (MACHINE_HAS_VX) lc->vector_save_area_addr = (unsigned long) &lc->vector_save_area; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b8ec50cb1b6f..dd96b467946b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -122,6 +122,20 @@ SECTIONS *(.altinstr_replacement) } + /* + * Table with the patch locations to undo expolines + */ + .nospec_call_table : { + __nospec_call_start = . ; + *(.s390_indirect*) + __nospec_call_end = . ; + } + .nospec_return_table : { + __nospec_return_start = . ; + *(.s390_return*) + __nospec_return_end = . ; + } + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 41e28b23b26a..8ac27efe34fc 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -2,6 +2,8 @@ # S/390 character devices # +CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_EXPOLINE) + obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \ sclp_early.o -- cgit v1.2.3 From 2a0b0e13a8dedcc366fa59656fbcad45c632ac92 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Apr 2018 07:36:48 +0200 Subject: KVM: s390: force bp isolation for VSIE [ Upstream commit f315104ad8b0c32be13eac628569ae707c332cb5 ] If the guest runs with bp isolation when doing a SIE instruction, we must also run the nested guest with bp isolation when emulating that SIE instruction. This is done by activating BPBC in the lpar, which acts as an override for lower level guests. Signed-off-by: Christian Borntraeger Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/vsie.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index e56b7724054c..ced6c9b8f04d 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -764,6 +764,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + int guest_bp_isolation; int rc; handle_last_fault(vcpu, vsie_page); @@ -774,6 +775,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) s390_handle_mcck(); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + + /* save current guest state of bp isolation override */ + guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); + + /* + * The guest is running with BPBC, so we have to force it on for our + * nested guest. This is done by enabling BPBC globally, so the BPBC + * control in the SCB (which the nested guest can modify) is simply + * ignored. + */ + if (test_kvm_facility(vcpu->kvm, 82) && + vcpu->arch.sie_block->fpf & FPF_BPBC) + set_thread_flag(TIF_ISOLATE_BP_GUEST); + local_irq_disable(); guest_enter_irqoff(); local_irq_enable(); @@ -783,6 +798,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) local_irq_disable(); guest_exit_irqoff(); local_irq_enable(); + + /* restore guest state for bp isolation override */ + if (!guest_bp_isolation) + clear_thread_flag(TIF_ISOLATE_BP_GUEST); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); if (rc > 0) -- cgit v1.2.3 From 49a48a0ec7c1f078fc6215ccaf2a5d277c5495c0 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Fri, 27 Apr 2018 07:36:49 +0200 Subject: s390: Replace IS_ENABLED(EXPOLINE_*) with IS_ENABLED(CONFIG_EXPOLINE_*) [ Upstream commit 2cb370d615e9fbed9e95ed222c2c8f337181aa90 ] I've accidentally stumbled upon the IS_ENABLED(EXPOLINE_*) lines, which obviously always evaluate to false. Fix this. Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") Signed-off-by: Eugeniu Rosca Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/nospec-branch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 86ee26a612cf..57f55c24c21c 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -3,8 +3,8 @@ #include #include -int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF); -int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL); +int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); +int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL); static int __init nospectre_v2_setup_early(char *str) { -- cgit v1.2.3 From 7ae60d00c31e5db697a1518ce94749a0852c1d0c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:50 +0200 Subject: s390: do not bypass BPENTER for interrupt system calls [ Upstream commit d5feec04fe578c8dbd9e2e1439afc2f0af761ed4 ] The system call path can be interrupted before the switch back to the standard branch prediction with BPENTER has been done. The critical section cleanup code skips forward to .Lsysc_do_svc and bypasses the BPENTER. In this case the kernel and all subsequent code will run with the limited branch prediction. Fixes: eacf67eb9b32 ("s390: run user space and KVM guests with modified branch prediction") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 8cbe75d7353d..c3e28af207cf 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1316,7 +1316,8 @@ cleanup_critical: srag %r9,%r9,23 jz 0f mvc __TI_last_break(8,%r12),16(%r11) -0: # set up saved register r11 +0: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + # set up saved register r11 lg %r15,__LC_KERNEL_STACK la %r9,STACK_FRAME_OVERHEAD(%r15) stg %r9,24(%r11) # r11 pt_regs pointer -- cgit v1.2.3 From 7634817fc5fad945d67c578f04c731ce25dc86bc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Apr 2018 07:36:51 +0200 Subject: s390/entry.S: fix spurious zeroing of r0 [ Upstream commit d3f468963cd6fd6d2aa5e26aed8b24232096d0e1 ] when a system call is interrupted we might call the critical section cleanup handler that re-does some of the operations. When we are between .Lsysc_vtime and .Lsysc_do_svc we might also redo the saving of the problem state registers r0-r7: .Lcleanup_system_call: [...] 0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER # set up saved register r11 lg %r15,__LC_KERNEL_STACK la %r9,STACK_FRAME_OVERHEAD(%r15) stg %r9,24(%r11) # r11 pt_regs pointer # fill pt_regs mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC ---> stmg %r0,%r7,__PT_R0(%r9) The problem is now, that we might have already zeroed out r0. The fix is to move the zeroing of r0 after sysc_do_svc. Reported-by: Farhan Ali Fixes: 7041d28115e91 ("s390: scrub registers on kernel entry and KVM exit") Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c3e28af207cf..1996afeb2e81 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -434,13 +434,13 @@ ENTRY(system_call) UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP stmg %r0,%r7,__PT_R0(%r11) - # clear user controlled register to prevent speculative use - xgr %r0,%r0 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC stg %r14,__PT_FLAGS(%r11) .Lsysc_do_svc: + # clear user controlled register to prevent speculative use + xgr %r0,%r0 lg %r10,__TI_sysc_table(%r12) # address of system call table llgh %r8,__PT_INT_CODE+2(%r11) slag %r8,%r8,2 # shift and test for svc 0 -- cgit v1.2.3 From 2afb4e9dfc0a44a4b616e4461fbf972e5ad4b0fe Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:52 +0200 Subject: s390: move nobp parameter functions to nospec-branch.c [ Upstream commit b2e2f43a01bace1a25bdbae04c9f9846882b727a ] Keep the code for the nobp parameter handling with the code for expolines. Both are related to the spectre v2 mitigation. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/Makefile | 4 ++-- arch/s390/kernel/alternative.c | 23 ----------------------- arch/s390/kernel/nospec-branch.c | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1c3e5d529cc1..0501cac2ab95 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -59,11 +59,11 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o obj-y += entry.o reipl.o relocate_kernel.o alternative.o +obj-y += nospec-branch.o extra-y += head.o head64.o vmlinux.lds -obj-$(CONFIG_EXPOLINE) += nospec-branch.o -CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE) +CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index ed21de98b79e..f5060af61176 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -14,29 +14,6 @@ static int __init disable_alternative_instructions(char *str) early_param("noaltinstr", disable_alternative_instructions); -static int __init nobp_setup_early(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (rc) - return rc; - if (enabled && test_facility(82)) - __set_facility(82, S390_lowcore.alt_stfle_fac_list); - else - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); - return 0; -} -early_param("nobp", nobp_setup_early); - -static int __init nospec_setup_early(char *str) -{ - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); - return 0; -} -early_param("nospec", nospec_setup_early); - struct brcl_insn { u16 opc; s32 disp; diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 57f55c24c21c..3ce59d044a4d 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -3,6 +3,31 @@ #include #include +static int __init nobp_setup_early(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + if (enabled && test_facility(82)) + __set_facility(82, S390_lowcore.alt_stfle_fac_list); + else + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nobp", nobp_setup_early); + +static int __init nospec_setup_early(char *str) +{ + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nospec", nospec_setup_early); + +#ifdef CONFIG_EXPOLINE + int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL); @@ -99,3 +124,5 @@ void __init nospec_init_branches(void) nospec_call_revert(__nospec_call_start, __nospec_call_end); nospec_return_revert(__nospec_return_start, __nospec_return_end); } + +#endif /* CONFIG_EXPOLINE */ -- cgit v1.2.3 From 3e17958c6b3159cebc09f24344176e9a12b15e30 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:53 +0200 Subject: s390: add automatic detection of the spectre defense [ Upstream commit 6e179d64126b909f0b288fa63cdbf07c531e9b1d ] Automatically decide between nobp vs. expolines if the spectre_v2=auto kernel parameter is specified or CONFIG_EXPOLINE_AUTO=y is set. The decision made at boot time due to CONFIG_EXPOLINE_AUTO=y being set can be overruled with the nobp, nospec and spectre_v2 kernel parameters. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 2 +- arch/s390/Makefile | 2 +- arch/s390/include/asm/nospec-branch.h | 6 ++-- arch/s390/kernel/alternative.c | 1 + arch/s390/kernel/module.c | 11 +++--- arch/s390/kernel/nospec-branch.c | 68 +++++++++++++++++++++-------------- 6 files changed, 52 insertions(+), 38 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c88bb4a50db7..fbb8fca6f9c2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -741,7 +741,7 @@ choice config EXPOLINE_OFF bool "spectre_v2=off" -config EXPOLINE_MEDIUM +config EXPOLINE_AUTO bool "spectre_v2=auto" config EXPOLINE_FULL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index d241a9fddf43..bef67c0f63e2 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -85,7 +85,7 @@ ifdef CONFIG_EXPOLINE CC_FLAGS_EXPOLINE += -mfunction-return=thunk CC_FLAGS_EXPOLINE += -mindirect-branch-table export CC_FLAGS_EXPOLINE - cflags-y += $(CC_FLAGS_EXPOLINE) + cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE endif endif diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index 7df48e5cf36f..35bf28fe4c64 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -6,12 +6,10 @@ #include -extern int nospec_call_disable; -extern int nospec_return_disable; +extern int nospec_disable; void nospec_init_branches(void); -void nospec_call_revert(s32 *start, s32 *end); -void nospec_return_revert(s32 *start, s32 *end); +void nospec_revert(s32 *start, s32 *end); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index f5060af61176..b57b293998dc 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,6 +1,7 @@ #include #include #include +#include #define MAX_PATCH_LEN (255 - 1) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f461b09b3e85..f3f7321bef1a 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -171,7 +171,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, me->core_layout.size += me->arch.got_size; me->arch.plt_offset = me->core_layout.size; if (me->arch.plt_size) { - if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable) + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) me->arch.plt_size += PLT_ENTRY_SIZE; me->core_layout.size += me->arch.plt_size; } @@ -330,8 +330,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, info->plt_offset; ip[0] = 0x0d10e310; /* basr 1,0 */ ip[1] = 0x100a0004; /* lg 1,10(1) */ - if (IS_ENABLED(CONFIG_EXPOLINE) && - !nospec_call_disable) { + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { unsigned int *ij; ij = me->core_layout.base + me->arch.plt_offset + @@ -452,7 +451,7 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg; if (IS_ENABLED(CONFIG_EXPOLINE) && - !nospec_call_disable && me->arch.plt_size) { + !nospec_disable && me->arch.plt_size) { unsigned int *ij; ij = me->core_layout.base + me->arch.plt_offset + @@ -479,11 +478,11 @@ int module_finalize(const Elf_Ehdr *hdr, if (IS_ENABLED(CONFIG_EXPOLINE) && (!strcmp(".nospec_call_table", secname))) - nospec_call_revert(aseg, aseg + s->sh_size); + nospec_revert(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && (!strcmp(".nospec_return_table", secname))) - nospec_return_revert(aseg, aseg + s->sh_size); + nospec_revert(aseg, aseg + s->sh_size); } jump_label_apply_nops(me); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 3ce59d044a4d..daa36a38a8b7 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -11,10 +11,17 @@ static int __init nobp_setup_early(char *str) rc = kstrtobool(str, &enabled); if (rc) return rc; - if (enabled && test_facility(82)) + if (enabled && test_facility(82)) { + /* + * The user explicitely requested nobp=1, enable it and + * disable the expoline support. + */ __set_facility(82, S390_lowcore.alt_stfle_fac_list); - else + if (IS_ENABLED(CONFIG_EXPOLINE)) + nospec_disable = 1; + } else { __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } return 0; } early_param("nobp", nobp_setup_early); @@ -28,31 +35,46 @@ early_param("nospec", nospec_setup_early); #ifdef CONFIG_EXPOLINE -int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); -int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL); +int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); static int __init nospectre_v2_setup_early(char *str) { - nospec_call_disable = 1; - nospec_return_disable = 1; + nospec_disable = 1; return 0; } early_param("nospectre_v2", nospectre_v2_setup_early); +static int __init spectre_v2_auto_early(void) +{ + if (IS_ENABLED(CC_USING_EXPOLINE)) { + /* + * The kernel has been compiled with expolines. + * Keep expolines enabled and disable nobp. + */ + nospec_disable = 0; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } + /* + * If the kernel has not been compiled with expolines the + * nobp setting decides what is done, this depends on the + * CONFIG_KERNEL_NP option and the nobp/nospec parameters. + */ + return 0; +} +#ifdef CONFIG_EXPOLINE_AUTO +early_initcall(spectre_v2_auto_early); +#endif + static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { - nospec_call_disable = 0; - nospec_return_disable = 0; - } - if (str && !strncmp(str, "off", 3)) { - nospec_call_disable = 1; - nospec_return_disable = 1; - } - if (str && !strncmp(str, "auto", 4)) { - nospec_call_disable = 0; - nospec_return_disable = 1; + nospec_disable = 0; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); } + if (str && !strncmp(str, "off", 3)) + nospec_disable = 1; + if (str && !strncmp(str, "auto", 4)) + spectre_v2_auto_early(); return 0; } early_param("spectre_v2", spectre_v2_setup_early); @@ -105,15 +127,9 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) } } -void __init_or_module nospec_call_revert(s32 *start, s32 *end) -{ - if (nospec_call_disable) - __nospec_revert(start, end); -} - -void __init_or_module nospec_return_revert(s32 *start, s32 *end) +void __init_or_module nospec_revert(s32 *start, s32 *end) { - if (nospec_return_disable) + if (nospec_disable) __nospec_revert(start, end); } @@ -121,8 +137,8 @@ extern s32 __nospec_call_start[], __nospec_call_end[]; extern s32 __nospec_return_start[], __nospec_return_end[]; void __init nospec_init_branches(void) { - nospec_call_revert(__nospec_call_start, __nospec_call_end); - nospec_return_revert(__nospec_return_start, __nospec_return_end); + nospec_revert(__nospec_call_start, __nospec_call_end); + nospec_revert(__nospec_return_start, __nospec_return_end); } #endif /* CONFIG_EXPOLINE */ -- cgit v1.2.3 From 4a51204f8b38e5533d0abf110dc922210b2b5f62 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:54 +0200 Subject: s390: report spectre mitigation via syslog [ Upstream commit bc035599718412cfba9249aa713f90ef13f13ee9 ] Add a boot message if either of the spectre defenses is active. The message is "Spectre V2 mitigation: execute trampolines." or "Spectre V2 mitigation: limited branch prediction." Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/nospec-branch.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index daa36a38a8b7..7387ebace891 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -33,6 +33,16 @@ static int __init nospec_setup_early(char *str) } early_param("nospec", nospec_setup_early); +static int __init nospec_report(void) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + pr_info("Spectre V2 mitigation: execute trampolines.\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + pr_info("Spectre V2 mitigation: limited branch prediction.\n"); + return 0; +} +arch_initcall(nospec_report); + #ifdef CONFIG_EXPOLINE int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); -- cgit v1.2.3 From fa90b9a2b63e2d84d169cb640f47fc6d9af17c6c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:55 +0200 Subject: s390: add sysfs attributes for spectre [ Upstream commit d424986f1d6b16079b3231db0314923f4f8deed1 ] Set CONFIG_GENERIC_CPU_VULNERABILITIES and provide the two functions cpu_show_spectre_v1 and cpu_show_spectre_v2 to report the spectre mitigations. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 1 + arch/s390/kernel/nospec-branch.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index fbb8fca6f9c2..1c4a595e8224 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -118,6 +118,7 @@ config S390 select GENERIC_CLOCKEVENTS select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_DEVICES if !SMP + select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 7387ebace891..73c06d42792d 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include @@ -43,6 +44,24 @@ static int __init nospec_report(void) } arch_initcall(nospec_report); +#ifdef CONFIG_SYSFS +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + return sprintf(buf, "Mitigation: execute trampolines\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + return sprintf(buf, "Mitigation: limited branch prediction.\n"); + return sprintf(buf, "Vulnerable\n"); +} +#endif + #ifdef CONFIG_EXPOLINE int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); -- cgit v1.2.3 From 906cd975f6d4bcfdf26a12f4e13e826d089a42a3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:56 +0200 Subject: s390: correct nospec auto detection init order [ Upstream commit 6a3d1e81a434fc311f224b8be77258bafc18ccc6 ] With CONFIG_EXPOLINE_AUTO=y the call of spectre_v2_auto_early() via early_initcall is done *after* the early_param functions. This overwrites any settings done with the nobp/no_spectre_v2/spectre_v2 parameters. The code patching for the kernel is done after the evaluation of the early parameters but before the early_initcall is done. The end result is a kernel image that is patched correctly but the kernel modules are not. Make sure that the nospec auto detection function is called before the early parameters are evaluated and before the code patching is done. Fixes: 6e179d64126b ("s390: add automatic detection of the spectre defense") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/nospec-branch.h | 1 + arch/s390/kernel/nospec-branch.c | 8 ++------ arch/s390/kernel/setup.c | 3 +++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index 35bf28fe4c64..b4bd8c41e9d3 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -9,6 +9,7 @@ extern int nospec_disable; void nospec_init_branches(void); +void nospec_auto_detect(void); void nospec_revert(s32 *start, s32 *end); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 73c06d42792d..9f3b5b382743 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -73,7 +73,7 @@ static int __init nospectre_v2_setup_early(char *str) } early_param("nospectre_v2", nospectre_v2_setup_early); -static int __init spectre_v2_auto_early(void) +void __init nospec_auto_detect(void) { if (IS_ENABLED(CC_USING_EXPOLINE)) { /* @@ -88,11 +88,7 @@ static int __init spectre_v2_auto_early(void) * nobp setting decides what is done, this depends on the * CONFIG_KERNEL_NP option and the nobp/nospec parameters. */ - return 0; } -#ifdef CONFIG_EXPOLINE_AUTO -early_initcall(spectre_v2_auto_early); -#endif static int __init spectre_v2_setup_early(char *str) { @@ -103,7 +99,7 @@ static int __init spectre_v2_setup_early(char *str) if (str && !strncmp(str, "off", 3)) nospec_disable = 1; if (str && !strncmp(str, "auto", 4)) - spectre_v2_auto_early(); + nospec_auto_detect(); return 0; } early_param("spectre_v2", spectre_v2_setup_early); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index bc25366f0244..feb9d97a9d14 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -876,6 +876,9 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; + if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) + nospec_auto_detect(); + parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ -- cgit v1.2.3 From 037069f2c62510b23dbc67b6894a1c152562f7b5 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2018 07:36:57 +0200 Subject: s390: correct module section names for expoline code revert [ Upstream commit 6cf09958f32b9667bb3ebadf74367c791112771b ] The main linker script vmlinux.lds.S for the kernel image merges the expoline code patch tables into two section ".nospec_call_table" and ".nospec_return_table". This is *not* done for the modules, there the sections retain their original names as generated by gcc: ".s390_indirect_call", ".s390_return_mem" and ".s390_return_reg". The module_finalize code has to check for the compiler generated section names, otherwise no code patching is done. This slows down the module code in case of "spectre_v2=off". Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f3f7321bef1a..64ccfdf96b32 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -477,11 +477,11 @@ int module_finalize(const Elf_Ehdr *hdr, apply_alternatives(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && - (!strcmp(".nospec_call_table", secname))) + (!strncmp(".s390_indirect", secname, 14))) nospec_revert(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && - (!strcmp(".nospec_return_table", secname))) + (!strncmp(".s390_return", secname, 12))) nospec_revert(aseg, aseg + s->sh_size); } -- cgit v1.2.3 From a43e7cba5444f062defc2145fe0b83b28ac35d5f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 22 Apr 2018 19:11:50 +0800 Subject: bonding: do not set slave_dev npinfo before slave_enable_netpoll in bond_enslave [ Upstream commit ddea788c63094f7c483783265563dd5b50052e28 ] After Commit 8a8efa22f51b ("bonding: sync netpoll code with bridge"), it would set slave_dev npinfo in slave_enable_netpoll when enslaving a dev if bond->dev->npinfo was set. However now slave_dev npinfo is set with bond->dev->npinfo before calling slave_enable_netpoll. With slave_dev npinfo set, __netpoll_setup called in slave_enable_netpoll will not call slave dev's .ndo_netpoll_setup(). It causes that the lower dev of this slave dev can't set its npinfo. One way to reproduce it: # modprobe bonding # brctl addbr br0 # brctl addif br0 eth1 # ifconfig bond0 192.168.122.1/24 up # ifenslave bond0 eth2 # systemctl restart netconsole # ifenslave bond0 br0 # ifconfig eth2 down # systemctl restart netconsole The netpoll won't really work. This patch is to remove that slave_dev npinfo setting in bond_enslave(). Fixes: 8a8efa22f51b ("bonding: sync netpoll code with bridge") Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 513457a2a7bf..13a015b8052b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1654,8 +1654,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } /* switch(bond_mode) */ #ifdef CONFIG_NET_POLL_CONTROLLER - slave_dev->npinfo = bond->dev->npinfo; - if (slave_dev->npinfo) { + if (bond->dev->npinfo) { if (slave_enable_netpoll(new_slave)) { netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); res = -EBUSY; -- cgit v1.2.3 From 06a02a81689074406d70979837518b1f747767af Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Apr 2018 12:07:06 -0700 Subject: KEYS: DNS: limit the length of option strings [ Upstream commit 9c438d7a3a52dcc2b9ed095cb87d3a5e83cf7e60 ] Adding a dns_resolver key whose payload contains a very long option name resulted in that string being printed in full. This hit the WARN_ONCE() in set_precision() during the printk(), because printk() only supports a precision of up to 32767 bytes: precision 1000000 too large WARNING: CPU: 0 PID: 752 at lib/vsprintf.c:2189 vsnprintf+0x4bc/0x5b0 Fix it by limiting option strings (combined name + value) to a much more reasonable 128 bytes. The exact limit is arbitrary, but currently the only recognized option is formatted as "dnserror=%lu" which fits well within this limit. Also ratelimit the printks. Reproducer: perl -e 'print "#", "A" x 1000000, "\x00"' | keyctl padd dns_resolver desc @s This bug was found using syzkaller. Reported-by: Mark Rutland Fixes: 4a2d789267e0 ("DNS: If the DNS server returns an error, allow that to be cached [ver #2]") Signed-off-by: Eric Biggers Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dns_resolver/dns_key.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index e1d4d898a007..f0252768ecf4 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -91,9 +92,9 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) next_opt = memchr(opt, '#', end - opt) ?: end; opt_len = next_opt - opt; - if (!opt_len) { - printk(KERN_WARNING - "Empty option to dns_resolver key\n"); + if (opt_len <= 0 || opt_len > 128) { + pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n", + opt_len); return -EINVAL; } @@ -127,10 +128,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) } bad_option_value: - printk(KERN_WARNING - "Option '%*.*s' to dns_resolver key:" - " bad/missing value\n", - opt_nlen, opt_nlen, opt); + pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n", + opt_nlen, opt_nlen, opt); return -EINVAL; } while (opt = next_opt + 1, opt < end); } -- cgit v1.2.3 From e9c46600cdf19bd790a0a2e2aeb6675f61d3aed5 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 23 Apr 2018 16:15:14 +0200 Subject: l2tp: check sockaddr length in pppol2tp_connect() [ Upstream commit eb1c28c05894a4b1f6b56c5bf072205e64cfa280 ] Check sockaddr_len before dereferencing sp->sa_protocol, to ensure that it actually points to valid data. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Reported-by: syzbot+a70ac890b23b1bf29f5c@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ppp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 163f1fa53917..9b214f313cc0 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -590,6 +590,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6)) + goto end; + if (sp->sa_protocol != PX_PROTO_OL2TP) goto end; -- cgit v1.2.3 From 0e770d2927b7b5fed0859ac46861d00f762b2407 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Apr 2018 14:46:00 -0700 Subject: net: validate attribute sizes in neigh_dump_table() [ Upstream commit 7dd07c143a4b54d050e748bee4b4b9e94a7b1744 ] Since neigh_dump_table() calls nlmsg_parse() without giving policy constraints, attributes can have arbirary size that we must validate Reported by syzbot/KMSAN : BUG: KMSAN: uninit-value in neigh_master_filtered net/core/neighbour.c:2292 [inline] BUG: KMSAN: uninit-value in neigh_dump_table net/core/neighbour.c:2348 [inline] BUG: KMSAN: uninit-value in neigh_dump_info+0x1af0/0x2250 net/core/neighbour.c:2438 CPU: 1 PID: 3575 Comm: syzkaller268891 Not tainted 4.16.0+ #83 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 neigh_master_filtered net/core/neighbour.c:2292 [inline] neigh_dump_table net/core/neighbour.c:2348 [inline] neigh_dump_info+0x1af0/0x2250 net/core/neighbour.c:2438 netlink_dump+0x9ad/0x1540 net/netlink/af_netlink.c:2225 __netlink_dump_start+0x1167/0x12a0 net/netlink/af_netlink.c:2322 netlink_dump_start include/linux/netlink.h:214 [inline] rtnetlink_rcv_msg+0x1435/0x1560 net/core/rtnetlink.c:4598 netlink_rcv_skb+0x355/0x5f0 net/netlink/af_netlink.c:2447 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4653 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0x1672/0x1750 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x1048/0x1310 net/netlink/af_netlink.c:1900 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x43fed9 RSP: 002b:00007ffddbee2798 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fed9 RDX: 0000000000000000 RSI: 0000000020005000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401800 R13: 0000000000401890 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1183 [inline] netlink_sendmsg+0x9a6/0x1310 net/netlink/af_netlink.c:1875 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 21fdd092acc7 ("net: Add support for filtering neigh dump by master device") Signed-off-by: Eric Dumazet Cc: David Ahern Reported-by: syzbot Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a426790b0688..ce5bbf8b1f67 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2279,12 +2279,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL); if (!err) { - if (tb[NDA_IFINDEX]) + if (tb[NDA_IFINDEX]) { + if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) + return -EINVAL; filter_idx = nla_get_u32(tb[NDA_IFINDEX]); - - if (tb[NDA_MASTER]) + } + if (tb[NDA_MASTER]) { + if (nla_len(tb[NDA_MASTER]) != sizeof(u32)) + return -EINVAL; filter_master_idx = nla_get_u32(tb[NDA_MASTER]); - + } if (filter_idx || filter_master_idx) flags |= NLM_F_DUMP_FILTERED; } -- cgit v1.2.3 From e202fa9ea5a33723a6dd048e5e272335bfdf8113 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 19 Apr 2018 12:25:38 -0700 Subject: llc: delete timers synchronously in llc_sk_free() [ Upstream commit b905ef9ab90115d001c1658259af4b1c65088779 ] The connection timers of an llc sock could be still flying after we delete them in llc_sk_free(), and even possibly after we free the sock. We could just wait synchronously here in case of troubles. Note, I leave other call paths as they are, since they may not have to wait, at least we can change them to synchronously when needed. Also, move the code to net/llc/llc_conn.c, which is apparently a better place. Reported-by: Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/llc_conn.h | 1 + net/llc/llc_c_ac.c | 9 +-------- net/llc/llc_conn.c | 22 +++++++++++++++++++++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index fe994d2e5286..ea985aa7a6c5 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -97,6 +97,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); +void llc_sk_stop_all_timers(struct sock *sk, bool sync); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index ea225bd2672c..f8d4ab8ca1a5 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1096,14 +1096,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb) { - struct llc_sock *llc = llc_sk(sk); - - del_timer(&llc->pf_cycle_timer.timer); - del_timer(&llc->ack_timer.timer); - del_timer(&llc->rej_sent_timer.timer); - del_timer(&llc->busy_state_timer.timer); - llc->ack_must_be_send = 0; - llc->ack_pf = 0; + llc_sk_stop_all_timers(sk, false); return 0; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 8bc5a1bd2d45..d861b74ad068 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -951,6 +951,26 @@ out: return sk; } +void llc_sk_stop_all_timers(struct sock *sk, bool sync) +{ + struct llc_sock *llc = llc_sk(sk); + + if (sync) { + del_timer_sync(&llc->pf_cycle_timer.timer); + del_timer_sync(&llc->ack_timer.timer); + del_timer_sync(&llc->rej_sent_timer.timer); + del_timer_sync(&llc->busy_state_timer.timer); + } else { + del_timer(&llc->pf_cycle_timer.timer); + del_timer(&llc->ack_timer.timer); + del_timer(&llc->rej_sent_timer.timer); + del_timer(&llc->busy_state_timer.timer); + } + + llc->ack_must_be_send = 0; + llc->ack_pf = 0; +} + /** * llc_sk_free - Frees a LLC socket * @sk - socket to free @@ -963,7 +983,7 @@ void llc_sk_free(struct sock *sk) llc->state = LLC_CONN_OUT_OF_SVC; /* Stop all (possibly) running timers */ - llc_conn_ac_stop_all_timers(sk, NULL); + llc_sk_stop_all_timers(sk, true); #ifdef DEBUG_LLC_CONN_ALLOC printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__, skb_queue_len(&llc->pdu_unack_q), -- cgit v1.2.3 From f838259677f3ce937756018ce999339e07f8c351 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 20 Apr 2018 15:57:30 +0200 Subject: tcp: don't read out-of-bounds opsize [ Upstream commit 7e5a206ab686f098367b61aca989f5cdfa8114a3 ] The old code reads the "opsize" variable from out-of-bounds memory (first byte behind the segment) if a broken TCP segment ends directly after an opcode that is neither EOL nor NOP. The result of the read isn't used for anything, so the worst thing that could theoretically happen is a pagefault; and since the physmap is usually mostly contiguous, even that seems pretty unlikely. The following C reproducer triggers the uninitialized read - however, you can't actually see anything happen unless you put something like a pr_warn() in tcp_parse_md5sig_option() to print the opsize. ==================================== #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void systemf(const char *command, ...) { char *full_command; va_list ap; va_start(ap, command); if (vasprintf(&full_command, command, ap) == -1) err(1, "vasprintf"); va_end(ap); printf("systemf: <<<%s>>>\n", full_command); system(full_command); } char *devname; int tun_alloc(char *name) { int fd = open("/dev/net/tun", O_RDWR); if (fd == -1) err(1, "open tun dev"); static struct ifreq req = { .ifr_flags = IFF_TUN|IFF_NO_PI }; strcpy(req.ifr_name, name); if (ioctl(fd, TUNSETIFF, &req)) err(1, "TUNSETIFF"); devname = req.ifr_name; printf("device name: %s\n", devname); return fd; } #define IPADDR(a,b,c,d) (((a)<<0)+((b)<<8)+((c)<<16)+((d)<<24)) void sum_accumulate(unsigned int *sum, void *data, int len) { assert((len&2)==0); for (int i=0; i> 16) + (sum & 0xffff); sum = (sum >> 16) + (sum & 0xffff); return htons(~sum); } void fix_ip_sum(struct iphdr *ip) { unsigned int sum = 0; sum_accumulate(&sum, ip, sizeof(*ip)); ip->check = sum_final(sum); } void fix_tcp_sum(struct iphdr *ip, struct tcphdr *tcp) { unsigned int sum = 0; struct { unsigned int saddr; unsigned int daddr; unsigned char pad; unsigned char proto_num; unsigned short tcp_len; } fakehdr = { .saddr = ip->saddr, .daddr = ip->daddr, .proto_num = ip->protocol, .tcp_len = htons(ntohs(ip->tot_len) - ip->ihl*4) }; sum_accumulate(&sum, &fakehdr, sizeof(fakehdr)); sum_accumulate(&sum, tcp, tcp->doff*4); tcp->check = sum_final(sum); } int main(void) { int tun_fd = tun_alloc("inject_dev%d"); systemf("ip link set %s up", devname); systemf("ip addr add 192.168.42.1/24 dev %s", devname); struct { struct iphdr ip; struct tcphdr tcp; unsigned char tcp_opts[20]; } __attribute__((packed)) syn_packet = { .ip = { .ihl = sizeof(struct iphdr)/4, .version = 4, .tot_len = htons(sizeof(syn_packet)), .ttl = 30, .protocol = IPPROTO_TCP, /* FIXUP check */ .saddr = IPADDR(192,168,42,2), .daddr = IPADDR(192,168,42,1) }, .tcp = { .source = htons(1), .dest = htons(1337), .seq = 0x12345678, .doff = (sizeof(syn_packet.tcp)+sizeof(syn_packet.tcp_opts))/4, .syn = 1, .window = htons(64), .check = 0 /*FIXUP*/ }, .tcp_opts = { /* INVALID: trailing MD5SIG opcode after NOPs */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19 } }; fix_ip_sum(&syn_packet.ip); fix_tcp_sum(&syn_packet.ip, &syn_packet.tcp); while (1) { int write_res = write(tun_fd, &syn_packet, sizeof(syn_packet)); if (write_res != sizeof(syn_packet)) err(1, "packet write failed"); } } ==================================== Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Signed-off-by: Jann Horn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb05ad940e37..52b0a84be765 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3943,11 +3943,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return NULL; - - while (length > 0) { + /* If not enough data remaining, we can short cut */ + while (length >= TCPOLEN_MD5SIG) { int opcode = *ptr++; int opsize; -- cgit v1.2.3 From 3626fb04d050139b1fe1249f67c7c3efbe73b0ad Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 Apr 2018 13:59:25 +0200 Subject: team: avoid adding twice the same option to the event list [ Upstream commit 4fb0534fb7bbc2346ba7d3a072b538007f4135a5 ] When parsing the options provided by the user space, team_nl_cmd_options_set() insert them in a temporary list to send multiple events with a single message. While each option's attribute is correctly validated, the code does not check for duplicate entries before inserting into the event list. Exploiting the above, the syzbot was able to trigger the following splat: kernel BUG at lib/list_debug.c:31! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4466 Comm: syzkaller556835 Not tainted 4.16.0+ #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__list_add_valid+0xaa/0xb0 lib/list_debug.c:29 RSP: 0018:ffff8801b04bf248 EFLAGS: 00010286 RAX: 0000000000000058 RBX: ffff8801c8fc7a90 RCX: 0000000000000000 RDX: 0000000000000058 RSI: ffffffff815fbf41 RDI: ffffed0036097e3f RBP: ffff8801b04bf260 R08: ffff8801b0b2a700 R09: ffffed003b604f90 R10: ffffed003b604f90 R11: ffff8801db027c87 R12: ffff8801c8fc7a90 R13: ffff8801c8fc7a90 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000b98880(0000) GS:ffff8801db000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000043fc30 CR3: 00000001afe8e000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_add include/linux/list.h:60 [inline] list_add include/linux/list.h:79 [inline] team_nl_cmd_options_set+0x9ff/0x12b0 drivers/net/team/team.c:2571 genl_family_rcv_msg+0x889/0x1120 net/netlink/genetlink.c:599 genl_rcv_msg+0xc6/0x170 net/netlink/genetlink.c:624 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2448 genl_rcv+0x28/0x40 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x58b/0x740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x9f0/0xfa0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:639 ___sys_sendmsg+0x805/0x940 net/socket.c:2117 __sys_sendmsg+0x115/0x270 net/socket.c:2155 SYSC_sendmsg net/socket.c:2164 [inline] SyS_sendmsg+0x29/0x30 net/socket.c:2162 do_syscall_64+0x29e/0x9d0 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x4458b9 RSP: 002b:00007ffd1d4a7278 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000000001b RCX: 00000000004458b9 RDX: 0000000000000010 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00000000004a74ed R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000213 R12: 00007ffd1d4a7348 R13: 0000000000402a60 R14: 0000000000000000 R15: 0000000000000000 Code: 75 e8 eb a9 48 89 f7 48 89 75 e8 e8 d1 85 7b fe 48 8b 75 e8 eb bb 48 89 f2 48 89 d9 4c 89 e6 48 c7 c7 a0 84 d8 87 e8 ea 67 28 fe <0f> 0b 0f 1f 40 00 48 b8 00 00 00 00 00 fc ff df 55 48 89 e5 41 RIP: __list_add_valid+0xaa/0xb0 lib/list_debug.c:29 RSP: ffff8801b04bf248 This changeset addresses the avoiding list_add() if the current option is already present in the event list. Reported-and-tested-by: syzbot+4d4af685432dc0e56c91@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Fixes: 2fcdb2c9e659 ("team: allow to send multiple set events in one message") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 8673ef3c9cdc..e5941e86f64a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -261,6 +261,17 @@ static void __team_option_inst_mark_removed_port(struct team *team, } } +static bool __team_option_inst_tmp_find(const struct list_head *opts, + const struct team_option_inst *needle) +{ + struct team_option_inst *opt_inst; + + list_for_each_entry(opt_inst, opts, tmp_list) + if (opt_inst == needle) + return true; + return false; +} + static int __team_options_register(struct team *team, const struct team_option *option, size_t option_count) @@ -2569,6 +2580,14 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) if (err) goto team_put; opt_inst->changed = true; + + /* dumb/evil user-space can send us duplicate opt, + * keep only the last one + */ + if (__team_option_inst_tmp_find(&opt_inst_list, + opt_inst)) + continue; + list_add(&opt_inst->tmp_list, &opt_inst_list); } if (!opt_found) { -- cgit v1.2.3 From 8599a1fe10b8c3f6e17568a1c43bfaccfeaff75c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Apr 2018 14:33:37 +0800 Subject: team: fix netconsole setup over team MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9cf2f437ca5b39828984064fad213e68fc17ef11 ] The same fix in Commit dbe173079ab5 ("bridge: fix netconsole setup over bridge") is also needed for team driver. While at it, remove the unnecessary parameter *team from team_port_enable_netpoll(). v1->v2: - fix it in a better way, as does bridge. Fixes: 0fb52a27a04a ("team: cleanup netpoll clode") Reported-by: João Avelino Bellomo Filho Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e5941e86f64a..36963685d42a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1078,14 +1078,11 @@ static void team_port_leave(struct team *team, struct team_port *port) } #ifdef CONFIG_NET_POLL_CONTROLLER -static int team_port_enable_netpoll(struct team *team, struct team_port *port) +static int __team_port_enable_netpoll(struct team_port *port) { struct netpoll *np; int err; - if (!team->dev->npinfo) - return 0; - np = kzalloc(sizeof(*np), GFP_KERNEL); if (!np) return -ENOMEM; @@ -1099,6 +1096,14 @@ static int team_port_enable_netpoll(struct team *team, struct team_port *port) return err; } +static int team_port_enable_netpoll(struct team_port *port) +{ + if (!port->team->dev->npinfo) + return 0; + + return __team_port_enable_netpoll(port); +} + static void team_port_disable_netpoll(struct team_port *port) { struct netpoll *np = port->np; @@ -1113,7 +1118,7 @@ static void team_port_disable_netpoll(struct team_port *port) kfree(np); } #else -static int team_port_enable_netpoll(struct team *team, struct team_port *port) +static int team_port_enable_netpoll(struct team_port *port) { return 0; } @@ -1221,7 +1226,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_vids_add; } - err = team_port_enable_netpoll(team, port); + err = team_port_enable_netpoll(port); if (err) { netdev_err(dev, "Failed to enable netpoll on device %s\n", portname); @@ -1919,7 +1924,7 @@ static int team_netpoll_setup(struct net_device *dev, mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { - err = team_port_enable_netpoll(team, port); + err = __team_port_enable_netpoll(port); if (err) { __team_netpoll_cleanup(team); break; -- cgit v1.2.3 From b2a5207f8c45359d2404562e31fbca2b2fee6c56 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 23 Apr 2018 17:37:03 -0400 Subject: packet: fix bitfield update race [ Upstream commit a6361f0ca4b25460f2cdf3235ebe8115f622901e ] Updates to the bitfields in struct packet_sock are not atomic. Serialize these read-modify-write cycles. Move po->running into a separate variable. Its writes are protected by po->bind_lock (except for one startup case at packet_create). Also replace a textual precondition warning with lockdep annotation. All others are set only in packet_setsockopt. Serialize these updates by holding the socket lock. Analogous to other field updates, also hold the lock when testing whether a ring is active (pg_vec). Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") Reported-by: DaeRyong Jeong Reported-by: Byoungyoung Lee Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 60 ++++++++++++++++++++++++++++++++++++-------------- net/packet/internal.h | 10 ++++----- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 267db0d603bc..759e005623aa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -333,11 +333,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) skb_set_queue_mapping(skb, queue_index); } -/* register_prot_hook must be invoked with the po->bind_lock held, +/* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ -static void register_prot_hook(struct sock *sk) +static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); @@ -352,8 +352,13 @@ static void register_prot_hook(struct sock *sk) } } -/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock - * held. If the sync parameter is true, we will temporarily drop +static void register_prot_hook(struct sock *sk) +{ + lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); + __register_prot_hook(sk); +} + +/* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the @@ -363,6 +368,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); + lockdep_assert_held_once(&po->bind_lock); + po->running = 0; if (po->fanout) @@ -3259,7 +3266,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (proto) { po->prot_hook.type = proto; - register_prot_hook(sk); + __register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); @@ -3735,12 +3742,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_loss = !!val; - return 0; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_loss = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_AUXDATA: { @@ -3751,7 +3764,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->auxdata = !!val; + release_sock(sk); return 0; } case PACKET_ORIGDEV: @@ -3763,7 +3778,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->origdev = !!val; + release_sock(sk); return 0; } case PACKET_VNET_HDR: @@ -3772,15 +3789,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (sock->type != SOCK_RAW) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->has_vnet_hdr = !!val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->has_vnet_hdr = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_TIMESTAMP: { @@ -3818,11 +3840,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tx_has_off = !!val; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_tx_has_off = !!val; + ret = 0; + } + release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: diff --git a/net/packet/internal.h b/net/packet/internal.h index d55bfc34d6b3..1309e2a7baad 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -109,10 +109,12 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, + unsigned int running; /* bind_lock must be held */ + unsigned int auxdata:1, /* writer must hold sock lock */ origdev:1, - has_vnet_hdr:1; + has_vnet_hdr:1, + tp_loss:1, + tp_tx_has_off:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -122,8 +124,6 @@ struct packet_sock { enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); -- cgit v1.2.3 From 0b6693b49e4d36846ab0d638ff062920d0ec10e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Apr 2018 08:29:42 -0700 Subject: tipc: add policy for TIPC_NLA_NET_ADDR [ Upstream commit ec518f21cb1a1b1f8a516499ea05c60299e04963 ] Before syzbot/KMSAN bites, add the missing policy for TIPC_NLA_NET_ADDR Fixes: 27c21416727a ("tipc: add net set to new netlink api") Signed-off-by: Eric Dumazet Cc: Jon Maloy Cc: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3200059d14b2..9ba3c462f86e 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -79,7 +79,8 @@ const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_NET_ID] = { .type = NLA_U32 } + [TIPC_NLA_NET_ID] = { .type = NLA_U32 }, + [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, }; const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { -- cgit v1.2.3 From ecaae08d72bd353c548545e521b68b00749f479a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 23 Apr 2018 16:38:27 +0200 Subject: pppoe: check sockaddr length in pppoe_connect() [ Upstream commit a49e2f5d5fb141884452ddb428f551b123d436b5 ] We must validate sockaddr_len, otherwise userspace can pass fewer data than we expect and we end up accessing invalid data. Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers") Reported-by: syzbot+4f03bdf92fdf9ef5ddab@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index dc36c2ec1d10..fa2c7bd638be 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -620,6 +620,10 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppox)) + goto end; + if (sp->sa_protocol != PX_PROTO_OE) goto end; -- cgit v1.2.3 From 9d33bfffd39bd1c2bf5d55f8b1f1e0cebf26f666 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 17 Apr 2018 18:46:14 +0900 Subject: vlan: Fix reading memory beyond skb->tail in skb_vlan_tagged_multi [ Upstream commit 7ce2367254e84753bceb07327aaf5c953cfce117 ] Syzkaller spotted an old bug which leads to reading skb beyond tail by 4 bytes on vlan tagged packets. This is caused because skb_vlan_tagged_multi() did not check skb_headlen. BUG: KMSAN: uninit-value in eth_type_vlan include/linux/if_vlan.h:283 [inline] BUG: KMSAN: uninit-value in skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline] BUG: KMSAN: uninit-value in vlan_features_check include/linux/if_vlan.h:672 [inline] BUG: KMSAN: uninit-value in dflt_features_check net/core/dev.c:2949 [inline] BUG: KMSAN: uninit-value in netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009 CPU: 1 PID: 3582 Comm: syzkaller435149 Not tainted 4.16.0+ #82 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 eth_type_vlan include/linux/if_vlan.h:283 [inline] skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline] vlan_features_check include/linux/if_vlan.h:672 [inline] dflt_features_check net/core/dev.c:2949 [inline] netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009 validate_xmit_skb+0x89/0x1320 net/core/dev.c:3084 __dev_queue_xmit+0x1cb2/0x2b60 net/core/dev.c:3549 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 packet_snd net/packet/af_packet.c:2944 [inline] packet_sendmsg+0x7c57/0x8a10 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] sock_write_iter+0x3b9/0x470 net/socket.c:909 do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776 do_iter_write+0x30d/0xd40 fs/read_write.c:932 vfs_writev fs/read_write.c:977 [inline] do_writev+0x3c9/0x830 fs/read_write.c:1012 SYSC_writev+0x9b/0xb0 fs/read_write.c:1085 SyS_writev+0x56/0x80 fs/read_write.c:1082 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x43ffa9 RSP: 002b:00007fff2cff3948 EFLAGS: 00000217 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043ffa9 RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004018d0 R13: 0000000000401960 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 packet_alloc_skb net/packet/af_packet.c:2803 [inline] packet_snd net/packet/af_packet.c:2894 [inline] packet_sendmsg+0x6444/0x8a10 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] sock_write_iter+0x3b9/0x470 net/socket.c:909 do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776 do_iter_write+0x30d/0xd40 fs/read_write.c:932 vfs_writev fs/read_write.c:977 [inline] do_writev+0x3c9/0x830 fs/read_write.c:1012 SYSC_writev+0x9b/0xb0 fs/read_write.c:1085 SyS_writev+0x56/0x80 fs/read_write.c:1082 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.") Reported-and-tested-by: syzbot+0bbe42c764feafa82c5a@syzkaller.appspotmail.com Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 7 +++++-- net/core/dev.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8feecd5345e7..7e39719e27cb 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -600,7 +600,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb) * Returns true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ -static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) { __be16 protocol = skb->protocol; @@ -610,6 +610,9 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) if (likely(!eth_type_vlan(protocol))) return false; + if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) + return false; + veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } @@ -627,7 +630,7 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) * * Returns features without unsafe ones if the skb has multiple tags. */ -static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, +static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) { if (skb_vlan_tagged_multi(skb)) { diff --git a/net/core/dev.c b/net/core/dev.c index 3d9190c2940d..5407d5f7b2d0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2871,7 +2871,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb, } EXPORT_SYMBOL(passthru_features_check); -static netdev_features_t dflt_features_check(const struct sk_buff *skb, +static netdev_features_t dflt_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { -- cgit v1.2.3 From 2de74b91a85e4928600543841752b70af22d3d50 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 12 Apr 2018 14:24:31 +0800 Subject: sctp: do not check port in sctp_inet6_cmp_addr [ Upstream commit 1071ec9d453a38023579714b64a951a2fb982071 ] pf->cmp_addr() is called before binding a v6 address to the sock. It should not check ports, like in sctp_inet_cmp_addr. But sctp_inet6_cmp_addr checks the addr by invoking af(6)->cmp_addr, sctp_v6_cmp_addr where it also compares the ports. This would cause that setsockopt(SCTP_SOCKOPT_BINDX_ADD) could bind multiple duplicated IPv6 addresses after Commit 40b4f0fd74e4 ("sctp: lack the check for ports in sctp_v6_cmp_addr"). This patch is to remove af->cmp_addr called in sctp_inet6_cmp_addr, but do the proper check for both v6 addrs and v4mapped addrs. v1->v2: - define __sctp_v6_cmp_addr to do the common address comparison used for both pf and af v6 cmp_addr. Fixes: 40b4f0fd74e4 ("sctp: lack the check for ports in sctp_v6_cmp_addr") Reported-by: Jianwen Ji Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 60 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 355d95a7cd81..e031797ad311 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -521,46 +521,49 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, addr->v6.sin6_scope_id = 0; } -/* Compare addresses exactly. - * v4-mapped-v6 is also in consideration. - */ -static int sctp_v6_cmp_addr(const union sctp_addr *addr1, - const union sctp_addr *addr2) +static int __sctp_v6_cmp_addr(const union sctp_addr *addr1, + const union sctp_addr *addr2) { if (addr1->sa.sa_family != addr2->sa.sa_family) { if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) { - if (addr2->v6.sin6_port == addr1->v4.sin_port && - addr2->v6.sin6_addr.s6_addr32[3] == - addr1->v4.sin_addr.s_addr) - return 1; - } + ipv6_addr_v4mapped(&addr2->v6.sin6_addr) && + addr2->v6.sin6_addr.s6_addr32[3] == + addr1->v4.sin_addr.s_addr) + return 1; + if (addr2->sa.sa_family == AF_INET && addr1->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) { - if (addr1->v6.sin6_port == addr2->v4.sin_port && - addr1->v6.sin6_addr.s6_addr32[3] == - addr2->v4.sin_addr.s_addr) - return 1; - } + ipv6_addr_v4mapped(&addr1->v6.sin6_addr) && + addr1->v6.sin6_addr.s6_addr32[3] == + addr2->v4.sin_addr.s_addr) + return 1; + return 0; } - if (addr1->v6.sin6_port != addr2->v6.sin6_port) - return 0; + if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) return 0; + /* If this is a linklocal address, compare the scope_id. */ - if (ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { - if (addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id && - (addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)) { - return 0; - } - } + if ((ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) && + addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id && + addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id) + return 0; return 1; } +/* Compare addresses exactly. + * v4-mapped-v6 is also in consideration. + */ +static int sctp_v6_cmp_addr(const union sctp_addr *addr1, + const union sctp_addr *addr2) +{ + return __sctp_v6_cmp_addr(addr1, addr2) && + addr1->v6.sin6_port == addr2->v6.sin6_port; +} + /* Initialize addr struct to INADDR_ANY. */ static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port) { @@ -844,8 +847,8 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2, struct sctp_sock *opt) { - struct sctp_af *af1, *af2; struct sock *sk = sctp_opt2sk(opt); + struct sctp_af *af1, *af2; af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); @@ -861,10 +864,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; - if (addr1->sa.sa_family != addr2->sa.sa_family) - return 0; - - return af1->cmp_addr(addr1, addr2); + return __sctp_v6_cmp_addr(addr1, addr2); } /* Verify that the provided sockaddr looks bindable. Common verification, -- cgit v1.2.3 From e5d0f6a6935a6b58e2b343010a260b58594566bf Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 15:15:03 -0400 Subject: net: sched: ife: signal not finding metaid [ Upstream commit f6cd14537ff9919081be19b9c53b9b19c0d3ea97 ] We need to record stats for received metadata that we dont know how to process. Have find_decode_metaid() return -ENOENT to capture this. Signed-off-by: Alexander Aring Reviewed-by: Yotam Gigi Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ife.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 95c463cbb9a6..235db2c9bbbb 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -634,7 +634,7 @@ int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, } } - return 0; + return -ENOENT; } struct ifeheadr { -- cgit v1.2.3 From 90e19ec22499f6182134f5a8b8461de2a0abe987 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 18 Apr 2018 11:51:56 -0700 Subject: llc: hold llc_sap before release_sock() [ Upstream commit f7e43672683b097bb074a8fe7af9bc600a23f231 ] syzbot reported we still access llc->sap in llc_backlog_rcv() after it is freed in llc_sap_remove_socket(): Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 llc_conn_ac_send_sabme_cmd_p_set_x+0x3a8/0x460 net/llc/llc_c_ac.c:785 llc_exec_conn_trans_actions net/llc/llc_conn.c:475 [inline] llc_conn_service net/llc/llc_conn.c:400 [inline] llc_conn_state_process+0x4e1/0x13a0 net/llc/llc_conn.c:75 llc_backlog_rcv+0x195/0x1e0 net/llc/llc_conn.c:891 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x12f/0x3a0 net/core/sock.c:2335 release_sock+0xa4/0x2b0 net/core/sock.c:2850 llc_ui_release+0xc8/0x220 net/llc/af_llc.c:204 llc->sap is refcount'ed and llc_sap_remove_socket() is paired with llc_sap_add_socket(). This can be amended by holding its refcount before llc_sap_remove_socket() and releasing it after release_sock(). Reported-by: Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f7caf0f5d9c8..dd4fcf0acea5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -187,6 +187,7 @@ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; + struct llc_sap *sap; if (unlikely(sk == NULL)) goto out; @@ -197,9 +198,15 @@ static int llc_ui_release(struct socket *sock) llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); + sap = llc->sap; + /* Hold this for release_sock(), so that llc_backlog_rcv() could still + * use it. + */ + llc_sap_hold(sap); if (!sock_flag(sk, SOCK_ZAPPED)) llc_sap_remove_socket(llc->sap, sk); release_sock(sk); + llc_sap_put(sap); if (llc->dev) dev_put(llc->dev); sock_put(sk); -- cgit v1.2.3 From 7b80c16871489c7aa4b283ebc12e226b27c4c73b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 19 Apr 2018 21:54:34 -0700 Subject: llc: fix NULL pointer deref for SOCK_ZAPPED [ Upstream commit 3a04ce7130a7e5dad4e78d45d50313747f8c830f ] For SOCK_ZAPPED socket, we don't need to care about llc->sap, so we should just skip these refcount functions in this case. Fixes: f7e43672683b ("llc: hold llc_sap before release_sock()") Reported-by: kernel test robot Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index dd4fcf0acea5..d6bc5f2a1175 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -187,7 +187,6 @@ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; - struct llc_sap *sap; if (unlikely(sk == NULL)) goto out; @@ -198,15 +197,19 @@ static int llc_ui_release(struct socket *sock) llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); - sap = llc->sap; - /* Hold this for release_sock(), so that llc_backlog_rcv() could still - * use it. - */ - llc_sap_hold(sap); - if (!sock_flag(sk, SOCK_ZAPPED)) + if (!sock_flag(sk, SOCK_ZAPPED)) { + struct llc_sap *sap = llc->sap; + + /* Hold this for release_sock(), so that llc_backlog_rcv() + * could still use it. + */ + llc_sap_hold(sap); llc_sap_remove_socket(llc->sap, sk); - release_sock(sk); - llc_sap_put(sap); + release_sock(sk); + llc_sap_put(sap); + } else { + release_sock(sk); + } if (llc->dev) dev_put(llc->dev); sock_put(sk); -- cgit v1.2.3 From b031b84f3c39cfd0b69e7d40eebc20c3a6bd9193 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 19 Apr 2018 22:49:09 +0300 Subject: net: ethernet: ti: cpsw: fix tx vlan priority mapping [ Upstream commit 5e391dc5a8d801a2410d0032ad4a428d1d61800c ] The CPDMA_TX_PRIORITY_MAP in real is vlan pcp field priority mapping register and basically replaces vlan pcp field for tagged packets. So, set it to be 1:1 mapping. Otherwise, it will cause unexpected change of egress vlan tagged packets, like prio 2 -> prio 5. Fixes: e05107e6b747 ("net: ethernet: ti: cpsw: add multi queue support") Reviewed-by: Grygorii Strashko Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 552de9c490c6..de336897a28a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -124,7 +124,7 @@ do { \ #define RX_PRIORITY_MAPPING 0x76543210 #define TX_PRIORITY_MAPPING 0x33221100 -#define CPDMA_TX_PRIORITY_MAP 0x01234567 +#define CPDMA_TX_PRIORITY_MAP 0x76543210 #define CPSW_VLAN_AWARE BIT(1) #define CPSW_ALE_VLAN_AWARE 1 -- cgit v1.2.3 From 581cb195c59f0da31878c10e40893ec2d1d7b122 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Thu, 12 Apr 2018 10:46:55 +0200 Subject: net: fix deadlock while clearing neighbor proxy table [ Upstream commit 53b76cdf7e8fecec1d09e38aad2f8579882591a8 ] When coming from ndisc_netdev_event() in net/ipv6/ndisc.c, neigh_ifdown() is called with &nd_tbl, locking this while clearing the proxy neighbor entries when eg. deleting an interface. Calling the table's pndisc_destructor() with the lock still held, however, can cause a deadlock: When a multicast listener is available an IGMP packet of type ICMPV6_MGM_REDUCTION may be sent out. When reaching ip6_finish_output2(), if no neighbor entry for the target address is found, __neigh_create() is called with &nd_tbl, which it'll want to lock. Move the elements into their own list, then unlock the table and perform the destruction. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199289 Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().") Signed-off-by: Wolfgang Bumiller Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ce5bbf8b1f67..128c811dcb1a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -54,7 +54,8 @@ do { \ static void neigh_timer_handler(unsigned long arg); static void __neigh_notify(struct neighbour *n, int type, int flags); static void neigh_update_notify(struct neighbour *neigh); -static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, + struct net_device *dev); #ifdef CONFIG_PROC_FS static const struct file_operations neigh_stat_seq_fops; @@ -254,8 +255,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) { write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev); - pneigh_ifdown(tbl, dev); - write_unlock_bh(&tbl->lock); + pneigh_ifdown_and_unlock(tbl, dev); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); @@ -645,9 +645,10 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, return -ENOENT; } -static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, + struct net_device *dev) { - struct pneigh_entry *n, **np; + struct pneigh_entry *n, **np, *freelist = NULL; u32 h; for (h = 0; h <= PNEIGH_HASHMASK; h++) { @@ -655,16 +656,23 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) while ((n = *np) != NULL) { if (!dev || n->dev == dev) { *np = n->next; - if (tbl->pdestructor) - tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); - kfree(n); + n->next = freelist; + freelist = n; continue; } np = &n->next; } } + write_unlock_bh(&tbl->lock); + while ((n = freelist)) { + freelist = n->next; + n->next = NULL; + if (tbl->pdestructor) + tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); + kfree(n); + } return -ENOENT; } -- cgit v1.2.3 From 228ce13c3064fcc8b30d21f95a9c246a23196daa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Apr 2018 14:36:28 -0700 Subject: tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets [ Upstream commit 7212303268918b9a203aebeacfdbd83b5e87b20d ] syzbot/KMSAN reported an uninit-value in tcp_parse_options() [1] I believe this was caused by a TCP_MD5SIG being set on live flow. This is highly unexpected, since TCP option space is limited. For instance, presence of TCP MD5 option automatically disables TCP TimeStamp option at SYN/SYNACK time, which we can not do once flow has been established. Really, adding/deleting an MD5 key only makes sense on sockets in CLOSE or LISTEN state. [1] BUG: KMSAN: uninit-value in tcp_parse_options+0xd74/0x1a30 net/ipv4/tcp_input.c:3720 CPU: 1 PID: 6177 Comm: syzkaller192004 Not tainted 4.16.0+ #83 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 tcp_parse_options+0xd74/0x1a30 net/ipv4/tcp_input.c:3720 tcp_fast_parse_options net/ipv4/tcp_input.c:3858 [inline] tcp_validate_incoming+0x4f1/0x2790 net/ipv4/tcp_input.c:5184 tcp_rcv_established+0xf60/0x2bb0 net/ipv4/tcp_input.c:5453 tcp_v4_do_rcv+0x6cd/0xd90 net/ipv4/tcp_ipv4.c:1469 sk_backlog_rcv include/net/sock.h:908 [inline] __release_sock+0x2d6/0x680 net/core/sock.c:2271 release_sock+0x97/0x2a0 net/core/sock.c:2786 tcp_sendmsg+0xd6/0x100 net/ipv4/tcp.c:1464 inet_sendmsg+0x48d/0x740 net/ipv4/af_inet.c:764 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] SYSC_sendto+0x6c3/0x7e0 net/socket.c:1747 SyS_sendto+0x8a/0xb0 net/socket.c:1715 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x448fe9 RSP: 002b:00007fd472c64d38 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000006e5a30 RCX: 0000000000448fe9 RDX: 000000000000029f RSI: 0000000020a88f88 RDI: 0000000000000004 RBP: 00000000006e5a34 R08: 0000000020e68000 R09: 0000000000000010 R10: 00000000200007fd R11: 0000000000000216 R12: 0000000000000000 R13: 00007fff074899ef R14: 00007fd472c659c0 R15: 0000000000000009 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] tcp_send_ack+0x18c/0x910 net/ipv4/tcp_output.c:3624 __tcp_ack_snd_check net/ipv4/tcp_input.c:5040 [inline] tcp_ack_snd_check net/ipv4/tcp_input.c:5053 [inline] tcp_rcv_established+0x2103/0x2bb0 net/ipv4/tcp_input.c:5469 tcp_v4_do_rcv+0x6cd/0xd90 net/ipv4/tcp_ipv4.c:1469 sk_backlog_rcv include/net/sock.h:908 [inline] __release_sock+0x2d6/0x680 net/core/sock.c:2271 release_sock+0x97/0x2a0 net/core/sock.c:2786 tcp_sendmsg+0xd6/0x100 net/ipv4/tcp.c:1464 inet_sendmsg+0x48d/0x740 net/ipv4/af_inet.c:764 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] SYSC_sendto+0x6c3/0x7e0 net/socket.c:1747 SyS_sendto+0x8a/0xb0 net/socket.c:1715 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d1a767db1bb..0fc5dad02fe8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2662,8 +2662,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: - /* Read the IP->Key mappings from userspace */ - err = tp->af_specific->md5_parse(sk, optval, optlen); + if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) + err = tp->af_specific->md5_parse(sk, optval, optlen); + else + err = -EINVAL; break; #endif case TCP_USER_TIMEOUT: -- cgit v1.2.3 From 55ca7b1d43a9d9958ce8544dee938e6c62c263ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Apr 2018 17:52:04 -0700 Subject: net: af_packet: fix race in PACKET_{R|T}X_RING [ Upstream commit 5171b37d959641bbc619781caf62e61f7b940871 ] In order to remove the race caught by syzbot [1], we need to lock the socket before using po->tp_version as this could change under us otherwise. This means lock_sock() and release_sock() must be done by packet_set_ring() callers. [1] : BUG: KMSAN: uninit-value in packet_set_ring+0x1254/0x3870 net/packet/af_packet.c:4249 CPU: 0 PID: 20195 Comm: syzkaller707632 Not tainted 4.16.0+ #83 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 packet_set_ring+0x1254/0x3870 net/packet/af_packet.c:4249 packet_setsockopt+0x12c6/0x5a90 net/packet/af_packet.c:3662 SYSC_setsockopt+0x4b8/0x570 net/socket.c:1849 SyS_setsockopt+0x76/0xa0 net/socket.c:1828 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x449099 RSP: 002b:00007f42b5307ce8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 000000000070003c RCX: 0000000000449099 RDX: 0000000000000005 RSI: 0000000000000107 RDI: 0000000000000003 RBP: 0000000000700038 R08: 000000000000001c R09: 0000000000000000 R10: 00000000200000c0 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000080eecf R14: 00007f42b53089c0 R15: 0000000000000001 Local variable description: ----req_u@packet_setsockopt Variable was created at: packet_setsockopt+0x13f/0x5a90 net/packet/af_packet.c:3612 SYSC_setsockopt+0x4b8/0x570 net/socket.c:1849 Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 759e005623aa..a027f8c00944 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3024,6 +3024,7 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); + lock_sock(sk); if (po->rx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); @@ -3033,6 +3034,7 @@ static int packet_release(struct socket *sock) memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); } + release_sock(sk); f = fanout_release(sk); @@ -3661,6 +3663,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv union tpacket_req_u req_u; int len; + lock_sock(sk); switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: @@ -3671,12 +3674,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv len = sizeof(req_u.req3); break; } - if (optlen < len) - return -EINVAL; - if (copy_from_user(&req_u.req, optval, len)) - return -EFAULT; - return packet_set_ring(sk, &req_u, 0, - optname == PACKET_TX_RING); + if (optlen < len) { + ret = -EINVAL; + } else { + if (copy_from_user(&req_u.req, optval, len)) + ret = -EFAULT; + else + ret = packet_set_ring(sk, &req_u, 0, + optname == PACKET_TX_RING); + } + release_sock(sk); + return ret; } case PACKET_COPY_THRESH: { @@ -4247,7 +4255,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; - lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { net_warn_ratelimited("Tx-ring is not supported.\n"); @@ -4383,7 +4390,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: - release_sock(sk); return err; } -- cgit v1.2.3 From e2956fc8356549c7072a6b420b500bb5fe32c4fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Apr 2018 18:29:23 -0700 Subject: ipv6: add RTA_TABLE and RTA_PREFSRC to rtm_ipv6_policy [ Upstream commit aa8f8778493c85fff480cdf8b349b1e1dcb5f243 ] KMSAN reported use of uninit-value that I tracked to lack of proper size check on RTA_TABLE attribute. I also believe RTA_PREFSRC lacks a similar check. Fixes: 86872cb57925 ("[IPv6] route: FIB6 configuration using struct fib6_config") Fixes: c3968a857a6b ("ipv6: RTA_PREFSRC support for ipv6 route source address selection") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d6a4b2c73a7c..f6ac472acd0f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2811,6 +2811,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, + [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, @@ -2820,6 +2821,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit v1.2.3 From 2f7be1262873bb200ab18fe7627231c9da9f22b2 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Wed, 11 Apr 2018 15:05:16 -0700 Subject: strparser: Fix incorrect strp->need_bytes value. [ Upstream commit 9d0c75bf6e03d9bf80c55b0f677dc9b982958fd5 ] strp_data_ready resets strp->need_bytes to 0 if strp_peek_len indicates that the remainder of the message has been received. However, do_strp_work does not reset strp->need_bytes to 0. If do_strp_work completes a partial message, the value of strp->need_bytes will continue to reflect the needed bytes of the previous message, causing future invocations of strp_data_ready to return early if strp->need_bytes is less than strp_peek_len. Resetting strp->need_bytes to 0 in __strp_recv on handing a full message to the upper layer solves this problem. __strp_recv also calculates strp->need_bytes using stm->accum_len before stm->accum_len has been incremented by cand_len. This can cause strp->need_bytes to be equal to the full length of the message instead of the full length minus the accumulated length. This, in turn, causes strp_data_ready to return early, even when there is sufficient data to complete the partial message. Incrementing stm->accum_len before using it to calculate strp->need_bytes solves this problem. Found while testing net/tls_sw recv path. Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages") Signed-off-by: Doron Roberts-Kedes Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/strparser/strparser.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 6cbc935ddd96..bbee334ab1b0 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -285,9 +285,9 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, strp_start_rx_timer(strp); } + rxm->accum_len += cand_len; strp->rx_need_bytes = rxm->strp.full_len - rxm->accum_len; - rxm->accum_len += cand_len; rxm->early_eaten = cand_len; STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); desc->count = 0; /* Stop reading socket */ @@ -310,6 +310,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* Hurray, we have a new message! */ del_timer(&strp->rx_msg_timer); strp->rx_skb_head = NULL; + strp->rx_need_bytes = 0; STRP_STATS_INCR(strp->stats.rx_msgs); /* Give skb to upper layer */ @@ -374,9 +375,7 @@ void strp_data_ready(struct strparser *strp) return; if (strp->rx_need_bytes) { - if (strp_peek_len(strp) >= strp->rx_need_bytes) - strp->rx_need_bytes = 0; - else + if (strp_peek_len(strp) < strp->rx_need_bytes) return; } -- cgit v1.2.3 From 70f2351eca535c590dad1657a3761650c8515acb Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 18 Apr 2018 22:54:59 -0400 Subject: scsi: mptsas: Disable WRITE SAME commit 94e5395d2403c8bc2504a7cbe4c4caaacb7b8b84 upstream. First generation MPT Fusion controllers can not translate WRITE SAME when the attached device is a SATA drive. Disable WRITE SAME support. Reported-by: Nikola Ciprich Cc: Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/message/fusion/mptsas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 7ee1667acde4..00dff9b5a6c4 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1994,6 +1994,7 @@ static struct scsi_host_template mptsas_driver_template = { .cmd_per_lun = 7, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mptscsih_host_attrs, + .no_write_same = 1, }; static int mptsas_get_linkerrors(struct sas_phy *phy) -- cgit v1.2.3 From 4bd744b86114a406efb563c8717e5bea7672d427 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Apr 2018 12:51:31 +0300 Subject: cdrom: information leak in cdrom_ioctl_media_changed() commit 9de4ee40547fd315d4a0ed1dd15a2fa3559ad707 upstream. This cast is wrong. "cdi->capacity" is an int and "arg" is an unsigned long. The way the check is written now, if one of the high 32 bits is set then we could read outside the info->slots[] array. This bug is pretty old and it predates git. Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 5d475b3a0b2e..128ebd439221 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2368,7 +2368,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || arg == CDSL_CURRENT) return media_changed(cdi, 1); - if ((unsigned int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; info = kmalloc(sizeof(*info), GFP_KERNEL); -- cgit v1.2.3 From 04f87299884a26f64b643d810e68d56685d6ffc1 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 11 Apr 2018 11:21:17 +0200 Subject: s390/cio: update chpid descriptor after resource accessibility event commit af2e460ade0b0180d0f3812ca4f4f59cc9597f3e upstream. Channel path descriptors have been seen as something stable (as long as the chpid is configured). Recent tests have shown that the descriptor can also be altered when the link state of a channel path changes. Thus it is necessary to update the descriptor during handling of resource accessibility events. Cc: Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/chsc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 11674698b36d..67903c93328b 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -451,6 +451,7 @@ static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area) static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area) { + struct channel_path *chp; struct chp_link link; struct chp_id chpid; int status; @@ -463,10 +464,17 @@ static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area) chpid.id = sei_area->rsid; /* allocate a new channel path structure, if needed */ status = chp_get_status(chpid); - if (status < 0) - chp_new(chpid); - else if (!status) + if (!status) return; + + if (status < 0) { + chp_new(chpid); + } else { + chp = chpid_to_chp(chpid); + mutex_lock(&chp->lock); + chp_update_desc(chp); + mutex_unlock(&chp->lock); + } memset(&link, 0, sizeof(struct chp_link)); link.chpid = chpid; if ((sei_area->vf & 0xc0) != 0) { -- cgit v1.2.3 From a714a5f3afdd8de38985a88fc6d29f81873369d1 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 12 Apr 2018 13:38:22 +0200 Subject: s390/dasd: fix IO error for newly defined devices commit 5d27a2bf6e14f5c7d1033ad1e993fcd0eba43e83 upstream. When a new CKD storage volume is defined at the storage server, Linux may be relying on outdated information about that volume, which leads to the following errors: 1. Command Reject Errors for minidisk on z/VM: dasd-eckd.b3193d: 0.0.XXXX: An error occurred in the DASD device driver, reason=09 dasd(eckd): I/O status report for device 0.0.XXXX: dasd(eckd): in req: 00000000XXXXXXXX CC:00 FC:04 AC:00 SC:17 DS:02 CS:00 RC:0 dasd(eckd): device 0.0.2046: Failing CCW: 00000000XXXXXXXX dasd(eckd): Sense(hex) 0- 7: 80 00 00 00 00 00 00 00 dasd(eckd): Sense(hex) 8-15: 00 00 00 00 00 00 00 00 dasd(eckd): Sense(hex) 16-23: 00 00 00 00 e1 00 0f 00 dasd(eckd): Sense(hex) 24-31: 00 00 40 e2 00 00 00 00 dasd(eckd): 24 Byte: 0 MSG 0, no MSGb to SYSOP 2. Equipment Check errors on LPAR or for dedicated devices on z/VM: dasd(eckd): I/O status report for device 0.0.XXXX: dasd(eckd): in req: 00000000XXXXXXXX CC:00 FC:04 AC:00 SC:17 DS:0E CS:40 fcxs:01 schxs:00 RC:0 dasd(eckd): device 0.0.9713: Failing TCW: 00000000XXXXXXXX dasd(eckd): Sense(hex) 0- 7: 10 00 00 00 13 58 4d 0f dasd(eckd): Sense(hex) 8-15: 67 00 00 00 00 00 00 04 dasd(eckd): Sense(hex) 16-23: e5 18 05 33 97 01 0f 0f dasd(eckd): Sense(hex) 24-31: 00 00 40 e2 00 04 58 0d dasd(eckd): 24 Byte: 0 MSG f, no MSGb to SYSOP Fix this problem by using the up-to-date information provided during online processing via the device specific SNEQ to detect the case of outdated LCU data. If there is a difference, perform a re-read of that data. Cc: stable@vger.kernel.org Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_alias.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 1e560188dd13..e453d2a7d7f9 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -591,13 +591,22 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, int dasd_alias_add_device(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; - struct alias_lcu *lcu; + __u8 uaddr = private->uid.real_unit_addr; + struct alias_lcu *lcu = private->lcu; unsigned long flags; int rc; - lcu = private->lcu; rc = 0; spin_lock_irqsave(&lcu->lock, flags); + /* + * Check if device and lcu type differ. If so, the uac data may be + * outdated and needs to be updated. + */ + if (private->uid.type != lcu->uac->unit[uaddr].ua_type) { + lcu->flags |= UPDATE_PENDING; + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "uid type mismatch - trigger rescan"); + } if (!(lcu->flags & UPDATE_PENDING)) { rc = _add_device_to_lcu(lcu, device, device); if (rc) -- cgit v1.2.3 From bed2d7627a9d95ecf4662fbcd7dd30d762997b1a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Apr 2018 12:22:24 +0200 Subject: s390/uprobes: implement arch_uretprobe_is_alive() commit 783c3b53b9506db3e05daacfe34e0287eebb09d8 upstream. Implement s390 specific arch_uretprobe_is_alive() to avoid SIGSEGVs observed with uretprobes in combination with setjmp/longjmp. See commit 2dea1d9c38e4 ("powerpc/uprobes: Implement arch_uretprobe_is_alive()") for more details. With this implemented all test cases referenced in the above commit pass. Reported-by: Ziqian SUN Cc: # v4.3+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/uprobes.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 66956c09d5bf..3d04dfdabc9f 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -147,6 +147,15 @@ unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, return orig; } +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return user_stack_pointer(regs) <= ret->stack; + else + return user_stack_pointer(regs) < ret->stack; +} + /* Instruction Emulation */ static void adjust_psw_addr(psw_t *psw, unsigned long len) -- cgit v1.2.3 From 4959a913ef6b274cedbc7fc06d5606aafd32ef80 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Apr 2018 18:23:50 +0200 Subject: ACPI / video: Only default only_lcd to true on Win8-ready _desktops_ commit 53fa1f6e8a5958da698a31edf366ffe90596b490 upstream. Commit 5928c281524f (ACPI / video: Default lcd_only to true on Win8-ready and newer machines) made only_lcd default to true on all machines where acpi_osi_is_win8() returns true, including laptops. The purpose of this is to avoid the bogus / non-working acpi backlight interface which many newer BIOS-es define on desktop machines. But this is causing a regression on some laptops, specifically on the Dell XPS 13 2013 model, which does not have the LCD flag set for its fully functional ACPI backlight interface. Rather then DMI quirking our way out of this, this commits changes the logic for setting only_lcd to true, to only do this on machines with a desktop (or server) dmi chassis-type. Note that we cannot simply only check the chassis-type and not register the backlight interface based on that as there are some laptops and tablets which have their chassis-type set to "3" aka desktop. Hopefully the combination of checking the LCD flag, but only on devices with a desktop(ish) chassis-type will avoid the needs for DMI quirks for this, or at least limit the amount of DMI quirks which we need to a minimum. Fixes: 5928c281524f (ACPI / video: Default lcd_only to true on Win8-ready and newer machines) Reported-and-tested-by: James Hogan Signed-off-by: Hans de Goede Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_video.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 94e04c9de12b..667dc5c86fef 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2069,6 +2069,25 @@ static int __init intel_opregion_present(void) return opregion; } +static bool dmi_is_desktop(void) +{ + const char *chassis_type; + + chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); + if (!chassis_type) + return false; + + if (!strcmp(chassis_type, "3") || /* 3: Desktop */ + !strcmp(chassis_type, "4") || /* 4: Low Profile Desktop */ + !strcmp(chassis_type, "5") || /* 5: Pizza Box */ + !strcmp(chassis_type, "6") || /* 6: Mini Tower */ + !strcmp(chassis_type, "7") || /* 7: Tower */ + !strcmp(chassis_type, "11")) /* 11: Main Server Chassis */ + return true; + + return false; +} + int acpi_video_register(void) { int ret = 0; @@ -2089,8 +2108,12 @@ int acpi_video_register(void) * win8 ready (where we also prefer the native backlight driver, so * normally the acpi_video code should not register there anyways). */ - if (only_lcd == -1) - only_lcd = acpi_osi_is_win8(); + if (only_lcd == -1) { + if (dmi_is_desktop() && acpi_osi_is_win8()) + only_lcd = true; + else + only_lcd = false; + } dmi_check_system(video_dmi_table); -- cgit v1.2.3 From ba3cd5796223e0971d30e910e0d5b953576f8629 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 29 Apr 2018 11:32:03 +0200 Subject: Linux 4.9.97 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 50ae573e8951..ee3e943c3bd9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 96 +SUBLEVEL = 97 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3