From 36a2e5010fe877bc2f049682d3b9ede4357a3927 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Wed, 14 Aug 2013 13:28:28 +0100 Subject: ARM: 7815/1: kexec: offline non panic CPUs on Kdump panic commit 4f9b4fb7a2091eec339413a460b1665758401828 upstream. In case of normal kexec kernel load, all cpu's are offlined before calling machine_kexec().But in case crash panic cpus are relaxed in machine_crash_nonpanic_core() SMP function but not offlined. When crash kernel is loaded with kexec and on panic trigger machine_kexec() checks for number of cpus online. If more than one cpu is online machine_kexec() fails to load with below error kexec: error: multiple CPUs still online In machine_crash_nonpanic_core() SMP function, offline CPU before cpu_relax Signed-off-by: Vijaya Kumar K Acked-by: Stephen Warren Acked-by: Will Deacon Signed-off-by: Russell King Cc: l00221744 Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/machine_kexec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 4fb074c446bf..163b160c69e9 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -73,6 +73,7 @@ void machine_crash_nonpanic_core(void *unused) crash_save_cpu(®s, smp_processor_id()); flush_cache_all(); + set_cpu_online(smp_processor_id(), false); atomic_dec(&waiting_for_crash_ipi); while (1) cpu_relax(); -- cgit v1.2.3 From 77dbd11a3788e94c547c9f1329cb7388f825cacb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Oct 2013 18:25:12 -0300 Subject: perf scripting perl: Fix build error on Fedora 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b16ff89676d9902dc39976aee3cb0314ee37d93 upstream. Cast __u64 to u64 to silence this warning on older distros, such as Fedora 12: CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o cc1: warnings being treated as errors util/scripting-engines/trace-event-perl.c: In function ‘perl_process_tracepoint’: util/scripting-engines/trace-event-perl.c:285: error: format ‘%lu’ expects type ‘long unsigned int’, but argument 2 has type ‘__u64’ make[1]: *** [/tmp/build/perf/util/scripting-engines/trace-event-perl.o] Error 1 make: *** [install] Error 2 make: Leaving directory `/home/acme/git/linux/tools/perf' [acme@fedora12 linux]$ Reported-by: Waiman Long Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Waiman Long Link: http://lkml.kernel.org/n/tip-nlxofdqcdjfm0w9o6bgq4kqv@git.kernel.org Link: http://lkml.kernel.org/r/1381265120-58532-1-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Xie XiuQi Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/scripting-engines/trace-event-perl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index eacec859f299..b4741b027a8f 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -282,7 +282,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %" PRIu64, evsel->attr.config); + die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); pid = raw_field_value(event, "common_pid", data); -- cgit v1.2.3 From 6826621350ecf4ec5f3508aa9e853e60e766e221 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 15 Jan 2014 15:57:29 +0100 Subject: perf/x86/amd/ibs: Fix waking up from S3 for AMD family 10h commit bee09ed91cacdbffdbcd3b05de8409c77ec9fcd6 upstream. On AMD family 10h we see following error messages while waking up from S3 for all non-boot CPUs leading to a failed IBS initialization: Enabling non-boot CPUs ... smpboot: Booting Node 0 Processor 1 APIC 0x1 [Firmware Bug]: cpu 1, try to use APIC500 (LVT offset 0) for vector 0x400, but the register is already in use for vector 0xf9 on another cpu perf: IBS APIC setup failed on cpu #1 process: Switch to broadcast mode on CPU1 CPU1 is up ... ACPI: Waking up from system sleep state S3 Reason for this is that during suspend the LVT offset for the IBS vector gets lost and needs to be reinialized while resuming. The offset is read from the IBSCTL msr. On family 10h the offset needs to be 1 as offset 0 is used for the MCE threshold interrupt, but firmware assings it for IBS to 0 too. The kernel needs to reprogram the vector. The msr is a readonly node msr, but a new value can be written via pci config space access. The reinitialization is implemented for family 10h in setup_ibs_ctl() which is forced during IBS setup. This patch fixes IBS setup after waking up from S3 by adding resume/supend hooks for the boot cpu which does the offset reinitialization. Marking it as stable to let distros pick up this fix. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1389797849-5565-1-git-send-email-rric.net@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_amd_ibs.c | 53 +++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 5f0581e713c2..b46601ada813 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -816,6 +817,18 @@ out: return ret; } +static void ibs_eilvt_setup(void) +{ + /* + * Force LVT offset assignment for family 10h: The offsets are + * not assigned by the BIOS for this family, so the OS is + * responsible for doing it. If the OS assignment fails, fall + * back to BIOS settings and try to setup this. + */ + if (boot_cpu_data.x86 == 0x10) + force_ibs_eilvt_setup(); +} + static inline int get_ibs_lvt_offset(void) { u64 val; @@ -851,6 +864,36 @@ static void clear_APIC_ibs(void *dummy) setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); } +#ifdef CONFIG_PM + +static int perf_ibs_suspend(void) +{ + clear_APIC_ibs(NULL); + return 0; +} + +static void perf_ibs_resume(void) +{ + ibs_eilvt_setup(); + setup_APIC_ibs(NULL); +} + +static struct syscore_ops perf_ibs_syscore_ops = { + .resume = perf_ibs_resume, + .suspend = perf_ibs_suspend, +}; + +static void perf_ibs_pm_init(void) +{ + register_syscore_ops(&perf_ibs_syscore_ops); +} + +#else + +static inline void perf_ibs_pm_init(void) { } + +#endif + static int __cpuinit perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -877,18 +920,12 @@ static __init int amd_ibs_init(void) if (!caps) return -ENODEV; /* ibs not supported by the cpu */ - /* - * Force LVT offset assignment for family 10h: The offsets are - * not assigned by the BIOS for this family, so the OS is - * responsible for doing it. If the OS assignment fails, fall - * back to BIOS settings and try to setup this. - */ - if (boot_cpu_data.x86 == 0x10) - force_ibs_eilvt_setup(); + ibs_eilvt_setup(); if (!ibs_eilvt_valid()) goto out; + perf_ibs_pm_init(); get_online_cpus(); ibs_caps = caps; /* make ibs_caps visible to other cpus: */ -- cgit v1.2.3 From 350f737ed039d2b36329ddcbcdf78e12c6d8c758 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 6 Jan 2014 17:16:01 -0500 Subject: GFS2: Increase i_writecount during gfs2_setattr_chown commit 62e96cf81988101fe9e086b2877307b6adda5197 upstream. This patch calls get_write_access in function gfs2_setattr_chown, which merely increases inode->i_writecount for the duration of the function. That will ensure that any file closes won't delete the inode's multi-block reservation while the function is running. It also ensures that a multi-block reservation exists when needed for quota change operations during the chown. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/inode.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 62b484e4a9e4..bc5dac400125 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1536,10 +1536,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - error = gfs2_quota_lock(ip, nuid, ngid); + error = get_write_access(inode); if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out; + + error = gfs2_rindex_update(sdp); + if (error) + goto out; + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out; + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { error = gfs2_quota_check(ip, nuid, ngid); @@ -1566,6 +1578,8 @@ out_end_trans: gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); +out: + put_write_access(inode); return error; } -- cgit v1.2.3 From deb19aafd901b60f7192b1e16073ec6673d5c98b Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 18 Dec 2013 17:08:54 -0800 Subject: mm/memory-failure.c: recheck PageHuge() after hugetlb page migrate successfully commit a49ecbcd7b0d5a1cda7d60e03df402dd0ef76ac8 upstream. After a successful hugetlb page migration by soft offline, the source page will either be freed into hugepage_freelists or buddy(over-commit page). If page is in buddy, page_hstate(page) will be NULL. It will hit a NULL pointer dereference in dequeue_hwpoisoned_huge_page(). BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [] dequeue_hwpoisoned_huge_page+0x131/0x1d0 PGD c23762067 PUD c24be2067 PMD 0 Oops: 0000 [#1] SMP So check PageHuge(page) after call migrate_pages() successfully. [wujg: backport to 3.10: - adjust context] Signed-off-by: Jianguo Wu Tested-by: Naoya Horiguchi Reviewed-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6a7f9cab4ddb..7e3601ce51c6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1499,10 +1499,16 @@ static int soft_offline_huge_page(struct page *page, int flags) pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); } else { - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_trans_order(hpage), - &num_poisoned_pages); + /* overcommit hugetlb page will be freed to buddy */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); + atomic_long_add(1 << compound_order(hpage), + &num_poisoned_pages); + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + } } /* keep elevated page count for bad page */ return ret; -- cgit v1.2.3 From d687814867c12165eadb676a93af6a1e201ed494 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 9 Dec 2013 16:06:41 -0700 Subject: staging: comedi: addi_apci_1032: fix subdevice type/flags bug commit 90daf69a7a3f1d1a41018c799968a0bb896d65e0 upstream. The SDF_CMD_READ should be one of the s->subdev_flags not part of the s->type. Signed-off-by: H Hartley Sweeten Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/addi_apci_1032.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/addi_apci_1032.c b/drivers/staging/comedi/drivers/addi_apci_1032.c index 3d4878facc26..d919a9863e01 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1032.c +++ b/drivers/staging/comedi/drivers/addi_apci_1032.c @@ -332,8 +332,8 @@ static int apci1032_auto_attach(struct comedi_device *dev, s = &dev->subdevices[1]; if (dev->irq) { dev->read_subdev = s; - s->type = COMEDI_SUBD_DI | SDF_CMD_READ; - s->subdev_flags = SDF_READABLE; + s->type = COMEDI_SUBD_DI; + s->subdev_flags = SDF_READABLE | SDF_CMD_READ; s->n_chan = 1; s->maxdata = 1; s->range_table = &range_digital; -- cgit v1.2.3 From e4832e92ecd957595937e15b0e4b98c8b2b68d1a Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Thu, 5 Dec 2013 13:43:28 -0700 Subject: staging: comedi: adl_pci9111: fix incorrect irq passed to request_irq() commit 48108fe3daa0d142f9b97178fdb23704ea3a407b upstream. The dev->irq passed to request_irq() will always be 0 when the auto_attach function is called. The pcidev->irq should be used instead to get the correct irq number. Signed-off-by: H Hartley Sweeten Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/adl_pci9111.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/adl_pci9111.c b/drivers/staging/comedi/drivers/adl_pci9111.c index 6247fdcedcbf..71043a1c8500 100644 --- a/drivers/staging/comedi/drivers/adl_pci9111.c +++ b/drivers/staging/comedi/drivers/adl_pci9111.c @@ -873,7 +873,7 @@ static int pci9111_auto_attach(struct comedi_device *dev, pci9111_reset(dev); if (pcidev->irq > 0) { - ret = request_irq(dev->irq, pci9111_interrupt, + ret = request_irq(pcidev->irq, pci9111_interrupt, IRQF_SHARED, dev->board_name, dev); if (ret) return ret; -- cgit v1.2.3 From 3717eee3c4d1d44ded3bd84bf5f6437ec2aa5496 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 Nov 2013 16:31:29 -0800 Subject: vfs: In d_path don't call d_dname on a mount point commit f48cfddc6729ef133933062320039808bafa6f45 upstream. Aditya Kali (adityakali@google.com) wrote: > Commit bf056bfa80596a5d14b26b17276a56a0dcb080e5: > "proc: Fix the namespace inode permission checks." converted > the namespace files into symlinks. The same commit changed > the way namespace bind mounts appear in /proc/mounts: > $ mount --bind /proc/self/ns/ipc /mnt/ipc > Originally: > $ cat /proc/mounts | grep ipc > proc /mnt/ipc proc rw,nosuid,nodev,noexec 0 0 > > After commit bf056bfa80596a5d14b26b17276a56a0dcb080e5: > $ cat /proc/mounts | grep ipc > proc ipc:[4026531839] proc rw,nosuid,nodev,noexec 0 0 > > This breaks userspace which expects the 2nd field in > /proc/mounts to be a valid path. The symlink /proc//ns/{ipc,mnt,net,pid,user,uts} point to dentries allocated with d_alloc_pseudo that we can mount, and that have interesting names printed out with d_dname. When these files are bind mounted /proc/mounts is not currently displaying the mount point correctly because d_dname is called instead of just displaying the path where the file is mounted. Solve this by adding an explicit check to distinguish mounted pseudo inodes and unmounted pseudo inodes. Unmounted pseudo inodes always use mount of their filesstem as the mnt_root in their path making these two cases easy to distinguish. Acked-by: Serge Hallyn Reported-by: Aditya Kali Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index da89cdfb21ab..9a59653d3449 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2686,8 +2686,13 @@ char *d_path(const struct path *path, char *buf, int buflen) * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. */ - if (path->dentry->d_op && path->dentry->d_op->d_dname) + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); -- cgit v1.2.3 From 02cb5b6b8b61c9e2e82cef752743168454cb227b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 14 Jan 2014 15:59:55 +0100 Subject: hwmon: (coretemp) Fix truncated name of alarm attributes commit 3f9aec7610b39521c7c69d754de7265f6994c194 upstream. When the core number exceeds 9, the size of the buffer storing the alarm attribute name is insufficient and the attribute name is truncated. This causes libsensors to skip these attributes as the truncated name is not recognized. Reported-by: Andreas Hollmann Signed-off-by: Jean Delvare Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 658ce3a8717f..0cf25101b3c3 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -52,7 +52,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ #define NUM_REAL_CORES 32 /* Number of Real cores per cpu */ -#define CORETEMP_NAME_LENGTH 17 /* String Length of attrs */ +#define CORETEMP_NAME_LENGTH 19 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO) -- cgit v1.2.3 From cc46cb337cf614b652b0068e6e388102bf8a9b54 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 14 Dec 2013 04:21:26 +0800 Subject: writeback: Fix data corruption on NFS commit f9b0e058cbd04ada76b13afffa7e1df830543c24 upstream. Commit 4f8ad655dbc8 "writeback: Refactor writeback_single_inode()" added a condition to skip clean inode. However this is wrong in WB_SYNC_ALL mode because there we also want to wait for outstanding writeback on possibly clean inode. This was causing occasional data corruption issues on NFS because it uses sync_inode() to make sure all outstanding writes are flushed to the server before truncating the inode and with sync_inode() returning prematurely file was sometimes extended back by an outstanding write after it was truncated. So modify the test to also check for pages under writeback in WB_SYNC_ALL mode. Fixes: 4f8ad655dbc82cf05d2edc11e66b78a42d38bf93 Reported-and-tested-by: Dan Duval Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3be57189efd5..e3ab1e4dc442 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -505,13 +505,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, } WARN_ON(inode->i_state & I_SYNC); /* - * Skip inode if it is clean. We don't want to mess with writeback - * lists in this function since flusher thread may be doing for example - * sync in parallel and if we move the inode, it could get skipped. So - * here we make sure inode is on some writeback list and leave it there - * unless we have completely cleaned the inode. + * Skip inode if it is clean and we have no outstanding writeback in + * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this + * function since flusher thread may be doing for example sync in + * parallel and if we move the inode, it could get skipped. So here we + * make sure inode is on some writeback list and leave it there unless + * we have completely cleaned the inode. */ - if (!(inode->i_state & I_DIRTY)) + if (!(inode->i_state & I_DIRTY) && + (wbc->sync_mode != WB_SYNC_ALL || + !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) goto out; inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); -- cgit v1.2.3 From 057f2f7daf84356c8677f2458395d250c4c0674f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Jan 2014 21:46:34 -0500 Subject: SELinux: Fix possible NULL pointer dereference in selinux_inode_permission() commit 3dc91d4338d698ce77832985f9cb183d8eeaf6be upstream. While running stress tests on adding and deleting ftrace instances I hit this bug: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: selinux_inode_permission+0x85/0x160 PGD 63681067 PUD 7ddbe067 PMD 0 Oops: 0000 [#1] PREEMPT CPU: 0 PID: 5634 Comm: ftrace-test-mki Not tainted 3.13.0-rc4-test-00033-gd2a6dde-dirty #20 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 task: ffff880078375800 ti: ffff88007ddb0000 task.ti: ffff88007ddb0000 RIP: 0010:[] [] selinux_inode_permission+0x85/0x160 RSP: 0018:ffff88007ddb1c48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000800000 RCX: ffff88006dd43840 RDX: 0000000000000001 RSI: 0000000000000081 RDI: ffff88006ee46000 RBP: ffff88007ddb1c88 R08: 0000000000000000 R09: ffff88007ddb1c54 R10: 6e6576652f6f6f66 R11: 0000000000000003 R12: 0000000000000000 R13: 0000000000000081 R14: ffff88006ee46000 R15: 0000000000000000 FS: 00007f217b5b6700(0000) GS:ffffffff81e21000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M CR2: 0000000000000020 CR3: 000000006a0fe000 CR4: 00000000000007f0 Call Trace: security_inode_permission+0x1c/0x30 __inode_permission+0x41/0xa0 inode_permission+0x18/0x50 link_path_walk+0x66/0x920 path_openat+0xa6/0x6c0 do_filp_open+0x43/0xa0 do_sys_open+0x146/0x240 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b Code: 84 a1 00 00 00 81 e3 00 20 00 00 89 d8 83 c8 02 40 f6 c6 04 0f 45 d8 40 f6 c6 08 74 71 80 cf 02 49 8b 46 38 4c 8d 4d cc 45 31 c0 <0f> b7 50 20 8b 70 1c 48 8b 41 70 89 d9 8b 78 04 e8 36 cf ff ff RIP selinux_inode_permission+0x85/0x160 CR2: 0000000000000020 Investigating, I found that the inode->i_security was NULL, and the dereference of it caused the oops. in selinux_inode_permission(): isec = inode->i_security; rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); Note, the crash came from stressing the deletion and reading of debugfs files. I was not able to recreate this via normal files. But I'm not sure they are safe. It may just be that the race window is much harder to hit. What seems to have happened (and what I have traced), is the file is being opened at the same time the file or directory is being deleted. As the dentry and inode locks are not held during the path walk, nor is the inodes ref counts being incremented, there is nothing saving these structures from being discarded except for an rcu_read_lock(). The rcu_read_lock() protects against freeing of the inode, but it does not protect freeing of the inode_security_struct. Now if the freeing of the i_security happens with a call_rcu(), and the i_security field of the inode is not changed (it gets freed as the inode gets freed) then there will be no issue here. (Linus Torvalds suggested not setting the field to NULL such that we do not need to check if it is NULL in the permission check). Note, this is a hack, but it fixes the problem at hand. A real fix is to restructure the destroy_inode() to call all the destructor handlers from the RCU callback. But that is a major job to do, and requires a lot of work. For now, we just band-aid this bug with this fix (it works), and work on a more maintainable solution in the future. Link: http://lkml.kernel.org/r/20140109101932.0508dec7@gandalf.local.home Link: http://lkml.kernel.org/r/20140109182756.17abaaa8@gandalf.local.home Signed-off-by: Steven Rostedt Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 20 ++++++++++++++++++-- security/selinux/include/objsec.h | 5 ++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 57f14185cf18..a7096e130c04 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -219,6 +219,14 @@ static int inode_alloc_security(struct inode *inode) return 0; } +static void inode_free_rcu(struct rcu_head *head) +{ + struct inode_security_struct *isec; + + isec = container_of(head, struct inode_security_struct, rcu); + kmem_cache_free(sel_inode_cache, isec); +} + static void inode_free_security(struct inode *inode) { struct inode_security_struct *isec = inode->i_security; @@ -229,8 +237,16 @@ static void inode_free_security(struct inode *inode) list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); - inode->i_security = NULL; - kmem_cache_free(sel_inode_cache, isec); + /* + * The inode may still be referenced in a path walk and + * a call to selinux_inode_permission() can be made + * after inode_free_security() is called. Ideally, the VFS + * wouldn't do this, but fixing that is a much harder + * job. For now, simply free the i_security via RCU, and + * leave the current inode->i_security pointer intact. + * The inode will be freed after the RCU grace period too. + */ + call_rcu(&isec->rcu, inode_free_rcu); } static int file_alloc_security(struct file *file) diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index aa47bcabb5f6..6fd9dd256a62 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -38,7 +38,10 @@ struct task_security_struct { struct inode_security_struct { struct inode *inode; /* back pointer to inode object */ - struct list_head list; /* list of inode_security_struct */ + union { + struct list_head list; /* list of inode_security_struct */ + struct rcu_head rcu; /* for freeing the inode_security_struct */ + }; u32 task_sid; /* SID of creating task */ u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ -- cgit v1.2.3 From b2be7cd9d11dde01e7c8ae39fe428d8df0aa8c6c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 13 Nov 2013 15:20:04 -0500 Subject: ftrace/x86: Load ftrace_ops in parameter not the variable holding it commit 1739f09e33d8f66bf48ddbc3eca615574da6c4f6 upstream. Function tracing callbacks expect to have the ftrace_ops that registered it passed to them, not the address of the variable that holds the ftrace_ops that registered it. Use a mov instead of a lea to store the ftrace_ops into the parameter of the function tracing callback. Signed-off-by: Steven Rostedt Reviewed-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20131113152004.459787f9@gandalf.local.home Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_32.S | 4 ++-- arch/x86/kernel/entry_64.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8f3e2dec1df3..94e52cf064b0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1075,7 +1075,7 @@ ENTRY(ftrace_caller) pushl $0 /* Pass NULL as regs pointer */ movl 4*4(%esp), %eax movl 0x4(%ebp), %edx - leal function_trace_op, %ecx + movl function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call @@ -1133,7 +1133,7 @@ ENTRY(ftrace_regs_caller) movl 12*4(%esp), %eax /* Load ip (1st parameter) */ subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ - leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ pushl %esp /* Save pt_regs as 4th parameter */ GLOBAL(ftrace_regs_call) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 727208941030..7ac938a4bfab 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -88,7 +88,7 @@ END(function_hook) MCOUNT_SAVE_FRAME \skip /* Load the ftrace_ops into the 3rd parameter */ - leaq function_trace_op, %rdx + movq function_trace_op(%rip), %rdx /* Load ip into the first parameter */ movq RIP(%rsp), %rdi -- cgit v1.2.3 From 38f27e4d72717c4b25c90b303cdb06076c8c08c2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 12 Jan 2014 01:25:21 -0800 Subject: thp: fix copy_page_rep GPF by testing is_huge_zero_pmd once only commit eecc1e426d681351a6026a7d3e7d225f38955b6c upstream. We see General Protection Fault on RSI in copy_page_rep: that RSI is what you get from a NULL struct page pointer. RIP: 0010:[] [] copy_page_rep+0x5/0x10 RSP: 0000:ffff880136e15c00 EFLAGS: 00010286 RAX: ffff880000000000 RBX: ffff880136e14000 RCX: 0000000000000200 RDX: 6db6db6db6db6db7 RSI: db73880000000000 RDI: ffff880dd0c00000 RBP: ffff880136e15c18 R08: 0000000000000200 R09: 000000000005987c R10: 000000000005987c R11: 0000000000000200 R12: 0000000000000001 R13: ffffea00305aa000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f195752f700(0000) GS:ffff880c7fc20000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000093010000 CR3: 00000001458e1000 CR4: 00000000000027e0 Call Trace: copy_user_huge_page+0x93/0xab do_huge_pmd_wp_page+0x710/0x815 handle_mm_fault+0x15d8/0x1d70 __do_page_fault+0x14d/0x840 do_page_fault+0x2f/0x90 page_fault+0x22/0x30 do_huge_pmd_wp_page() tests is_huge_zero_pmd(orig_pmd) four times: but since shrink_huge_zero_page() can free the huge_zero_page, and we have no hold of our own on it here (except where the fourth test holds page_table_lock and has checked pmd_same), it's possible for it to answer yes the first time, but no to the second or third test. Change all those last three to tests for NULL page. (Note: this is not the same issue as trinity's DEBUG_PAGEALLOC BUG in copy_page_rep with RSI: ffff88009c422000, reported by Sasha Levin in https://lkml.org/lkml/2013/3/29/103. I believe that one is due to the source page being split, and a tail page freed, while copy is in progress; and not a problem without DEBUG_PAGEALLOC, since the pmd_same check will prevent a miscopy from being made visible.) Fixes: 97ae17497e99 ("thp: implement refcounting for huge zero page") Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6bd22902d289..eb00e81601a5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1166,7 +1166,7 @@ alloc: if (unlikely(!new_page)) { count_vm_event(THP_FAULT_FALLBACK); - if (is_huge_zero_pmd(orig_pmd)) { + if (!page) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, address, pmd, orig_pmd, haddr); } else { @@ -1190,7 +1190,7 @@ alloc: goto out; } - if (is_huge_zero_pmd(orig_pmd)) + if (!page) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); @@ -1215,7 +1215,7 @@ alloc: page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache_pmd(vma, address, pmd); - if (is_huge_zero_pmd(orig_pmd)) { + if (!page) { add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); put_huge_zero_page(); } else { -- cgit v1.2.3 From 4cb1e59ffcbee368f32c37ca5872fd73163c7783 Mon Sep 17 00:00:00 2001 From: Andreas Rohner Date: Tue, 14 Jan 2014 17:56:36 -0800 Subject: nilfs2: fix segctor bug that causes file system corruption commit 70f2fe3a26248724d8a5019681a869abdaf3e89a upstream. There is a bug in the function nilfs_segctor_collect, which results in active data being written to a segment, that is marked as clean. It is possible, that this segment is selected for a later segment construction, whereby the old data is overwritten. The problem shows itself with the following kernel log message: nilfs_sufile_do_cancel_free: segment 6533 must be clean Usually a few hours later the file system gets corrupted: NILFS: bad btree node (blocknr=8748107): level = 0, flags = 0x0, nchildren = 0 NILFS error (device sdc1): nilfs_bmap_last_key: broken bmap (inode number=114660) The issue can be reproduced with a file system that is nearly full and with the cleaner running, while some IO intensive task is running. Although it is quite hard to reproduce. This is what happens: 1. The cleaner starts the segment construction 2. nilfs_segctor_collect is called 3. sc_stage is on NILFS_ST_SUFILE and segments are freed 4. sc_stage is on NILFS_ST_DAT current segment is full 5. nilfs_segctor_extend_segments is called, which allocates a new segment 6. The new segment is one of the segments freed in step 3 7. nilfs_sufile_cancel_freev is called and produces an error message 8. Loop around and the collection starts again 9. sc_stage is on NILFS_ST_SUFILE and segments are freed including the newly allocated segment, which will contain active data and can be allocated at a later time 10. A few hours later another segment construction allocates the segment and causes file system corruption This can be prevented by simply reordering the statements. If nilfs_sufile_cancel_freev is called before nilfs_segctor_extend_segments the freed segments are marked as dirty and cannot be allocated any more. Signed-off-by: Andreas Rohner Reviewed-by: Ryusuke Konishi Tested-by: Andreas Rohner Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segment.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index cbd66188a28b..958a5b57ed4a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1440,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nilfs_clear_logs(&sci->sc_segbufs); - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, NULL); WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; } + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } -- cgit v1.2.3 From ba8ee5985099dbd8f01ac5dcc7ad8ddf23056e87 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 8 Jan 2014 11:12:27 -0200 Subject: drm/i915: fix DDI PLLs HW state readout code commit 0882dae983707455e97479e5e904e37673517ebc upstream. Properly zero the refcounts and crtc->ddi_pll_set so the previous HW state doesn't affect the result of reading the current HW state. This fixes WARNs about WRPLL refcount if we have an HDMI monitor on HSW and then suspend/resume. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64379 Tested-by: Qingshuai Tian Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ddi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 16e674af4d57..7ce38344e484 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1193,12 +1193,18 @@ void intel_ddi_setup_hw_pll_state(struct drm_device *dev) enum pipe pipe; struct intel_crtc *intel_crtc; + dev_priv->ddi_plls.spll_refcount = 0; + dev_priv->ddi_plls.wrpll1_refcount = 0; + dev_priv->ddi_plls.wrpll2_refcount = 0; + for_each_pipe(pipe) { intel_crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - if (!intel_crtc->active) + if (!intel_crtc->active) { + intel_crtc->ddi_pll_sel = PORT_CLK_SEL_NONE; continue; + } intel_crtc->ddi_pll_sel = intel_ddi_get_crtc_pll(dev_priv, pipe); -- cgit v1.2.3 From bb4a65df3097524be403d455176710aee14d41f7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Dec 2013 10:13:33 +1100 Subject: md: fix problem when adding device to read-only array with bitmap. commit 8313b8e57f55b15e5b7f7fc5d1630bbf686a9a97 upstream. If an array is started degraded, and then the missing device is found it can be re-added and a minimal bitmap-based recovery will bring it fully up-to-date. If the array is read-only a recovery would not be allowed. But also if the array is read-only and the missing device was present very recently, then there could be no need for any recovery at all, so we simply include the device in the read-only array without any recovery. However... if the missing device was removed a little longer ago it could be missing some updates, but if a bitmap is present it will be conditionally accepted pending a bitmap-based update. We don't currently detect this case properly and will include that old device into the read-only array with no recovery even though it really needs a recovery. This patch keeps track of whether a bitmap-based-recovery is really needed or not in the new Bitmap_sync rdev flag. If that is set, then the device will not be added to a read-only array. Cc: Andrei Warkentin Fixes: d70ed2e4fafdbef0800e73942482bb075c21578b Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 18 +++++++++++++++--- drivers/md/md.h | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2290b95009de..a2dda416c9cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1118,6 +1118,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) rdev->raid_disk = -1; clear_bit(Faulty, &rdev->flags); clear_bit(In_sync, &rdev->flags); + clear_bit(Bitmap_sync, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); if (mddev->raid_disks == 0) { @@ -1196,6 +1197,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) */ if (ev1 < mddev->bitmap->events_cleared) return 0; + if (ev1 < mddev->events) + set_bit(Bitmap_sync, &rdev->flags); } else { if (ev1 < mddev->events) /* just a hot-add of a new device, leave raid_disk at -1 */ @@ -1604,6 +1607,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) rdev->raid_disk = -1; clear_bit(Faulty, &rdev->flags); clear_bit(In_sync, &rdev->flags); + clear_bit(Bitmap_sync, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); if (mddev->raid_disks == 0) { @@ -1686,6 +1690,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) */ if (ev1 < mddev->bitmap->events_cleared) return 0; + if (ev1 < mddev->events) + set_bit(Bitmap_sync, &rdev->flags); } else { if (ev1 < mddev->events) /* just a hot-add of a new device, leave raid_disk at -1 */ @@ -2829,6 +2835,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) else rdev->saved_raid_disk = -1; clear_bit(In_sync, &rdev->flags); + clear_bit(Bitmap_sync, &rdev->flags); err = rdev->mddev->pers-> hot_add_disk(rdev->mddev, rdev); if (err) { @@ -5761,6 +5768,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) info->raid_disk < mddev->raid_disks) { rdev->raid_disk = info->raid_disk; set_bit(In_sync, &rdev->flags); + clear_bit(Bitmap_sync, &rdev->flags); } else rdev->raid_disk = -1; } else @@ -7694,7 +7702,8 @@ static int remove_and_add_spares(struct mddev *mddev, if (test_bit(Faulty, &rdev->flags)) continue; if (mddev->ro && - rdev->saved_raid_disk < 0) + ! (rdev->saved_raid_disk >= 0 && + !test_bit(Bitmap_sync, &rdev->flags))) continue; rdev->recovery_offset = 0; @@ -7775,9 +7784,12 @@ void md_check_recovery(struct mddev *mddev) * As we only add devices that are already in-sync, * we can activate the spares immediately. */ - clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); remove_and_add_spares(mddev, NULL); - mddev->pers->spare_active(mddev); + /* There is no thread, but we need to call + * ->spare_active and clear saved_raid_disk + */ + md_reap_sync_thread(mddev); + clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); goto unlock; } diff --git a/drivers/md/md.h b/drivers/md/md.h index 653f992b687a..ebe748e57416 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -129,6 +129,9 @@ struct md_rdev { enum flag_bits { Faulty, /* device is known to have a fault */ In_sync, /* device is in_sync with rest of array */ + Bitmap_sync, /* ..actually, not quite In_sync. Need a + * bitmap-based recovery to get fully in sync + */ Unmerged, /* device is being added to array and should * be considerred for bvec_merge_fn but not * yet for actual IO -- cgit v1.2.3 From 16342c21c94760ac4d426371546d4f9e27758e1b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 6 Jan 2014 10:35:34 +1100 Subject: md/raid10: fix bug when raid10 recovery fails to recover a block. commit e8b849158508565e0cd6bc80061124afc5879160 upstream. commit e875ecea266a543e643b19e44cf472f1412708f9 md/raid10 record bad blocks as needed during recovery. added code to the "cannot recover this block" path to record a bad block rather than fail the whole recovery. Unfortunately this new case was placed *after* r10bio was freed rather than *before*, yet it still uses r10bio. This is will crash with a null dereference. So move the freeing of r10bio down where it is safe. Fixes: e875ecea266a543e643b19e44cf472f1412708f9 Reported-by: Damian Nowak URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181 Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0add86821755..ff3f8d057e0a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3198,10 +3198,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (j == conf->copies) { /* Cannot recover, so abort the recovery or * record a bad block */ - put_buf(r10_bio); - if (rb2) - atomic_dec(&rb2->remaining); - r10_bio = rb2; if (any_working) { /* problem is that there are bad blocks * on other device(s) @@ -3233,6 +3229,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, mirror->recovery_disabled = mddev->recovery_disabled; } + put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); + r10_bio = rb2; break; } } -- cgit v1.2.3 From ddd618aa9eee9f143d750d15497bf6f117874928 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 14 Jan 2014 10:38:09 +1100 Subject: md/raid10: fix two bugs in handling of known-bad-blocks. commit b50c259e25d9260b9108dc0c2964c26e5ecbe1c1 upstream. If we discover a bad block when reading we split the request and potentially read some of it from a different device. The code path of this has two bugs in RAID10. 1/ we get a spin_lock with _irq, but unlock without _irq!! 2/ The calculation of 'sectors_handled' is wrong, as can be clearly seen by comparison with raid1.c This leads to at least 2 warnings and a probable crash is a RAID10 ever had known bad blocks. Fixes: 856e08e23762dfb92ffc68fd0a8d228f9e152160 Reported-by: Damian Nowak URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181 Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ff3f8d057e0a..d2f8cd332b4a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1321,7 +1321,7 @@ read_again: /* Could not read all from this device, so we will * need another r10_bio. */ - sectors_handled = (r10_bio->sectors + max_sectors + sectors_handled = (r10_bio->sector + max_sectors - bio->bi_sector); r10_bio->sectors = max_sectors; spin_lock_irq(&conf->device_lock); @@ -1329,7 +1329,7 @@ read_again: bio->bi_phys_segments = 2; else bio->bi_phys_segments++; - spin_unlock(&conf->device_lock); + spin_unlock_irq(&conf->device_lock); /* Cannot call generic_make_request directly * as that will be queued in __generic_make_request * and subsequent mempool_alloc might block -- cgit v1.2.3 From c44bc4496d29b70f95f2197eb950581a455c92a8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 6 Jan 2014 13:19:42 +1100 Subject: md/raid5: Fix possible confusion when multiple write errors occur. commit 1cc03eb93245e63b0b7a7832165efdc52e25b4e6 upstream. commit 5d8c71f9e5fbdd95650be00294d238e27a363b5c md: raid5 crash during degradation Fixed a crash in an overly simplistic way which could leave R5_WriteError or R5_MadeGood set in the stripe cache for devices for which it is no longer relevant. When those devices are removed and spares added the flags are still set and can cause incorrect behaviour. commit 14a75d3e07c784c004b4b44b34af996b8e4ac453 md/raid5: preferentially read from replacement device if possible. Fixed the same bug if a more effective way, so we can now revert the original commit. Reported-and-tested-by: Alexander Lyakas Fixes: 5d8c71f9e5fbdd95650be00294d238e27a363b5c Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4bed5454b8dc..51422999fd08 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3391,7 +3391,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) */ set_bit(R5_Insync, &dev->flags); - if (rdev && test_bit(R5_WriteError, &dev->flags)) { + if (test_bit(R5_WriteError, &dev->flags)) { /* This flag does not apply to '.replacement' * only to .rdev, so make sure to check that*/ struct md_rdev *rdev2 = rcu_dereference( @@ -3404,7 +3404,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) } else clear_bit(R5_WriteError, &dev->flags); } - if (rdev && test_bit(R5_MadeGood, &dev->flags)) { + if (test_bit(R5_MadeGood, &dev->flags)) { /* This flag does not apply to '.replacement' * only to .rdev, so make sure to check that*/ struct md_rdev *rdev2 = rcu_dereference( -- cgit v1.2.3 From 2d36355274980e716b832710e8d18b6396594d4e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Jan 2014 15:48:47 -0800 Subject: mm: Make {,set}page_address() static inline if WANT_PAGE_VIRTUAL commit f92f455f67fef27929e6043499414605b0c94872 upstream. {,set}page_address() are macros if WANT_PAGE_VIRTUAL. If !WANT_PAGE_VIRTUAL, they're plain C functions. If someone calls them with a void *, this pointer is auto-converted to struct page * if !WANT_PAGE_VIRTUAL, but causes a build failure on architectures using WANT_PAGE_VIRTUAL (arc, m68k and sparc64): drivers/md/bcache/bset.c: In function `__btree_sort': drivers/md/bcache/bset.c:1190: warning: dereferencing `void *' pointer drivers/md/bcache/bset.c:1190: error: request for member `virtual' in something not a structure or union Convert them to static inline functions to fix this. There are already plenty of users of struct page members inside , so there's no reason to keep them as macros. Signed-off-by: Geert Uytterhoeven Acked-by: Michael S. Tsirkin Tested-by: Guenter Roeck Tested-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e0c8528a41a4..3bf21c3502d0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -766,11 +766,14 @@ static __always_inline void *lowmem_page_address(const struct page *page) #endif #if defined(WANT_PAGE_VIRTUAL) -#define page_address(page) ((page)->virtual) -#define set_page_address(page, address) \ - do { \ - (page)->virtual = (address); \ - } while(0) +static inline void *page_address(const struct page *page) +{ + return page->virtual; +} +static inline void set_page_address(struct page *page, void *address) +{ + page->virtual = address; +} #define page_address_init() do { } while(0) #endif -- cgit v1.2.3 From 3bb0df1b71413ae789767d928d9126c5a5cb1e11 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Tue, 10 Dec 2013 10:18:58 +0000 Subject: serial: amba-pl011: use port lock to guard control register access commit fe43390702a1b5741fdf217063b05c7612b38303 upstream. When the pl011 is being used for a console, pl011_console_write forces the control register (CR) to enable the UART for transmission and then restores this to the original value afterwards. It does this while holding the port lock. Unfortunately, when the uart is started or shutdown - say in response to userland using the serial device for a terminal - then this updates the control register without any locking. This means we can have pl011_console_write Save CR pl011_startup Initialise CR, e.g. enable receive pl011_console_write Restore old CR with receive not enabled this result is a serial port which doesn't respond to any input. A similar race in reverse could happen when the device is shutdown. We can fix these problems by taking the port lock when updating CR. Signed-off-by: Jon Medhurst Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index e2774f9ecd59..7a55fe70434a 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1543,6 +1543,8 @@ static int pl011_startup(struct uart_port *port) /* * Provoke TX FIFO interrupt into asserting. */ + spin_lock_irq(&uap->port.lock); + cr = UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_LBE; writew(cr, uap->port.membase + UART011_CR); writew(0, uap->port.membase + UART011_FBRD); @@ -1567,6 +1569,8 @@ static int pl011_startup(struct uart_port *port) cr |= UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE; writew(cr, uap->port.membase + UART011_CR); + spin_unlock_irq(&uap->port.lock); + /* * initialise the old status of the modem signals */ @@ -1636,11 +1640,13 @@ static void pl011_shutdown(struct uart_port *port) * it during startup(). */ uap->autorts = false; + spin_lock_irq(&uap->port.lock); cr = readw(uap->port.membase + UART011_CR); uap->old_cr = cr; cr &= UART011_CR_RTS | UART011_CR_DTR; cr |= UART01x_CR_UARTEN | UART011_CR_TXE; writew(cr, uap->port.membase + UART011_CR); + spin_unlock_irq(&uap->port.lock); /* * disable break condition and fifos -- cgit v1.2.3 From 4b176ae176912fbf9e19ab6313ebf1a4b6bd9b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Jan 2014 16:15:36 +0200 Subject: drm/i915: Don't grab crtc mutexes in intel_modeset_gem_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7ad228b11ec26a820291c9f5a1168d6176580dc1 upstream. When the pipe A force quirk is applied the code will attempt to grab a crtc mutex during intel_modeset_setup_hw_state(). If we're already holding all crtc mutexes this will obviously deadlock every time. So instead of using drm_modeset_lock_all() just grab the mode_config.mutex. This is enough to avoid the unlocked mutex warnings from certain lower level functions. The regression was introduced in: commit 027476642811f8559cbe00ef6cc54db230e48a20 Author: Ville Syrjälä Date: Mon Dec 2 11:08:06 2013 +0200 drm/i915: Take modeset locks around intel_modeset_setup_hw_state() Signed-off-by: Ville Syrjälä [danvet: Add cc: stable since the offending commit has that, too.] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 15358add4f70..54ae96f7bec6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9456,9 +9456,9 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_setup_overlay(dev); - drm_modeset_lock_all(dev); + mutex_lock(&dev->mode_config.mutex); intel_modeset_setup_hw_state(dev, false); - drm_modeset_unlock_all(dev); + mutex_unlock(&dev->mode_config.mutex); } void intel_modeset_cleanup(struct drm_device *dev) -- cgit v1.2.3 From 04545a3a85d0d1b7c3e09adcf5d0aff9d71072f3 Mon Sep 17 00:00:00 2001 From: Taras Kondratiuk Date: Fri, 10 Jan 2014 01:27:08 +0100 Subject: ARM: 7938/1: OMAP4/highbank: Flush L2 cache before disabling commit b25f3e1c358434bf850220e04f28eebfc45eb634 upstream. Kexec disables outer cache before jumping to reboot code, but it doesn't flush it explicitly. Flush is done implicitly inside of l2x0_disable(). But some SoC's override default .disable handler and don't flush cache. This may lead to a corrupted memory during Kexec reboot on these platforms. This patch adds cache flush inside of OMAP4 and Highbank outer_cache.disable() handlers to make it consistent with default l2x0_disable(). Acked-by: Rob Herring Acked-by: Santosh Shilimkar Acked-by: Tony Lindgren Signed-off-by: Taras Kondratiuk Signed-off-by: Russell King Cc: Wang Nan Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-highbank/highbank.c | 1 + arch/arm/mach-omap2/omap4-common.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index e7df2dd43a40..5ed19e88874b 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -68,6 +68,7 @@ void highbank_set_cpu_jump(int cpu, void *jump_addr) #ifdef CONFIG_CACHE_L2X0 static void highbank_l2x0_disable(void) { + outer_flush_all(); /* Disable PL310 L2 Cache controller */ highbank_smc1(0x102, 0x0); } diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 13b27ffaf45e..ab99ab8fce8a 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -162,6 +162,7 @@ void __iomem *omap4_get_l2cache_base(void) static void omap4_l2x0_disable(void) { + outer_flush_all(); /* Disable PL310 L2 Cache controller */ omap_smc1(0x102, 0x0); } -- cgit v1.2.3 From 020abbc91120ddf052e2c303a8c598c3be4dc459 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 25 Jan 2014 08:27:55 -0800 Subject: Linux 3.10.28 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 09675a57059c..addf1b007fe3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 27 +SUBLEVEL = 28 EXTRAVERSION = NAME = TOSSUG Baby Fish -- cgit v1.2.3