From 1eb64c07aab86eca7cdaa05a59d8da5a3ba758f8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 20 Apr 2020 18:13:38 -0700 Subject: sh: fix build error in mm/init.c The closing parenthesis is missing. Fixes: bfeb022f8fe4 ("mm/memory_hotplug: add pgprot_t to mhp_params") Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Reviewed-by: Logan Gunthorpe Cc: Stephen Rothwell Cc: Guenter Roeck Cc: Yoshinori Sato Cc: Rich Felker Link: http://lkml.kernel.org/r/20200413014743.16353-1-masahiroy@kernel.org Signed-off-by: Linus Torvalds --- arch/sh/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index b9de2d4fa57e3..8d2a68aea1fcb 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -412,7 +412,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot) + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) return -EINVAL; /* We only have ZONE_NORMAL, so this is easy.. */ -- cgit v1.2.3 From 89b83f282d8ba380cf2124f88106c57df49c538c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 20 Apr 2020 18:13:42 -0700 Subject: slub: avoid redzone when choosing freepointer location Marco Elver reported system crashes when booting with "slub_debug=Z". The freepointer location (s->offset) was not taking into account that the "inuse" size that includes the redzone area should not be used by the freelist pointer. Change the calculation to save the area of the object that an inline freepointer may be written into. Fixes: 3202fa62fb43 ("slub: relocate freelist pointer to middle of object") Reported-by: Marco Elver Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Tested-by: Marco Elver Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/202004151054.BD695840@keescook Link: https://lore.kernel.org/linux-mm/20200415164726.GA234932@google.com Signed-off-by: Linus Torvalds --- mm/slub.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 332d4b459a907..9bf44955c4f1e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3533,6 +3533,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) { slab_flags_t flags = s->flags; unsigned int size = s->object_size; + unsigned int freepointer_area; unsigned int order; /* @@ -3541,6 +3542,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * the possible location of the free pointer. */ size = ALIGN(size, sizeof(void *)); + /* + * This is the area of the object where a freepointer can be + * safely written. If redzoning adds more to the inuse size, we + * can't use that portion for writing the freepointer, so + * s->offset must be limited within this for the general case. + */ + freepointer_area = size; #ifdef CONFIG_SLUB_DEBUG /* @@ -3582,13 +3590,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ s->offset = size; size += sizeof(void *); - } else if (size > sizeof(void *)) { + } else if (freepointer_area > sizeof(void *)) { /* * Store freelist pointer near middle of object to keep * it away from the edges of the object to avoid small * sized over/underflows from neighboring allocations. */ - s->offset = ALIGN(size / 2, sizeof(void *)); + s->offset = ALIGN(freepointer_area / 2, sizeof(void *)); } #ifdef CONFIG_SLUB_DEBUG -- cgit v1.2.3 From b64d8d1e1ba5ba041e4407056a723c25a7052c49 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 20 Apr 2020 18:13:45 -0700 Subject: mm/userfaultfd: disable userfaultfd-wp on x86_32 Userfaultfd-wp is not yet working on 32bit hosts, but it's accidentally enabled previously. Disable it. Fixes: 5a281062af1d ("userfaultfd: wp: add WP pagetable tracking to x86") Reported-by: Naresh Kamboju Reported-by: kernel test robot Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Tested-by: Naresh Kamboju Reviewed-by: Andrea Arcangeli Cc: Hillf Danton Link: http://lkml.kernel.org/r/20200413141608.109211-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d6104ea8af03..1197b5596d5ad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,7 +149,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 - select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_ASM_MODVERSIONS -- cgit v1.2.3 From 5701feb0a9f5b1c30222d77b12e15cc17c9cd4bf Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 20 Apr 2020 18:13:48 -0700 Subject: MAINTAINERS: add an entry for kfifo Kfifo has been written by Stefani Seibold and she's implicitly expected to Ack any changes to it. She's not however officially listed as kfifo maintainer which leads to delays in patch review. This patch proposes to add an explitic entry for kfifo to MAINTAINERS file. [akpm@linux-foundation.org: alphasort F: entries, per Joe] [akpm@linux-foundation.org: remove colon, per Bartosz] Link: http://lkml.kernel.org/r/20200124174533.21815-1-brgl@bgdev.pl Link: http://lkml.kernel.org/r/20200413104250.26683-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Walleij Cc: Andy Shevchenko Cc: Joe Perches Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b816a453b10eb..0e8093442a6ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9416,6 +9416,13 @@ F: include/linux/keyctl.h F: include/uapi/linux/keyctl.h F: security/keys/ +KFIFO +M: Stefani Seibold +S: Maintained +F: include/linux/kfifo.h +F: lib/kfifo.c +F: samples/kfifo/ + KGDB / KDB /debug_core M: Jason Wessel M: Daniel Thompson -- cgit v1.2.3 From 3c1d7e6ccb644d517a12f73a7ff200870926f865 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Mon, 20 Apr 2020 18:13:51 -0700 Subject: mm/hugetlb: fix a addressing exception caused by huge_pte_offset Our machine encountered a panic(addressing exception) after run for a long time and the calltrace is: RIP: hugetlb_fault+0x307/0xbe0 RSP: 0018:ffff9567fc27f808 EFLAGS: 00010286 RAX: e800c03ff1258d48 RBX: ffffd3bb003b69c0 RCX: e800c03ff1258d48 RDX: 17ff3fc00eda72b7 RSI: 00003ffffffff000 RDI: e800c03ff1258d48 RBP: ffff9567fc27f8c8 R08: e800c03ff1258d48 R09: 0000000000000080 R10: ffffaba0704c22a8 R11: 0000000000000001 R12: ffff95c87b4b60d8 R13: 00005fff00000000 R14: 0000000000000000 R15: ffff9567face8074 FS: 00007fe2d9ffb700(0000) GS:ffff956900e40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffd3bb003b69c0 CR3: 000000be67374000 CR4: 00000000003627e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: follow_hugetlb_page+0x175/0x540 __get_user_pages+0x2a0/0x7e0 __get_user_pages_unlocked+0x15d/0x210 __gfn_to_pfn_memslot+0x3c5/0x460 [kvm] try_async_pf+0x6e/0x2a0 [kvm] tdp_page_fault+0x151/0x2d0 [kvm] ... kvm_arch_vcpu_ioctl_run+0x330/0x490 [kvm] kvm_vcpu_ioctl+0x309/0x6d0 [kvm] do_vfs_ioctl+0x3f0/0x540 SyS_ioctl+0xa1/0xc0 system_call_fastpath+0x22/0x27 For 1G hugepages, huge_pte_offset() wants to return NULL or pudp, but it may return a wrong 'pmdp' if there is a race. Please look at the following code snippet: ... pud = pud_offset(p4d, addr); if (sz != PUD_SIZE && pud_none(*pud)) return NULL; /* hugepage or swap? */ if (pud_huge(*pud) || !pud_present(*pud)) return (pte_t *)pud; pmd = pmd_offset(pud, addr); if (sz != PMD_SIZE && pmd_none(*pmd)) return NULL; /* hugepage or swap? */ if (pmd_huge(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; ... The following sequence would trigger this bug: - CPU0: sz = PUD_SIZE and *pud = 0 , continue - CPU0: "pud_huge(*pud)" is false - CPU1: calling hugetlb_no_page and set *pud to xxxx8e7(PRESENT) - CPU0: "!pud_present(*pud)" is false, continue - CPU0: pmd = pmd_offset(pud, addr) and maybe return a wrong pmdp However, we want CPU0 to return NULL or pudp in this case. We must make sure there is exactly one dereference of pud and pmd. Signed-off-by: Longpeng Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Reviewed-by: Jason Gunthorpe Cc: Matthew Wilcox Cc: Sean Christopherson Cc: Link: http://lkml.kernel.org/r/20200413010342.771-1-longpeng2@huawei.com Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cd459155d28a7..bcabbe02192b1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5365,8 +5365,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, { pgd_t *pgd; p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; + pud_t *pud, pud_entry; + pmd_t *pmd, pmd_entry; pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) @@ -5376,17 +5376,19 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; pud = pud_offset(p4d, addr); - if (sz != PUD_SIZE && pud_none(*pud)) + pud_entry = READ_ONCE(*pud); + if (sz != PUD_SIZE && pud_none(pud_entry)) return NULL; /* hugepage or swap? */ - if (pud_huge(*pud) || !pud_present(*pud)) + if (pud_huge(pud_entry) || !pud_present(pud_entry)) return (pte_t *)pud; pmd = pmd_offset(pud, addr); - if (sz != PMD_SIZE && pmd_none(*pmd)) + pmd_entry = READ_ONCE(*pmd); + if (sz != PMD_SIZE && pmd_none(pmd_entry)) return NULL; /* hugepage or swap? */ - if (pmd_huge(*pmd) || !pmd_present(*pmd)) + if (pmd_huge(pmd_entry) || !pmd_present(pmd_entry)) return (pte_t *)pmd; return NULL; -- cgit v1.2.3 From d180870d83dbb076fabd6d469f2626c071f12255 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 20 Apr 2020 18:13:55 -0700 Subject: mm, gup: return EINTR when gup is interrupted by fatal signals EINTR is the usual error code which other killable interfaces return. This is the case for the other fatal_signal_pending break out from the same function. Make the code consistent. ERESTARTSYS is also quite confusing because the signal is fatal and so no restart will happen before returning to the userspace. Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Peter Xu Cc: Hillf Danton Link: http://lkml.kernel.org/r/20200409071133.31734-1-mhocko@kernel.org Signed-off-by: Linus Torvalds --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index 6076df8e04a4e..50681f0286ded 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1088,7 +1088,7 @@ retry: * potentially allocating memory. */ if (fatal_signal_pending(current)) { - ret = -ERESTARTSYS; + ret = -EINTR; goto out; } cond_resched(); -- cgit v1.2.3 From 461e1565366e84f7c9ccaf40730e4e892c70700f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 20 Apr 2020 18:13:58 -0700 Subject: checkpatch: fix a typo in the regex for $allocFunctions Here, we look for function such as 'netdev_alloc_skb_ip_align', so a '_' is missing in the regex. To make sure: grep -r --include=*.c skbip_a * | wc ==> 0 results grep -r --include=*.c skb_ip_a * | wc ==> 112 results Signed-off-by: Christophe JAILLET Signed-off-by: Andrew Morton Acked-by: Joe Perches Link: http://lkml.kernel.org/r/20200407190029.892-1-christophe.jaillet@wanadoo.fr Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d64c67b67e3c7..eac40f0abd56a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -479,7 +479,7 @@ our $allocFunctions = qr{(?x: (?:kv|k|v)[czm]alloc(?:_node|_array)? | kstrdup(?:_const)? | kmemdup(?:_nul)?) | - (?:\w+)?alloc_skb(?:ip_align)? | + (?:\w+)?alloc_skb(?:_ip_align)? | # dev_alloc_skb/netdev_alloc_skb, et al dma_alloc_coherent )}; -- cgit v1.2.3 From a21151b9d81a69ce334f409e7f98e50e3b8fa984 Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Mon, 20 Apr 2020 18:14:01 -0700 Subject: tools/build: tweak unused value workaround Clang has -Wself-assign enabled by default under -Wall, which always gets -Werror'ed on this file, causing sync-compare-and-swap to be disabled by default. The generally-accepted way to spell "this value is intentionally unused," is casting it to `void`. This is accepted by both GCC and Clang with -Wall enabled: https://godbolt.org/z/qqZ9r3 Signed-off-by: George Burgess IV Signed-off-by: Andrew Morton Cc: Nick Desaulniers Link: http://lkml.kernel.org/r/20200414195638.156123-1-gbiv@google.com Signed-off-by: Linus Torvalds --- tools/build/feature/test-sync-compare-and-swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/feature/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c index 1e38d1930a972..3bc6b0768a531 100644 --- a/tools/build/feature/test-sync-compare-and-swap.c +++ b/tools/build/feature/test-sync-compare-and-swap.c @@ -7,7 +7,7 @@ int main(int argc, char *argv[]) { uint64_t old, new = argc; - argv = argv; + (void)argv; do { old = __sync_val_compare_and_swap(&x, 0, 0); } while (!__sync_bool_compare_and_swap(&x, old, new)); -- cgit v1.2.3 From 56df70a63ed5d989c1d36deee94cae14342be6e9 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 20 Apr 2020 18:14:04 -0700 Subject: mm/ksm: fix NULL pointer dereference when KSM zero page is enabled find_mergeable_vma() can return NULL. In this case, it leads to a crash when we access vm_mm(its offset is 0x40) later in write_protect_page. And this case did happen on our server. The following call trace is captured in kernel 4.19 with the following patch applied and KSM zero page enabled on our server. commit e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring") So add a vma check to fix it. BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 Oops: 0000 [#1] SMP NOPTI CPU: 9 PID: 510 Comm: ksmd Kdump: loaded Tainted: G OE 4.19.36.bsk.9-amd64 #4.19.36.bsk.9 RIP: try_to_merge_one_page+0xc7/0x760 Code: 24 58 65 48 33 34 25 28 00 00 00 89 e8 0f 85 a3 06 00 00 48 83 c4 60 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 8b 46 08 a8 01 75 b8 <49> 8b 44 24 40 4c 8d 7c 24 20 b9 07 00 00 00 4c 89 e6 4c 89 ff 48 RSP: 0018:ffffadbdd9fffdb0 EFLAGS: 00010246 RAX: ffffda83ffd4be08 RBX: ffffda83ffd4be40 RCX: 0000002c6e800000 RDX: 0000000000000000 RSI: ffffda83ffd4be40 RDI: 0000000000000000 RBP: ffffa11939f02ec0 R08: 0000000094e1a447 R09: 00000000abe76577 R10: 0000000000000962 R11: 0000000000004e6a R12: 0000000000000000 R13: ffffda83b1e06380 R14: ffffa18f31f072c0 R15: ffffda83ffd4be40 FS: 0000000000000000(0000) GS:ffffa0da43b80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 0000002c77c0a003 CR4: 00000000007626e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ksm_scan_thread+0x115e/0x1960 kthread+0xf5/0x130 ret_from_fork+0x1f/0x30 [songmuchun@bytedance.com: if the vma is out of date, just exit] Link: http://lkml.kernel.org/r/20200416025034.29780-1-songmuchun@bytedance.com [akpm@linux-foundation.org: add the conventional braces, replace /** with /*] Fixes: e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring") Co-developed-by: Xiongchun Duan Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Kirill Tkhai Cc: Hugh Dickins Cc: Yang Shi Cc: Claudio Imbrenda Cc: Markus Elfring Cc: Link: http://lkml.kernel.org/r/20200416025034.29780-1-songmuchun@bytedance.com Link: http://lkml.kernel.org/r/20200414132905.83819-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- mm/ksm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index a558da9e71770..281c00129a2ea 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2112,8 +2112,16 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) down_read(&mm->mmap_sem); vma = find_mergeable_vma(mm, rmap_item->address); - err = try_to_merge_one_page(vma, page, - ZERO_PAGE(rmap_item->address)); + if (vma) { + err = try_to_merge_one_page(vma, page, + ZERO_PAGE(rmap_item->address)); + } else { + /* + * If the vma is out of date, we do not need to + * continue. + */ + err = 0; + } up_read(&mm->mmap_sem); /* * In case of failure, the page was not really empty, so we -- cgit v1.2.3 From 0783ac95b4a09a18e702c8359053762649b34c94 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 20 Apr 2020 18:14:07 -0700 Subject: mm/shmem: fix build without THP Some optimizers don't notice that shmem_punch_compound() is always true (PageTransCompound() being false) without CONFIG_TRANSPARENT_HUGEPAGE==y. Use IS_ENABLED to help them to avoid the BUILD_BUG inside HPAGE_PMD_NR. Fixes: 71725ed10c40 ("mm: huge tmpfs: try to split_huge_page() when punching hole") Reported-by: Randy Dunlap Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Acked-by: Randy Dunlap Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2004142339170.10035@eggly.anvils Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index d722eb8303170..1d32420262198 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -952,7 +952,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, VM_BUG_ON_PAGE(PageWriteback(page), page); if (shmem_punch_compound(page, start, end)) truncate_inode_page(mapping, page); - else { + else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Wipe the page and don't get stuck */ clear_highpage(page); flush_dcache_page(page); -- cgit v1.2.3 From bdebd6a2831b6fab69eb85cee74a8ba77f1a1cc2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 20 Apr 2020 18:14:11 -0700 Subject: vmalloc: fix remap_vmalloc_range() bounds checks remap_vmalloc_range() has had various issues with the bounds checks it promises to perform ("This function checks that addr is a valid vmalloc'ed area, and that it is big enough to cover the vma") over time, e.g.: - not detecting pgoff< Signed-off-by: Andrew Morton Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Link: http://lkml.kernel.org/r/20200415222312.236431-1-jannh@google.com Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 5 +++-- include/linux/vmalloc.h | 2 +- mm/vmalloc.c | 16 +++++++++++++--- samples/vfio-mdev/mdpy.c | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7dc800cce3543..c663202da8de7 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -266,7 +266,8 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst, if (start < offset + dump->size) { tsz = min(offset + (u64)dump->size - start, (u64)size); buf = dump->buf + start - offset; - if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) { + if (remap_vmalloc_range_partial(vma, dst, buf, 0, + tsz)) { ret = -EFAULT; goto out_unlock; } @@ -624,7 +625,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, tsz)) + kaddr, 0, tsz)) goto fail; size -= tsz; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0507a162ccd0e..a95d3cc74d79b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -137,7 +137,7 @@ extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, - unsigned long size); + unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 399f219544f74..9a8227afa0738 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -3054,6 +3055,7 @@ finished: * @vma: vma to cover * @uaddr: target user address to start at * @kaddr: virtual address of vmalloc kernel memory + * @pgoff: offset from @kaddr to start at * @size: size of map area * * Returns: 0 for success, -Exxx on failure @@ -3066,9 +3068,15 @@ finished: * Similar to remap_pfn_range() (see mm/memory.c) */ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, - void *kaddr, unsigned long size) + void *kaddr, unsigned long pgoff, + unsigned long size) { struct vm_struct *area; + unsigned long off; + unsigned long end_index; + + if (check_shl_overflow(pgoff, PAGE_SHIFT, &off)) + return -EINVAL; size = PAGE_ALIGN(size); @@ -3082,8 +3090,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) return -EINVAL; - if (kaddr + size > area->addr + get_vm_area_size(area)) + if (check_add_overflow(size, off, &end_index) || + end_index > get_vm_area_size(area)) return -EINVAL; + kaddr += off; do { struct page *page = vmalloc_to_page(kaddr); @@ -3122,7 +3132,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff) { return remap_vmalloc_range_partial(vma, vma->vm_start, - addr + (pgoff << PAGE_SHIFT), + addr, pgoff, vma->vm_end - vma->vm_start); } EXPORT_SYMBOL(remap_vmalloc_range); diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index cc86bf6566e42..9894693f3be17 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -418,7 +418,7 @@ static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) return -EINVAL; return remap_vmalloc_range_partial(vma, vma->vm_start, - mdev_state->memblk, + mdev_state->memblk, 0, vma->vm_end - vma->vm_start); } -- cgit v1.2.3 From ea0dfeb4209b4eab954d6e00ed136bc6b48b380d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 20 Apr 2020 18:14:14 -0700 Subject: shmem: fix possible deadlocks on shmlock_user_lock Recent commit 71725ed10c40 ("mm: huge tmpfs: try to split_huge_page() when punching hole") has allowed syzkaller to probe deeper, uncovering a long-standing lockdep issue between the irq-unsafe shmlock_user_lock, the irq-safe xa_lock on mapping->i_pages, and shmem inode's info->lock which nests inside xa_lock (or tree_lock) since 4.8's shmem_uncharge(). user_shm_lock(), servicing SysV shmctl(SHM_LOCK), wants shmlock_user_lock while its caller shmem_lock() holds info->lock with interrupts disabled; but hugetlbfs_file_setup() calls user_shm_lock() with interrupts enabled, and might be interrupted by a writeback endio wanting xa_lock on i_pages. This may not risk an actual deadlock, since shmem inodes do not take part in writeback accounting, but there are several easy ways to avoid it. Requiring interrupts disabled for shmlock_user_lock would be easy, but it's a high-level global lock for which that seems inappropriate. Instead, recall that the use of info->lock to guard info->flags in shmem_lock() dates from pre-3.1 days, when races with SHMEM_PAGEIN and SHMEM_TRUNCATE could occur: nowadays it serves no purpose, the only flag added or removed is VM_LOCKED itself, and calls to shmem_lock() an inode are already serialized by the caller. Take info->lock out of the chain and the possibility of deadlock or lockdep warning goes away. Fixes: 4595ef88d136 ("shmem: make shmem_inode_info::lock irq-safe") Reported-by: syzbot+c8a8197c8852f566b9d9@syzkaller.appspotmail.com Reported-by: syzbot+40b71e145e73f78f81ad@syzkaller.appspotmail.com Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Acked-by: Yang Shi Cc: Yang Shi Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2004161707410.16322@eggly.anvils Link: https://lore.kernel.org/lkml/000000000000e5838c05a3152f53@google.com/ Link: https://lore.kernel.org/lkml/0000000000003712b305a331d3b1@google.com/ Signed-off-by: Linus Torvalds --- mm/shmem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 1d32420262198..8b1560143c3b1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2179,7 +2179,11 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) struct shmem_inode_info *info = SHMEM_I(inode); int retval = -ENOMEM; - spin_lock_irq(&info->lock); + /* + * What serializes the accesses to info->flags? + * ipc_lock_object() when called from shmctl_do_lock(), + * no serialization needed when called from shm_destroy(). + */ if (lock && !(info->flags & VM_LOCKED)) { if (!user_shm_lock(inode->i_size, user)) goto out_nomem; @@ -2194,7 +2198,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) retval = 0; out_nomem: - spin_unlock_irq(&info->lock); return retval; } -- cgit v1.2.3 From 94b7cc01da5a3cc4f3da5e0ff492ef008bb555d6 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 20 Apr 2020 18:14:17 -0700 Subject: mm: shmem: disable interrupt when acquiring info->lock in userfaultfd_copy path Syzbot reported the below lockdep splat: WARNING: possible irq lock inversion dependency detected 5.6.0-rc7-syzkaller #0 Not tainted -------------------------------------------------------- syz-executor.0/10317 just changed the state of lock: ffff888021d16568 (&(&info->lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline] ffff888021d16568 (&(&info->lock)->rlock){+.+.}, at: shmem_mfill_atomic_pte+0x1012/0x21c0 mm/shmem.c:2407 but this lock was taken by another, SOFTIRQ-safe lock in the past: (&(&xa->xa_lock)->rlock#5){..-.} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&info->lock)->rlock); local_irq_disable(); lock(&(&xa->xa_lock)->rlock#5); lock(&(&info->lock)->rlock); lock(&(&xa->xa_lock)->rlock#5); *** DEADLOCK *** The full report is quite lengthy, please see: https://lore.kernel.org/linux-mm/alpine.LSU.2.11.2004152007370.13597@eggly.anvils/T/#m813b412c5f78e25ca8c6c7734886ed4de43f241d It is because CPU 0 held info->lock with IRQ enabled in userfaultfd_copy path, then CPU 1 is splitting a THP which held xa_lock and info->lock in IRQ disabled context at the same time. If softirq comes in to acquire xa_lock, the deadlock would be triggered. The fix is to acquire/release info->lock with *_irq version instead of plain spin_{lock,unlock} to make it softirq safe. Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Reported-by: syzbot+e27980339d305f2dbfd9@syzkaller.appspotmail.com Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Tested-by: syzbot+e27980339d305f2dbfd9@syzkaller.appspotmail.com Acked-by: Hugh Dickins Cc: Andrea Arcangeli Link: http://lkml.kernel.org/r/1587061357-122619-1-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Linus Torvalds --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 8b1560143c3b1..bd8840082c941 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2402,11 +2402,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, lru_cache_add_anon(page); - spin_lock(&info->lock); + spin_lock_irq(&info->lock); info->alloced++; inode->i_blocks += BLOCKS_PER_PAGE; shmem_recalc_inode(inode); - spin_unlock(&info->lock); + spin_unlock_irq(&info->lock); inc_mm_counter(dst_mm, mm_counter_file(page)); page_add_file_rmap(page, false); -- cgit v1.2.3 From db973a7289dad24e6c017dcedc6aee886579dc3a Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 20 Apr 2020 18:14:20 -0700 Subject: coredump: fix null pointer dereference on coredump If the core_pattern is set to "|" and any process segfaults then we get a null pointer derefernce while trying to coredump. The call stack shows: RIP: do_coredump+0x628/0x11c0 When the core_pattern has only "|" there is no use of trying the coredump and we can check that while formating the corename and exit with an error. After this change I get: format_corename failed Aborting core Fixes: 315c69261dd3 ("coredump: split pipe command whitespace before expanding template") Reported-by: Matthew Ruffell Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Cc: Paul Wise Cc: Alexander Viro Cc: Neil Horman Cc: Link: http://lkml.kernel.org/r/20200416194612.21418-1-sudipm.mukherjee@gmail.com Signed-off-by: Linus Torvalds --- fs/coredump.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/coredump.c b/fs/coredump.c index f8296a82d01df..408418e6aa131 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -211,6 +211,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, return -ENOMEM; (*argv)[(*argc)++] = 0; ++pat_ptr; + if (!(*pat_ptr)) + return -ENOMEM; } /* Repeat as long as we have more pattern to process and more output -- cgit v1.2.3 From cf01699ee220c38099eb3e43ce3d10690c8b7060 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 20 Apr 2020 18:14:23 -0700 Subject: tools/vm: fix cross-compile build Commit 7ed1c1901fe5 ("tools: fix cross-compile var clobbering") moved the setup of the CC variable to tools/scripts/Makefile.include to make the behavior consistent across all the tools Makefiles. As the vm tools missed the include we end up with the wrong CC in a cross-compiling evironment. Fixes: 7ed1c1901fe5 (tools: fix cross-compile var clobbering) Signed-off-by: Lucas Stach Signed-off-by: Andrew Morton Cc: Martin Kelly Cc: Link: http://lkml.kernel.org/r/20200416104748.25243-1-l.stach@pengutronix.de Signed-off-by: Linus Torvalds --- tools/vm/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 20f6cf04377f0..9860622cbb151 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm tools # +include ../scripts/Makefile.include + TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api -- cgit v1.2.3