From aec44e0f0213e36d4f0868a80cdc5097a510f79d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 4 May 2021 18:33:00 -0700 Subject: hugetlb: pass vma into huge_pte_alloc() and huge_pmd_share() Patch series "hugetlb: Disable huge pmd unshare for uffd-wp", v4. This series tries to disable huge pmd unshare of hugetlbfs backed memory for uffd-wp. Although uffd-wp of hugetlbfs is still during rfc stage, the idea of this series may be needed for multiple tasks (Axel's uffd minor fault series, and Mike's soft dirty series), so I picked it out from the larger series. This patch (of 4): It is a preparation work to be able to behave differently in the per architecture huge_pte_alloc() according to different VMA attributes. Pass it deeper into huge_pmd_share() so that we can avoid the find_vma() call. [peterx@redhat.com: build fix] Link: https://lkml.kernel.org/r/20210304164653.GB397383@xz-x1Link: https://lkml.kernel.org/r/20210218230633.15028-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210218230633.15028-2-peterx@redhat.com Signed-off-by: Peter Xu Suggested-by: Mike Kravetz Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Axel Rasmussen Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/hugetlbpage.c | 4 ++-- arch/ia64/mm/hugetlbpage.c | 3 ++- arch/mips/mm/hugetlbpage.c | 4 ++-- arch/parisc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 3 ++- arch/s390/mm/hugetlbpage.c | 2 +- arch/sh/mm/hugetlbpage.c | 2 +- arch/sparc/mm/hugetlbpage.c | 2 +- 8 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 55ecf6de9ff7..6e3bcffe2837 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -252,7 +252,7 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgdp; @@ -286,7 +286,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } else if (sz == PMD_SIZE) { if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && pud_none(READ_ONCE(*pudp))) - ptep = huge_pmd_share(mm, addr, pudp); + ptep = huge_pmd_share(mm, vma, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); } else if (sz == (CONT_PMD_SIZE)) { diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index b331f94d20ac..f993cb36c062 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -25,7 +25,8 @@ unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT; EXPORT_SYMBOL(hpage_shift); pte_t * -huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { unsigned long taddr = htlbpage_to_page(addr); pgd_t *pgd; diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index b9f76f433617..7eaff5b07873 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -21,8 +21,8 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, - unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { pgd_t *pgd; p4d_t *p4d; diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index 43652de5f139..d1d3990b83f6 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -44,7 +44,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgd; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d142b76d507d..9a75ba078e1b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -106,7 +106,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. */ -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { pgd_t *pg; p4d_t *p4; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 3b5a4d25ca9b..da36d13ffc16 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -189,7 +189,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return pte; } -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgdp; diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 220d7bc43d2b..999ab5916e69 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -21,7 +21,7 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgd; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index ad4b42f04988..04d8790f6c32 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -279,7 +279,7 @@ unsigned long pud_leaf_size(pud_t pud) { return 1UL << tte_to_shift(*(pte_t *)&p unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); } unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); } -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgd; -- cgit v1.2.3 From c1991e0705d143be773c984b006f2078aa9f2853 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 4 May 2021 18:33:04 -0700 Subject: hugetlb/userfaultfd: forbid huge pmd sharing when uffd enabled Huge pmd sharing could bring problem to userfaultfd. The thing is that userfaultfd is running its logic based on the special bits on page table entries, however the huge pmd sharing could potentially share page table entries for different address ranges. That could cause issues on either: - When sharing huge pmd page tables for an uffd write protected range, the newly mapped huge pmd range will also be write protected unexpectedly, or, - When we try to write protect a range of huge pmd shared range, we'll first do huge_pmd_unshare() in hugetlb_change_protection(), however that also means the UFFDIO_WRITEPROTECT could be silently skipped for the shared region, which could lead to data loss. While at it, a few other things are done altogether: - Move want_pmd_share() from mm/hugetlb.c into linux/hugetlb.h, because that's definitely something that arch code would like to use too - ARM64 currently directly check against CONFIG_ARCH_WANT_HUGE_PMD_SHARE when trying to share huge pmd. Switch to the want_pmd_share() helper. - Move vma_shareable() from huge_pmd_share() into want_pmd_share(). [peterx@redhat.com: fix build with !ARCH_WANT_HUGE_PMD_SHARE] Link: https://lkml.kernel.org/r/20210310185359.88297-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210218231202.15426-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: Axel Rasmussen Tested-by: Naresh Kamboju Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/hugetlbpage.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 6e3bcffe2837..58987a98e179 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -284,8 +284,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, */ ptep = pte_alloc_map(mm, pmdp, addr); } else if (sz == PMD_SIZE) { - if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && - pud_none(READ_ONCE(*pudp))) + if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp))) ptep = huge_pmd_share(mm, vma, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); -- cgit v1.2.3 From 4bfb68a0858deae4c40ea585037a3261f0717b0a Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:33:19 -0700 Subject: mm: generalize HUGETLB_PAGE_SIZE_VARIABLE HUGETLB_PAGE_SIZE_VARIABLE need not be defined for each individual platform subscribing it. Instead just make it generic. Link: https://lkml.kernel.org/r/1614914928-22039-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Michael Ellerman [powerpc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Christophe Leroy Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 6 +----- arch/powerpc/Kconfig | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 81e2b893b1e7..fd2cfc65fdc9 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -32,6 +32,7 @@ config IA64 select TTY select HAVE_ARCH_TRACEHOOK select HAVE_VIRT_CPU_ACCOUNTING + select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP @@ -82,11 +83,6 @@ config STACKTRACE_SUPPORT config GENERIC_LOCKBREAK def_bool n -config HUGETLB_PAGE_SIZE_VARIABLE - bool - depends on HUGETLB_PAGE - default y - config GENERIC_CALIBRATE_DELAY bool default y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2d060c002595..965278498dd7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -232,6 +232,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API @@ -416,11 +417,6 @@ config HIGHMEM source "kernel/Kconfig.hz" -config HUGETLB_PAGE_SIZE_VARIABLE - bool - depends on HUGETLB_PAGE && PPC_BOOK3S_64 - default y - config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE -- cgit v1.2.3 From 7677f7fd8be76659cd2d0db8ff4093bbb51c20e5 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:36 -0700 Subject: userfaultfd: add minor fault registration mode Patch series "userfaultfd: add minor fault handling", v9. Overview ======== This series adds a new userfaultfd feature, UFFD_FEATURE_MINOR_HUGETLBFS. When enabled (via the UFFDIO_API ioctl), this feature means that any hugetlbfs VMAs registered with UFFDIO_REGISTER_MODE_MISSING will *also* get events for "minor" faults. By "minor" fault, I mean the following situation: Let there exist two mappings (i.e., VMAs) to the same page(s) (shared memory). One of the mappings is registered with userfaultfd (in minor mode), and the other is not. Via the non-UFFD mapping, the underlying pages have already been allocated & filled with some contents. The UFFD mapping has not yet been faulted in; when it is touched for the first time, this results in what I'm calling a "minor" fault. As a concrete example, when working with hugetlbfs, we have huge_pte_none(), but find_lock_page() finds an existing page. We also add a new ioctl to resolve such faults: UFFDIO_CONTINUE. The idea is, userspace resolves the fault by either a) doing nothing if the contents are already correct, or b) updating the underlying contents using the second, non-UFFD mapping (via memcpy/memset or similar, or something fancier like RDMA, or etc...). In either case, userspace issues UFFDIO_CONTINUE to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". Use Case ======== Consider the use case of VM live migration (e.g. under QEMU/KVM): 1. While a VM is still running, we copy the contents of its memory to a target machine. The pages are populated on the target by writing to the non-UFFD mapping, using the setup described above. The VM is still running (and therefore its memory is likely changing), so this may be repeated several times, until we decide the target is "up to date enough". 2. We pause the VM on the source, and start executing on the target machine. During this gap, the VM's user(s) will *see* a pause, so it is desirable to minimize this window. 3. Between the last time any page was copied from the source to the target, and when the VM was paused, the contents of that page may have changed - and therefore the copy we have on the target machine is out of date. Although we can keep track of which pages are out of date, for VMs with large amounts of memory, it is "slow" to transfer this information to the target machine. We want to resume execution before such a transfer would complete. 4. So, the guest begins executing on the target machine. The first time it touches its memory (via the UFFD-registered mapping), userspace wants to intercept this fault. Userspace checks whether or not the page is up to date, and if not, copies the updated page from the source machine, via the non-UFFD mapping. Finally, whether a copy was performed or not, userspace issues a UFFDIO_CONTINUE ioctl to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". We don't have to do all of the final updates on-demand. The userfaultfd manager can, in the background, also copy over updated pages once it receives the map of which pages are up-to-date or not. Interaction with Existing APIs ============================== Because this is a feature, a registered VMA could potentially receive both missing and minor faults. I spent some time thinking through how the existing API interacts with the new feature: UFFDIO_CONTINUE cannot be used to resolve non-minor faults, as it does not allocate a new page. If UFFDIO_CONTINUE is used on a non-minor fault: - For non-shared memory or shmem, -EINVAL is returned. - For hugetlb, -EFAULT is returned. UFFDIO_COPY and UFFDIO_ZEROPAGE cannot be used to resolve minor faults. Without modifications, the existing codepath assumes a new page needs to be allocated. This is okay, since userspace must have a second non-UFFD-registered mapping anyway, thus there isn't much reason to want to use these in any case (just memcpy or memset or similar). - If UFFDIO_COPY is used on a minor fault, -EEXIST is returned. - If UFFDIO_ZEROPAGE is used on a minor fault, -EEXIST is returned (or -EINVAL in the case of hugetlb, as UFFDIO_ZEROPAGE is unsupported in any case). - UFFDIO_WRITEPROTECT simply doesn't work with shared memory, and returns -ENOENT in that case (regardless of the kind of fault). Future Work =========== This series only supports hugetlbfs. I have a second series in flight to support shmem as well, extending the functionality. This series is more mature than the shmem support at this point, and the functionality works fully on hugetlbfs, so this series can be merged first and then shmem support will follow. This patch (of 6): This feature allows userspace to intercept "minor" faults. By "minor" faults, I mean the following situation: Let there exist two mappings (i.e., VMAs) to the same page(s). One of the mappings is registered with userfaultfd (in minor mode), and the other is not. Via the non-UFFD mapping, the underlying pages have already been allocated & filled with some contents. The UFFD mapping has not yet been faulted in; when it is touched for the first time, this results in what I'm calling a "minor" fault. As a concrete example, when working with hugetlbfs, we have huge_pte_none(), but find_lock_page() finds an existing page. This commit adds the new registration mode, and sets the relevant flag on the VMAs being registered. In the hugetlb fault path, if we find that we have huge_pte_none(), but find_lock_page() does indeed find an existing page, then we have a "minor" fault, and if the VMA has the userfaultfd registration flag, we call into userfaultfd to handle it. This is implemented as a new registration mode, instead of an API feature. This is because the alternative implementation has significant drawbacks [1]. However, doing it this was requires we allocate a VM_* flag for the new registration mode. On 32-bit systems, there are no unused bits, so this feature is only supported on architectures with CONFIG_ARCH_USES_HIGH_VMA_FLAGS. When attempting to register a VMA in MINOR mode on 32-bit architectures, we return -EINVAL. [1] https://lore.kernel.org/patchwork/patch/1380226/ [peterx@redhat.com: fix minor fault page leak] Link: https://lkml.kernel.org/r/20210322175132.36659-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210301222728.176417-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20210301222728.176417-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Chinwen Chang Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Peter Xu Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Rostedt Cc: Steven Price Cc: Vlastimil Babka Cc: Adam Ruprecht Cc: Axel Rasmussen Cc: Cannon Matthews Cc: "Dr . David Alan Gilbert" Cc: David Rientjes Cc: Mina Almasry Cc: Oliver Upton Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 1 + arch/x86/Kconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7f2a80091337..04c69f606537 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -213,6 +213,7 @@ config ARM64 select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD help ARM 64-bit (AArch64) Linux support. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dac15f646f79..1c350e8782ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,7 @@ config X86 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD + select HAVE_ARCH_USERFAULTFD_MINOR if X86_64 && USERFAULTFD select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_WITHIN_STACK_FRAMES -- cgit v1.2.3 From 575299ea18a8c0575d4c2ef6ad3fa4d41d529d1c Mon Sep 17 00:00:00 2001 From: Saravanan D Date: Tue, 4 May 2021 18:38:03 -0700 Subject: x86/mm: track linear mapping split events To help with debugging the sluggishness caused by TLB miss/reload, we introduce monotonic hugepage [direct mapped] split event counts since system state: SYSTEM_RUNNING to be displayed as part of /proc/vmstat in x86 servers The lifetime split event information will be displayed at the bottom of /proc/vmstat .... swap_ra 0 swap_ra_hit 0 direct_map_level2_splits 94 direct_map_level3_splits 4 nr_unstable 0 .... One of the many lasting sources of direct hugepage splits is kernel tracing (kprobes, tracepoints). Note that the kernel's code segment [512 MB] points to the same physical addresses that have been already mapped in the kernel's direct mapping range. Source : Documentation/x86/x86_64/mm.rst When we enable kernel tracing, the kernel has to modify attributes/permissions of the text segment hugepages that are direct mapped causing them to split. Kernel's direct mapped hugepages do not coalesce back after split and remain in place for the remainder of the lifetime. An instance of direct page splits when we turn on dynamic kernel tracing .... cat /proc/vmstat | grep -i direct_map_level direct_map_level2_splits 784 direct_map_level3_splits 12 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ [pid, comm] = count(); }' cat /proc/vmstat | grep -i direct_map_level direct_map_level2_splits 789 direct_map_level3_splits 12 .... Link: https://lkml.kernel.org/r/20210218235744.1040634-1-saravanand@fb.com Signed-off-by: Saravanan D Acked-by: Tejun Heo Acked-by: Johannes Weiner Acked-by: Dave Hansen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/pat/set_memory.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 427980617557..156cd235659f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -91,6 +93,12 @@ static void split_page_count(int level) return; direct_pages_count[level]--; + if (system_state == SYSTEM_RUNNING) { + if (level == PG_LEVEL_2M) + count_vm_event(DIRECT_MAP_LEVEL2_SPLIT); + else if (level == PG_LEVEL_1G) + count_vm_event(DIRECT_MAP_LEVEL3_SPLIT); + } direct_pages_count[level - 1] += PTRS_PER_PTE; } -- cgit v1.2.3 From c2280be81de404e99f66c7249496b0355406ed94 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:38:09 -0700 Subject: mm: generalize ARCH_HAS_CACHE_LINE_SIZE Patch series "mm: some config cleanups", v2. This series contains config cleanup patches which reduces code duplication across platforms and also improves maintainability. There is no functional change intended with this series. This patch (of 6): ARCH_HAS_CACHE_LINE_SIZE config has duplicate definitions on platforms that subscribe it. Instead, just make it a generic option which can be selected on applicable platforms. This change reduces code duplication and makes it cleaner. Link: https://lkml.kernel.org/r/1617259448-22529-1-git-send-email-anshuman.khandual@arm.com Link: https://lkml.kernel.org/r/1617259448-22529-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas [arm64] Acked-by: Vineet Gupta [arc] Cc: Will Deacon Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Albert Ou Cc: Alexander Viro Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/Kconfig | 4 +--- arch/arm64/Kconfig | 4 +--- arch/x86/Kconfig | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index bc8d6aecfbbd..fab05f7189c0 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -6,6 +6,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_PTE_SPECIAL @@ -48,9 +49,6 @@ config ARC select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select SET_FS -config ARCH_HAS_CACHE_LINE_SIZE - def_bool y - config TRACE_IRQFLAGS_SUPPORT def_bool y diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 04c69f606537..86dfaac79e49 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_STATE + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT @@ -1074,9 +1075,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_HAS_CACHE_LINE_SIZE - def_bool y - config ARCH_HAS_FILTER_PGPROT def_bool y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1c350e8782ed..78c475d8c409 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -316,9 +317,6 @@ config GENERIC_CALIBRATE_DELAY config ARCH_HAS_CPU_RELAX def_bool y -config ARCH_HAS_CACHE_LINE_SIZE - def_bool y - config ARCH_HAS_FILTER_PGPROT def_bool y -- cgit v1.2.3 From 855f9a8e87fe3912a1c00eb63f36880d1ad32e40 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:38:13 -0700 Subject: mm: generalize SYS_SUPPORTS_HUGETLBFS (rename as ARCH_SUPPORTS_HUGETLBFS) SYS_SUPPORTS_HUGETLBFS config has duplicate definitions on platforms that subscribe it. Instead, just make it a generic option which can be selected on applicable platforms. Also rename it as ARCH_SUPPORTS_HUGETLBFS instead. This reduces code duplication and makes it cleaner. Link: https://lkml.kernel.org/r/1617259448-22529-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas [arm64] Acked-by: Palmer Dabbelt [riscv] Acked-by: Michael Ellerman [powerpc] Cc: Russell King Cc: Will Deacon Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Albert Ou Cc: Yoshinori Sato Cc: Rich Felker Cc: Alexander Viro Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 5 +---- arch/arm64/Kconfig | 4 +--- arch/mips/Kconfig | 6 +----- arch/parisc/Kconfig | 5 +---- arch/powerpc/Kconfig | 3 --- arch/powerpc/platforms/Kconfig.cputype | 6 +++--- arch/riscv/Kconfig | 5 +---- arch/sh/Kconfig | 5 +---- 8 files changed, 9 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 085c830d344b..13fa7ecf195c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -31,6 +31,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST @@ -1511,10 +1512,6 @@ config HW_PERF_EVENTS def_bool y depends on ARM_PMU -config SYS_SUPPORTS_HUGETLBFS - def_bool y - depends on ARM_LPAE - config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y depends on ARM_LPAE diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 86dfaac79e49..0fd5d373d7cf 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -73,6 +73,7 @@ config ARM64 select ARCH_USE_QUEUED_SPINLOCKS select ARCH_USE_SYM_ANNOTATIONS select ARCH_SUPPORTS_DEBUG_PAGEALLOC + select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN @@ -1072,9 +1073,6 @@ config HW_PERF_EVENTS def_bool y depends on ARM_PMU -config SYS_SUPPORTS_HUGETLBFS - def_bool y - config ARCH_HAS_FILTER_PGPROT def_bool y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 49a3c9cd1cb2..ed51970c08e7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -19,6 +19,7 @@ config MIPS select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_SUPPORTS_HUGETLBFS if CPU_SUPPORTS_HUGEPAGES select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_LD_ORPHAN_WARN @@ -1287,11 +1288,6 @@ config SYS_SUPPORTS_BIG_ENDIAN config SYS_SUPPORTS_LITTLE_ENDIAN bool -config SYS_SUPPORTS_HUGETLBFS - bool - depends on CPU_SUPPORTS_HUGEPAGES - default y - config MIPS_HUGE_TLB_SUPPORT def_bool HUGETLB_PAGE || TRANSPARENT_HUGEPAGE diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index afc3b8d03572..bde9907bc5b2 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -12,6 +12,7 @@ config PARISC select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_NO_SG_CHAIN + select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select DMA_OPS select RTC_CLASS @@ -138,10 +139,6 @@ config PGTABLE_LEVELS default 3 if 64BIT && PARISC_PAGE_SIZE_4KB default 2 -config SYS_SUPPORTS_HUGETLBFS - def_bool y if PA20 - - menu "Processor type and features" choice diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 965278498dd7..3a2b90e21e69 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -697,9 +697,6 @@ config ARCH_SPARSEMEM_DEFAULT def_bool y depends on PPC_BOOK3S_64 -config SYS_SUPPORTS_HUGETLBFS - bool - config ILLEGAL_POINTER_VALUE hex # This is roughly half way between the top of user space and the bottom diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3ce907523b1e..cec1017813f8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -40,8 +40,8 @@ config PPC_85xx config PPC_8xx bool "Freescale 8xx" + select ARCH_SUPPORTS_HUGETLBFS select FSL_SOC - select SYS_SUPPORTS_HUGETLBFS select PPC_HAVE_KUEP select PPC_HAVE_KUAP select HAVE_ARCH_VMAP_STACK @@ -95,9 +95,9 @@ config PPC_BOOK3S_64 bool "Server processors" select PPC_FPU select PPC_HAVE_PMU_SUPPORT - select SYS_SUPPORTS_HUGETLBFS select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE + select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK select PPC_MM_SLICES @@ -278,9 +278,9 @@ config FSL_BOOKE # this is for common code between PPC32 & PPC64 FSL BOOKE config PPC_FSL_BOOK3E bool + select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 select FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI - select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 select PPC_DOORBELL default y if FSL_BOOKE diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 4515a10c5d22..2a0fc12b8d45 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -30,6 +30,7 @@ config RISCV select ARCH_HAS_STRICT_KERNEL_RWX if MMU select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT @@ -165,10 +166,6 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_SUPPORTS_UPROBES def_bool y -config SYS_SUPPORTS_HUGETLBFS - depends on MMU - def_bool y - config STACKTRACE_SUPPORT def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index e798e55915c2..a54b0c5de37b 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -101,9 +101,6 @@ config SYS_SUPPORTS_APM_EMULATION bool select ARCH_SUSPEND_POSSIBLE -config SYS_SUPPORTS_HUGETLBFS - bool - config SYS_SUPPORTS_SMP bool @@ -175,12 +172,12 @@ config CPU_SH3 config CPU_SH4 bool + select ARCH_SUPPORTS_HUGETLBFS if MMU select CPU_HAS_INTEVT select CPU_HAS_SR_RB select CPU_HAS_FPU if !CPU_SH4AL_DSP select SH_INTC select SYS_SUPPORTS_SH_TMU - select SYS_SUPPORTS_HUGETLBFS if MMU config CPU_SH4A bool -- cgit v1.2.3 From 91024b3ce247213ee43103dffd629623537a569e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:38:17 -0700 Subject: mm: generalize ARCH_ENABLE_MEMORY_[HOTPLUG|HOTREMOVE] ARCH_ENABLE_MEMORY_[HOTPLUG|HOTREMOVE] configs have duplicate definitions on platforms that subscribe them. Instead, just make them generic options which can be selected on applicable platforms. Link: https://lkml.kernel.org/r/1617259448-22529-4-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas [arm64] Acked-by: Heiko Carstens [s390] Cc: Will Deacon Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Albert Ou Cc: Alexander Viro Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Russell King Cc: Thomas Bogendoerfer Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 8 ++------ arch/ia64/Kconfig | 8 ++------ arch/powerpc/Kconfig | 8 ++------ arch/s390/Kconfig | 8 ++------ arch/sh/Kconfig | 2 ++ arch/sh/mm/Kconfig | 8 -------- arch/x86/Kconfig | 10 ++-------- 7 files changed, 12 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0fd5d373d7cf..c0746b2d3fff 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,8 @@ config ARM64 select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_STATE + select ARCH_ENABLE_MEMORY_HOTPLUG + select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE @@ -311,12 +313,6 @@ config ZONE_DMA32 bool "Support DMA32 zone" if EXPERT default y -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - config SMP def_bool y diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index fd2cfc65fdc9..279252e3e0f7 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -13,6 +13,8 @@ config IA64 select ARCH_MIGHT_HAVE_PC_SERIO select ACPI select ACPI_NUMA if NUMA + select ARCH_ENABLE_MEMORY_HOTPLUG + select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_SUPPORTS_ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI @@ -246,12 +248,6 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - config SCHED_SMT bool "SMT scheduler support" depends on SMP diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3a2b90e21e69..d4333049b813 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -118,6 +118,8 @@ config PPC # Please keep this list sorted alphabetically. # select ARCH_32BIT_OFF_T if PPC32 + select ARCH_ENABLE_MEMORY_HOTPLUG + select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE @@ -512,12 +514,6 @@ config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - config PPC64_SUPPORTS_MEMORY_FAILURE bool "Add support for memory hwpoison" depends on PPC_BOOK3S_64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c1ff874e6c2e..f8b356550daa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,8 @@ config S390 imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ARCH_32BIT_USTAT_F_TINODE select ARCH_BINFMT_ELF_STATE + select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM + select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -626,12 +628,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y if SPARSEMEM - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index a54b0c5de37b..68129537e350 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -2,6 +2,8 @@ config SUPERH def_bool y select ARCH_32BIT_OFF_T + select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM && MMU + select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index 77aa2f802d8d..d551a9cac41e 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -136,14 +136,6 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool y -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - depends on SPARSEMEM && MMU - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - depends on SPARSEMEM && MMU - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 78c475d8c409..306f76fea837 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,8 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) + select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL @@ -2427,14 +2429,6 @@ config ARCH_HAS_ADD_PAGES def_bool y depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - depends on X86_64 || (X86_32 && HIGHMEM) - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - depends on MEMORY_HOTPLUG - config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA -- cgit v1.2.3 From 1e866974a15be8921fb01f8c4efa93a5157ef690 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:38:21 -0700 Subject: mm: drop redundant ARCH_ENABLE_[HUGEPAGE|THP]_MIGRATION ARCH_ENABLE_[HUGEPAGE|THP]_MIGRATION configs have duplicate definitions on platforms that subscribe them. Drop these reduntant definitions and instead just select them appropriately. [akpm@linux-foundation.org: s/x86_64/X86_64/, per Oscar] Link: https://lkml.kernel.org/r/1617259448-22529-5-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas [arm64] Cc: Will Deacon Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Albert Ou Cc: Alexander Viro Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 10 ++-------- arch/powerpc/platforms/Kconfig.cputype | 5 +---- arch/x86/Kconfig | 10 ++-------- 3 files changed, 5 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c0746b2d3fff..4297fbca0faa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,8 +11,10 @@ config ARM64 select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_STATE + select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE @@ -1916,14 +1918,6 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC -config ARCH_ENABLE_HUGEPAGE_MIGRATION - def_bool y - depends on HUGETLB_PAGE && MIGRATION - -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index cec1017813f8..4465b71b2bff 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -96,6 +96,7 @@ config PPC_BOOK3S_64 select PPC_FPU select PPC_HAVE_PMU_SUPPORT select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING @@ -420,10 +421,6 @@ config PPC_PKEY depends on PPC_BOOK3S_64 depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP -config ARCH_ENABLE_HUGEPAGE_MIGRATION - def_bool y - depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION - config PPC_MMU_NOHASH def_bool y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 306f76fea837..ab581af756cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,8 +60,10 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG + select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL @@ -2437,14 +2439,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y depends on X86_64 || X86_PAE -config ARCH_ENABLE_HUGEPAGE_MIGRATION - def_bool y - depends on X86_64 && HUGETLB_PAGE && MIGRATION - -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on X86_64 && TRANSPARENT_HUGEPAGE - menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER -- cgit v1.2.3 From 66f24fa766e3a5a194a85af98ff454d8d94b59cf Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:38:25 -0700 Subject: mm: drop redundant ARCH_ENABLE_SPLIT_PMD_PTLOCK ARCH_ENABLE_SPLIT_PMD_PTLOCKS has duplicate definitions on platforms that subscribe it. Drop these redundant definitions and instead just select it on applicable platforms. Link: https://lkml.kernel.org/r/1617259448-22529-6-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas [arm64] Acked-by: Heiko Carstens [s390] Cc: Will Deacon Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Albert Ou Cc: Alexander Viro Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Russell King Cc: Thomas Bogendoerfer Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 4 +--- arch/powerpc/platforms/Kconfig.cputype | 5 +---- arch/s390/Kconfig | 4 +--- arch/x86/Kconfig | 5 +---- 4 files changed, 4 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4297fbca0faa..e12fad2e2a15 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -14,6 +14,7 @@ config ARM64 select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VIRTUAL @@ -1074,9 +1075,6 @@ config HW_PERF_EVENTS config ARCH_HAS_FILTER_PGPROT def_bool y -config ARCH_ENABLE_SPLIT_PMD_PTLOCK - def_bool y if PGTABLE_LEVELS > 2 - # Supported by clang >= 7.0 config CC_HAVE_SHADOW_CALL_STACK def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 4465b71b2bff..be0e29f18dd4 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -97,6 +97,7 @@ config PPC_BOOK3S_64 select PPC_HAVE_PMU_SUPPORT select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION + select ARCH_ENABLE_PMD_SPLIT_PTLOCK select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING @@ -356,10 +357,6 @@ config SPE If in doubt, say Y here. -config ARCH_ENABLE_SPLIT_PMD_PTLOCK - def_bool y - depends on PPC_BOOK3S_64 - config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f8b356550daa..d72989591223 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -62,6 +62,7 @@ config S390 select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_SPLIT_PMD_PTLOCK select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -628,9 +629,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y -config ARCH_ENABLE_SPLIT_PMD_PTLOCK - def_bool y - config MAX_PHYSMEM_BITS int "Maximum size of supported physical memory in bits (42-53)" range 42 53 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ab581af756cd..0bbc157e4bc7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -63,6 +63,7 @@ config X86 select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG + select ARCH_ENABLE_SPLIT_PMD_PTLOCK if X86_64 || X86_PAE select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE @@ -2435,10 +2436,6 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA -config ARCH_ENABLE_SPLIT_PMD_PTLOCK - def_bool y - depends on X86_64 || X86_PAE - menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER -- cgit v1.2.3 From e8003bf66a7a66d8ae3db2c40b2dca180bf942bb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 4 May 2021 18:38:29 -0700 Subject: mm: drop redundant HAVE_ARCH_TRANSPARENT_HUGEPAGE HAVE_ARCH_TRANSPARENT_HUGEPAGE has duplicate definitions on platforms that subscribe it. Drop these reduntant definitions and instead just select it on applicable platforms. Link: https://lkml.kernel.org/r/1617259448-22529-7-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Arnd Bergmann Acked-by: Vineet Gupta [arc] Cc: Russell King Cc: Albert Ou Cc: Alexander Viro Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Rich Felker Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/Kconfig | 5 +---- arch/arm/Kconfig | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fab05f7189c0..2d98501c0897 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -29,6 +29,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4 select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_KMEMLEAK select HAVE_FUTEX_CMPXCHG if FUTEX @@ -84,10 +85,6 @@ config STACKTRACE_SUPPORT def_bool y select STACKTRACE -config HAVE_ARCH_TRANSPARENT_HUGEPAGE - def_bool y - depends on ARC_MMU_V4 - menu "ARC Architecture Configuration" menu "ARC Platform/SoC/Board" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 13fa7ecf195c..24804f11302d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -78,6 +78,7 @@ config ARM select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING @@ -1512,10 +1513,6 @@ config HW_PERF_EVENTS def_bool y depends on ARM_PMU -config HAVE_ARCH_TRANSPARENT_HUGEPAGE - def_bool y - depends on ARM_LPAE - config ARCH_WANT_GENERAL_HUGETLB def_bool y -- cgit v1.2.3 From f91ef2223dc425e2e8759a625cffd48dce3503de Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 4 May 2021 18:39:51 -0700 Subject: x86/Kconfig: introduce ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE Enable x86_64 platform to use the MHP_MEMMAP_ON_MEMORY feature. Link: https://lkml.kernel.org/r/20210421102701.25051-8-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: Anshuman Khandual Cc: Pavel Tatashin Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0bbc157e4bc7..0045e1b44190 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2432,6 +2432,9 @@ config ARCH_HAS_ADD_PAGES def_bool y depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG +config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE + def_bool y + config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA -- cgit v1.2.3 From ca6e51d592d20180374366e71bb0972de002d509 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 4 May 2021 18:39:54 -0700 Subject: arm64/Kconfig: introduce ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE Enable arm64 platform to use the MHP_MEMMAP_ON_MEMORY feature. Link: https://lkml.kernel.org/r/20210421102701.25051-9-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: David Hildenbrand Cc: Anshuman Khandual Cc: Michal Hocko Cc: Pavel Tatashin Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e12fad2e2a15..f0b17d758912 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -316,6 +316,9 @@ config ZONE_DMA32 bool "Support DMA32 zone" if EXPERT default y +config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE + def_bool y + config SMP def_bool y -- cgit v1.2.3