diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 1 | ||||
-rw-r--r-- | mm/fadvise.c | 10 | ||||
-rw-r--r-- | mm/huge_memory.c | 5 | ||||
-rw-r--r-- | mm/kasan/kasan.c | 2 | ||||
-rw-r--r-- | mm/khugepaged.c | 20 | ||||
-rw-r--r-- | mm/kmemleak.c | 12 | ||||
-rw-r--r-- | mm/ksm.c | 28 | ||||
-rw-r--r-- | mm/mempolicy.c | 36 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 22 |
10 files changed, 116 insertions, 24 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 77e5862a1ed2..ef45be9d5f40 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -666,6 +666,7 @@ config DEFERRED_STRUCT_PAGE_INIT depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT depends on NO_BOOTMEM && MEMORY_HOTPLUG depends on !FLATMEM + depends on !NEED_PER_CPU_KM help Ordinarily all struct pages are initialised during early boot in a single thread. On very large machines this can take a considerable diff --git a/mm/fadvise.c b/mm/fadvise.c index 6c707bfe02fd..27fc9ad267ac 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -126,7 +126,15 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) */ start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; end_index = (endbyte >> PAGE_SHIFT); - if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) { + /* + * The page at end_index will be inclusively discarded according + * by invalidate_mapping_pages(), so subtracting 1 from + * end_index means we will skip the last page. But if endbyte + * is page aligned or is at the end of file, we should not skip + * that page - discarding the last page is safe enough. + */ + if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && + endbyte != inode->i_size - 1) { /* First page is tricky as 0 - 1 = -1, but pgoff_t * is unsigned, so the end_index >= start_index * check below would be true and we'll discard the whole diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e2982ea26090..724372866e67 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -542,7 +542,8 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { + if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg, + true)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1060,7 +1061,7 @@ alloc: } if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, - huge_gfp, &memcg, true))) { + huge_gfp | __GFP_NORETRY, &memcg, true))) { put_page(new_page); split_huge_pmd(vma, fe->pmd, fe->address); if (page) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 0e9505f66ec1..73c258129257 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -800,5 +800,5 @@ static int __init kasan_memhotplug_init(void) return 0; } -module_init(kasan_memhotplug_init); +core_initcall(kasan_memhotplug_init); #endif diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 898eb26f5dc8..1df37ee996d5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -963,7 +963,9 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + /* Do not oom kill for khugepaged charges */ + if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, + &memcg, true))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1323,7 +1325,9 @@ static void collapse_shmem(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + /* Do not oom kill for khugepaged charges */ + if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, + &memcg, true))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } @@ -1678,10 +1682,14 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, spin_unlock(&khugepaged_mm_lock); mm = mm_slot->mm; - down_read(&mm->mmap_sem); - if (unlikely(khugepaged_test_exit(mm))) - vma = NULL; - else + /* + * Don't wait for semaphore (to avoid long wait times). Just move to + * the next mm on the list. + */ + vma = NULL; + if (unlikely(!down_read_trylock(&mm->mmap_sem))) + goto breakouterloop_mmap_sem; + if (likely(!khugepaged_test_exit(mm))) vma = find_vma(mm, khugepaged_scan.address); progress++; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 20cf3be9a5e8..9e66449ed91f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1577,8 +1577,7 @@ static void start_scan_thread(void) } /* - * Stop the automatic memory scanning thread. This function must be called - * with the scan_mutex held. + * Stop the automatic memory scanning thread. */ static void stop_scan_thread(void) { @@ -1841,12 +1840,15 @@ static void kmemleak_do_cleanup(struct work_struct *work) { stop_scan_thread(); + mutex_lock(&scan_mutex); /* - * Once the scan thread has stopped, it is safe to no longer track - * object freeing. Ordering of the scan thread stopping and the memory - * accesses below is guaranteed by the kthread_stop() function. + * Once it is made sure that kmemleak_scan has stopped, it is safe to no + * longer track object freeing. Ordering of the scan thread stopping and + * the memory accesses below is guaranteed by the kthread_stop() + * function. */ kmemleak_free_enabled = 0; + mutex_unlock(&scan_mutex); if (!kmemleak_found_leaks) __kmemleak_do_cleanup(); @@ -1469,8 +1469,22 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) tree_rmap_item = unstable_tree_search_insert(rmap_item, page, &tree_page); if (tree_rmap_item) { + bool split; + kpage = try_to_merge_two_pages(rmap_item, page, tree_rmap_item, tree_page); + /* + * If both pages we tried to merge belong to the same compound + * page, then we actually ended up increasing the reference + * count of the same compound page twice, and split_huge_page + * failed. + * Here we set a flag if that happened, and we use it later to + * try split_huge_page again. Since we call put_page right + * afterwards, the reference count will be correct and + * split_huge_page should succeed. + */ + split = PageTransCompound(page) + && compound_head(page) == compound_head(tree_page); put_page(tree_page); if (kpage) { /* @@ -1495,6 +1509,20 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) break_cow(tree_rmap_item); break_cow(rmap_item); } + } else if (split) { + /* + * We are here if we tried to merge two pages and + * failed because they both belonged to the same + * compound page. We will split the page now, but no + * merging will take place. + * We do not want to add the cost of a full lock; if + * the page is locked, it is better to skip it and + * perhaps try again later. + */ + if (!trylock_page(page)) + return; + split_huge_page(page); + unlock_page(page); } } } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a8ab5e73dc61..69c4a0c92ebb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1264,6 +1264,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long maxnode) { unsigned long k; + unsigned long t; unsigned long nlongs; unsigned long endmask; @@ -1280,13 +1281,19 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, else endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1; - /* When the user specified more nodes than supported just check - if the non supported part is all zero. */ + /* + * When the user specified more nodes than supported just check + * if the non supported part is all zero. + * + * If maxnode have more longs than MAX_NUMNODES, check + * the bits in that area first. And then go through to + * check the rest bits which equal or bigger than MAX_NUMNODES. + * Otherwise, just check bits [MAX_NUMNODES, maxnode). + */ if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) { if (nlongs > PAGE_SIZE/sizeof(long)) return -EINVAL; for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) { - unsigned long t; if (get_user(t, nmask + k)) return -EFAULT; if (k == nlongs - 1) { @@ -1299,6 +1306,16 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, endmask = ~0UL; } + if (maxnode > MAX_NUMNODES && MAX_NUMNODES % BITS_PER_LONG != 0) { + unsigned long valid_mask = endmask; + + valid_mask &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); + if (get_user(t, nmask + nlongs - 1)) + return -EFAULT; + if (t & valid_mask) + return -EINVAL; + } + if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long))) return -EFAULT; nodes_addr(*nodes)[nlongs-1] &= endmask; @@ -1425,10 +1442,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, goto out_put; } - if (!nodes_subset(*new, node_states[N_MEMORY])) { - err = -EINVAL; + task_nodes = cpuset_mems_allowed(current); + nodes_and(*new, *new, task_nodes); + if (nodes_empty(*new)) + goto out_put; + + nodes_and(*new, *new, node_states[N_MEMORY]); + if (nodes_empty(*new)) goto out_put; - } err = security_task_movememory(task); if (err) @@ -2138,6 +2159,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) case MPOL_INTERLEAVE: return !!nodes_equal(a->v.nodes, b->v.nodes); case MPOL_PREFERRED: + /* a's ->flags is the same as b's */ + if (a->flags & MPOL_F_LOCAL) + return true; return a->v.preferred_node == b->v.preferred_node; default: BUG(); diff --git a/mm/swapfile.c b/mm/swapfile.c index d76b2a18f044..79c03ecd31c8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2271,6 +2271,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p, maxpages = swp_offset(pte_to_swp_entry( swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; last_page = swap_header->info.last_page; + if (!last_page) { + pr_warn("Empty swap-file\n"); + return 0; + } if (last_page > maxpages) { pr_warn("Truncating oversized swap area, only using %luk out of %luk\n", maxpages << (PAGE_SHIFT - 10), diff --git a/mm/vmscan.c b/mm/vmscan.c index 557ad1367595..2d4b6478237b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1374,6 +1374,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) if (PageDirty(page)) { struct address_space *mapping; + bool migrate_dirty; /* ISOLATE_CLEAN means only clean pages */ if (mode & ISOLATE_CLEAN) @@ -1382,10 +1383,19 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) /* * Only pages without mappings or that have a * ->migratepage callback are possible to migrate - * without blocking + * without blocking. However, we can be racing with + * truncation so it's necessary to lock the page + * to stabilise the mapping as truncation holds + * the page lock until after the page is removed + * from the page cache. */ + if (!trylock_page(page)) + return ret; + mapping = page_mapping(page); - if (mapping && !mapping->a_ops->migratepage) + migrate_dirty = mapping && mapping->a_ops->migratepage; + unlock_page(page); + if (!migrate_dirty) return ret; } } @@ -3847,7 +3857,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) */ int page_evictable(struct page *page) { - return !mapping_unevictable(page_mapping(page)) && !PageMlocked(page); + int ret; + + /* Prevent address_space of inode and swap cache from being freed */ + rcu_read_lock(); + ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page); + rcu_read_unlock(); + return ret; } #ifdef CONFIG_SHMEM |