aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/fadvise.c10
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/kasan/kasan.c2
-rw-r--r--mm/khugepaged.c20
-rw-r--r--mm/kmemleak.c12
-rw-r--r--mm/ksm.c28
-rw-r--r--mm/mempolicy.c36
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/vmscan.c22
10 files changed, 116 insertions, 24 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 77e5862a1ed2..ef45be9d5f40 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -666,6 +666,7 @@ config DEFERRED_STRUCT_PAGE_INIT
depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
depends on NO_BOOTMEM && MEMORY_HOTPLUG
depends on !FLATMEM
+ depends on !NEED_PER_CPU_KM
help
Ordinarily all struct pages are initialised during early boot in a
single thread. On very large machines this can take a considerable
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 6c707bfe02fd..27fc9ad267ac 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -126,7 +126,15 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
*/
start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
end_index = (endbyte >> PAGE_SHIFT);
- if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
+ /*
+ * The page at end_index will be inclusively discarded according
+ * by invalidate_mapping_pages(), so subtracting 1 from
+ * end_index means we will skip the last page. But if endbyte
+ * is page aligned or is at the end of file, we should not skip
+ * that page - discarding the last page is safe enough.
+ */
+ if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
+ endbyte != inode->i_size - 1) {
/* First page is tricky as 0 - 1 = -1, but pgoff_t
* is unsigned, so the end_index >= start_index
* check below would be true and we'll discard the whole
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e2982ea26090..724372866e67 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -542,7 +542,8 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
VM_BUG_ON_PAGE(!PageCompound(page), page);
- if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+ if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+ true)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1060,7 +1061,7 @@ alloc:
}
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
- huge_gfp, &memcg, true))) {
+ huge_gfp | __GFP_NORETRY, &memcg, true))) {
put_page(new_page);
split_huge_pmd(vma, fe->pmd, fe->address);
if (page)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 0e9505f66ec1..73c258129257 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -800,5 +800,5 @@ static int __init kasan_memhotplug_init(void)
return 0;
}
-module_init(kasan_memhotplug_init);
+core_initcall(kasan_memhotplug_init);
#endif
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 898eb26f5dc8..1df37ee996d5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -963,7 +963,9 @@ static void collapse_huge_page(struct mm_struct *mm,
goto out_nolock;
}
- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ /* Do not oom kill for khugepaged charges */
+ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+ &memcg, true))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out_nolock;
}
@@ -1323,7 +1325,9 @@ static void collapse_shmem(struct mm_struct *mm,
goto out;
}
- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ /* Do not oom kill for khugepaged charges */
+ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+ &memcg, true))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out;
}
@@ -1678,10 +1682,14 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
spin_unlock(&khugepaged_mm_lock);
mm = mm_slot->mm;
- down_read(&mm->mmap_sem);
- if (unlikely(khugepaged_test_exit(mm)))
- vma = NULL;
- else
+ /*
+ * Don't wait for semaphore (to avoid long wait times). Just move to
+ * the next mm on the list.
+ */
+ vma = NULL;
+ if (unlikely(!down_read_trylock(&mm->mmap_sem)))
+ goto breakouterloop_mmap_sem;
+ if (likely(!khugepaged_test_exit(mm)))
vma = find_vma(mm, khugepaged_scan.address);
progress++;
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 20cf3be9a5e8..9e66449ed91f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1577,8 +1577,7 @@ static void start_scan_thread(void)
}
/*
- * Stop the automatic memory scanning thread. This function must be called
- * with the scan_mutex held.
+ * Stop the automatic memory scanning thread.
*/
static void stop_scan_thread(void)
{
@@ -1841,12 +1840,15 @@ static void kmemleak_do_cleanup(struct work_struct *work)
{
stop_scan_thread();
+ mutex_lock(&scan_mutex);
/*
- * Once the scan thread has stopped, it is safe to no longer track
- * object freeing. Ordering of the scan thread stopping and the memory
- * accesses below is guaranteed by the kthread_stop() function.
+ * Once it is made sure that kmemleak_scan has stopped, it is safe to no
+ * longer track object freeing. Ordering of the scan thread stopping and
+ * the memory accesses below is guaranteed by the kthread_stop()
+ * function.
*/
kmemleak_free_enabled = 0;
+ mutex_unlock(&scan_mutex);
if (!kmemleak_found_leaks)
__kmemleak_do_cleanup();
diff --git a/mm/ksm.c b/mm/ksm.c
index caa54a55a357..614b2cce9ad7 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1469,8 +1469,22 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
tree_rmap_item =
unstable_tree_search_insert(rmap_item, page, &tree_page);
if (tree_rmap_item) {
+ bool split;
+
kpage = try_to_merge_two_pages(rmap_item, page,
tree_rmap_item, tree_page);
+ /*
+ * If both pages we tried to merge belong to the same compound
+ * page, then we actually ended up increasing the reference
+ * count of the same compound page twice, and split_huge_page
+ * failed.
+ * Here we set a flag if that happened, and we use it later to
+ * try split_huge_page again. Since we call put_page right
+ * afterwards, the reference count will be correct and
+ * split_huge_page should succeed.
+ */
+ split = PageTransCompound(page)
+ && compound_head(page) == compound_head(tree_page);
put_page(tree_page);
if (kpage) {
/*
@@ -1495,6 +1509,20 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
break_cow(tree_rmap_item);
break_cow(rmap_item);
}
+ } else if (split) {
+ /*
+ * We are here if we tried to merge two pages and
+ * failed because they both belonged to the same
+ * compound page. We will split the page now, but no
+ * merging will take place.
+ * We do not want to add the cost of a full lock; if
+ * the page is locked, it is better to skip it and
+ * perhaps try again later.
+ */
+ if (!trylock_page(page))
+ return;
+ split_huge_page(page);
+ unlock_page(page);
}
}
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a8ab5e73dc61..69c4a0c92ebb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1264,6 +1264,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
unsigned long maxnode)
{
unsigned long k;
+ unsigned long t;
unsigned long nlongs;
unsigned long endmask;
@@ -1280,13 +1281,19 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
else
endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
- /* When the user specified more nodes than supported just check
- if the non supported part is all zero. */
+ /*
+ * When the user specified more nodes than supported just check
+ * if the non supported part is all zero.
+ *
+ * If maxnode have more longs than MAX_NUMNODES, check
+ * the bits in that area first. And then go through to
+ * check the rest bits which equal or bigger than MAX_NUMNODES.
+ * Otherwise, just check bits [MAX_NUMNODES, maxnode).
+ */
if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
if (nlongs > PAGE_SIZE/sizeof(long))
return -EINVAL;
for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
- unsigned long t;
if (get_user(t, nmask + k))
return -EFAULT;
if (k == nlongs - 1) {
@@ -1299,6 +1306,16 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
endmask = ~0UL;
}
+ if (maxnode > MAX_NUMNODES && MAX_NUMNODES % BITS_PER_LONG != 0) {
+ unsigned long valid_mask = endmask;
+
+ valid_mask &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1);
+ if (get_user(t, nmask + nlongs - 1))
+ return -EFAULT;
+ if (t & valid_mask)
+ return -EINVAL;
+ }
+
if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
return -EFAULT;
nodes_addr(*nodes)[nlongs-1] &= endmask;
@@ -1425,10 +1442,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
goto out_put;
}
- if (!nodes_subset(*new, node_states[N_MEMORY])) {
- err = -EINVAL;
+ task_nodes = cpuset_mems_allowed(current);
+ nodes_and(*new, *new, task_nodes);
+ if (nodes_empty(*new))
+ goto out_put;
+
+ nodes_and(*new, *new, node_states[N_MEMORY]);
+ if (nodes_empty(*new))
goto out_put;
- }
err = security_task_movememory(task);
if (err)
@@ -2138,6 +2159,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
case MPOL_INTERLEAVE:
return !!nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
+ /* a's ->flags is the same as b's */
+ if (a->flags & MPOL_F_LOCAL)
+ return true;
return a->v.preferred_node == b->v.preferred_node;
default:
BUG();
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d76b2a18f044..79c03ecd31c8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2271,6 +2271,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
maxpages = swp_offset(pte_to_swp_entry(
swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
last_page = swap_header->info.last_page;
+ if (!last_page) {
+ pr_warn("Empty swap-file\n");
+ return 0;
+ }
if (last_page > maxpages) {
pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
maxpages << (PAGE_SHIFT - 10),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 557ad1367595..2d4b6478237b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1374,6 +1374,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
if (PageDirty(page)) {
struct address_space *mapping;
+ bool migrate_dirty;
/* ISOLATE_CLEAN means only clean pages */
if (mode & ISOLATE_CLEAN)
@@ -1382,10 +1383,19 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
/*
* Only pages without mappings or that have a
* ->migratepage callback are possible to migrate
- * without blocking
+ * without blocking. However, we can be racing with
+ * truncation so it's necessary to lock the page
+ * to stabilise the mapping as truncation holds
+ * the page lock until after the page is removed
+ * from the page cache.
*/
+ if (!trylock_page(page))
+ return ret;
+
mapping = page_mapping(page);
- if (mapping && !mapping->a_ops->migratepage)
+ migrate_dirty = mapping && mapping->a_ops->migratepage;
+ unlock_page(page);
+ if (!migrate_dirty)
return ret;
}
}
@@ -3847,7 +3857,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
*/
int page_evictable(struct page *page)
{
- return !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
+ int ret;
+
+ /* Prevent address_space of inode and swap cache from being freed */
+ rcu_read_lock();
+ ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
+ rcu_read_unlock();
+ return ret;
}
#ifdef CONFIG_SHMEM