summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/balloon_compaction.c4
-rw-r--r--mm/compaction.c10
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/memcontrol.c81
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory.c54
-rw-r--r--mm/migrate.c10
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c90
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/page_alloc.c48
-rw-r--r--mm/page_isolation.c8
-rw-r--r--mm/pgtable-generic.c4
-rw-r--r--mm/process_vm_access.c2
-rw-r--r--mm/shmem.c9
-rw-r--r--mm/slub.c22
-rw-r--r--mm/vmscan.c45
-rw-r--r--mm/zsmalloc.c14
-rw-r--r--mm/zswap.c8
20 files changed, 264 insertions, 163 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7340353f8aea..cbe6f0b96f29 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -989,7 +989,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
* here rather than calling cond_resched().
*/
if (current->flags & PF_WQ_WORKER)
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
else
cond_resched();
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index d3116be5a00f..300117f1a08f 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -61,6 +61,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
bool dequeued_page;
dequeued_page = false;
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
/*
* Block others from accessing the 'page' while we get around
@@ -75,15 +76,14 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
continue;
}
#endif
- spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_delete(page);
__count_vm_event(BALLOON_DEFLATE);
- spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
unlock_page(page);
dequeued_page = true;
break;
}
}
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
if (!dequeued_page) {
/*
diff --git a/mm/compaction.c b/mm/compaction.c
index de3e1e71cd9f..7881e072dc33 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -880,16 +880,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
ISOLATE_UNEVICTABLE);
- /*
- * In case of fatal failure, release everything that might
- * have been isolated in the previous iteration, and signal
- * the failure back to caller.
- */
- if (!pfn) {
- putback_movable_pages(&cc->migratepages);
- cc->nr_migratepages = 0;
+ if (!pfn)
break;
- }
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
break;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 62fe06bb7d04..530e6427f823 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2134,10 +2134,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
* page fault if needed.
*/
return 0;
- if (vma->vm_ops)
+ if (vma->vm_ops || (vm_flags & VM_NO_THP))
/* khugepaged not yet working on file or special mappings */
return 0;
- VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (hstart < hend)
@@ -2498,8 +2497,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
return false;
if (is_vma_temporary_stack(vma))
return false;
- VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
- return true;
+ return !(vma->vm_flags & VM_NO_THP);
}
static void collapse_huge_page(struct mm_struct *mm,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5c714f33f494..b7d8315541e3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
/* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct {
spinlock_t lock; /* for from, to */
+ struct mm_struct *mm;
struct mem_cgroup *from;
struct mem_cgroup *to;
unsigned long flags;
@@ -1332,7 +1333,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
return limit;
}
-static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
int order)
{
struct oom_control oc = {
@@ -1410,6 +1411,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
}
unlock:
mutex_unlock(&oom_lock);
+ return chosen;
}
#if MAX_NUMNODES > 1
@@ -4799,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void)
static void mem_cgroup_clear_mc(void)
{
+ struct mm_struct *mm = mc.mm;
+
/*
* we must clear moving_task before waking up waiters at the end of
* task migration.
@@ -4808,7 +4812,10 @@ static void mem_cgroup_clear_mc(void)
spin_lock(&mc.lock);
mc.from = NULL;
mc.to = NULL;
+ mc.mm = NULL;
spin_unlock(&mc.lock);
+
+ mmput(mm);
}
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4865,6 +4872,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
VM_BUG_ON(mc.moved_swap);
spin_lock(&mc.lock);
+ mc.mm = mm;
mc.from = from;
mc.to = memcg;
mc.flags = move_flags;
@@ -4874,8 +4882,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
ret = mem_cgroup_precharge_mc(mm);
if (ret)
mem_cgroup_clear_mc();
+ } else {
+ mmput(mm);
}
- mmput(mm);
return ret;
}
@@ -4989,11 +4998,11 @@ put: /* get_mctgt_type() gets the page */
return ret;
}
-static void mem_cgroup_move_charge(struct mm_struct *mm)
+static void mem_cgroup_move_charge(void)
{
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
- .mm = mm,
+ .mm = mc.mm,
};
lru_add_drain_all();
@@ -5005,7 +5014,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_inc(&mc.from->moving_account);
synchronize_rcu();
retry:
- if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
+ if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
/*
* Someone who are holding the mmap_sem might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters,
@@ -5022,23 +5031,16 @@ retry:
* additional charge, the page walk just aborts.
*/
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
- up_read(&mm->mmap_sem);
+ up_read(&mc.mm->mmap_sem);
atomic_dec(&mc.from->moving_account);
}
-static void mem_cgroup_move_task(struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(void)
{
- struct cgroup_subsys_state *css;
- struct task_struct *p = cgroup_taskset_first(tset, &css);
- struct mm_struct *mm = get_task_mm(p);
-
- if (mm) {
- if (mc.to)
- mem_cgroup_move_charge(mm);
- mmput(mm);
- }
- if (mc.to)
+ if (mc.to) {
+ mem_cgroup_move_charge();
mem_cgroup_clear_mc();
+ }
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -5052,7 +5054,7 @@ static int mem_cgroup_allow_attach(struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
-static void mem_cgroup_move_task(struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(void)
{
}
#endif
@@ -5130,6 +5132,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned long nr_pages;
unsigned long high;
int err;
@@ -5140,6 +5143,11 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
memcg->high = high;
+ nr_pages = page_counter_read(&memcg->memory);
+ if (nr_pages > high)
+ try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
+ GFP_KERNEL, true);
+
memcg_wb_domain_size_changed(memcg);
return nbytes;
}
@@ -5161,6 +5169,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES;
+ bool drained = false;
unsigned long max;
int err;
@@ -5169,9 +5179,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (err)
return err;
- err = mem_cgroup_resize_limit(memcg, max);
- if (err)
- return err;
+ xchg(&memcg->memory.limit, max);
+
+ for (;;) {
+ unsigned long nr_pages = page_counter_read(&memcg->memory);
+
+ if (nr_pages <= max)
+ break;
+
+ if (signal_pending(current)) {
+ err = -EINTR;
+ break;
+ }
+
+ if (!drained) {
+ drain_all_stock(memcg);
+ drained = true;
+ continue;
+ }
+
+ if (nr_reclaims) {
+ if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
+ GFP_KERNEL, true))
+ nr_reclaims--;
+ continue;
+ }
+
+ mem_cgroup_events(memcg, MEMCG_OOM, 1);
+ if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
+ break;
+ }
memcg_wb_domain_size_changed(memcg);
return nbytes;
@@ -5231,7 +5268,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
- .attach = mem_cgroup_move_task,
+ .post_attach = mem_cgroup_move_task,
.allow_attach = mem_cgroup_allow_attach,
.bind = mem_cgroup_bind,
.dfl_cftypes = memory_files,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8424b64711ac..750b7893ee3a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1572,7 +1572,7 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
* Did it turn free?
*/
ret = __get_any_page(page, pfn, 0);
- if (!PageLRU(page)) {
+ if (ret == 1 && !PageLRU(page)) {
/* Drop page reference which is from __get_any_page() */
put_hwpoison_page(page);
pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
diff --git a/mm/memory.c b/mm/memory.c
index c387430f06c3..76dcee317714 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -797,6 +797,46 @@ out:
return pfn_to_page(pfn);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t pmd)
+{
+ unsigned long pfn = pmd_pfn(pmd);
+
+ /*
+ * There is no pmd_special() but there may be special pmds, e.g.
+ * in a direct-access (dax) mapping, so let's just replicate the
+ * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+ */
+ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+ if (vma->vm_flags & VM_MIXEDMAP) {
+ if (!pfn_valid(pfn))
+ return NULL;
+ goto out;
+ } else {
+ unsigned long off;
+ off = (addr - vma->vm_start) >> PAGE_SHIFT;
+ if (pfn == vma->vm_pgoff + off)
+ return NULL;
+ if (!is_cow_mapping(vma->vm_flags))
+ return NULL;
+ }
+ }
+
+ if (is_zero_pfn(pfn))
+ return NULL;
+ if (unlikely(pfn > highest_memmap_pfn))
+ return NULL;
+
+ /*
+ * NOTE! We still have PageReserved() pages in the page tables.
+ * eg. VDSO mappings can cause them to exist.
+ */
+out:
+ return pfn_to_page(pfn);
+}
+#endif
+
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
@@ -3399,8 +3439,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(pmd_none(*pmd)) &&
unlikely(__pte_alloc(mm, vma, pmd, address)))
return VM_FAULT_OOM;
- /* if an huge pmd materialized from under us just retry later */
- if (unlikely(pmd_trans_huge(*pmd)))
+ /*
+ * If a huge pmd materialized under us just retry later. Use
+ * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+ * didn't become pmd_trans_huge under us and then back to pmd_none, as
+ * a result of MADV_DONTNEED running immediately after a huge pmd fault
+ * in a different thread of this mm, in turn leading to a misleading
+ * pmd_trans_huge() retval. All we have to ensure is that it is a
+ * regular pmd that we can walk with pte_offset_map() and we can do that
+ * through an atomic read in C, which is what pmd_trans_unstable()
+ * provides.
+ */
+ if (unlikely(pmd_trans_unstable(pmd)))
return 0;
/*
* A regular pmd is established and it can't morph into a huge pmd
diff --git a/mm/migrate.c b/mm/migrate.c
index 7890d0bb5e23..bbeb0b71fcf4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -963,7 +963,13 @@ out:
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
/* Soft-offlined page shouldn't go through lru cache list */
- if (reason == MR_MEMORY_FAILURE) {
+ if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
+ /*
+ * With this release, we free successfully migrated
+ * page and set PG_HWPoison on just freed page
+ * intentionally. Although it's rather weird, it's how
+ * HWPoison flag works at the moment.
+ */
put_page(page);
if (!test_set_page_hwpoison(page))
num_poisoned_pages_inc();
@@ -1578,7 +1584,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
(GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE | __GFP_NOMEMALLOC |
__GFP_NORETRY | __GFP_NOWARN) &
- ~(__GFP_IO | __GFP_FS), 0);
+ ~__GFP_RECLAIM, 0);
return newpage;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index a6617735f2f4..d843bc9d32dd 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -172,7 +172,7 @@ static void __munlock_isolation_failed(struct page *page)
*/
unsigned int munlock_vma_page(struct page *page)
{
- unsigned int nr_pages;
+ int nr_pages;
struct zone *zone = page_zone(page);
/* For try_to_munlock() and to serialize with page migration */
diff --git a/mm/mmap.c b/mm/mmap.c
index 2b1143008793..a089cca8d79a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -453,12 +453,16 @@ static void validate_mm(struct mm_struct *mm)
struct vm_area_struct *vma = mm->mmap;
while (vma) {
+ struct anon_vma *anon_vma = vma->anon_vma;
struct anon_vma_chain *avc;
- vma_lock_anon_vma(vma);
- list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
- anon_vma_interval_tree_verify(avc);
- vma_unlock_anon_vma(vma);
+ if (anon_vma) {
+ anon_vma_lock_read(anon_vma);
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ anon_vma_interval_tree_verify(avc);
+ anon_vma_unlock_read(anon_vma);
+ }
+
highest_address = vma->vm_end;
vma = vma->vm_next;
i++;
@@ -2168,32 +2172,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
- int error;
+ int error = 0;
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
- /*
- * We must make sure the anon_vma is allocated
- * so that the anon_vma locking is not a noop.
- */
+ /* Guard against wrapping around to address 0. */
+ if (address < PAGE_ALIGN(address+4))
+ address = PAGE_ALIGN(address+4);
+ else
+ return -ENOMEM;
+
+ /* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
- vma_lock_anon_vma(vma);
/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
- * Also guard against wrapping around to address 0.
*/
- if (address < PAGE_ALIGN(address+4))
- address = PAGE_ALIGN(address+4);
- else {
- vma_unlock_anon_vma(vma);
- return -ENOMEM;
- }
- error = 0;
+ anon_vma_lock_write(vma->anon_vma);
/* Somebody else might have raced and expanded it already */
if (address > vma->vm_end) {
@@ -2211,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
- * vma_lock_anon_vma() doesn't help here, as
+ * anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
@@ -2235,7 +2234,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
}
}
- vma_unlock_anon_vma(vma);
+ anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(mm);
return error;
@@ -2251,25 +2250,21 @@ int expand_downwards(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
int error;
- /*
- * We must make sure the anon_vma is allocated
- * so that the anon_vma locking is not a noop.
- */
- if (unlikely(anon_vma_prepare(vma)))
- return -ENOMEM;
-
address &= PAGE_MASK;
error = security_mmap_addr(address);
if (error)
return error;
- vma_lock_anon_vma(vma);
+ /* We must make sure the anon_vma is allocated. */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
+ anon_vma_lock_write(vma->anon_vma);
/* Somebody else might have raced and expanded it already */
if (address < vma->vm_start) {
@@ -2287,7 +2282,7 @@ int expand_downwards(struct vm_area_struct *vma,
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
- * vma_lock_anon_vma() doesn't help here, as
+ * anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
@@ -2309,7 +2304,7 @@ int expand_downwards(struct vm_area_struct *vma,
}
}
}
- vma_unlock_anon_vma(vma);
+ anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(mm);
return error;
@@ -2639,6 +2634,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
return 0;
}
+EXPORT_SYMBOL(do_munmap);
int vm_munmap(unsigned long start, size_t len)
{
@@ -2694,12 +2690,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (!vma || !(vma->vm_flags & VM_SHARED))
goto out;
- if (start < vma->vm_start || start + size > vma->vm_end)
+ if (start < vma->vm_start)
goto out;
- if (pgoff == linear_page_index(vma, start)) {
- ret = 0;
- goto out;
+ if (start + size > vma->vm_end) {
+ struct vm_area_struct *next;
+
+ for (next = vma->vm_next; next; next = next->vm_next) {
+ /* hole between vmas ? */
+ if (next->vm_start != next->vm_prev->vm_end)
+ goto out;
+
+ if (next->vm_file != vma->vm_file)
+ goto out;
+
+ if (next->vm_flags != vma->vm_flags)
+ goto out;
+
+ if (start + size <= next->vm_end)
+ break;
+ }
+
+ if (!next)
+ goto out;
}
prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2709,9 +2722,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
flags &= MAP_NONBLOCK;
flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
if (vma->vm_flags & VM_LOCKED) {
+ struct vm_area_struct *tmp;
flags |= MAP_LOCKED;
+
/* drop PG_Mlocked flag for over-mapped range */
- munlock_vma_pages_range(vma, start, start + size);
+ for (tmp = vma; tmp->vm_start >= start + size;
+ tmp = tmp->vm_next) {
+ munlock_vma_pages_range(tmp,
+ max(tmp->vm_start, start),
+ min(tmp->vm_end, start + size));
+ }
}
file = get_file(vma->vm_file);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d15d88c8efa1..e40c9364582d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1899,7 +1899,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
if (gdtc->dirty > gdtc->bg_thresh)
return true;
- if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc))
+ if (wb_stat(wb, WB_RECLAIMABLE) >
+ wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
return true;
if (mdtc) {
@@ -1913,7 +1914,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
if (mdtc->dirty > mdtc->bg_thresh)
return true;
- if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc))
+ if (wb_stat(wb, WB_RECLAIMABLE) >
+ wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
return true;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89ef8347900c..755f29fac22f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -674,34 +674,28 @@ static inline void __free_one_page(struct page *page,
unsigned long combined_idx;
unsigned long uninitialized_var(buddy_idx);
struct page *buddy;
- unsigned int max_order = MAX_ORDER;
+ unsigned int max_order;
+
+ max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
VM_BUG_ON(!zone_is_initialized(zone));
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
VM_BUG_ON(migratetype == -1);
- if (is_migrate_isolate(migratetype)) {
- /*
- * We restrict max order of merging to prevent merge
- * between freepages on isolate pageblock and normal
- * pageblock. Without this, pageblock isolation
- * could cause incorrect freepage accounting.
- */
- max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
- } else {
+ if (likely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
- }
- page_idx = pfn & ((1 << max_order) - 1);
+ page_idx = pfn & ((1 << MAX_ORDER) - 1);
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
+continue_merging:
while (order < max_order - 1) {
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order))
- break;
+ goto done_merging;
/*
* Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
* merge with it and move up one order.
@@ -718,6 +712,32 @@ static inline void __free_one_page(struct page *page,
page_idx = combined_idx;
order++;
}
+ if (max_order < MAX_ORDER) {
+ /* If we are here, it means order is >= pageblock_order.
+ * We want to prevent merge between freepages on isolate
+ * pageblock and normal pageblock. Without this, pageblock
+ * isolation could cause incorrect freepage or CMA accounting.
+ *
+ * We don't want to hit this code for the more frequent
+ * low-order merging.
+ */
+ if (unlikely(has_isolate_pageblock(zone))) {
+ int buddy_mt;
+
+ buddy_idx = __find_buddy_index(page_idx, order);
+ buddy = page + (buddy_idx - page_idx);
+ buddy_mt = get_pageblock_migratetype(buddy);
+
+ if (migratetype != buddy_mt
+ && (is_migrate_isolate(migratetype) ||
+ is_migrate_isolate(buddy_mt)))
+ goto done_merging;
+ }
+ max_order++;
+ goto continue_merging;
+ }
+
+done_merging:
set_page_order(page, order);
/*
@@ -6191,7 +6211,7 @@ int __meminit init_per_zone_wmark_min(void)
setup_per_zone_inactive_ratio();
return 0;
}
-module_init(init_per_zone_wmark_min)
+core_initcall(init_per_zone_wmark_min)
/*
* min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 4568fd58f70a..00c96462cc36 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -283,11 +283,11 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private,
* now as a simple work-around, we use the next node for destination.
*/
if (PageHuge(page)) {
- nodemask_t src = nodemask_of_node(page_to_nid(page));
- nodemask_t dst;
- nodes_complement(dst, src);
+ int node = next_online_node(page_to_nid(page));
+ if (node == MAX_NUMNODES)
+ node = first_online_node;
return alloc_huge_page_node(page_hstate(compound_head(page)),
- next_node(page_to_nid(page), dst));
+ node);
}
if (PageHighMem(page))
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 7d3db0247983..1ba58213ad65 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -210,7 +210,9 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
- flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+ /* collapse entails shooting down ptes not pmd */
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
}
#endif
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index e88d071648c2..5d453e58ddbf 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -194,7 +194,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
goto free_proc_pages;
}
- mm = mm_access(task, PTRACE_MODE_ATTACH);
+ mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS);
if (!mm || IS_ERR(mm)) {
rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 40d1d3b0ea2f..b82e56ebabdd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -620,8 +620,7 @@ static void shmem_evict_inode(struct inode *inode)
list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
- } else
- kfree(info->symlink);
+ }
simple_xattrs_free(&info->xattrs);
WARN_ON(inode->i_blocks);
@@ -2462,13 +2461,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
info = SHMEM_I(inode);
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
- info->symlink = kmemdup(symname, len, GFP_KERNEL);
- if (!info->symlink) {
+ inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+ if (!inode->i_link) {
iput(inode);
return -ENOMEM;
}
inode->i_op = &shmem_short_symlink_operations;
- inode->i_link = info->symlink;
} else {
error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
if (error) {
@@ -3083,6 +3081,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
static void shmem_destroy_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+ kfree(inode->i_link);
kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
diff --git a/mm/slub.c b/mm/slub.c
index 46997517406e..65d5f92d51d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2819,6 +2819,7 @@ struct detached_freelist {
void *tail;
void *freelist;
int cnt;
+ struct kmem_cache *s;
};
/*
@@ -2833,8 +2834,9 @@ struct detached_freelist {
* synchronization primitive. Look ahead in the array is limited due
* to performance reasons.
*/
-static int build_detached_freelist(struct kmem_cache *s, size_t size,
- void **p, struct detached_freelist *df)
+static inline
+int build_detached_freelist(struct kmem_cache *s, size_t size,
+ void **p, struct detached_freelist *df)
{
size_t first_skipped_index = 0;
int lookahead = 3;
@@ -2850,8 +2852,11 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
return 0;
+ /* Support for memcg, compiler can optimize this out */
+ df->s = cache_from_obj(s, object);
+
/* Start new detached freelist */
- set_freepointer(s, object, NULL);
+ set_freepointer(df->s, object, NULL);
df->page = virt_to_head_page(object);
df->tail = object;
df->freelist = object;
@@ -2866,7 +2871,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
/* df->page is always set at this point */
if (df->page == virt_to_head_page(object)) {
/* Opportunity build freelist */
- set_freepointer(s, object, df->freelist);
+ set_freepointer(df->s, object, df->freelist);
df->freelist = object;
df->cnt++;
p[size] = NULL; /* mark object processed */
@@ -2885,25 +2890,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
return first_skipped_index;
}
-
/* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
{
if (WARN_ON(!size))
return;
do {
struct detached_freelist df;
- struct kmem_cache *s;
-
- /* Support for memcg */
- s = cache_from_obj(orig_s, p[size - 1]);
size = build_detached_freelist(s, size, p, &df);
if (unlikely(!df.page))
continue;
- slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+ slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0c56f6a812bc..0c114e2b01d3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -46,7 +46,6 @@
#include <linux/oom.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
-#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -221,39 +220,6 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
}
-struct dentry *debug_file;
-
-static int debug_shrinker_show(struct seq_file *s, void *unused)
-{
- struct shrinker *shrinker;
- struct shrink_control sc;
-
- sc.gfp_mask = -1;
- sc.nr_to_scan = 0;
-
- down_read(&shrinker_rwsem);
- list_for_each_entry(shrinker, &shrinker_list, list) {
- int num_objs;
-
- num_objs = shrinker->count_objects(shrinker, &sc);
- seq_printf(s, "%pf %d\n", shrinker->scan_objects, num_objs);
- }
- up_read(&shrinker_rwsem);
- return 0;
-}
-
-static int debug_shrinker_open(struct inode *inode, struct file *file)
-{
- return single_open(file, debug_shrinker_show, inode->i_private);
-}
-
-static const struct file_operations debug_shrinker_fops = {
- .open = debug_shrinker_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* Add a shrinker callback to be called from the vm.
*/
@@ -283,15 +249,6 @@ int register_shrinker(struct shrinker *shrinker)
}
EXPORT_SYMBOL(register_shrinker);
-static int __init add_shrinker_debug(void)
-{
- debugfs_create_file("shrinker", 0644, NULL, NULL,
- &debug_shrinker_fops);
- return 0;
-}
-
-late_initcall(add_shrinker_debug);
-
/*
* Remove one
*/
@@ -2577,7 +2534,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
sc->gfp_mask |= __GFP_HIGHMEM;
for_each_zone_zonelist_nodemask(zone, z, zonelist,
- requested_highidx, sc->nodemask) {
+ gfp_zone(sc->gfp_mask), sc->nodemask) {
enum zone_type classzone_idx;
if (!populated_zone(zone))
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 9f15bdd9163c..fc083996e40a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -309,7 +309,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle)
static void record_obj(unsigned long handle, unsigned long obj)
{
- *(unsigned long *)handle = obj;
+ /*
+ * lsb of @obj represents handle lock while other bits
+ * represent object value the handle is pointing so
+ * updating shouldn't do store tearing.
+ */
+ WRITE_ONCE(*(unsigned long *)handle, obj);
}
/* zpool driver */
@@ -1635,6 +1640,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
free_obj = obj_malloc(d_page, class, handle);
zs_object_copy(free_obj, used_obj, class);
index++;
+ /*
+ * record_obj updates handle's value to free_obj and it will
+ * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
+ * breaks synchronization using pin_tag(e,g, zs_free) so
+ * let's keep the lock bit.
+ */
+ free_obj |= BIT(HANDLE_PIN_BIT);
record_obj(handle, free_obj);
unpin_tag(handle);
obj_free(pool, class, used_obj);
diff --git a/mm/zswap.c b/mm/zswap.c
index bf14508afd64..340261946fda 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -170,6 +170,8 @@ static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
static LIST_HEAD(zswap_pools);
/* protects zswap_pools list modification */
static DEFINE_SPINLOCK(zswap_pools_lock);
+/* pool counter to provide unique names to zpool */
+static atomic_t zswap_pools_count = ATOMIC_INIT(0);
/* used by param callback function */
static bool zswap_init_started;
@@ -565,6 +567,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
{
struct zswap_pool *pool;
+ char name[38]; /* 'zswap' + 32 char (max) num + \0 */
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
pool = kzalloc(sizeof(*pool), GFP_KERNEL);
@@ -573,7 +576,10 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
return NULL;
}
- pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
+ /* unique name for each pool specifically required by zsmalloc */
+ snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
+
+ pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
if (!pool->zpool) {
pr_err("%s zpool not available\n", type);
goto error;