Merge remote-tracking branch 'rt-stable/v4.9-rt' into linux-linaro-lsk-v4.9-rt

author: Alex Shi <alex.shi@linaro.org> 2017-11-20 12:03:48 +0800
committer: Alex Shi <alex.shi@linaro.org> 2017-11-20 12:03:48 +0800
commit: 382ac60c32998318f133b8b84030aa83a8f600fe (patch)
tree: 92781c459b3e6ab6848d4c1d08a677d8c651e81d /mm
parent: 652dd9c8611986c5c99b41f2862b93a9fcc92e9c (diff)
parent: d0bcb84cbec27d7eb876b6f1d1fffdb65ccefcd4 (diff)
8 files changed, 59 insertions, 17 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index 63a12162f4c6..55f30ec32e5b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -533,6 +533,8 @@ static long madvise_remove(struct vm_area_struct *vma,
 static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 {
 	struct page *p;
+	struct zone *zone;
+
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	for (; start < end; start += PAGE_SIZE <<
@@ -561,6 +563,11 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 		if (ret)
 			return ret;
 	}
+
+	/* Ensure that all poisoned pages are removed from per-cpu lists */
+	for_each_populated_zone(zone)
+		drain_all_pages(zone);
+
 	return 0;
 }
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1f2ebc924916..12b94909ba7b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -465,6 +465,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 	struct mem_cgroup_tree_per_node *mctz;
 
 	mctz = soft_limit_tree_from_page(page);
+	if (!mctz)
+		return;
 	/*
 	 * Necessary to update all ancestors when hierarchy is used.
 	 * because their event counter is not touched.
@@ -502,7 +504,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
 	for_each_node(nid) {
 		mz = mem_cgroup_nodeinfo(memcg, nid);
 		mctz = soft_limit_tree_node(nid);
-		mem_cgroup_remove_exceeded(mz, mctz);
+		if (mctz)
+			mem_cgroup_remove_exceeded(mz, mctz);
 	}
 }
 
@@ -2569,7 +2572,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 	 * is empty. Do it lockless to prevent lock bouncing. Races
 	 * are acceptable as soft limit is best effort anyway.
 	 */
-	if (RB_EMPTY_ROOT(&mctz->rb_root))
+	if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
 		return 0;
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index d064caff9d7d..1aa63e7dd790 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3596,6 +3596,11 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	/* do counter updates before entering really critical section. */
 	check_sync_rss_stat(current);
 
+	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+					    flags & FAULT_FLAG_INSTRUCTION,
+					    flags & FAULT_FLAG_REMOTE))
+		return VM_FAULT_SIGSEGV;
+
 	/*
 	 * Enable the memcg OOM handling for faults triggered in user
 	 * space.  Kernel faults are handled more gracefully.
@@ -3603,11 +3608,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	if (flags & FAULT_FLAG_USER)
 		mem_cgroup_oom_enable();
 
-	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
-					    flags & FAULT_FLAG_INSTRUCTION,
-					    flags & FAULT_FLAG_REMOTE))
-		return VM_FAULT_SIGSEGV;
-
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
 	else
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ede137345a99..c9f715b2917f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -179,7 +179,7 @@ static void release_memory_resource(struct resource *res)
 void get_page_bootmem(unsigned long info,  struct page *page,
 		      unsigned long type)
 {
-	page->lru.next = (struct list_head *) type;
+	page->freelist = (void *)type;
 	SetPagePrivate(page);
 	set_page_private(page, info);
 	page_ref_inc(page);
@@ -189,11 +189,12 @@ void put_page_bootmem(struct page *page)
 {
 	unsigned long type;
 
-	type = (unsigned long) page->lru.next;
+	type = (unsigned long) page->freelist;
 	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
 	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
 
 	if (page_ref_dec_return(page) == 1) {
+		page->freelist = NULL;
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
 		INIT_LIST_HEAD(&page->lru);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ec9f11d4f094..d631d251c150 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -37,6 +37,7 @@
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
 #include <linux/init.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -491,6 +492,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	}
 
 	/*
+	 * If the mm has notifiers then we would need to invalidate them around
+	 * unmap_page_range and that is risky because notifiers can sleep and
+	 * what they do is basically undeterministic.  So let's have a short
+	 * sleep to give the oom victim some more time.
+	 * TODO: we really want to get rid of this ugly hack and make sure that
+	 * notifiers cannot block for unbounded amount of time and add
+	 * mmu_notifier_invalidate_range_{start,end} around unmap_page_range
+	 */
+	if (mm_has_notifiers(mm)) {
+		up_read(&mm->mmap_sem);
+		schedule_timeout_idle(HZ);
+		goto unlock_oom;
+	}
+
+	/*
 	 * increase mm_users only after we know we will reap something so
 	 * that the mmput_async is called only when we have reaped something
 	 * and delayed __mmput doesn't matter that much
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a63aa41760af..6a187c2adcc9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2139,13 +2139,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
 				continue;
 
 			/*
-			 * It should never happen but changes to locking could
-			 * inadvertently allow a per-cpu drain to add pages
-			 * to MIGRATE_HIGHATOMIC while unreserving so be safe
-			 * and watch for underflows.
+			 * In page freeing path, migratetype change is racy so
+			 * we can counter several free pages in a pageblock
+			 * in this loop althoug we changed the pageblock type
+			 * from highatomic to ac->migratetype. So we should
+			 * adjust the count once.
 			 */
-			zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
-				zone->nr_reserved_highatomic);
+			if (get_pageblock_migratetype(page) ==
+							MIGRATE_HIGHATOMIC) {
+				/*
+				 * It should never happen but changes to
+				 * locking could inadvertently allow a per-cpu
+				 * drain to add pages to MIGRATE_HIGHATOMIC
+				 * while unreserving so be safe and watch for
+				 * underflows.
+				 */
+				zone->nr_reserved_highatomic -= min(
+						pageblock_nr_pages,
+						zone->nr_reserved_highatomic);
+			}
 
 			/*
 			 * Convert to ac->migratetype and avoid the normal
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 5d2f24fbafc5..622f6b6ae844 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -255,7 +255,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
 {
 	struct kmem_cache *s;
 
-	if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
+	if (slab_nomerge)
 		return NULL;
 
 	if (ctor)
@@ -266,6 +266,9 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
 	size = ALIGN(size, align);
 	flags = kmem_cache_flags(size, flags, name, NULL);
 
+	if (flags & SLAB_NEVER_MERGE)
+		return NULL;
+
 	list_for_each_entry_reverse(s, &slab_caches, list) {
 		if (slab_unmergeable(s))
 			continue;
diff --git a/mm/sparse.c b/mm/sparse.c
index 1e168bf2779a..8c4c82e358e6 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap)
 		>> PAGE_SHIFT;
 
 	for (i = 0; i < nr_pages; i++, page++) {
-		magic = (unsigned long) page->lru.next;
+		magic = (unsigned long) page->freelist;
 
 		BUG_ON(magic == NODE_INFO);
author	Alex Shi <alex.shi@linaro.org>	2017-11-20 12:03:48 +0800
committer	Alex Shi <alex.shi@linaro.org>	2017-11-20 12:03:48 +0800
commit	382ac60c32998318f133b8b84030aa83a8f600fe (patch)
tree	92781c459b3e6ab6848d4c1d08a677d8c651e81d /mm
parent	652dd9c8611986c5c99b41f2862b93a9fcc92e9c (diff)
parent	d0bcb84cbec27d7eb876b6f1d1fffdb65ccefcd4 (diff)