From c32e6e80900a80fbaec23119f854d5e14a47e80b Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Sat, 15 Jul 2017 11:41:14 +1000
Subject: mm: skip HWPoisoned pages when onlining pages

b023f46813cd ("memory-hotplug: skip HWPoisoned page when offlining pages")
skipped the HWPoisoned pages when offlining pages, but this should be
skipped when onlining the pages too.

n-horiguchi@ah.jp.nec.com said:

: If I read correctly, to "skip HWPoiosned page" in commit b023f46813cd
: means that we skip the page status check for hwpoisoned pages *not* to
: prevent memory offlining for memblocks with hwpoisoned pages.  That
: means that hwpoisoned pages can be offlined.
:
: And another reason to clear PageReserved is that we could reuse the
: hwpoisoned page after onlining back with replacing the broken DIMM.  In
: this usecase, we first do unpoisoning to clear PageHWPoison, but it
: doesn't work if PageReserved is set.  My simple testing shows the BUG
: below in unpoisoning (without the ClearPageReserved):
:
:   Unpoison: Software-unpoisoned page 0x18000
:   BUG: Bad page state in process page-types  pfn:18000
:   page:ffffda5440600000 count:0 mapcount:0 mapping:          (null) index:0x70006b599
:   flags: 0x1fffc00004081a(error|uptodate|dirty|reserved|swapbacked)
:   raw: 001fffc00004081a 0000000000000000 000000070006b599 00000000ffffffff
:   raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000
:   page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
:   bad because of flags: 0x800(reserved)

Link: http://lkml.kernel.org/r/1493130472-22843-3-git-send-email-ldufour@linux.vnet.ibm.com
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Glauber Costa <glommer@openvz.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory_hotplug.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8dccc317aac2..d620d0427b6b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -689,6 +689,10 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
 	if (PageReserved(pfn_to_page(start_pfn)))
 		for (i = 0; i < nr_pages; i++) {
 			page = pfn_to_page(start_pfn + i);
+			if (PageHWPoison(page)) {
+				ClearPageReserved(page);
+				continue;
+			}
 			(*online_page_callback)(page);
 			onlined_pages++;
 		}
-- 
cgit v1.2.3


From 82494ce0a87a3f8d9a3d77e1fdaa9f34a5128633 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 15 Jul 2017 11:41:15 +1000
Subject: arm: arch/arm/include/asm/page.h needs personality.h

VM_DATA_DEFAULT_FLAGS uses READ_IMPLIES_EXEC, so page.h should include
personality.h to provide this.

This fixes no known bugs and can be safely ignored ;)

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm/include/asm/page.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 4355f0ec44d6..f98baaec0a15 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -17,6 +17,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/personality.h>	/* For READ_IMPLIES_EXEC */
+
 #ifndef CONFIG_MMU
 
 #include <asm/page-nommu.h>
-- 
cgit v1.2.3


From 947e780b273e35dac08166122ca91fea6f6d4d96 Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:15 +1000
Subject: ocfs2: remove ocfs2_is_o2cb_active()

Remove ocfs2_is_o2cb_active().  We have similar functions to identify
which cluster stack is being used via osb->osb_cluster_stack.

Secondly, the current implementation of ocfs2_is_o2cb_active() is not
totally safe.  Based on the design of stackglue, we need to get
ocfs2_stack_lock before using ocfs2_stack related data structures, and
that active_stack pointer can be NULL in the case of mount failure.

Link: http://lkml.kernel.org/r/1495441079-11708-1-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Eric Ren <zren@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlmglue.c   | 2 +-
 fs/ocfs2/stackglue.c | 6 ------
 fs/ocfs2/stackglue.h | 3 ---
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4689940a953c..703d8acc5efb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3413,7 +3413,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 	 * we can recover correctly from node failure. Otherwise, we may get
 	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
 	 */
-	if (!ocfs2_is_o2cb_active() &&
+	if (ocfs2_userspace_stack(osb) &&
 	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
 		lvb = 1;
 
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index d6c350ba25b9..c4b029c43464 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
  */
 static struct ocfs2_stack_plugin *active_stack;
 
-inline int ocfs2_is_o2cb_active(void)
-{
-	return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
-}
-EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
-
 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
 {
 	struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index e3036e1790e8..f2dce10fae54 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
 
-/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
-int ocfs2_is_o2cb_active(void);
-
 extern struct kset *ocfs2_kset;
 
 #endif  /* STACKGLUE_H */
-- 
cgit v1.2.3


From 555c056945f7221518a2faf3627e50ecc4b78412 Mon Sep 17 00:00:00 2001
From: Guozhonghua <guozhonghua@h3c.com>
Date: Sat, 15 Jul 2017 11:41:15 +1000
Subject: ocfs2: old mle put and release after the function
 dlm_add_migration_mle called

If the old mle is found after the dlm_add_migration_mle called, it should
be put once.  If the return value is not - EEXIST and its type is BLOCK,
it should be put again to release it to avoid memory leak, for it had been
unhashed from the map.

Link: http://lkml.kernel.org/r/71604351584F6A4EBAE558C676F37CA4A3D4B7FE@H3CMLB12-EX.srv.huawei-3com.com
Signed-off-by: Guozhonghua <guozhonghua@h3c.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 62 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3e04279446e8..4438671c4ac3 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2612,20 +2612,45 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	spin_lock(&dlm->master_lock);
 	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
 				    namelen, target, dlm->node_num);
+	if (ret == -EEXIST) {
+		if(oldmle)
+			__dlm_put_mle(oldmle);
+
+		spin_unlock(&dlm->master_lock);
+		spin_unlock(&dlm->spinlock);
+		mlog(0, "another process is already migrating it\n");
+		goto fail;
+	}
+
+	/* If an old one mle found, it should be put. if its type is BLOCK,
+	 * it should be put again. Because it had been unhasded from the map
+	 * in the function dlm_add_migration_mle.
+	 * otherwise the memory will be leaked. It will not found it again from
+	 * the hash map.
+	 */
+	if (oldmle) {
+		/* master is known, detach if not already detached */
+		__dlm_mle_detach_hb_events(dlm, oldmle);
+		__dlm_put_mle(oldmle);
+
+		/* if the type of the mle is BLOCK, should put it once for release.
+		 * otherwise memory leak may be caused because oldmle had been unhashed
+		 * from the hash map, it will not be found anymore.
+		 */
+		if (oldmle->type == DLM_MLE_BLOCK)
+			__dlm_put_mle(oldmle);
+       }
+
 	/* get an extra reference on the mle.
 	 * otherwise the assert_master from the new
 	 * master will destroy this.
 	 */
 	dlm_get_mle_inuse(mle);
+	mle_added = 1;
+
 	spin_unlock(&dlm->master_lock);
 	spin_unlock(&dlm->spinlock);
 
-	if (ret == -EEXIST) {
-		mlog(0, "another process is already migrating it\n");
-		goto fail;
-	}
-	mle_added = 1;
-
 	/*
 	 * set the MIGRATING flag and flush asts
 	 * if we fail after this we need to re-dirty the lockres
@@ -2642,12 +2667,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	}
 
 fail:
-	if (ret != -EEXIST && oldmle) {
-		/* master is known, detach if not already detached */
-		dlm_mle_detach_hb_events(dlm, oldmle);
-		dlm_put_mle(oldmle);
-	}
-
 	if (ret < 0) {
 		if (mle_added) {
 			dlm_mle_detach_hb_events(dlm, mle);
@@ -3182,16 +3201,23 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
 	if (ret < 0)
 		kmem_cache_free(dlm_mle_cache, mle);
 
+	/* If an old one mle found, it should be put. if its type is BLOCK,
+	 * it should be put again. Because it had been unhasded from the map
+	 * in the function dlm_add_migration_mle.
+	 * otherwise the memory will be leaked. It will not found it again from
+	 * the hash map.
+	 */
+	if (oldmle) {
+		__dlm_mle_detach_hb_events(dlm, oldmle);
+		__dlm_put_mle(oldmle);
+		if (ret >= 0 && oldmle->type == DLM_MLE_BLOCK)
+			__dlm_put_mle(oldmle);
+	}
+
 	spin_unlock(&dlm->master_lock);
 unlock:
 	spin_unlock(&dlm->spinlock);
 
-	if (oldmle) {
-		/* master is known, detach if not already detached */
-		dlm_mle_detach_hb_events(dlm, oldmle);
-		dlm_put_mle(oldmle);
-	}
-
 	if (res)
 		dlm_lockres_put(res);
 leave:
-- 
cgit v1.2.3


From 2d060f378b31d7ea521df9b652650fc7996ef302 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 15 Jul 2017 11:41:15 +1000
Subject: 
 ocfs2-old-mle-put-and-release-after-the-function-dlm_add_migration_mle-called-fix

fix coding style, comments

Cc: Guozhonghua <guozhonghua@h3c.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4438671c4ac3..f0072145eead 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2613,7 +2613,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
 				    namelen, target, dlm->node_num);
 	if (ret == -EEXIST) {
-		if(oldmle)
+		if (oldmle)
 			__dlm_put_mle(oldmle);
 
 		spin_unlock(&dlm->master_lock);
@@ -2622,10 +2622,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 		goto fail;
 	}
 
-	/* If an old one mle found, it should be put. if its type is BLOCK,
-	 * it should be put again. Because it had been unhasded from the map
+	/*
+	 * If an old mle is found, it should be put.  If its type is BLOCK,
+	 * it should be put again.  Because it has been unhasded from the map
 	 * in the function dlm_add_migration_mle.
-	 * otherwise the memory will be leaked. It will not found it again from
+	 * Otherwise the memory will be leaked.  It will not be found again from
 	 * the hash map.
 	 */
 	if (oldmle) {
@@ -2633,9 +2634,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 		__dlm_mle_detach_hb_events(dlm, oldmle);
 		__dlm_put_mle(oldmle);
 
-		/* if the type of the mle is BLOCK, should put it once for release.
-		 * otherwise memory leak may be caused because oldmle had been unhashed
-		 * from the hash map, it will not be found anymore.
+		/*
+		 * If the type of the mle is BLOCK, it should be put once for
+		 * release.  Otherwise a memory leak may be caused because
+		 * oldmle has been unhashed from the hash map and it will not
+		 * be found any more.
 		 */
 		if (oldmle->type == DLM_MLE_BLOCK)
 			__dlm_put_mle(oldmle);
@@ -3201,10 +3204,11 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
 	if (ret < 0)
 		kmem_cache_free(dlm_mle_cache, mle);
 
-	/* If an old one mle found, it should be put. if its type is BLOCK,
-	 * it should be put again. Because it had been unhasded from the map
-	 * in the function dlm_add_migration_mle.
-	 * otherwise the memory will be leaked. It will not found it again from
+	/*
+	 * If an old mle is found, it should be put.  If its type is BLOCK,
+	 * it should be put again because it has been unhashed from the map
+	 * in the dlm_add_migration_mle().
+	 * Otherwise the memory will be leaked.  It will not be found again from
 	 * the hash map.
 	 */
 	if (oldmle) {
-- 
cgit v1.2.3


From d82442ed243cfff2860ce6938067cb8f0a83bc32 Mon Sep 17 00:00:00 2001
From: Guozhonghua <guozhonghua@h3c.com>
Date: Sat, 15 Jul 2017 11:41:15 +1000
Subject: ocfs2/dlm: optimize freeing of dead node locks

Three loops can be optimized into one and its sub loops, so less code can
do the same work.

Link: http://lkml.kernel.org/r/71604351584F6A4EBAE558C676F37CA4C4AF898E@H3CMLB12-EX.srv.huawei-3com.com
Signed-off-by: Guozhonghua <guozhonghua@h3c.com>
Reviewed-by: Eric Ren <zren@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmrecovery.c | 39 ++++++++++++++-------------------------
 1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 74407c6dd592..4c4b18e612c5 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2268,6 +2268,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 {
 	struct dlm_lock *lock, *next;
 	unsigned int freed = 0;
+       struct list_head *queue = NULL;
+       int i;
 
 	/* this node is the lockres master:
 	 * 1) remove any stale locks for the dead node
@@ -2280,31 +2282,18 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 	 * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
 
 	/* TODO: check pending_asts, pending_basts here */
-	list_for_each_entry_safe(lock, next, &res->granted, list) {
-		if (lock->ml.node == dead_node) {
-			list_del_init(&lock->list);
-			dlm_lock_put(lock);
-			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
-			dlm_lock_put(lock);
-			freed++;
-		}
-	}
-	list_for_each_entry_safe(lock, next, &res->converting, list) {
-		if (lock->ml.node == dead_node) {
-			list_del_init(&lock->list);
-			dlm_lock_put(lock);
-			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
-			dlm_lock_put(lock);
-			freed++;
-		}
-	}
-	list_for_each_entry_safe(lock, next, &res->blocked, list) {
-		if (lock->ml.node == dead_node) {
-			list_del_init(&lock->list);
-			dlm_lock_put(lock);
-			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
-			dlm_lock_put(lock);
-			freed++;
+       for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) {
+               queue = dlm_list_idx_to_ptr(res, i);
+               list_for_each_entry_safe(lock, next, queue, list) {
+                       if (lock->ml.node == dead_node) {
+                               list_del_init(&lock->list);
+                               dlm_lock_put(lock);
+                               /* Can't schedule DLM_UNLOCK_FREE_LOCK
+                                * do manually
+                                */
+                               dlm_lock_put(lock);
+                               freed++;
+                       }
 		}
 	}
 
-- 
cgit v1.2.3


From 341d2d6f0f30256cff2c323b3e0848e4f0f10755 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: 
 ocfs2-dlm-optimization-of-code-while-free-dead-node-locks-checkpatch-fixes

WARNING: please, no spaces at the start of a line
#26: FILE: fs/ocfs2/dlm/dlmrecovery.c:2271:
+       struct list_head *queue = NULL;$

WARNING: please, no spaces at the start of a line
#27: FILE: fs/ocfs2/dlm/dlmrecovery.c:2272:
+       int i;$

WARNING: please, no spaces at the start of a line
#60: FILE: fs/ocfs2/dlm/dlmrecovery.c:2285:
+       for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) {$

WARNING: suspect code indent for conditional statements (7, 15)
#60: FILE: fs/ocfs2/dlm/dlmrecovery.c:2285:
+       for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) {
+               queue = dlm_list_idx_to_ptr(res, i);

ERROR: code indent should use tabs where possible
#61: FILE: fs/ocfs2/dlm/dlmrecovery.c:2286:
+               queue = dlm_list_idx_to_ptr(res, i);$

WARNING: please, no spaces at the start of a line
#61: FILE: fs/ocfs2/dlm/dlmrecovery.c:2286:
+               queue = dlm_list_idx_to_ptr(res, i);$

ERROR: code indent should use tabs where possible
#62: FILE: fs/ocfs2/dlm/dlmrecovery.c:2287:
+               list_for_each_entry_safe(lock, next, queue, list) {$

WARNING: please, no spaces at the start of a line
#62: FILE: fs/ocfs2/dlm/dlmrecovery.c:2287:
+               list_for_each_entry_safe(lock, next, queue, list) {$

WARNING: suspect code indent for conditional statements (15, 23)
#62: FILE: fs/ocfs2/dlm/dlmrecovery.c:2287:
+               list_for_each_entry_safe(lock, next, queue, list) {
+                       if (lock->ml.node == dead_node) {

ERROR: code indent should use tabs where possible
#63: FILE: fs/ocfs2/dlm/dlmrecovery.c:2288:
+                       if (lock->ml.node == dead_node) {$

WARNING: please, no spaces at the start of a line
#63: FILE: fs/ocfs2/dlm/dlmrecovery.c:2288:
+                       if (lock->ml.node == dead_node) {$

WARNING: suspect code indent for conditional statements (23, 31)
#63: FILE: fs/ocfs2/dlm/dlmrecovery.c:2288:
+                       if (lock->ml.node == dead_node) {
+                               list_del_init(&lock->list);

ERROR: code indent should use tabs where possible
#64: FILE: fs/ocfs2/dlm/dlmrecovery.c:2289:
+                               list_del_init(&lock->list);$

WARNING: please, no spaces at the start of a line
#64: FILE: fs/ocfs2/dlm/dlmrecovery.c:2289:
+                               list_del_init(&lock->list);$

ERROR: code indent should use tabs where possible
#65: FILE: fs/ocfs2/dlm/dlmrecovery.c:2290:
+                               dlm_lock_put(lock);$

WARNING: please, no spaces at the start of a line
#65: FILE: fs/ocfs2/dlm/dlmrecovery.c:2290:
+                               dlm_lock_put(lock);$

ERROR: code indent should use tabs where possible
#66: FILE: fs/ocfs2/dlm/dlmrecovery.c:2291:
+                               /* Can't schedule DLM_UNLOCK_FREE_LOCK$

ERROR: code indent should use tabs where possible
#67: FILE: fs/ocfs2/dlm/dlmrecovery.c:2292:
+                                * do manually$

ERROR: code indent should use tabs where possible
#68: FILE: fs/ocfs2/dlm/dlmrecovery.c:2293:
+                                */$

ERROR: code indent should use tabs where possible
#69: FILE: fs/ocfs2/dlm/dlmrecovery.c:2294:
+                               dlm_lock_put(lock);$

WARNING: please, no spaces at the start of a line
#69: FILE: fs/ocfs2/dlm/dlmrecovery.c:2294:
+                               dlm_lock_put(lock);$

ERROR: code indent should use tabs where possible
#70: FILE: fs/ocfs2/dlm/dlmrecovery.c:2295:
+                               freed++;$

WARNING: please, no spaces at the start of a line
#70: FILE: fs/ocfs2/dlm/dlmrecovery.c:2295:
+                               freed++;$

ERROR: code indent should use tabs where possible
#71: FILE: fs/ocfs2/dlm/dlmrecovery.c:2296:
+                       }$

WARNING: please, no spaces at the start of a line
#71: FILE: fs/ocfs2/dlm/dlmrecovery.c:2296:
+                       }$

total: 11 errors, 14 warnings, 51 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
      mechanically convert to the typical style using --fix or --fix-inplace.

NOTE: Whitespace errors detected.
      You may wish to use scripts/cleanpatch or scripts/cleanfile

./patches/ocfs2-dlm-optimization-of-code-while-free-dead-node-locks.patch has style problems, please review.

NOTE: If any of the errors are false positives, please report
      them to the maintainer, see CHECKPATCH in MAINTAINERS.

Please run checkpatch prior to sending patches

Cc: Guozhonghua <guozhonghua@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmrecovery.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 4c4b18e612c5..908b05942282 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2268,8 +2268,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 {
 	struct dlm_lock *lock, *next;
 	unsigned int freed = 0;
-       struct list_head *queue = NULL;
-       int i;
+	struct list_head *queue = NULL;
+	int i;
 
 	/* this node is the lockres master:
 	 * 1) remove any stale locks for the dead node
@@ -2282,18 +2282,19 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 	 * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
 
 	/* TODO: check pending_asts, pending_basts here */
-       for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) {
-               queue = dlm_list_idx_to_ptr(res, i);
-               list_for_each_entry_safe(lock, next, queue, list) {
-                       if (lock->ml.node == dead_node) {
-                               list_del_init(&lock->list);
-                               dlm_lock_put(lock);
-                               /* Can't schedule DLM_UNLOCK_FREE_LOCK
-                                * do manually
-                                */
-                               dlm_lock_put(lock);
-                               freed++;
-                       }
+	for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) {
+		queue = dlm_list_idx_to_ptr(res, i);
+		list_for_each_entry_safe(lock, next, queue, list) {
+			if (lock->ml.node == dead_node) {
+				list_del_init(&lock->list);
+				dlm_lock_put(lock);
+				/*
+				 * Can't schedule DLM_UNLOCK_FREE_LOCK: do
+				 * manually
+				 */
+				dlm_lock_put(lock);
+				freed++;
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 32d70be6fe37b958f83e4f5f153b2fc696c6092c Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: ocfs2: give an obvious tip for mismatched cluster names

Add an obvious error message, due to mismatched cluster names between
on-disk and in the current cluster.  We can meet this case during OCFS2
cluster migration.

If we can give the user an obvious tip for why they can not mount the file
system after migration, they can quickly fix this mismatch problem.

Second, also move printing ocfs2_fill_super() errno to the front of
ocfs2_dismount_volume(), since ocfs2_dismount_volume() will also print
its own message.

I looked through all the code of OCFS2 (include o2cb); there is not any
place which returns this error.  In fact, the function calling path
ocfs2_fill_super -> ocfs2_mount_volume -> ocfs2_dlm_init ->
dlm_new_lockspace is a very specific one.  We can use this errno to give
the user a more clear tip, since this case is a little common during
cluster migration, but the customer can quickly get the failure cause if
there is a error printed.  Also, I think it is not possible to add this
errno in the o2cb path during ocfs2_dlm_init(), since the o2cb code has
been stable for a long time.

We only print this error tip when the user uses pcmk stack, since using
the o2cb stack the user will not meet this error.

Link: http://lkml.kernel.org/r/1495089336-19312-1-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Reviewed-by: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/super.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 83005f486451..24d885bf14bc 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1208,14 +1208,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 read_super_error:
 	brelse(bh);
 
+	if (status)
+		mlog_errno(status);
+
 	if (osb) {
 		atomic_set(&osb->vol_state, VOLUME_DISABLED);
 		wake_up(&osb->osb_mount_event);
 		ocfs2_dismount_volume(sb, 1);
 	}
 
-	if (status)
-		mlog_errno(status);
 	return status;
 }
 
@@ -1843,6 +1844,9 @@ static int ocfs2_mount_volume(struct super_block *sb)
 	status = ocfs2_dlm_init(osb);
 	if (status < 0) {
 		mlog_errno(status);
+		if (status == -EBADR)
+			mlog(ML_ERROR, "couldn't mount because cluster name on"
+			" disk does not match the running cluster name.\n");
 		goto leave;
 	}
 
-- 
cgit v1.2.3


From 7845af6f476326109bde39a77fd819930faac7e2 Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: ocfs2-give-an-obvious-tip-for-dismatch-cluster-names-v2

Compare with initial version, we only print this error tip when the user
uses pcmk stack.  since in o2cb stack, the user will not meet this error.

Link: http://lkml.kernel.org/r/1495419305-3780-1-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 24d885bf14bc..f03fdd0fc72c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1844,7 +1844,7 @@ static int ocfs2_mount_volume(struct super_block *sb)
 	status = ocfs2_dlm_init(osb);
 	if (status < 0) {
 		mlog_errno(status);
-		if (status == -EBADR)
+		if (status == -EBADR && ocfs2_userspace_stack(osb))
 			mlog(ML_ERROR, "couldn't mount because cluster name on"
 			" disk does not match the running cluster name.\n");
 		goto leave;
-- 
cgit v1.2.3


From 9e6f2ab0e30d48c84d92d496ba9512f9f670741a Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: ocfs2: move some definitions to header file

Patch series "ocfs2: use kobject for online file check", v3.

Use embedded kobject mechanism for online file check feature, this will
avoid to use a global list to save/search per-device online file check
related data.  The changed code is based on Goldwyn Rodrigues's patches
and ext4 fs code, there is not any new features added, except some very
small fixes during this code refactoring.  Second, the code change does
not affect the underlying file check code.  Thank Goldwyn very much.

Compare with second version, add more comments in the patch descriptions,
to make sure each modification is mentioned.  Compare with first version,
split the code change into four patches, make sure each patch will not
bring ocfs2 kernel modules compiling errors.

This patch (of 3):

Move some definitions to header file, which will be referenced by other
source files when kobject mechanism is introduced.

Link: http://lkml.kernel.org/r/1495611866-27360-2-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/filecheck.c | 27 ---------------------------
 fs/ocfs2/filecheck.h | 27 +++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 2cabbcf2f28e..cc7b595e264d 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -56,33 +56,6 @@ static const char * const ocfs2_filecheck_errs[] = {
 static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
 static LIST_HEAD(ocfs2_filecheck_sysfs_list);
 
-struct ocfs2_filecheck {
-	struct list_head fc_head;	/* File check entry list head */
-	spinlock_t fc_lock;
-	unsigned int fc_max;	/* Maximum number of entry in list */
-	unsigned int fc_size;	/* Current entry count in list */
-	unsigned int fc_done;	/* Finished entry count in list */
-};
-
-struct ocfs2_filecheck_sysfs_entry {	/* sysfs entry per mounting */
-	struct list_head fs_list;
-	atomic_t fs_count;
-	struct super_block *fs_sb;
-	struct kset *fs_devicekset;
-	struct kset *fs_fcheckkset;
-	struct ocfs2_filecheck *fs_fcheck;
-};
-
-#define OCFS2_FILECHECK_MAXSIZE		100
-#define OCFS2_FILECHECK_MINSIZE		10
-
-/* File check operation type */
-enum {
-	OCFS2_FILECHECK_TYPE_CHK = 0,	/* Check a file(inode) */
-	OCFS2_FILECHECK_TYPE_FIX,	/* Fix a file(inode) */
-	OCFS2_FILECHECK_TYPE_SET = 100	/* Set entry list maximum size */
-};
-
 struct ocfs2_filecheck_entry {
 	struct list_head fe_list;
 	unsigned long fe_ino;
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
index e5cd002a2c09..af1678b620a4 100644
--- a/fs/ocfs2/filecheck.h
+++ b/fs/ocfs2/filecheck.h
@@ -43,6 +43,33 @@ enum {
 #define OCFS2_FILECHECK_ERR_START	OCFS2_FILECHECK_ERR_FAILED
 #define OCFS2_FILECHECK_ERR_END		OCFS2_FILECHECK_ERR_UNSUPPORTED
 
+struct ocfs2_filecheck {
+	struct list_head fc_head;	/* File check entry list head */
+	spinlock_t fc_lock;
+	unsigned int fc_max;	/* Maximum number of entry in list */
+	unsigned int fc_size;	/* Current entry count in list */
+	unsigned int fc_done;	/* Finished entry count in list */
+};
+
+struct ocfs2_filecheck_sysfs_entry {	/* sysfs entry per mounting */
+	struct list_head fs_list;
+	atomic_t fs_count;
+	struct super_block *fs_sb;
+	struct kset *fs_devicekset;
+	struct kset *fs_fcheckkset;
+	struct ocfs2_filecheck *fs_fcheck;
+};
+
+#define OCFS2_FILECHECK_MAXSIZE		100
+#define OCFS2_FILECHECK_MINSIZE		10
+
+/* File check operation type */
+enum {
+	OCFS2_FILECHECK_TYPE_CHK = 0,	/* Check a file(inode) */
+	OCFS2_FILECHECK_TYPE_FIX,	/* Fix a file(inode) */
+	OCFS2_FILECHECK_TYPE_SET = 100	/* Set entry list maximum size */
+};
+
 int ocfs2_filecheck_create_sysfs(struct super_block *sb);
 int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
 
-- 
cgit v1.2.3


From a92260d973ae172e58d6408a90bceb9501dc64c3 Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: ocfs2: fix some small problems

First, move setting fe_done = 1 in spin lock, avoid bring any potential
race condition.

Second, tune mlog message level from ERROR to NOTICE, since the message
should not belong to error message.

Third, tune errno to -EAGAIN when file check queue is full, this errno is
more appropriate in the case.

Link: http://lkml.kernel.org/r/1495611866-27360-3-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/filecheck.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index cc7b595e264d..43477272ab96 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -288,7 +288,7 @@ ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent,
 
 	spin_lock(&ent->fs_fcheck->fc_lock);
 	if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) {
-		mlog(ML_ERROR,
+		mlog(ML_NOTICE,
 		"Cannot set online file check maximum entry number "
 		"to %u due to too many pending entries(%u)\n",
 		len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done);
@@ -462,8 +462,8 @@ static void
 ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
 			   struct ocfs2_filecheck_entry *entry)
 {
-	entry->fe_done = 1;
 	spin_lock(&ent->fs_fcheck->fc_lock);
+	entry->fe_done = 1;
 	ent->fs_fcheck->fc_done++;
 	spin_unlock(&ent->fs_fcheck->fc_lock);
 }
@@ -545,11 +545,11 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
 	spin_lock(&ent->fs_fcheck->fc_lock);
 	if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
 	    (ent->fs_fcheck->fc_done == 0)) {
-		mlog(ML_ERROR,
+		mlog(ML_NOTICE,
 		"Cannot do more file check "
 		"since file check queue(%u) is full now\n",
 		ent->fs_fcheck->fc_max);
-		ret = -EBUSY;
+		ret = -EAGAIN;
 		kfree(entry);
 	} else {
 		if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
-- 
cgit v1.2.3


From ef4d7619dbc2fd2cba76db700e1958d5a2e3ae1c Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: ocfs2: add kobject for online file check

Use embedded kobject mechanism for online file check feature, this will
avoid to use a global list to save/search per-device online file check
related data, meanwhile, reduce the code lines and make the code logic
clear.  The changed code is based on Goldwyn Rodrigues's patches and ext4
fs code.

Link: http://lkml.kernel.org/r/1495611866-27360-4-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/filecheck.c | 301 +++++++++++++++++++--------------------------------
 fs/ocfs2/filecheck.h |  20 ++--
 fs/ocfs2/ocfs2.h     |   8 ++
 fs/ocfs2/super.c     |  27 ++++-
 4 files changed, 152 insertions(+), 204 deletions(-)

diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 43477272ab96..a94c5310a59a 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -53,9 +53,6 @@ static const char * const ocfs2_filecheck_errs[] = {
 	"UNSUPPORTED"
 };
 
-static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
-static LIST_HEAD(ocfs2_filecheck_sysfs_list);
-
 struct ocfs2_filecheck_entry {
 	struct list_head fe_list;
 	unsigned long fe_ino;
@@ -83,40 +80,84 @@ ocfs2_filecheck_error(int errno)
 	return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1];
 }
 
-static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
-				    struct kobj_attribute *attr,
-				    char *buf);
-static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
-				     struct kobj_attribute *attr,
-				     const char *buf, size_t count);
-static struct kobj_attribute ocfs2_attr_filecheck_chk =
+static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					char *buf);
+static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count);
+static struct kobj_attribute ocfs2_filecheck_attr_chk =
 					__ATTR(check, S_IRUSR | S_IWUSR,
-					ocfs2_filecheck_show,
-					ocfs2_filecheck_store);
-static struct kobj_attribute ocfs2_attr_filecheck_fix =
+					ocfs2_filecheck_attr_show,
+					ocfs2_filecheck_attr_store);
+static struct kobj_attribute ocfs2_filecheck_attr_fix =
 					__ATTR(fix, S_IRUSR | S_IWUSR,
-					ocfs2_filecheck_show,
-					ocfs2_filecheck_store);
-static struct kobj_attribute ocfs2_attr_filecheck_set =
+					ocfs2_filecheck_attr_show,
+					ocfs2_filecheck_attr_store);
+static struct kobj_attribute ocfs2_filecheck_attr_set =
 					__ATTR(set, S_IRUSR | S_IWUSR,
-					ocfs2_filecheck_show,
-					ocfs2_filecheck_store);
+					ocfs2_filecheck_attr_show,
+					ocfs2_filecheck_attr_store);
+static struct attribute *ocfs2_filecheck_attrs[] = {
+	&ocfs2_filecheck_attr_chk.attr,
+	&ocfs2_filecheck_attr_fix.attr,
+	&ocfs2_filecheck_attr_set.attr,
+	NULL
+};
 
-static int ocfs2_filecheck_sysfs_wait(atomic_t *p)
+static void ocfs2_filecheck_release(struct kobject *kobj)
 {
-	schedule();
-	return 0;
+	struct ocfs2_filecheck_sysfs_entry *entry = container_of(kobj,
+				struct ocfs2_filecheck_sysfs_entry, fs_kobj);
+
+	complete(&entry->fs_kobj_unregister);
+}
+
+static ssize_t
+ocfs2_filecheck_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	ssize_t ret = -EIO;
+	struct kobj_attribute *kattr = container_of(attr,
+					struct kobj_attribute, attr);
+
+	kobject_get(kobj);
+	if (kattr->show)
+		ret = kattr->show(kobj, kattr, buf);
+	kobject_put(kobj);
+	return ret;
+}
+
+static ssize_t
+ocfs2_filecheck_store(struct kobject *kobj, struct attribute *attr,
+			const char *buf, size_t count)
+{
+	ssize_t ret = -EIO;
+	struct kobj_attribute *kattr = container_of(attr,
+					struct kobj_attribute, attr);
+
+	kobject_get(kobj);
+	if (kattr->store)
+		ret = kattr->store(kobj, kattr, buf, count);
+	kobject_put(kobj);
+	return ret;
 }
 
+static const struct sysfs_ops ocfs2_filecheck_ops = {
+	.show = ocfs2_filecheck_show,
+	.store = ocfs2_filecheck_store,
+};
+
+static struct kobj_type ocfs2_ktype_filecheck = {
+	.default_attrs = ocfs2_filecheck_attrs,
+	.sysfs_ops = &ocfs2_filecheck_ops,
+	.release = ocfs2_filecheck_release,
+};
+
 static void
 ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
 {
 	struct ocfs2_filecheck_entry *p;
 
-	if (!atomic_dec_and_test(&entry->fs_count))
-		wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait,
-				 TASK_UNINTERRUPTIBLE);
-
 	spin_lock(&entry->fs_fcheck->fc_lock);
 	while (!list_empty(&entry->fs_fcheck->fc_head)) {
 		p = list_first_entry(&entry->fs_fcheck->fc_head,
@@ -127,151 +168,48 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
 	}
 	spin_unlock(&entry->fs_fcheck->fc_lock);
 
-	kset_unregister(entry->fs_fcheckkset);
-	kset_unregister(entry->fs_devicekset);
 	kfree(entry->fs_fcheck);
-	kfree(entry);
-}
-
-static void
-ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry)
-{
-	spin_lock(&ocfs2_filecheck_sysfs_lock);
-	list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list);
-	spin_unlock(&ocfs2_filecheck_sysfs_lock);
-}
-
-static int ocfs2_filecheck_sysfs_del(const char *devname)
-{
-	struct ocfs2_filecheck_sysfs_entry *p;
-
-	spin_lock(&ocfs2_filecheck_sysfs_lock);
-	list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
-		if (!strcmp(p->fs_sb->s_id, devname)) {
-			list_del(&p->fs_list);
-			spin_unlock(&ocfs2_filecheck_sysfs_lock);
-			ocfs2_filecheck_sysfs_free(p);
-			return 0;
-		}
-	}
-	spin_unlock(&ocfs2_filecheck_sysfs_lock);
-	return 1;
-}
-
-static void
-ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
-{
-	if (atomic_dec_and_test(&entry->fs_count))
-		wake_up_atomic_t(&entry->fs_count);
+	entry->fs_fcheck = NULL;
 }
 
-static struct ocfs2_filecheck_sysfs_entry *
-ocfs2_filecheck_sysfs_get(const char *devname)
+int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb)
 {
-	struct ocfs2_filecheck_sysfs_entry *p = NULL;
-
-	spin_lock(&ocfs2_filecheck_sysfs_lock);
-	list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
-		if (!strcmp(p->fs_sb->s_id, devname)) {
-			atomic_inc(&p->fs_count);
-			spin_unlock(&ocfs2_filecheck_sysfs_lock);
-			return p;
-		}
-	}
-	spin_unlock(&ocfs2_filecheck_sysfs_lock);
-	return NULL;
-}
-
-int ocfs2_filecheck_create_sysfs(struct super_block *sb)
-{
-	int ret = 0;
-	struct kset *device_kset = NULL;
-	struct kset *fcheck_kset = NULL;
-	struct ocfs2_filecheck *fcheck = NULL;
-	struct ocfs2_filecheck_sysfs_entry *entry = NULL;
-	struct attribute **attrs = NULL;
-	struct attribute_group attrgp;
-
-	if (!ocfs2_kset)
-		return -ENOMEM;
-
-	attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS);
-	if (!attrs) {
-		ret = -ENOMEM;
-		goto error;
-	} else {
-		attrs[0] = &ocfs2_attr_filecheck_chk.attr;
-		attrs[1] = &ocfs2_attr_filecheck_fix.attr;
-		attrs[2] = &ocfs2_attr_filecheck_set.attr;
-		attrs[3] = NULL;
-		memset(&attrgp, 0, sizeof(attrgp));
-		attrgp.attrs = attrs;
-	}
+	int ret;
+	struct ocfs2_filecheck *fcheck;
+	struct ocfs2_filecheck_sysfs_entry *entry = &osb->osb_fc_ent;
 
 	fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS);
-	if (!fcheck) {
-		ret = -ENOMEM;
-		goto error;
-	} else {
-		INIT_LIST_HEAD(&fcheck->fc_head);
-		spin_lock_init(&fcheck->fc_lock);
-		fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
-		fcheck->fc_size = 0;
-		fcheck->fc_done = 0;
-	}
-
-	if (strlen(sb->s_id) <= 0) {
-		mlog(ML_ERROR,
-		"Cannot get device basename when create filecheck sysfs\n");
-		ret = -ENODEV;
-		goto error;
-	}
-
-	device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj);
-	if (!device_kset) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	fcheck_kset = kset_create_and_add("filecheck", NULL,
-					  &device_kset->kobj);
-	if (!fcheck_kset) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp);
-	if (ret)
-		goto error;
+	if (!fcheck)
+		return -ENOMEM;
 
-	entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS);
-	if (!entry) {
-		ret = -ENOMEM;
-		goto error;
-	} else {
-		atomic_set(&entry->fs_count, 1);
-		entry->fs_sb = sb;
-		entry->fs_devicekset = device_kset;
-		entry->fs_fcheckkset = fcheck_kset;
-		entry->fs_fcheck = fcheck;
-		ocfs2_filecheck_sysfs_add(entry);
+	INIT_LIST_HEAD(&fcheck->fc_head);
+	spin_lock_init(&fcheck->fc_lock);
+	fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
+	fcheck->fc_size = 0;
+	fcheck->fc_done = 0;
+
+	entry->fs_kobj.kset = osb->osb_dev_kset;
+	init_completion(&entry->fs_kobj_unregister);
+	ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck,
+					NULL, "filecheck");
+	if (ret) {
+		kfree(fcheck);
+		return ret;
 	}
 
-	kfree(attrs);
+	entry->fs_fcheck = fcheck;
 	return 0;
-
-error:
-	kfree(attrs);
-	kfree(entry);
-	kfree(fcheck);
-	kset_unregister(fcheck_kset);
-	kset_unregister(device_kset);
-	return ret;
 }
 
-int ocfs2_filecheck_remove_sysfs(struct super_block *sb)
+void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb)
 {
-	return ocfs2_filecheck_sysfs_del(sb->s_id);
+	if (!osb->osb_fc_ent.fs_fcheck)
+		return;
+
+	kobject_del(&osb->osb_fc_ent.fs_kobj);
+	kobject_put(&osb->osb_fc_ent.fs_kobj);
+	wait_for_completion(&osb->osb_fc_ent.fs_kobj_unregister);
+	ocfs2_filecheck_sysfs_free(&osb->osb_fc_ent);
 }
 
 static int
@@ -365,7 +303,7 @@ ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count,
 	return 0;
 }
 
-static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
 				    struct kobj_attribute *attr,
 				    char *buf)
 {
@@ -373,19 +311,12 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
 	ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
 	unsigned int type;
 	struct ocfs2_filecheck_entry *p;
-	struct ocfs2_filecheck_sysfs_entry *ent;
+	struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj,
+				struct ocfs2_filecheck_sysfs_entry, fs_kobj);
 
 	if (ocfs2_filecheck_type_parse(attr->attr.name, &type))
 		return -EINVAL;
 
-	ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
-	if (!ent) {
-		mlog(ML_ERROR,
-		"Cannot get the corresponding entry via device basename %s\n",
-		kobj->name);
-		return -ENODEV;
-	}
-
 	if (type == OCFS2_FILECHECK_TYPE_SET) {
 		spin_lock(&ent->fs_fcheck->fc_lock);
 		total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max);
@@ -419,11 +350,10 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
 	spin_unlock(&ent->fs_fcheck->fc_lock);
 
 exit:
-	ocfs2_filecheck_sysfs_put(ent);
 	return total;
 }
 
-static int
+static inline int
 ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
 {
 	struct ocfs2_filecheck_entry *p;
@@ -469,14 +399,14 @@ ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
 }
 
 static unsigned int
-ocfs2_filecheck_handle(struct super_block *sb,
+ocfs2_filecheck_handle(struct ocfs2_super *osb,
 		       unsigned long ino, unsigned int flags)
 {
 	unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS;
 	struct inode *inode = NULL;
 	int rc;
 
-	inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0);
+	inode = ocfs2_iget(osb, ino, flags, 0);
 	if (IS_ERR(inode)) {
 		rc = (int)(-(long)inode);
 		if (rc >= OCFS2_FILECHECK_ERR_START &&
@@ -494,11 +424,14 @@ static void
 ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
 			     struct ocfs2_filecheck_entry *entry)
 {
+	struct ocfs2_super *osb = container_of(ent, struct ocfs2_super,
+						osb_fc_ent);
+
 	if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK)
-		entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+		entry->fe_status = ocfs2_filecheck_handle(osb,
 				entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK);
 	else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX)
-		entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+		entry->fe_status = ocfs2_filecheck_handle(osb,
 				entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX);
 	else
 		entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED;
@@ -506,30 +439,21 @@ ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
 	ocfs2_filecheck_done_entry(ent, entry);
 }
 
-static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
 				     struct kobj_attribute *attr,
 				     const char *buf, size_t count)
 {
+	ssize_t ret = 0;
 	struct ocfs2_filecheck_args args;
 	struct ocfs2_filecheck_entry *entry;
-	struct ocfs2_filecheck_sysfs_entry *ent;
-	ssize_t ret = 0;
+	struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj,
+				struct ocfs2_filecheck_sysfs_entry, fs_kobj);
 
 	if (count == 0)
 		return count;
 
-	if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) {
-		mlog(ML_ERROR, "Invalid arguments for online file check\n");
+	if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args))
 		return -EINVAL;
-	}
-
-	ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
-	if (!ent) {
-		mlog(ML_ERROR,
-		"Cannot get the corresponding entry via device basename %s\n",
-		kobj->parent->name);
-		return -ENODEV;
-	}
 
 	if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) {
 		ret = ocfs2_filecheck_adjust_max(ent, args.fa_len);
@@ -544,7 +468,7 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
 
 	spin_lock(&ent->fs_fcheck->fc_lock);
 	if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
-	    (ent->fs_fcheck->fc_done == 0)) {
+		(ent->fs_fcheck->fc_done == 0)) {
 		mlog(ML_NOTICE,
 		"Cannot do more file check "
 		"since file check queue(%u) is full now\n",
@@ -574,6 +498,5 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
 		ocfs2_filecheck_handle_entry(ent, entry);
 
 exit:
-	ocfs2_filecheck_sysfs_put(ent);
 	return (!ret ? count : ret);
 }
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
index af1678b620a4..6a22ee79e8d0 100644
--- a/fs/ocfs2/filecheck.h
+++ b/fs/ocfs2/filecheck.h
@@ -51,15 +51,6 @@ struct ocfs2_filecheck {
 	unsigned int fc_done;	/* Finished entry count in list */
 };
 
-struct ocfs2_filecheck_sysfs_entry {	/* sysfs entry per mounting */
-	struct list_head fs_list;
-	atomic_t fs_count;
-	struct super_block *fs_sb;
-	struct kset *fs_devicekset;
-	struct kset *fs_fcheckkset;
-	struct ocfs2_filecheck *fs_fcheck;
-};
-
 #define OCFS2_FILECHECK_MAXSIZE		100
 #define OCFS2_FILECHECK_MINSIZE		10
 
@@ -70,7 +61,14 @@ enum {
 	OCFS2_FILECHECK_TYPE_SET = 100	/* Set entry list maximum size */
 };
 
-int ocfs2_filecheck_create_sysfs(struct super_block *sb);
-int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
+struct ocfs2_filecheck_sysfs_entry {	/* sysfs entry per partition */
+	struct kobject fs_kobj;
+	struct completion fs_kobj_unregister;
+	struct ocfs2_filecheck *fs_fcheck;
+};
+
+
+int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb);
+void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb);
 
 #endif  /* FILECHECK_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 0c39d71c67a1..3bb169157a01 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -50,6 +50,8 @@
 
 #include "reservations.h"
 
+#include "filecheck.h"
+
 /* Caching of metadata buffers */
 
 /* Most user visible OCFS2 inodes will have very few pieces of
@@ -473,6 +475,12 @@ struct ocfs2_super
 	 * workqueue and schedule on our own.
 	 */
 	struct workqueue_struct *ocfs2_wq;
+
+	/* sysfs directory per partition */
+	struct kset *osb_dev_kset;
+
+	/* file check related stuff */
+	struct ocfs2_filecheck_sysfs_entry osb_fc_ent;
 };
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f03fdd0fc72c..3aa927c681d2 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1162,6 +1162,23 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
 	ocfs2_complete_mount_recovery(osb);
 
+	osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
+						&ocfs2_kset->kobj);
+	if (!osb->osb_dev_kset) {
+		status = -ENOMEM;
+		mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
+		goto read_super_error;
+	}
+
+	/* Create filecheck sysfs related directories/files at
+	 * /sys/fs/ocfs2/<devname>/filecheck */
+	if (ocfs2_filecheck_create_sysfs(osb)) {
+		status = -ENOMEM;
+		mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
+			"/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
+		goto read_super_error;
+	}
+
 	if (ocfs2_mount_local(osb))
 		snprintf(nodestr, sizeof(nodestr), "local");
 	else
@@ -1200,9 +1217,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	/* Start this when the mount is almost sure of being successful */
 	ocfs2_orphan_scan_start(osb);
 
-	/* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */
-	ocfs2_filecheck_create_sysfs(sb);
-
 	return status;
 
 read_super_error:
@@ -1654,7 +1668,6 @@ static void ocfs2_put_super(struct super_block *sb)
 
 	ocfs2_sync_blockdev(sb);
 	ocfs2_dismount_volume(sb, 0);
-	ocfs2_filecheck_remove_sysfs(sb);
 }
 
 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1900,6 +1913,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 	osb = OCFS2_SB(sb);
 	BUG_ON(!osb);
 
+	/* Remove file check sysfs related directores/files,
+	 * and wait for the pending file check operations */
+	ocfs2_filecheck_remove_sysfs(osb);
+
+	kset_unregister(osb->osb_dev_kset);
+
 	debugfs_remove(osb->osb_ctxt);
 
 	/* Orphan scan should be stopped as early as possible */
-- 
cgit v1.2.3


From e2d18e1b55f66e82de016205fb7f22ef59a7a1bf Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Sat, 15 Jul 2017 11:41:16 +1000
Subject: ocfs2: add duplicated ino number check

Add duplicated ino number check, to avoid adding a file into the file
check list when this file is being checked.

Link: http://lkml.kernel.org/r/1495611866-27360-5-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/filecheck.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index a94c5310a59a..f65f2b2f594d 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -353,6 +353,22 @@ exit:
 	return total;
 }
 
+static inline int
+ocfs2_filecheck_is_dup_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+				unsigned long ino)
+{
+	struct ocfs2_filecheck_entry *p;
+
+	list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+		if (!p->fe_done) {
+			if (p->fe_ino == ino)
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
 static inline int
 ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
 {
@@ -467,7 +483,10 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
 	}
 
 	spin_lock(&ent->fs_fcheck->fc_lock);
-	if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+	if (ocfs2_filecheck_is_dup_entry(ent, args.fa_ino)) {
+		ret = -EEXIST;
+		kfree(entry);
+	} else if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
 		(ent->fs_fcheck->fc_done == 0)) {
 		mlog(ML_NOTICE,
 		"Cannot do more file check "
-- 
cgit v1.2.3


From e81d9956cd86e4df47ec222b3b8001b14f827311 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: block: restore /proc/partitions to not display non-partitionable
 removable devices

We found with newer kernels we started seeing the cdrom device showing
up in /proc/partitions, but it was not there before.

Looking into this I found that commit d27769ec ("block: add
GENHD_FL_NO_PART_SCAN") introduces this change in behavior.  It's not
clear to me from the commit's changelog if this change was intentional or
not.  This comment still remains: /* Don't show non-partitionable
removeable devices or empty devices */ so I've decided to send a patch to
restore the behavior of not printing unpartitionable removable devices.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 block/genhd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/genhd.c b/block/genhd.c
index 7f520fa25d16..9dc591678627 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -890,7 +890,7 @@ static int show_partition(struct seq_file *seqf, void *v)
 	char buf[BDEVNAME_SIZE];
 
 	/* Don't show non-partitionable removeable devices or empty devices */
-	if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+	if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
 				   (sgp->flags & GENHD_FL_REMOVABLE)))
 		return 0;
 	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
-- 
cgit v1.2.3


From 1c68a50d0904a1b2cf399a866b762dca298da5cf Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: slub: tidy up initialization ordering

- free_kmem_cache_nodes() frees the cache node before nulling out a
  reference to it

- init_kmem_cache_nodes() publishes the cache node before initializing it

Neither of these matter at runtime because the cache nodes cannot be
looked up by any other thread.  But it's neater and more consistent to
reorder these.

Link: http://lkml.kernel.org/r/20170707083408.40410-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/slub.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 1d3f9835f4ea..5ac0b5ad029b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3358,8 +3358,8 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
 	struct kmem_cache_node *n;
 
 	for_each_kmem_cache_node(s, node, n) {
-		kmem_cache_free(kmem_cache_node, n);
 		s->node[node] = NULL;
+		kmem_cache_free(kmem_cache_node, n);
 	}
 }
 
@@ -3389,8 +3389,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 			return 0;
 		}
 
-		s->node[node] = n;
 		init_kmem_cache_node(n);
+		s->node[node] = n;
 	}
 	return 1;
 }
-- 
cgit v1.2.3


From 6379d071d06ef44e0e07c054680a8cdd87cb797e Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: mm/memory_hotplug: just build zonelist for newly added node

9adb62a5df9c0fbef7 ("mm/hotplug: correctly setup fallback zonelists when
creating new pgdat") tries to build the correct zonelist for a newly added
node, while it is not necessary to rebuild it for already exist nodes.

In build_zonelists(), it will iterate on nodes with memory.  For a newly
added node, it will have memory until node_states_set_node() is called in
online_pages().

This patch avoids rebuilding the zonelists for already existing nodes.

build_zonelists_node() uses managed_zone(zone) checks, so it should not
include empty zones anyway.  So effectively we avoid some pointless
work under stop_machine().

Link: http://lkml.kernel.org/r/20170626035822.50155-1-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d30e914afb6..7392e4ff628f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5213,15 +5213,17 @@ static int __build_all_zonelists(void *data)
 	memset(node_load, 0, sizeof(node_load));
 #endif
 
-	if (self && !node_online(self->node_id)) {
+	/* This node is hotadded and no memory preset yet.
+	 * So just build zonelists is fine, no need to touch other nodes.
+	 */
+	if (self && !node_online(self->node_id))
 		build_zonelists(self);
-	}
-
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
+	else
+		for_each_online_node(nid) {
+			pg_data_t *pgdat = NODE_DATA(nid);
 
-		build_zonelists(pgdat);
-	}
+			build_zonelists(pgdat);
+		}
 
 	/*
 	 * Initialize the boot_pagesets that are going to be used
-- 
cgit v1.2.3


From 5c865b2316e6ec459c5170a1b9e7fa77b79b8b9b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: mm-memory_hotplug-just-build-zonelist-for-new-added-node-fix

tweak comment text

Cc: Jiang Liu <liuj97@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7392e4ff628f..bb2d08167bbf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5213,8 +5213,9 @@ static int __build_all_zonelists(void *data)
 	memset(node_load, 0, sizeof(node_load));
 #endif
 
-	/* This node is hotadded and no memory preset yet.
-	 * So just build zonelists is fine, no need to touch other nodes.
+	/*
+	 * This node is hotadded and no memory is yet present.   So just
+	 * building zonelists is fine - no need to touch other nodes.
 	 */
 	if (self && !node_online(self->node_id))
 		build_zonelists(self);
-- 
cgit v1.2.3


From 5ee2dcffffc03fcc7056b7d5defc4f97ced4bc9a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: mm-memory_hotplug-just-build-zonelist-for-new-added-node-fix-fix

coding-style tweak, per Vlastimil

Cc: Jiang Liu <liuj97@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bb2d08167bbf..f75c18307beb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5217,14 +5217,15 @@ static int __build_all_zonelists(void *data)
 	 * This node is hotadded and no memory is yet present.   So just
 	 * building zonelists is fine - no need to touch other nodes.
 	 */
-	if (self && !node_online(self->node_id))
+	if (self && !node_online(self->node_id)) {
 		build_zonelists(self);
-	else
+	} else {
 		for_each_online_node(nid) {
 			pg_data_t *pgdat = NODE_DATA(nid);
 
 			build_zonelists(pgdat);
 		}
+	}
 
 	/*
 	 * Initialize the boot_pagesets that are going to be used
-- 
cgit v1.2.3


From 3301171f809f27b7ad49a6f35cae4b501e8803d0 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: mm/page_alloc: return 0 in case this node has no page within the zone

The whole memory space is divided into several zones and nodes may have no
page in some zones.  In this case, the __absent_pages_in_range() would
return 0, since the range it is searching for is an empty range.

Also this happens more often to those nodes with higher memory range when
there are more nodes, which is a trend for future architectures.

This patch checks the zone range after clamp and adjustment, return 0 if
the range is an empty range.

Link: http://lkml.kernel.org/r/20170206154314.15705-1-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f75c18307beb..80e4adb4c360 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5866,6 +5866,11 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 	adjust_zone_range_for_zone_movable(nid, zone_type,
 			node_start_pfn, node_end_pfn,
 			&zone_start_pfn, &zone_end_pfn);
+
+	/* If this node has no page within this zone, return 0. */
+	if (zone_start_pfn == zone_end_pfn)
+		return 0;
+
 	nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
 
 	/*
-- 
cgit v1.2.3


From 187e364b7dfa13b18a13f250707f32ae13f40891 Mon Sep 17 00:00:00 2001
From: Vinayak Menon <vinmenon@codeaurora.org>
Date: Sat, 15 Jul 2017 11:41:17 +1000
Subject: mm: vmscan: do not pass reclaimed slab to vmpressure

During global reclaim, the nr_reclaimed passed to vmpressure includes
the pages reclaimed from slab.  But the corresponding scanned slab
pages is not passed.  There is an impact to the vmpressure values
because of this.  While moving from kernel version 3.18 to 4.4, a
difference is seen in the vmpressure values for the same workload
resulting in a different behaviour of the vmpressure consumer.  One
such case is of a vmpressure based lowmemorykiller.  It is observed
that the vmpressure events are received late and less in number
resulting in tasks not being killed at the right time.  In this use
case, The number of critical vmpressure events received is around 50%
less on 4.4 than 3.18.  The following numbers show the impact on
reclaim activity due to the change in behaviour of lowmemorykiller on a
4GB device.  The test launches a number of apps in sequence and repeats
it multiple times.  The difference in reclaim behaviour is because of
lesser number of kills and kills happening late, resulting in more
swapping and page cache reclaim.

                      v4.4           v3.18
pgpgin                163016456      145617236
pgpgout               4366220        4188004
workingset_refault    29857868       26781854
workingset_activate   6293946        5634625
pswpin                1327601        1133912
pswpout               3593842        3229602
pgalloc_dma           99520618       94402970
pgalloc_normal        104046854      98124798
pgfree                203772640      192600737
pgmajfault            2126962        1851836
pgsteal_kswapd_dma    19732899       18039462
pgsteal_kswapd_normal 19945336       17977706
pgsteal_direct_dma    206757         131376
pgsteal_direct_normal 236783         138247
pageoutrun            116622         108370
allocstall            7220           4684
compact_stall         931            856

The lowmemorykiller example above is just for indicating the difference
in vmpressure events between 4.4 and 3.18.

Do not consider reclaimed slab pages for vmpressure calculation.  The
reclaimed pages from slab can be excluded because the freeing of a page
by slab shrinking depends on each slab's object population, making the
cost model (i.e.  scan:free) different from that of LRU.  Also, not
every shrinker accounts the pages it reclaims.  Ideally the pages
reclaimed from slab should be passed to vmpressure, otherwise higher
vmpressure levels can be triggered even when there is a reclaim
progress.  But accounting only the reclaimed slab pages without the
scanned, and adding something which does not fit into the cost model
just adds noise to the vmpressure values.

Fixes: 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from shrink_zone()")
Link: http://lkml.kernel.org/r/1486641577-11685-2-git-send-email-vinmenon@codeaurora.org
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Shiraz Hashim <shashim@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1af041930a6..efc9da21c5e6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2632,16 +2632,23 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 				    sc->nr_scanned - nr_scanned,
 				    node_lru_pages);
 
+		/*
+		 * Record the subtree's reclaim efficiency. The reclaimed
+		 * pages from slab is excluded here because the corresponding
+		 * scanned pages is not accounted. Moreover, freeing a page
+		 * by slab shrinking depends on each slab's object population,
+		 * making the cost model (i.e. scan:free) different from that
+		 * of LRU.
+		 */
+		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+			   sc->nr_scanned - nr_scanned,
+			   sc->nr_reclaimed - nr_reclaimed);
+
 		if (reclaim_state) {
 			sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 			reclaim_state->reclaimed_slab = 0;
 		}
 
-		/* Record the subtree's reclaim efficiency */
-		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
-			   sc->nr_scanned - nr_scanned,
-			   sc->nr_reclaimed - nr_reclaimed);
-
 		if (sc->nr_reclaimed - nr_reclaimed)
 			reclaimable = true;
 
-- 
cgit v1.2.3


From dc52c5c660fd03d25f9f859d60e3a7685e437fa1 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Sat, 15 Jul 2017 11:41:18 +1000
Subject: mm/page_owner: align with pageblock_nr pages

When pfn_valid(pfn) returns false, pfn should be aligned with
pageblock_nr_pages other than MAX_ORDER_NR_PAGES in init_pages_in_zone,
because the skipped 2M may be valid pfn, as a result, early allocated
count will not be accurate.

Link: http://lkml.kernel.org/r/1468938136-24228-1-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_owner.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_owner.c b/mm/page_owner.c
index 0fd9dcf2c5dc..356828237581 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -531,7 +531,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
 	 */
 	for (; pfn < end_pfn; ) {
 		if (!pfn_valid(pfn)) {
-			pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+			pfn = ALIGN(pfn + 1, pageblock_nr_pages);
 			continue;
 		}
 
-- 
cgit v1.2.3


From dcb37f7e16fec90e4e92608c7053324f75b2a501 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Sat, 15 Jul 2017 11:41:18 +1000
Subject: mm/vmstat.c: walk the zone in pageblock_nr_pages steps

when walking the zone, we can happens to the holes. we should not
align MAX_ORDER_NR_PAGES, so it can skip the normal memory.

In addition, pagetypeinfo_showmixedcount_print reflect fragmentization.
we hope to get more accurate data. therefore, I decide to fix it.

Link: http://lkml.kernel.org/r/1469502526-24486-2-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_owner.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_owner.c b/mm/page_owner.c
index 356828237581..401feb070335 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -261,7 +261,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 	 */
 	for (; pfn < end_pfn; ) {
 		if (!pfn_valid(pfn)) {
-			pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+			pfn = ALIGN(pfn + 1, pageblock_nr_pages);
 			continue;
 		}
 
-- 
cgit v1.2.3


From 6b0f6a1f8c6eeca5d6d6e35751bfd40f63195b38 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 15 Jul 2017 11:41:18 +1000
Subject: fs/seq_file.c: delete small-value optimization

num_to_str() optimizes printing small integers [0..9], so the same check
higher in callchain is unnecessary.

Link: http://lkml.kernel.org/r/20170516204246.GA18123@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/seq_file.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index dc7c2be963ed..13e8c092d4d2 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -694,11 +694,6 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 	if (m->count + 1 >= m->size)
 		goto overflow;
 
-	if (num < 10) {
-		m->buf[m->count++] = num + '0';
-		return;
-	}
-
 	len = num_to_str(m->buf + m->count, m->size - m->count, num);
 	if (!len)
 		goto overflow;
@@ -733,11 +728,6 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 		num = -num;
 	}
 
-	if (num < 10) {
-		m->buf[m->count++] = num + '0';
-		return;
-	}
-
 	len = num_to_str(m->buf + m->count, m->size - m->count, num);
 	if (!len)
 		goto overflow;
-- 
cgit v1.2.3


From 3beb8be37f7e1347b18d4c43997b18a09fcc6c85 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Date: Sat, 15 Jul 2017 11:41:18 +1000
Subject: kdump, vmcoreinfo: report actual value of phys_base

Currently, VMCOREINFO note information reports the virtual address of
phys_base that is assigned to symbol phys_base.  But this doesn't make
sense because to refer to phys_base, it's necessary to get the value of
phys_base itself we are now about to refer to.

Userland tools related to kdump such as makedumpfile and crash utility so
far have made some efforts to calculate phys_base on crash dump formats
generated by mechanisms running outside Linux kernel, such as virtual
machine hypervisor such as qemu dump, which ordinary users use via virsh
dump, or ones implemented on vendor specific firmware.

That is, find a kernel data whose virtual and physical addresses are
available via its note information and calculate phys_base from it.
However, such data structure is not the one prepared for phys_base
purpose.  There's no guarantee that other crash dump mechanisms include
such information that can be used to calculate phys_base similarly.

To get VMCOREINFO in vmcore, it's easy to use strings and grep commands
like this; VMCOREINFO consists of simple string:

$ strings vmcore-3.10.0-121.el7.x86_64 | grep -E ".*VMCOREINFO.*" -A 100
VMCOREINFO
OSRELEASE=3.10.0-121.el7.x86_64
PAGESIZE=4096
...

This is also useful to get value of phys_base in kdump 2nd kernel
contained in vmcore using the above-mentioned external crash dump
mechanism; kdump 2nd kernel is an inherently relocated kernel.

This commit doesn't remove VMCOREINFO_SYMBOL(phys_base) line because
makedumpfile refers to it and if removing it, old versions makedumpfile
doesn't work well.

Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Cc: Dave Anderson <anderson@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/machine_kexec_64.c | 1 +
 include/linux/crash_core.h         | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a30473c23..df2ac62298c0 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -356,6 +356,7 @@ void arch_crash_save_vmcoreinfo(void)
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
 			      kaslr_offset());
 	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
+	VMCOREINFO_PHYS_BASE(phys_base);
 }
 
 /* arch-dependent functionality related to kexec file-based syscall */
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 2df2118fbe13..fffe674cad27 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -56,6 +56,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PHYS_BASE(value) \
+	vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
 
 extern u32 *vmcoreinfo_note;
 
-- 
cgit v1.2.3


From 46f7f69c3e1a838c07725b7aa1ef547c5ad232b0 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Sat, 15 Jul 2017 11:41:19 +1000
Subject: uapi: fix linux/sysctl.h userspace compilation errors

Include <stddef.h> (guarded by #ifndef __KERNEL__) to fix the following
linux/sysctl.h userspace compilation errors:

/usr/include/linux/sysctl.h:38:2: error: unknown type name 'size_t'
  size_t *oldlenp;
/usr/include/linux/sysctl.h:40:2: error: unknown type name 'size_t'
  size_t newlen;

This also fixes userspace compilation of uapi headers that include
linux/sysctl.h, e.g. linux/netfilter.h.

Link: http://lkml.kernel.org/r/20170222230652.GA14373@altlinux.org
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/sysctl.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index e13d48058b8d..c6602bdd2a10 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -26,6 +26,10 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
+#ifndef __KERNEL__
+#include <stddef.h>		/* For size_t. */
+#endif
+
 #define CTL_MAXNAME 10		/* how many path components do we allow in a
 				   call to sysctl?   In other words, what is
 				   the largest acceptable value for the nlen
-- 
cgit v1.2.3


From 86119af309c6b8e74b6cb22d7bf388cff3ca072b Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sat, 15 Jul 2017 11:41:19 +1000
Subject: kernel/reboot.c: add devm_register_reboot_notifier()

Add devm_* wrapper around register_reboot_notifier to simplify device
specific reboot notifier registration/unregistration.

Link: http://lkml.kernel.org/r/20170320171753.1705-1-andrew.smirnov@gmail.com
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/reboot.h |  3 +++
 kernel/reboot.c        | 27 +++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index a7ff409f386d..0ca25413ad2d 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -38,6 +38,9 @@ extern int reboot_force;
 extern int register_reboot_notifier(struct notifier_block *);
 extern int unregister_reboot_notifier(struct notifier_block *);
 
+struct device;
+extern int devm_register_reboot_notifier(struct device *, struct notifier_block *);
+
 extern int register_restart_handler(struct notifier_block *);
 extern int unregister_restart_handler(struct notifier_block *);
 extern void do_kernel_restart(char *cmd);
diff --git a/kernel/reboot.c b/kernel/reboot.c
index bd30a973fe94..e4ced883d8de 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -104,6 +104,33 @@ int unregister_reboot_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_reboot_notifier);
 
+static void devm_unregister_reboot_notifier(struct device *dev, void *res)
+{
+	WARN_ON(unregister_reboot_notifier(*(struct notifier_block **)res));
+}
+
+int devm_register_reboot_notifier(struct device *dev, struct notifier_block *nb)
+{
+	struct notifier_block **rcnb;
+	int ret;
+
+	rcnb = devres_alloc(devm_unregister_reboot_notifier,
+			    sizeof(*rcnb), GFP_KERNEL);
+	if (!rcnb)
+		return -ENOMEM;
+
+	ret = register_reboot_notifier(nb);
+	if (!ret) {
+		*rcnb = nb;
+		devres_add(dev, rcnb);
+	} else {
+		devres_free(rcnb);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(devm_register_reboot_notifier);
+
 /*
  *	Notifier list for kernel code which wants to be called
  *	to restart the system.
-- 
cgit v1.2.3


From 7f004d4884d4b78c6ac68cc6cd6a105f21ed047b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 15 Jul 2017 11:41:19 +1000
Subject: kernel-reboot-add-devm_register_reboot_notifier-fix

move `struct device' forward declaration to top-of-file

Cc: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/reboot.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 0ca25413ad2d..ecbf7b56b9db 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -5,6 +5,8 @@
 #include <linux/notifier.h>
 #include <uapi/linux/reboot.h>
 
+struct device;
+
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
 #define SYS_HALT	0x0002	/* Notify of system halt */
@@ -38,7 +40,6 @@ extern int reboot_force;
 extern int register_reboot_notifier(struct notifier_block *);
 extern int unregister_reboot_notifier(struct notifier_block *);
 
-struct device;
 extern int devm_register_reboot_notifier(struct device *, struct notifier_block *);
 
 extern int register_restart_handler(struct notifier_block *);
-- 
cgit v1.2.3