From 778d3b0ff0654ad7092bf823fd32010066b12365 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 26 Jul 2011 16:08:30 -0700 Subject: cpusets: randomize node rotor used in cpuset_mem_spread_node() [ This patch has already been accepted as commit 0ac0c0d0f837 but later reverted (commit 35926ff5fba8) because it itroduced arch specific __node_random which was defined only for x86 code so it broke other archs. This is a followup without any arch specific code. Other than that there are no functional changes.] Some workloads that create a large number of small files tend to assign too many pages to node 0 (multi-node systems). Part of the reason is that the rotor (in cpuset_mem_spread_node()) used to assign nodes starts at node 0 for newly created tasks. This patch changes the rotor to be initialized to a random node number of the cpuset. [akpm@linux-foundation.org: fix layout] [Lee.Schermerhorn@hp.com: Define stub numa_random() for !NUMA configuration] [mhocko@suse.cz: Make it arch independent] [akpm@linux-foundation.org: fix CONFIG_NUMA=y, MAX_NUMNODES>1 build] Signed-off-by: Jack Steiner Signed-off-by: Lee Schermerhorn Signed-off-by: Michal Hocko Reviewed-by: KOSAKI Motohiro Cc: Christoph Lameter Cc: Pekka Enberg Cc: Paul Menage Cc: Jack Steiner Cc: Robin Holt Cc: David Rientjes Cc: Christoph Lameter Cc: David Rientjes Cc: Jack Steiner Cc: KOSAKI Motohiro Cc: Lee Schermerhorn Cc: Michal Hocko Cc: Paul Menage Cc: Pekka Enberg Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 3f3b68199d7..37ef4b04879 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -756,7 +756,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) * * The bit positions 0 through @bits are valid positions in @buf. */ -static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) +int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) { int pos = 0; -- cgit v1.2.3 From 95918f4a72ce1dbbd2ce67a7c3bfff31335f4219 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 26 Jul 2011 16:08:44 -0700 Subject: cpumask: convert for_each_cpumask() with for_each_cpu() Adapt new API fashion. Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cpumask.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/cpumask.c b/lib/cpumask.c index 05d6aca7fc1..4f6425d7723 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -30,7 +30,7 @@ int __any_online_cpu(const cpumask_t *mask) { int cpu; - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { if (cpu_online(cpu)) break; } -- cgit v1.2.3 From 37e7b5f1535dddaace865b25df34a2d049a3a66f Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 26 Jul 2011 16:08:44 -0700 Subject: cpumask: alloc_cpumask_var() use NUMA_NO_NODE NUMA_NO_NODE and numa_node_id() have different meanings. NUMA_NO_NODE is obviously the recommended fallback. Signed-off-by: KOSAKI Motohiro Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cpumask.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/cpumask.c b/lib/cpumask.c index 4f6425d7723..af3e5817de9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -131,7 +131,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var_node); */ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { - return alloc_cpumask_var_node(mask, flags, numa_node_id()); + return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE); } EXPORT_SYMBOL(alloc_cpumask_var); -- cgit v1.2.3 From 35f46764f0101f1aca60c26de96e0c4e8e3d011f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 26 Jul 2011 16:09:00 -0700 Subject: fault-injection: do not include unneeded header No need to include linux/kallsyms.h. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/fault-inject.c | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 7e65af70635..e6430e1ce18 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -8,7 +8,6 @@ #include #include #include -#include #include /* -- cgit v1.2.3 From 8307fc257cf3931d87e172bd8663e80c3d1e56a3 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 26 Jul 2011 16:09:01 -0700 Subject: fault-injection: cleanup simple attribute of stacktrace_depth Minor cosmetic changes for simple attribute of stacktrace_depth: - use min_t() - reduce #ifdef by moving a function - do not use partly capitalized function name Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/fault-inject.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/fault-inject.c b/lib/fault-inject.c index e6430e1ce18..882fd3b6a6a 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -139,16 +139,6 @@ static int debugfs_ul_set(void *data, u64 val) return 0; } -#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER -static int debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val) -{ - *(unsigned long *)data = - val < MAX_STACK_TRACE_DEPTH ? - val : MAX_STACK_TRACE_DEPTH; - return 0; -} -#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ - static int debugfs_ul_get(void *data, u64 *val) { *val = *(unsigned long *)data; @@ -164,16 +154,26 @@ static struct dentry *debugfs_create_ul(const char *name, mode_t mode, } #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER -DEFINE_SIMPLE_ATTRIBUTE(fops_ul_MAX_STACK_TRACE_DEPTH, debugfs_ul_get, - debugfs_ul_set_MAX_STACK_TRACE_DEPTH, "%llu\n"); -static struct dentry *debugfs_create_ul_MAX_STACK_TRACE_DEPTH( +static int debugfs_stacktrace_depth_set(void *data, u64 val) +{ + *(unsigned long *)data = + min_t(unsigned long, val, MAX_STACK_TRACE_DEPTH); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stacktrace_depth, debugfs_ul_get, + debugfs_stacktrace_depth_set, "%llu\n"); + +static struct dentry *debugfs_create_stacktrace_depth( const char *name, mode_t mode, struct dentry *parent, unsigned long *value) { return debugfs_create_file(name, mode, parent, value, - &fops_ul_MAX_STACK_TRACE_DEPTH); + &fops_stacktrace_depth); } + #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ static int debugfs_atomic_t_set(void *data, u64 val) @@ -281,7 +281,7 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER attr->dentries.stacktrace_depth_file = - debugfs_create_ul_MAX_STACK_TRACE_DEPTH( + debugfs_create_stacktrace_depth( "stacktrace-depth", mode, dir, &attr->stacktrace_depth); attr->dentries.require_start_file = -- cgit v1.2.3 From 7f5ddcc8d3eaccd5e169fda738530f937509645e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 26 Jul 2011 16:09:02 -0700 Subject: fault-injection: use debugfs_remove_recursive Use debugfs_remove_recursive() to simplify initialization and deinitialization of fault injection debugfs files. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/fault-inject.c | 115 +++++++++++++---------------------------------------- 1 file changed, 27 insertions(+), 88 deletions(-) (limited to 'lib') diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 882fd3b6a6a..2577b121c7c 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -199,48 +199,7 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, void cleanup_fault_attr_dentries(struct fault_attr *attr) { - debugfs_remove(attr->dentries.probability_file); - attr->dentries.probability_file = NULL; - - debugfs_remove(attr->dentries.interval_file); - attr->dentries.interval_file = NULL; - - debugfs_remove(attr->dentries.times_file); - attr->dentries.times_file = NULL; - - debugfs_remove(attr->dentries.space_file); - attr->dentries.space_file = NULL; - - debugfs_remove(attr->dentries.verbose_file); - attr->dentries.verbose_file = NULL; - - debugfs_remove(attr->dentries.task_filter_file); - attr->dentries.task_filter_file = NULL; - -#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER - - debugfs_remove(attr->dentries.stacktrace_depth_file); - attr->dentries.stacktrace_depth_file = NULL; - - debugfs_remove(attr->dentries.require_start_file); - attr->dentries.require_start_file = NULL; - - debugfs_remove(attr->dentries.require_end_file); - attr->dentries.require_end_file = NULL; - - debugfs_remove(attr->dentries.reject_start_file); - attr->dentries.reject_start_file = NULL; - - debugfs_remove(attr->dentries.reject_end_file); - attr->dentries.reject_end_file = NULL; - -#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ - - if (attr->dentries.dir) - WARN_ON(!simple_empty(attr->dentries.dir)); - - debugfs_remove(attr->dentries.dir); - attr->dentries.dir = NULL; + debugfs_remove_recursive(attr->dir); } int init_fault_attr_dentries(struct fault_attr *attr, const char *name) @@ -248,66 +207,46 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - memset(&attr->dentries, 0, sizeof(attr->dentries)); - dir = debugfs_create_dir(name, NULL); if (!dir) - goto fail; - attr->dentries.dir = dir; - - attr->dentries.probability_file = - debugfs_create_ul("probability", mode, dir, &attr->probability); + return -ENOMEM; - attr->dentries.interval_file = - debugfs_create_ul("interval", mode, dir, &attr->interval); + attr->dir = dir; - attr->dentries.times_file = - debugfs_create_atomic_t("times", mode, dir, &attr->times); - - attr->dentries.space_file = - debugfs_create_atomic_t("space", mode, dir, &attr->space); - - attr->dentries.verbose_file = - debugfs_create_ul("verbose", mode, dir, &attr->verbose); - - attr->dentries.task_filter_file = debugfs_create_bool("task-filter", - mode, dir, &attr->task_filter); - - if (!attr->dentries.probability_file || !attr->dentries.interval_file || - !attr->dentries.times_file || !attr->dentries.space_file || - !attr->dentries.verbose_file || !attr->dentries.task_filter_file) + if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) + goto fail; + if (!debugfs_create_ul("interval", mode, dir, &attr->interval)) + goto fail; + if (!debugfs_create_atomic_t("times", mode, dir, &attr->times)) + goto fail; + if (!debugfs_create_atomic_t("space", mode, dir, &attr->space)) + goto fail; + if (!debugfs_create_ul("verbose", mode, dir, &attr->verbose)) + goto fail; + if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter)) goto fail; #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER - attr->dentries.stacktrace_depth_file = - debugfs_create_stacktrace_depth( - "stacktrace-depth", mode, dir, &attr->stacktrace_depth); - - attr->dentries.require_start_file = - debugfs_create_ul("require-start", mode, dir, &attr->require_start); - - attr->dentries.require_end_file = - debugfs_create_ul("require-end", mode, dir, &attr->require_end); - - attr->dentries.reject_start_file = - debugfs_create_ul("reject-start", mode, dir, &attr->reject_start); - - attr->dentries.reject_end_file = - debugfs_create_ul("reject-end", mode, dir, &attr->reject_end); - - if (!attr->dentries.stacktrace_depth_file || - !attr->dentries.require_start_file || - !attr->dentries.require_end_file || - !attr->dentries.reject_start_file || - !attr->dentries.reject_end_file) + if (!debugfs_create_stacktrace_depth("stacktrace-depth", mode, dir, + &attr->stacktrace_depth)) + goto fail; + if (!debugfs_create_ul("require-start", mode, dir, + &attr->require_start)) + goto fail; + if (!debugfs_create_ul("require-end", mode, dir, &attr->require_end)) + goto fail; + if (!debugfs_create_ul("reject-start", mode, dir, &attr->reject_start)) + goto fail; + if (!debugfs_create_ul("reject-end", mode, dir, &attr->reject_end)) goto fail; #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ return 0; fail: - cleanup_fault_attr_dentries(attr); + debugfs_remove_recursive(attr->dir); + return -ENOMEM; } -- cgit v1.2.3 From 60063497a95e716c9a689af3be2687d261f115b4 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Tue, 26 Jul 2011 16:09:06 -0700 Subject: atomic: use This allows us to move duplicated code in (atomic_inc_not_zero() for now) to Signed-off-by: Arun Sharma Reviewed-by: Eric Dumazet Cc: Ingo Molnar Cc: David Miller Cc: Eric Dumazet Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/atomic64.c | 2 +- lib/atomic64_test.c | 2 +- lib/crc32.c | 2 +- lib/dec_and_lock.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/atomic64.c b/lib/atomic64.c index a21c12bc727..e12ae0dd08a 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* * We use a hashed array of spinlocks to provide exclusive access diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 44524cc8c32..0c33cde2a1e 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -10,7 +10,7 @@ */ #include #include -#include +#include #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) static __init int test_atomic64(void) diff --git a/lib/crc32.c b/lib/crc32.c index 4855995fcde..a6e633a48ce 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include "crc32defs.h" #if CRC_LE_BITS == 8 # define tole(x) __constant_cpu_to_le32(x) diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index e73822aa6e9..b5257725daa 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -1,6 +1,6 @@ #include #include -#include +#include /* * This is an implementation of the notion of "decrement a -- cgit v1.2.3 From f49f23abf3dd786ddcac1c1e7db3c2013b07413f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:23 +0800 Subject: lib, Add lock-less NULL terminated single list Cmpxchg is used to implement adding new entry to the list, deleting all entries from the list, deleting first entry of the list and some other operations. Because this is a single list, so the tail can not be accessed in O(1). If there are multiple producers and multiple consumers, llist_add can be used in producers and llist_del_all can be used in consumers. They can work simultaneously without lock. But llist_del_first can not be used here. Because llist_del_first depends on list->first->next does not changed if list->first is not changed during its operation, but llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, llist_add) sequence in another consumer may violate that. If there are multiple producers and one consumer, llist_add can be used in producers and llist_del_all or llist_del_first can be used in the consumer. This can be summarized as follow: | add | del_first | del_all add | - | - | - del_first | | L | L del_all | | | - Where "-" stands for no lock is needed, while "L" stands for lock is needed. The list entries deleted via llist_del_all can be traversed with traversing function such as llist_for_each etc. But the list entries can not be traversed safely before deleted from the list. The order of deleted entries is from the newest to the oldest added one. If you want to traverse from the oldest to the newest, you must reverse the order by yourself before traversing. The basic atomic operation of this list is cmpxchg on long. On architectures that don't have NMI-safe cmpxchg implementation, the list can NOT be used in NMI handler. So code uses the list in NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Mathieu Desnoyers Cc: Andrew Morton Signed-off-by: Len Brown --- lib/Kconfig | 3 ++ lib/Makefile | 2 + lib/llist.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 lib/llist.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 830181cc7a8..25c19678a30 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -262,4 +262,7 @@ config AVERAGE If unsure, say N. +config LLIST + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index 6b597fdb189..d770b817202 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -112,6 +112,8 @@ obj-$(CONFIG_AVERAGE) += average.o obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o +obj-$(CONFIG_LLIST) += llist.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/llist.c b/lib/llist.c new file mode 100644 index 00000000000..da445724fa1 --- /dev/null +++ b/lib/llist.c @@ -0,0 +1,129 @@ +/* + * Lock-less NULL terminated single linked list + * + * The basic atomic operation of this list is cmpxchg on long. On + * architectures that don't have NMI-safe cmpxchg implementation, the + * list can NOT be used in NMI handler. So code uses the list in NMI + * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +#include + +/** + * llist_add - add a new entry + * @new: new entry to be added + * @head: the head for your lock-less list + */ +void llist_add(struct llist_node *new, struct llist_head *head) +{ + struct llist_node *entry, *old_entry; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + old_entry = entry; + new->next = entry; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); +} +EXPORT_SYMBOL_GPL(llist_add); + +/** + * llist_add_batch - add several linked entries in batch + * @new_first: first entry in batch to be added + * @new_last: last entry in batch to be added + * @head: the head for your lock-less list + */ +void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, + struct llist_head *head) +{ + struct llist_node *entry, *old_entry; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + old_entry = entry; + new_last->next = entry; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry); +} +EXPORT_SYMBOL_GPL(llist_add_batch); + +/** + * llist_del_first - delete the first entry of lock-less list + * @head: the head for your lock-less list + * + * If list is empty, return NULL, otherwise, return the first entry + * deleted, this is the newest added one. + * + * Only one llist_del_first user can be used simultaneously with + * multiple llist_add users without lock. Because otherwise + * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, + * llist_add) sequence in another user may change @head->first->next, + * but keep @head->first. If multiple consumers are needed, please + * use llist_del_all or use lock between consumers. + */ +struct llist_node *llist_del_first(struct llist_head *head) +{ + struct llist_node *entry, *old_entry, *next; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + if (entry == NULL) + return NULL; + old_entry = entry; + next = entry->next; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry); + + return entry; +} +EXPORT_SYMBOL_GPL(llist_del_first); + +/** + * llist_del_all - delete all entries from lock-less list + * @head: the head of lock-less list to delete all entries + * + * If list is empty, return NULL, otherwise, delete all entries and + * return the pointer to the first entry. The order of entries + * deleted is from the newest to the oldest added one. + */ +struct llist_node *llist_del_all(struct llist_head *head) +{ +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + return xchg(&head->first, NULL); +} +EXPORT_SYMBOL_GPL(llist_del_all); -- cgit v1.2.3 From 7f184275aa306046fe7edcbef3229754f0d97402 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:24 +0800 Subject: lib, Make gen_pool memory allocator lockless This version of the gen_pool memory allocator supports lockless operation. This makes it safe to use in NMI handlers and other special unblockable contexts that could otherwise deadlock on locks. This is implemented by using atomic operations and retries on any conflicts. The disadvantage is that there may be livelocks in extreme cases. For better scalability, one gen_pool allocator can be used for each CPU. The lockless operation only works if there is enough memory available. If new memory is added to the pool a lock has to be still taken. So any user relying on locklessness has to ensure that sufficient memory is preallocated. The basic atomic operation of this allocator is cmpxchg on long. On architectures that don't have NMI-safe cmpxchg implementation, the allocator can NOT be used in NMI handler. So code uses the allocator in NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Mathieu Desnoyers Cc: Andrew Morton Signed-off-by: Len Brown --- lib/bitmap.c | 2 - lib/genalloc.c | 300 ++++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 243 insertions(+), 59 deletions(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 3f3b68199d7..e3c9e999501 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) } EXPORT_SYMBOL(__bitmap_weight); -#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) - void bitmap_set(unsigned long *map, int start, int nr) { unsigned long *p = map + BIT_WORD(start); diff --git a/lib/genalloc.c b/lib/genalloc.c index 577ddf80597..f352cc42f4f 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -1,8 +1,26 @@ /* - * Basic general purpose allocator for managing special purpose memory - * not managed by the regular kmalloc/kfree interface. - * Uses for this includes on-device special memory, uncached memory - * etc. + * Basic general purpose allocator for managing special purpose + * memory, for example, memory that is not managed by the regular + * kmalloc/kfree interface. Uses for this includes on-device special + * memory, uncached memory etc. + * + * It is safe to use the allocator in NMI handlers and other special + * unblockable contexts that could otherwise deadlock on locks. This + * is implemented by using atomic operations and retries on any + * conflicts. The disadvantage is that there may be livelocks in + * extreme cases. For better scalability, one allocator can be used + * for each CPU. + * + * The lockless operation only works if there is enough memory + * available. If new memory is added to the pool a lock has to be + * still taken. So any user relying on locklessness has to ensure + * that sufficient memory is preallocated. + * + * The basic atomic operation of this allocator is cmpxchg on long. + * On architectures that don't have NMI-safe cmpxchg implementation, + * the allocator can NOT be used in NMI handler. So code uses the + * allocator in NMI handler should depend on + * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2005 (C) Jes Sorensen * @@ -13,8 +31,109 @@ #include #include #include +#include +#include #include +static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) +{ + unsigned long val, nval; + + nval = *addr; + do { + val = nval; + if (val & mask_to_set) + return -EBUSY; + cpu_relax(); + } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val); + + return 0; +} + +static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) +{ + unsigned long val, nval; + + nval = *addr; + do { + val = nval; + if ((val & mask_to_clear) != mask_to_clear) + return -EBUSY; + cpu_relax(); + } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val); + + return 0; +} + +/* + * bitmap_set_ll - set the specified number of bits at the specified position + * @map: pointer to a bitmap + * @start: a bit position in @map + * @nr: number of bits to set + * + * Set @nr bits start from @start in @map lock-lessly. Several users + * can set/clear the same bitmap simultaneously without lock. If two + * users set the same bit, one user will return remain bits, otherwise + * return 0. + */ +static int bitmap_set_ll(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_set >= 0) { + if (set_bits_ll(p, mask_to_set)) + return nr; + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + if (set_bits_ll(p, mask_to_set)) + return nr; + } + + return 0; +} + +/* + * bitmap_clear_ll - clear the specified number of bits at the specified position + * @map: pointer to a bitmap + * @start: a bit position in @map + * @nr: number of bits to set + * + * Clear @nr bits start from @start in @map lock-lessly. Several users + * can set/clear the same bitmap simultaneously without lock. If two + * users clear the same bit, one user will return remain bits, + * otherwise return 0. + */ +static int bitmap_clear_ll(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_clear >= 0) { + if (clear_bits_ll(p, mask_to_clear)) + return nr; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + if (clear_bits_ll(p, mask_to_clear)) + return nr; + } + + return 0; +} /** * gen_pool_create - create a new special memory pool @@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); if (pool != NULL) { - rwlock_init(&pool->lock); + spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->chunks); pool->min_alloc_order = min_alloc_order; } @@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy if (unlikely(chunk == NULL)) return -ENOMEM; - spin_lock_init(&chunk->lock); chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size; + atomic_set(&chunk->avail, size); - write_lock(&pool->lock); - list_add(&chunk->next_chunk, &pool->chunks); - write_unlock(&pool->lock); + spin_lock(&pool->lock); + list_add_rcu(&chunk->next_chunk, &pool->chunks); + spin_unlock(&pool->lock); return 0; } @@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt); */ phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) { - struct list_head *_chunk; struct gen_pool_chunk *chunk; + phys_addr_t paddr = -1; - read_lock(&pool->lock); - list_for_each(_chunk, &pool->chunks) { - chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); - - if (addr >= chunk->start_addr && addr < chunk->end_addr) - return chunk->phys_addr + addr - chunk->start_addr; + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + paddr = chunk->phys_addr + (addr - chunk->start_addr); + break; + } } - read_unlock(&pool->lock); + rcu_read_unlock(); - return -1; + return paddr; } EXPORT_SYMBOL(gen_pool_virt_to_phys); @@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool) int order = pool->min_alloc_order; int bit, end_bit; - list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); list_del(&chunk->next_chunk); @@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy); * @size: number of bytes to allocate from the pool * * Allocate the requested number of bytes from the specified pool. - * Uses a first-fit algorithm. + * Uses a first-fit algorithm. Can not be used in NMI handler on + * architectures without NMI-safe cmpxchg implementation. */ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) { - struct list_head *_chunk; struct gen_pool_chunk *chunk; - unsigned long addr, flags; + unsigned long addr = 0; int order = pool->min_alloc_order; - int nbits, start_bit, end_bit; + int nbits, start_bit = 0, end_bit, remain; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif if (size == 0) return 0; nbits = (size + (1UL << order) - 1) >> order; - - read_lock(&pool->lock); - list_for_each(_chunk, &pool->chunks) { - chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { + if (size > atomic_read(&chunk->avail)) + continue; end_bit = (chunk->end_addr - chunk->start_addr) >> order; - - spin_lock_irqsave(&chunk->lock, flags); - start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, - nbits, 0); - if (start_bit >= end_bit) { - spin_unlock_irqrestore(&chunk->lock, flags); +retry: + start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, + start_bit, nbits, 0); + if (start_bit >= end_bit) continue; + remain = bitmap_set_ll(chunk->bits, start_bit, nbits); + if (remain) { + remain = bitmap_clear_ll(chunk->bits, start_bit, + nbits - remain); + BUG_ON(remain); + goto retry; } addr = chunk->start_addr + ((unsigned long)start_bit << order); - - bitmap_set(chunk->bits, start_bit, nbits); - spin_unlock_irqrestore(&chunk->lock, flags); - read_unlock(&pool->lock); - return addr; + size = nbits << order; + atomic_sub(size, &chunk->avail); + break; } - read_unlock(&pool->lock); - return 0; + rcu_read_unlock(); + return addr; } EXPORT_SYMBOL(gen_pool_alloc); @@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc); * @addr: starting address of memory to free back to pool * @size: size in bytes of memory to free * - * Free previously allocated special memory back to the specified pool. + * Free previously allocated special memory back to the specified + * pool. Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. */ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) { - struct list_head *_chunk; struct gen_pool_chunk *chunk; - unsigned long flags; int order = pool->min_alloc_order; - int bit, nbits; + int start_bit, nbits, remain; - nbits = (size + (1UL << order) - 1) >> order; - - read_lock(&pool->lock); - list_for_each(_chunk, &pool->chunks) { - chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + nbits = (size + (1UL << order) - 1) >> order; + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { if (addr >= chunk->start_addr && addr < chunk->end_addr) { BUG_ON(addr + size > chunk->end_addr); - spin_lock_irqsave(&chunk->lock, flags); - bit = (addr - chunk->start_addr) >> order; - while (nbits--) - __clear_bit(bit++, chunk->bits); - spin_unlock_irqrestore(&chunk->lock, flags); - break; + start_bit = (addr - chunk->start_addr) >> order; + remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); + BUG_ON(remain); + size = nbits << order; + atomic_add(size, &chunk->avail); + rcu_read_unlock(); + return; } } - BUG_ON(nbits > 0); - read_unlock(&pool->lock); + rcu_read_unlock(); + BUG(); } EXPORT_SYMBOL(gen_pool_free); + +/** + * gen_pool_for_each_chunk - call func for every chunk of generic memory pool + * @pool: the generic memory pool + * @func: func to call + * @data: additional data used by @func + * + * Call @func for every chunk of generic memory pool. The @func is + * called with rcu_read_lock held. + */ +void gen_pool_for_each_chunk(struct gen_pool *pool, + void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data), + void *data) +{ + struct gen_pool_chunk *chunk; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) + func(pool, chunk, data); + rcu_read_unlock(); +} +EXPORT_SYMBOL(gen_pool_for_each_chunk); + +/** + * gen_pool_avail - get available free space of the pool + * @pool: pool to get available free space + * + * Return available free space of the specified pool. + */ +size_t gen_pool_avail(struct gen_pool *pool) +{ + struct gen_pool_chunk *chunk; + size_t avail = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) + avail += atomic_read(&chunk->avail); + rcu_read_unlock(); + return avail; +} +EXPORT_SYMBOL_GPL(gen_pool_avail); + +/** + * gen_pool_size - get size in bytes of memory managed by the pool + * @pool: pool to get size + * + * Return size in bytes of memory managed by the pool. + */ +size_t gen_pool_size(struct gen_pool *pool) +{ + struct gen_pool_chunk *chunk; + size_t size = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) + size += chunk->end_addr - chunk->start_addr; + rcu_read_unlock(); + return size; +} +EXPORT_SYMBOL_GPL(gen_pool_size); -- cgit v1.2.3 From dd48c085c1cdf9446f92826f1fd451167fb6c2fd Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 3 Aug 2011 16:21:01 -0700 Subject: fault-injection: add ability to export fault_attr in arbitrary directory init_fault_attr_dentries() is used to export fault_attr via debugfs. But it can only export it in debugfs root directory. Per Forlin is working on mmc_fail_request which adds support to inject data errors after a completed host transfer in MMC subsystem. The fault_attr for mmc_fail_request should be defined per mmc host and export it in debugfs directory per mmc host like /sys/kernel/debug/mmc0/mmc_fail_request. init_fault_attr_dentries() doesn't help for mmc_fail_request. So this introduces fault_create_debugfs_attr() which is able to create a directory in the arbitrary directory and replace init_fault_attr_dentries(). [akpm@linux-foundation.org: extraneous semicolon, per Randy] Signed-off-by: Akinobu Mita Tested-by: Per Forlin Cc: Jens Axboe Cc: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: Randy Dunlap Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/fault-inject.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 2577b121c7c..f193b779644 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); } -void cleanup_fault_attr_dentries(struct fault_attr *attr) -{ - debugfs_remove_recursive(attr->dir); -} - -int init_fault_attr_dentries(struct fault_attr *attr, const char *name) +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - dir = debugfs_create_dir(name, NULL); + dir = debugfs_create_dir(name, parent); if (!dir) - return -ENOMEM; - - attr->dir = dir; + return ERR_PTR(-ENOMEM); if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) goto fail; @@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ - return 0; + return dir; fail: - debugfs_remove_recursive(attr->dir); + debugfs_remove_recursive(dir); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ -- cgit v1.2.3 From 88eca0207cf1574328c3ce8c3be537a9317261bb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 3 Aug 2011 16:21:06 -0700 Subject: ida: simplified functions for id allocation The current hyper-optimized functions are overkill if you simply want to allocate an id for a device. Create versions which use an internal lock. In followup patches, numerous drivers are converted to use this interface. Thanks to Tejun for feedback. Signed-off-by: Rusty Russell Acked-by: Tejun Heo Acked-by: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index e15502e8b21..db040ce3fa7 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -34,8 +34,10 @@ #include #include #include +#include static struct kmem_cache *idr_layer_cache; +static DEFINE_SPINLOCK(simple_ida_lock); static struct idr_layer *get_from_free_list(struct idr *idp) { @@ -925,6 +927,71 @@ void ida_destroy(struct ida *ida) } EXPORT_SYMBOL(ida_destroy); +/** + * ida_simple_get - get a new id. + * @ida: the (initialized) ida. + * @start: the minimum id (inclusive, < 0x8000000) + * @end: the maximum id (exclusive, < 0x8000000 or 0) + * @gfp_mask: memory allocation flags + * + * Allocates an id in the range start <= id < end, or returns -ENOSPC. + * On memory allocation failure, returns -ENOMEM. + * + * Use ida_simple_remove() to get rid of an id. + */ +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask) +{ + int ret, id; + unsigned int max; + + BUG_ON((int)start < 0); + BUG_ON((int)end < 0); + + if (end == 0) + max = 0x80000000; + else { + BUG_ON(end < start); + max = end - 1; + } + +again: + if (!ida_pre_get(ida, gfp_mask)) + return -ENOMEM; + + spin_lock(&simple_ida_lock); + ret = ida_get_new_above(ida, start, &id); + if (!ret) { + if (id > max) { + ida_remove(ida, id); + ret = -ENOSPC; + } else { + ret = id; + } + } + spin_unlock(&simple_ida_lock); + + if (unlikely(ret == -EAGAIN)) + goto again; + + return ret; +} +EXPORT_SYMBOL(ida_simple_get); + +/** + * ida_simple_remove - remove an allocated id. + * @ida: the (initialized) ida. + * @id: the id returned by ida_simple_get. + */ +void ida_simple_remove(struct ida *ida, unsigned int id) +{ + BUG_ON((int)id < 0); + spin_lock(&simple_ida_lock); + ida_remove(ida, id); + spin_unlock(&simple_ida_lock); +} +EXPORT_SYMBOL(ida_simple_remove); + /** * ida_init - initialize ida handle * @ida: ida handle -- cgit v1.2.3 From 6328650bb4d854a7dc1498d1c0048b838b0d340c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:18 -0700 Subject: radix_tree: exceptional entries and indices A patchset to extend tmpfs to MAX_LFS_FILESIZE by abandoning its peculiar swap vector, instead keeping a file's swap entries in the same radix tree as its struct page pointers: thus saving memory, and simplifying its code and locking. This patch: The radix_tree is used by several subsystems for different purposes. A major use is to store the struct page pointers of a file's pagecache for memory management. But what if mm wanted to store something other than page pointers there too? The low bit of a radix_tree entry is already used to denote an indirect pointer, for internal use, and the unlikely radix_tree_deref_retry() case. Define the next bit as denoting an exceptional entry, and supply inline functions radix_tree_exception() to return non-0 in either unlikely case, and radix_tree_exceptional_entry() to return non-0 in the second case. If a subsystem already uses radix_tree with that bit set, no problem: it does not affect internal workings at all, but is defined for the convenience of those storing well-aligned pointers in the radix_tree. The radix_tree_gang_lookups have an implicit assumption that the caller can deduce the offset of each entry returned e.g. by the page->index of a struct page. But that may not be feasible for some kinds of item to be stored there. radix_tree_gang_lookup_slot() allow for an optional indices argument, output array in which to return those offsets. The same could be added to other radix_tree_gang_lookups, but for now keep it to the only one for which we need it. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7ea2e033d71..348eaefbed7 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_prev_hole); static unsigned int -__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, - unsigned int max_items, unsigned long *next_index) +__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices, + unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; @@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; if (slot->slots[i]) { - results[nr_found++] = &(slot->slots[i]); - if (nr_found == max_items) + results[nr_found] = &(slot->slots[i]); + if (indices) + indices[nr_found] = index; + if (++nr_found == max_items) { + index++; goto out; + } } + index++; } out: *next_index = index; @@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, if (cur_index > max_index) break; - slots_found = __lookup(node, (void ***)results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, (void ***)results + ret, NULL, + cur_index, max_items - ret, &next_index); nr_found = 0; for (i = 0; i < slots_found; i++) { struct radix_tree_node *slot; @@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree * @root: radix tree root * @results: where the results of the lookup are placed + * @indices: where their indices should be placed (but usually NULL) * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * @@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * protection, radix_tree_deref_slot may fail requiring a retry. */ unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items) { unsigned long max_index; @@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (first_index > 0) return 0; results[0] = (void **)&root->rnode; + if (indices) + indices[0] = 0; return 1; } node = indirect_to_ptr(node); @@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (cur_index > max_index) break; - slots_found = __lookup(node, results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, results + ret, + indices ? indices + ret : NULL, + cur_index, max_items - ret, &next_index); ret += slots_found; if (next_index == 0) break; -- cgit v1.2.3 From e504f3fdd63d486d45b18009e5a65f2e329acb0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:27 -0700 Subject: tmpfs radix_tree: locate_item to speed up swapoff We have already acknowledged that swapoff of a tmpfs file is slower than it was before conversion to the generic radix_tree: a little slower there will be acceptable, if the hotter paths are faster. But it was a shock to find swapoff of a 500MB file 20 times slower on my laptop, taking 10 minutes; and at that rate it significantly slows down my testing. Now, most of that turned out to be overhead from PROVE_LOCKING and PROVE_RCU: without those it was only 4 times slower than before; and more realistic tests on other machines don't fare as badly. I've tried a number of things to improve it, including tagging the swap entries, then doing lookup by tag: I'd expected that to halve the time, but in practice it's erratic, and often counter-productive. The only change I've so far found to make a consistent improvement, is to short-circuit the way we go back and forth, gang lookup packing entries into the array supplied, then shmem scanning that array for the target entry. Scanning in place doubles the speed, so it's now only twice as slow as before (or three times slower when the PROVEs are on). So, add radix_tree_locate_item() as an expedient, once-off, single-caller hack to do the lookup directly in place. #ifdef it on CONFIG_SHMEM and CONFIG_SWAP, as much to document its limited applicability as save space in other configurations. And, sadly, #include sched.h for cond_resched(). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 348eaefbed7..a2f9da59c19 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1203,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); +#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) +#include /* for cond_resched() */ + +/* + * This linear search is at present only useful to shmem_unuse_inode(). + */ +static unsigned long __locate(struct radix_tree_node *slot, void *item, + unsigned long index, unsigned long *found_index) +{ + unsigned int shift, height; + unsigned long i; + + height = slot->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference_raw(slot->slots[i]); + if (slot == NULL) + goto out; + } + + /* Bottom level: check items */ + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + if (slot->slots[i] == item) { + *found_index = index + i; + index = 0; + goto out; + } + } + index += RADIX_TREE_MAP_SIZE; +out: + return index; +} + +/** + * radix_tree_locate_item - search through radix tree for item + * @root: radix tree root + * @item: item to be found + * + * Returns index where item was found, or -1 if not found. + * Caller must hold no lock (since this time-consuming function needs + * to be preemptible), and must check afterwards if item is still there. + */ +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + struct radix_tree_node *node; + unsigned long max_index; + unsigned long cur_index = 0; + unsigned long found_index = -1; + + do { + rcu_read_lock(); + node = rcu_dereference_raw(root->rnode); + if (!radix_tree_is_indirect_ptr(node)) { + rcu_read_unlock(); + if (node == item) + found_index = 0; + break; + } + + node = indirect_to_ptr(node); + max_index = radix_tree_maxindex(node->height); + if (cur_index > max_index) + break; + + cur_index = __locate(node, item, cur_index, &found_index); + rcu_read_unlock(); + cond_resched(); + } while (cur_index != 0 && cur_index <= max_index); + + return found_index; +} +#else +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + return -1; +} +#endif /* CONFIG_SHMEM && CONFIG_SWAP */ /** * radix_tree_shrink - shrink height of a radix tree to minimal -- cgit v1.2.3 From 1eb19a12bd2214cdcad5273d472b062a4ba97fa1 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Fri, 5 Aug 2011 18:46:27 -0700 Subject: lib/sha1: use the git implementation of SHA-1 For ChromiumOS, we use SHA-1 to verify the integrity of the root filesystem. The speed of the kernel sha-1 implementation has a major impact on our boot performance. To improve boot performance, we investigated using the heavily optimized sha-1 implementation used in git. With the git sha-1 implementation, we see a 11.7% improvement in boot time. 10 reboots, remove slowest/fastest. Before: Mean: 6.58 seconds Stdev: 0.14 After (with git sha-1, this patch): Mean: 5.89 seconds Stdev: 0.07 The other cool thing about the git SHA-1 implementation is that it only needs 64 bytes of stack for the workspace while the original kernel implementation needed 320 bytes. Signed-off-by: Mandeep Singh Baines Cc: Ramsay Jones Cc: Nicolas Pitre Cc: Herbert Xu Cc: David S. Miller Cc: linux-crypto@vger.kernel.org Signed-off-by: Linus Torvalds --- lib/sha1.c | 212 +++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 158 insertions(+), 54 deletions(-) (limited to 'lib') diff --git a/lib/sha1.c b/lib/sha1.c index 4c45fd50e91..f33271dd00c 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -1,31 +1,72 @@ /* - * SHA transform algorithm, originally taken from code written by - * Peter Gutmann, and placed in the public domain. + * SHA1 routine optimized to do word accesses rather than byte accesses, + * and to avoid unnecessary copies into the context array. + * + * This was based on the git SHA1 implementation. */ #include #include -#include +#include +#include -/* The SHA f()-functions. */ +/* + * If you have 32 registers or more, the compiler can (and should) + * try to change the array[] accesses into registers. However, on + * machines with less than ~25 registers, that won't really work, + * and at least gcc will make an unholy mess of it. + * + * So to avoid that mess which just slows things down, we force + * the stores to memory to actually happen (we might be better off + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as + * suggested by Artur Skawina - that will also make gcc unable to + * try to do the silly "optimize away loads" part because it won't + * see what the value will be). + * + * Ben Herrenschmidt reports that on PPC, the C version comes close + * to the optimized asm with this (ie on PPC you don't want that + * 'volatile', since there are lots of registers). + * + * On ARM we get the best code generation by forcing a full memory barrier + * between each SHA_ROUND, otherwise gcc happily get wild with spilling and + * the stack frame size simply explode and performance goes down the drain. + */ -#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */ -#define f2(x,y,z) (x ^ y ^ z) /* XOR */ -#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */ +#ifdef CONFIG_X86 + #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) +#elif defined(CONFIG_ARM) + #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) +#else + #define setW(x, val) (W(x) = (val)) +#endif -/* The SHA Mysterious Constants */ +/* This "rolls" over the 512-bit array */ +#define W(x) (array[(x)&15]) -#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */ -#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */ -#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */ -#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */ +/* + * Where do we get the source from? The first 16 iterations get it from + * the input data, the next mix it from the 512-bit array. + */ +#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) +#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) + +#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ + __u32 TEMP = input(t); setW(t, TEMP); \ + E += TEMP + rol32(A,5) + (fn) + (constant); \ + B = ror32(B, 2); } while (0) + +#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) +#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) +#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) /** * sha_transform - single block SHA1 transform * * @digest: 160 bit digest to update * @data: 512 bits of data to hash - * @W: 80 words of workspace (see note) + * @array: 16 words of workspace (see note) * * This function generates a SHA1 digest for a single 512-bit block. * Be warned, it does not handle padding and message digest, do not @@ -36,47 +77,111 @@ * to clear the workspace. This is left to the caller to avoid * unnecessary clears between chained hashing operations. */ -void sha_transform(__u32 *digest, const char *in, __u32 *W) +void sha_transform(__u32 *digest, const char *data, __u32 *array) { - __u32 a, b, c, d, e, t, i; - - for (i = 0; i < 16; i++) - W[i] = be32_to_cpu(((const __be32 *)in)[i]); - - for (i = 0; i < 64; i++) - W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - for (i = 0; i < 20; i++) { - t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - for (; i < 40; i ++) { - t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - for (; i < 60; i ++) { - t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - for (; i < 80; i ++) { - t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; + __u32 A, B, C, D, E; + + A = digest[0]; + B = digest[1]; + C = digest[2]; + D = digest[3]; + E = digest[4]; + + /* Round 1 - iterations 0-16 take their input from 'data' */ + T_0_15( 0, A, B, C, D, E); + T_0_15( 1, E, A, B, C, D); + T_0_15( 2, D, E, A, B, C); + T_0_15( 3, C, D, E, A, B); + T_0_15( 4, B, C, D, E, A); + T_0_15( 5, A, B, C, D, E); + T_0_15( 6, E, A, B, C, D); + T_0_15( 7, D, E, A, B, C); + T_0_15( 8, C, D, E, A, B); + T_0_15( 9, B, C, D, E, A); + T_0_15(10, A, B, C, D, E); + T_0_15(11, E, A, B, C, D); + T_0_15(12, D, E, A, B, C); + T_0_15(13, C, D, E, A, B); + T_0_15(14, B, C, D, E, A); + T_0_15(15, A, B, C, D, E); + + /* Round 1 - tail. Input from 512-bit mixing array */ + T_16_19(16, E, A, B, C, D); + T_16_19(17, D, E, A, B, C); + T_16_19(18, C, D, E, A, B); + T_16_19(19, B, C, D, E, A); + + /* Round 2 */ + T_20_39(20, A, B, C, D, E); + T_20_39(21, E, A, B, C, D); + T_20_39(22, D, E, A, B, C); + T_20_39(23, C, D, E, A, B); + T_20_39(24, B, C, D, E, A); + T_20_39(25, A, B, C, D, E); + T_20_39(26, E, A, B, C, D); + T_20_39(27, D, E, A, B, C); + T_20_39(28, C, D, E, A, B); + T_20_39(29, B, C, D, E, A); + T_20_39(30, A, B, C, D, E); + T_20_39(31, E, A, B, C, D); + T_20_39(32, D, E, A, B, C); + T_20_39(33, C, D, E, A, B); + T_20_39(34, B, C, D, E, A); + T_20_39(35, A, B, C, D, E); + T_20_39(36, E, A, B, C, D); + T_20_39(37, D, E, A, B, C); + T_20_39(38, C, D, E, A, B); + T_20_39(39, B, C, D, E, A); + + /* Round 3 */ + T_40_59(40, A, B, C, D, E); + T_40_59(41, E, A, B, C, D); + T_40_59(42, D, E, A, B, C); + T_40_59(43, C, D, E, A, B); + T_40_59(44, B, C, D, E, A); + T_40_59(45, A, B, C, D, E); + T_40_59(46, E, A, B, C, D); + T_40_59(47, D, E, A, B, C); + T_40_59(48, C, D, E, A, B); + T_40_59(49, B, C, D, E, A); + T_40_59(50, A, B, C, D, E); + T_40_59(51, E, A, B, C, D); + T_40_59(52, D, E, A, B, C); + T_40_59(53, C, D, E, A, B); + T_40_59(54, B, C, D, E, A); + T_40_59(55, A, B, C, D, E); + T_40_59(56, E, A, B, C, D); + T_40_59(57, D, E, A, B, C); + T_40_59(58, C, D, E, A, B); + T_40_59(59, B, C, D, E, A); + + /* Round 4 */ + T_60_79(60, A, B, C, D, E); + T_60_79(61, E, A, B, C, D); + T_60_79(62, D, E, A, B, C); + T_60_79(63, C, D, E, A, B); + T_60_79(64, B, C, D, E, A); + T_60_79(65, A, B, C, D, E); + T_60_79(66, E, A, B, C, D); + T_60_79(67, D, E, A, B, C); + T_60_79(68, C, D, E, A, B); + T_60_79(69, B, C, D, E, A); + T_60_79(70, A, B, C, D, E); + T_60_79(71, E, A, B, C, D); + T_60_79(72, D, E, A, B, C); + T_60_79(73, C, D, E, A, B); + T_60_79(74, B, C, D, E, A); + T_60_79(75, A, B, C, D, E); + T_60_79(76, E, A, B, C, D); + T_60_79(77, D, E, A, B, C); + T_60_79(78, C, D, E, A, B); + T_60_79(79, B, C, D, E, A); + + digest[0] += A; + digest[1] += B; + digest[2] += C; + digest[3] += D; + digest[4] += E; } EXPORT_SYMBOL(sha_transform); @@ -92,4 +197,3 @@ void sha_init(__u32 *buf) buf[3] = 0x10325476; buf[4] = 0xc3d2e1f0; } - -- cgit v1.2.3 From bc0b96b54a21246e377122d54569eef71cec535f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Aug 2011 19:45:10 -0700 Subject: crypto: Move md5_transform to lib/md5.c We are going to use this for TCP/IP sequence number and fragment ID generation. Signed-off-by: David S. Miller --- lib/Makefile | 2 +- lib/md5.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 lib/md5.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 6457af4a7ca..d5d175c8a6c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,7 +10,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o prio_tree.o \ - sha1.o irq_regs.o reciprocal_div.o argv_split.o \ + sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o find_next_bit.o diff --git a/lib/md5.c b/lib/md5.c new file mode 100644 index 00000000000..c777180e1f2 --- /dev/null +++ b/lib/md5.c @@ -0,0 +1,95 @@ +#include +#include +#include + +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +#define MD5STEP(f, w, x, y, z, in, s) \ + (w += f(x, y, z) + in, w = (w<>(32-s)) + x) + +void md5_transform(__u32 *hash, __u32 const *in) +{ + u32 a, b, c, d; + + a = hash[0]; + b = hash[1]; + c = hash[2]; + d = hash[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + hash[0] += a; + hash[1] += b; + hash[2] += c; + hash[3] += d; +} +EXPORT_SYMBOL(md5_transform); -- cgit v1.2.3 From bd823821e68e5de6d680cbbf1c8654c9c36674e1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Aug 2011 15:17:03 +0200 Subject: bitops: Move find_next_bit.o from lib-y to obj-y If there are no builtin users of find_next_bit_le() and find_next_zero_bit_le(), these functions are not present in the kernel image, causing m68k allmodconfig to fail with: ERROR: "find_next_zero_bit_le" [fs/ufs/ufs.ko] undefined! ERROR: "find_next_bit_le" [fs/udf/udf.ko] undefined! ... This started to happen after commit 171d809df189 ("m68k: merge mmu and non-mmu bitops.h"), as m68k had its own inline versions before. commit 63e424c84429 ("arch: remove CONFIG_GENERIC_FIND_{NEXT_BIT, BIT_LE, LAST_BIT}") added find_last_bit.o to obj-y (so it's always included), but find_next_bit.o to lib-y (so it gets removed by the linker if there are no builtin users). Signed-off-by: Geert Uytterhoeven Cc: Akinobu Mita Cc: Greg Ungerer Cc: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index d5d175c8a6c..3f5bc6d903e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o find_next_bit.o + is_single_threaded.o plist.o decompress.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -22,7 +22,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ - bsearch.o find_last_bit.o + bsearch.o find_last_bit.o find_next_bit.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o -- cgit v1.2.3 From 003f6c9df54970d8b19578d195b3e2b398cdbde2 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 9 Sep 2011 11:30:27 -0700 Subject: lib/sha1.c: quiet sparse noise about symbol not declared Include to pickup the declarations for sha_transform and sha_init to quite the sparse noise: warning: symbol 'sha_transform' was not declared. Should it be static? warning: symbol 'sha_init' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Acked-by: Mandeep Singh Baines Signed-off-by: Linus Torvalds --- lib/sha1.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/sha1.c b/lib/sha1.c index f33271dd00c..1de509a159c 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -8,6 +8,7 @@ #include #include #include +#include #include /* -- cgit v1.2.3