diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-07-24 13:13:01 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-07-24 13:13:01 +1000 |
commit | b21a87c5b7539d9647f26085df3d3380f599643a (patch) | |
tree | c641ffd6e96d333e48ca21bb7413387b31fe3aa6 | |
parent | 991f8a280cd2f18a9e7357affb8cbeb0384bd166 (diff) | |
parent | 9c01516278ef872190fdda95aafaa8effdf6649a (diff) |
Merge remote-tracking branch 'percpu/for-next'
-rw-r--r-- | mm/percpu-internal.h | 1 | ||||
-rw-r--r-- | mm/percpu-stats.c | 11 | ||||
-rw-r--r-- | mm/percpu.c | 60 |
3 files changed, 40 insertions, 32 deletions
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index cd2442e13d8f..c9158a48ca05 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -36,6 +36,7 @@ extern spinlock_t pcpu_lock; extern struct list_head *pcpu_slot; extern int pcpu_nr_slots; +extern int pcpu_nr_empty_pop_pages; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 03524a56eeff..44e561d3ab47 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -18,7 +18,7 @@ #include "percpu-internal.h" #define P(X, Y) \ - seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y) + seq_printf(m, " %-20s: %12lld\n", X, (long long int)Y) struct percpu_stats pcpu_stats; struct pcpu_alloc_info pcpu_stats_ai; @@ -49,7 +49,7 @@ static int find_max_map_used(void) * the beginning of the chunk to the last allocation. */ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, - void *buffer) + int *buffer) { int i, s_index, last_alloc, alloc_sign, as_len; int *alloc_sizes, *p; @@ -113,7 +113,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) { struct pcpu_chunk *chunk; int slot, max_map_used; - void *buffer; + int *buffer; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -134,7 +134,7 @@ alloc_buffer: } #define PL(X) \ - seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X) + seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X) seq_printf(m, "Percpu Memory Statistics\n" @@ -151,7 +151,7 @@ alloc_buffer: #undef PL #define PU(X) \ - seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X) + seq_printf(m, " %-20s: %12llu\n", #X, (unsigned long long)pcpu_stats.X) seq_printf(m, "Global Stats:\n" @@ -164,6 +164,7 @@ alloc_buffer: PU(nr_max_chunks); PU(min_alloc_size); PU(max_alloc_size); + P("empty_pop_pages", pcpu_nr_empty_pop_pages); seq_putc(m, '\n'); #undef PU diff --git a/mm/percpu.c b/mm/percpu.c index bd4130a69bbc..29244fb076ba 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -4,36 +4,35 @@ * Copyright (C) 2009 SUSE Linux Products GmbH * Copyright (C) 2009 Tejun Heo <tj@kernel.org> * - * This file is released under the GPLv2. + * This file is released under the GPLv2 license. * - * This is percpu allocator which can handle both static and dynamic - * areas. Percpu areas are allocated in chunks. Each chunk is - * consisted of boot-time determined number of units and the first - * chunk is used for static percpu variables in the kernel image - * (special boot time alloc/init handling necessary as these areas - * need to be brought up before allocation services are running). - * Unit grows as necessary and all units grow or shrink in unison. - * When a chunk is filled up, another chunk is allocated. + * The percpu allocator handles both static and dynamic areas. Percpu + * areas are allocated in chunks which are divided into units. There is + * a 1-to-1 mapping for units to possible cpus. These units are grouped + * based on NUMA properties of the machine. * * c0 c1 c2 * ------------------- ------------------- ------------ * | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u * ------------------- ...... ------------------- .... ------------ * - * Allocation is done in offset-size areas of single unit space. Ie, - * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, - * c1:u1, c1:u2 and c1:u3. On UMA, units corresponds directly to - * cpus. On NUMA, the mapping can be non-linear and even sparse. - * Percpu access can be done by configuring percpu base registers - * according to cpu to unit mapping and pcpu_unit_size. - * - * There are usually many small percpu allocations many of them being - * as small as 4 bytes. The allocator organizes chunks into lists - * according to free size and tries to allocate from the fullest one. - * Each chunk keeps the maximum contiguous area size hint which is - * guaranteed to be equal to or larger than the maximum contiguous - * area in the chunk. This helps the allocator not to iterate the - * chunk maps unnecessarily. + * Allocation is done by offsets into a unit's address space. Ie., an + * area of 512 bytes at 6k in c1 occupies 512 bytes at 6k in c1:u0, + * c1:u1, c1:u2, etc. On NUMA machines, the mapping may be non-linear + * and even sparse. Access is handled by configuring percpu base + * registers according to the cpu to unit mappings and offsetting the + * base address using pcpu_unit_size. + * + * There is special consideration for the first chunk which must handle + * the static percpu variables in the kernel image as allocation services + * are not online yet. In short, the first chunk is structure like so: + * + * <Static | [Reserved] | Dynamic> + * + * The static data is copied from the original section managed by the + * linker. The reserved section, if non-zero, primarily manages static + * percpu variables from kernel modules. Finally, the dynamic section + * takes care of normal allocations. * * Allocation state in each chunk is kept using an array of integers * on chunk->map. A positive value in the map represents a free @@ -43,6 +42,12 @@ * Chunks can be determined from the address using the index field * in the page struct. The index field contains a pointer to the chunk. * + * These chunks are organized into lists according to free_size and + * tries to allocate from the fullest chunk first. Each chunk maintains + * a maximum contiguous area size hint which is guaranteed to be equal + * to or larger than the maximum contiguous area in the chunk. This + * helps prevent the allocator from iterating over chunks unnecessarily. + * * To use this allocator, arch code should do the following: * * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate @@ -160,7 +165,7 @@ static LIST_HEAD(pcpu_map_extend_chunks); * The number of empty populated pages, protected by pcpu_lock. The * reserved chunk doesn't contribute to the count. */ -static int pcpu_nr_empty_pop_pages; +int pcpu_nr_empty_pop_pages; /* * Balance work is used to populate or destroy chunks asynchronously. We @@ -1842,6 +1847,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( */ min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + /* determine the maximum # of units that can fit in an allocation */ alloc_size = roundup(min_unit_size, atom_size); upa = alloc_size / min_unit_size; while (alloc_size % upa || (offset_in_page(alloc_size / upa))) @@ -1868,9 +1874,9 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( } /* - * Expand unit size until address space usage goes over 75% - * and then as much as possible without using more address - * space. + * Wasted space is caused by a ratio imbalance of upa to group_cnt. + * Expand the unit_size until we use >= 75% of the units allocated. + * Related to atom_size, which could be much larger than the unit_size. */ last_allocs = INT_MAX; for (upa = max_upa; upa; upa--) { |