aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-07-24 13:13:01 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2017-07-24 13:13:01 +1000
commitb21a87c5b7539d9647f26085df3d3380f599643a (patch)
treec641ffd6e96d333e48ca21bb7413387b31fe3aa6
parent991f8a280cd2f18a9e7357affb8cbeb0384bd166 (diff)
parent9c01516278ef872190fdda95aafaa8effdf6649a (diff)
Merge remote-tracking branch 'percpu/for-next'
-rw-r--r--mm/percpu-internal.h1
-rw-r--r--mm/percpu-stats.c11
-rw-r--r--mm/percpu.c60
3 files changed, 40 insertions, 32 deletions
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index cd2442e13d8f..c9158a48ca05 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -36,6 +36,7 @@ extern spinlock_t pcpu_lock;
extern struct list_head *pcpu_slot;
extern int pcpu_nr_slots;
+extern int pcpu_nr_empty_pop_pages;
extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 03524a56eeff..44e561d3ab47 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -18,7 +18,7 @@
#include "percpu-internal.h"
#define P(X, Y) \
- seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y)
+ seq_printf(m, " %-20s: %12lld\n", X, (long long int)Y)
struct percpu_stats pcpu_stats;
struct pcpu_alloc_info pcpu_stats_ai;
@@ -49,7 +49,7 @@ static int find_max_map_used(void)
* the beginning of the chunk to the last allocation.
*/
static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
- void *buffer)
+ int *buffer)
{
int i, s_index, last_alloc, alloc_sign, as_len;
int *alloc_sizes, *p;
@@ -113,7 +113,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
{
struct pcpu_chunk *chunk;
int slot, max_map_used;
- void *buffer;
+ int *buffer;
alloc_buffer:
spin_lock_irq(&pcpu_lock);
@@ -134,7 +134,7 @@ alloc_buffer:
}
#define PL(X) \
- seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X)
+ seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
seq_printf(m,
"Percpu Memory Statistics\n"
@@ -151,7 +151,7 @@ alloc_buffer:
#undef PL
#define PU(X) \
- seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X)
+ seq_printf(m, " %-20s: %12llu\n", #X, (unsigned long long)pcpu_stats.X)
seq_printf(m,
"Global Stats:\n"
@@ -164,6 +164,7 @@ alloc_buffer:
PU(nr_max_chunks);
PU(min_alloc_size);
PU(max_alloc_size);
+ P("empty_pop_pages", pcpu_nr_empty_pop_pages);
seq_putc(m, '\n');
#undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index bd4130a69bbc..29244fb076ba 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -4,36 +4,35 @@
* Copyright (C) 2009 SUSE Linux Products GmbH
* Copyright (C) 2009 Tejun Heo <tj@kernel.org>
*
- * This file is released under the GPLv2.
+ * This file is released under the GPLv2 license.
*
- * This is percpu allocator which can handle both static and dynamic
- * areas. Percpu areas are allocated in chunks. Each chunk is
- * consisted of boot-time determined number of units and the first
- * chunk is used for static percpu variables in the kernel image
- * (special boot time alloc/init handling necessary as these areas
- * need to be brought up before allocation services are running).
- * Unit grows as necessary and all units grow or shrink in unison.
- * When a chunk is filled up, another chunk is allocated.
+ * The percpu allocator handles both static and dynamic areas. Percpu
+ * areas are allocated in chunks which are divided into units. There is
+ * a 1-to-1 mapping for units to possible cpus. These units are grouped
+ * based on NUMA properties of the machine.
*
* c0 c1 c2
* ------------------- ------------------- ------------
* | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u
* ------------------- ...... ------------------- .... ------------
*
- * Allocation is done in offset-size areas of single unit space. Ie,
- * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
- * c1:u1, c1:u2 and c1:u3. On UMA, units corresponds directly to
- * cpus. On NUMA, the mapping can be non-linear and even sparse.
- * Percpu access can be done by configuring percpu base registers
- * according to cpu to unit mapping and pcpu_unit_size.
- *
- * There are usually many small percpu allocations many of them being
- * as small as 4 bytes. The allocator organizes chunks into lists
- * according to free size and tries to allocate from the fullest one.
- * Each chunk keeps the maximum contiguous area size hint which is
- * guaranteed to be equal to or larger than the maximum contiguous
- * area in the chunk. This helps the allocator not to iterate the
- * chunk maps unnecessarily.
+ * Allocation is done by offsets into a unit's address space. Ie., an
+ * area of 512 bytes at 6k in c1 occupies 512 bytes at 6k in c1:u0,
+ * c1:u1, c1:u2, etc. On NUMA machines, the mapping may be non-linear
+ * and even sparse. Access is handled by configuring percpu base
+ * registers according to the cpu to unit mappings and offsetting the
+ * base address using pcpu_unit_size.
+ *
+ * There is special consideration for the first chunk which must handle
+ * the static percpu variables in the kernel image as allocation services
+ * are not online yet. In short, the first chunk is structure like so:
+ *
+ * <Static | [Reserved] | Dynamic>
+ *
+ * The static data is copied from the original section managed by the
+ * linker. The reserved section, if non-zero, primarily manages static
+ * percpu variables from kernel modules. Finally, the dynamic section
+ * takes care of normal allocations.
*
* Allocation state in each chunk is kept using an array of integers
* on chunk->map. A positive value in the map represents a free
@@ -43,6 +42,12 @@
* Chunks can be determined from the address using the index field
* in the page struct. The index field contains a pointer to the chunk.
*
+ * These chunks are organized into lists according to free_size and
+ * tries to allocate from the fullest chunk first. Each chunk maintains
+ * a maximum contiguous area size hint which is guaranteed to be equal
+ * to or larger than the maximum contiguous area in the chunk. This
+ * helps prevent the allocator from iterating over chunks unnecessarily.
+ *
* To use this allocator, arch code should do the following:
*
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
@@ -160,7 +165,7 @@ static LIST_HEAD(pcpu_map_extend_chunks);
* The number of empty populated pages, protected by pcpu_lock. The
* reserved chunk doesn't contribute to the count.
*/
-static int pcpu_nr_empty_pop_pages;
+int pcpu_nr_empty_pop_pages;
/*
* Balance work is used to populate or destroy chunks asynchronously. We
@@ -1842,6 +1847,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
*/
min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+ /* determine the maximum # of units that can fit in an allocation */
alloc_size = roundup(min_unit_size, atom_size);
upa = alloc_size / min_unit_size;
while (alloc_size % upa || (offset_in_page(alloc_size / upa)))
@@ -1868,9 +1874,9 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
}
/*
- * Expand unit size until address space usage goes over 75%
- * and then as much as possible without using more address
- * space.
+ * Wasted space is caused by a ratio imbalance of upa to group_cnt.
+ * Expand the unit_size until we use >= 75% of the units allocated.
+ * Related to atom_size, which could be much larger than the unit_size.
*/
last_allocs = INT_MAX;
for (upa = max_upa; upa; upa--) {