aboutsummaryrefslogtreecommitdiff
path: root/mm/slub.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 10:56:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 10:56:07 -0800
commitae664dba2724e59ddd66291b895f7370e28b9a7a (patch)
treed6e214bdc9999bcb8b0a067053aa6934cfd9d60e /mm/slub.c
parenta2faf2fc534f57ba26bc4d613795236ed4f5fb1c (diff)
parent08afe22c68d8c07e8e31ee6491c37f36199ba14b (diff)
Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull SLAB changes from Pekka Enberg: "This contains preparational work from Christoph Lameter and Glauber Costa for SLAB memcg and cleanups and improvements from Ezequiel Garcia and Joonsoo Kim. Please note that the SLOB cleanup commit from Arnd Bergmann already appears in your tree but I had also merged it myself which is why it shows up in the shortlog." * 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: mm/sl[aou]b: Common alignment code slab: Use the new create_boot_cache function to simplify bootstrap slub: Use statically allocated kmem_cache boot structure for bootstrap mm, sl[au]b: create common functions for boot slab creation slab: Simplify bootstrap slub: Use correct cpu_slab on dead cpu mm: fix slab.c kernel-doc warnings mm/slob: use min_t() to compare ARCH_SLAB_MINALIGN slab: Ignore internal flags in cache creation mm/slob: Use free_page instead of put_page for page-size kmalloc allocations mm/sl[aou]b: Move common kmem_cache_size() to slab.h mm/slob: Use object_size field in kmem_cache_size() mm/slob: Drop usage of page->private for storing page-sized allocations slub: Commonize slab_cache field in struct page sl[au]b: Process slabinfo_show in common code mm/sl[au]b: Move print_slabinfo_header to slab_common.c mm/sl[au]b: Move slabinfo processing to slab_common.c slub: remove one code path and reduce lock contention in __slab_free()
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c301
1 files changed, 70 insertions, 231 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 487f0bdd53c..87f9f32bf0c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -112,9 +112,6 @@
* the fast path and disables lockless freelists.
*/
-#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
- SLAB_TRACE | SLAB_DEBUG_FREE)
-
static inline int kmem_cache_debug(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_DEBUG
@@ -179,8 +176,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
#define __OBJECT_POISON 0x80000000UL /* Poison object */
#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
-static int kmem_size = sizeof(struct kmem_cache);
-
#ifdef CONFIG_SMP
static struct notifier_block slab_notifier;
#endif
@@ -1092,11 +1087,11 @@ static noinline struct kmem_cache_node *free_debug_processing(
if (!check_object(s, page, object, SLUB_RED_ACTIVE))
goto out;
- if (unlikely(s != page->slab)) {
+ if (unlikely(s != page->slab_cache)) {
if (!PageSlab(page)) {
slab_err(s, page, "Attempt to free object(0x%p) "
"outside of slab", object);
- } else if (!page->slab) {
+ } else if (!page->slab_cache) {
printk(KERN_ERR
"SLUB <none>: no slab for object 0x%p.\n",
object);
@@ -1357,7 +1352,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
goto out;
inc_slabs_node(s, page_to_nid(page), page->objects);
- page->slab = s;
+ page->slab_cache = s;
__SetPageSlab(page);
if (page->pfmemalloc)
SetPageSlabPfmemalloc(page);
@@ -1424,7 +1419,7 @@ static void rcu_free_slab(struct rcu_head *h)
else
page = container_of((struct list_head *)h, struct page, lru);
- __free_slab(page->slab, page);
+ __free_slab(page->slab_cache, page);
}
static void free_slab(struct kmem_cache *s, struct page *page)
@@ -1872,12 +1867,14 @@ redo:
/*
* Unfreeze all the cpu partial slabs.
*
- * This function must be called with interrupt disabled.
+ * This function must be called with interrupts disabled
+ * for the cpu using c (or some other guarantee must be there
+ * to guarantee no concurrent accesses).
*/
-static void unfreeze_partials(struct kmem_cache *s)
+static void unfreeze_partials(struct kmem_cache *s,
+ struct kmem_cache_cpu *c)
{
struct kmem_cache_node *n = NULL, *n2 = NULL;
- struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
struct page *page, *discard_page = NULL;
while ((page = c->partial)) {
@@ -1963,7 +1960,7 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
* set to the per node partial list.
*/
local_irq_save(flags);
- unfreeze_partials(s);
+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
local_irq_restore(flags);
oldpage = NULL;
pobjects = 0;
@@ -2006,7 +2003,7 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
if (c->page)
flush_slab(s, c);
- unfreeze_partials(s);
+ unfreeze_partials(s, c);
}
}
@@ -2459,7 +2456,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
void *prior;
void **object = (void *)x;
int was_frozen;
- int inuse;
struct page new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
@@ -2472,13 +2468,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
return;
do {
+ if (unlikely(n)) {
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ n = NULL;
+ }
prior = page->freelist;
counters = page->counters;
set_freepointer(s, object, prior);
new.counters = counters;
was_frozen = new.frozen;
new.inuse--;
- if ((!new.inuse || !prior) && !was_frozen && !n) {
+ if ((!new.inuse || !prior) && !was_frozen) {
if (!kmem_cache_debug(s) && !prior)
@@ -2503,7 +2503,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
}
}
- inuse = new.inuse;
} while (!cmpxchg_double_slab(s, page,
prior, counters,
@@ -2529,25 +2528,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
return;
}
+ if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
+ goto slab_empty;
+
/*
- * was_frozen may have been set after we acquired the list_lock in
- * an earlier loop. So we need to check it here again.
+ * Objects left in the slab. If it was not on the partial list before
+ * then add it.
*/
- if (was_frozen)
- stat(s, FREE_FROZEN);
- else {
- if (unlikely(!inuse && n->nr_partial > s->min_partial))
- goto slab_empty;
-
- /*
- * Objects left in the slab. If it was not on the partial list before
- * then add it.
- */
- if (unlikely(!prior)) {
- remove_full(s, page);
- add_partial(n, page, DEACTIVATE_TO_TAIL);
- stat(s, FREE_ADD_PARTIAL);
- }
+ if (kmem_cache_debug(s) && unlikely(!prior)) {
+ remove_full(s, page);
+ add_partial(n, page, DEACTIVATE_TO_TAIL);
+ stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);
return;
@@ -2623,9 +2614,9 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
page = virt_to_head_page(x);
- if (kmem_cache_debug(s) && page->slab != s) {
+ if (kmem_cache_debug(s) && page->slab_cache != s) {
pr_err("kmem_cache_free: Wrong slab cache. %s but object"
- " is from %s\n", page->slab->name, s->name);
+ " is from %s\n", page->slab_cache->name, s->name);
WARN_ON_ONCE(1);
return;
}
@@ -2769,32 +2760,6 @@ static inline int calculate_order(int size, int reserved)
return -ENOSYS;
}
-/*
- * Figure out what the alignment of the objects will be.
- */
-static unsigned long calculate_alignment(unsigned long flags,
- unsigned long align, unsigned long size)
-{
- /*
- * If the user wants hardware cache aligned objects then follow that
- * suggestion if the object is sufficiently large.
- *
- * The hardware cache alignment cannot override the specified
- * alignment though. If that is greater then use it.
- */
- if (flags & SLAB_HWCACHE_ALIGN) {
- unsigned long ralign = cache_line_size();
- while (size <= ralign / 2)
- ralign /= 2;
- align = max(align, ralign);
- }
-
- if (align < ARCH_SLAB_MINALIGN)
- align = ARCH_SLAB_MINALIGN;
-
- return ALIGN(align, sizeof(void *));
-}
-
static void
init_kmem_cache_node(struct kmem_cache_node *n)
{
@@ -2928,7 +2893,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
unsigned long size = s->object_size;
- unsigned long align = s->align;
int order;
/*
@@ -3000,19 +2964,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
#endif
/*
- * Determine the alignment based on various parameters that the
- * user specified and the dynamic determination of cache line size
- * on bootup.
- */
- align = calculate_alignment(flags, align, s->object_size);
- s->align = align;
-
- /*
* SLUB stores one object immediately after another beginning from
* offset 0. In order to align the objects we have to simply size
* each object to conform to the alignment.
*/
- size = ALIGN(size, align);
+ size = ALIGN(size, s->align);
s->size = size;
if (forced_order >= 0)
order = forced_order;
@@ -3041,7 +2997,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
s->max = s->oo;
return !!oo_objects(s->oo);
-
}
static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
@@ -3127,15 +3082,6 @@ error:
return -EINVAL;
}
-/*
- * Determine the size of a slab object
- */
-unsigned int kmem_cache_size(struct kmem_cache *s)
-{
- return s->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
static void list_slab_objects(struct kmem_cache *s, struct page *page,
const char *text)
{
@@ -3261,32 +3207,6 @@ static int __init setup_slub_nomerge(char *str)
__setup("slub_nomerge", setup_slub_nomerge);
-static struct kmem_cache *__init create_kmalloc_cache(const char *name,
- int size, unsigned int flags)
-{
- struct kmem_cache *s;
-
- s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-
- s->name = name;
- s->size = s->object_size = size;
- s->align = ARCH_KMALLOC_MINALIGN;
-
- /*
- * This function is called with IRQs disabled during early-boot on
- * single CPU so there's no need to take slab_mutex here.
- */
- if (kmem_cache_open(s, flags))
- goto panic;
-
- list_add(&s->list, &slab_caches);
- return s;
-
-panic:
- panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
- return NULL;
-}
-
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -3424,7 +3344,7 @@ size_t ksize(const void *object)
return PAGE_SIZE << compound_order(page);
}
- return slab_ksize(page->slab);
+ return slab_ksize(page->slab_cache);
}
EXPORT_SYMBOL(ksize);
@@ -3449,8 +3369,8 @@ bool verify_mem_not_deleted(const void *x)
}
slab_lock(page);
- if (on_freelist(page->slab, page, object)) {
- object_err(page->slab, page, object, "Object is on free-list");
+ if (on_freelist(page->slab_cache, page, object)) {
+ object_err(page->slab_cache, page, object, "Object is on free-list");
rv = false;
} else {
rv = true;
@@ -3481,7 +3401,7 @@ void kfree(const void *x)
__free_pages(page, compound_order(page));
return;
}
- slab_free(page->slab, page, object, _RET_IP_);
+ slab_free(page->slab_cache, page, object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
@@ -3676,15 +3596,16 @@ static int slab_memory_callback(struct notifier_block *self,
/*
* Used for early kmem_cache structures that were allocated using
- * the page allocator
+ * the page allocator. Allocate them properly then fix up the pointers
+ * that may be pointing to the wrong kmem_cache structure.
*/
-static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
+static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
{
int node;
+ struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
- list_add(&s->list, &slab_caches);
- s->refcount = -1;
+ memcpy(s, static_cache, kmem_cache->object_size);
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node);
@@ -3692,78 +3613,52 @@ static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
if (n) {
list_for_each_entry(p, &n->partial, lru)
- p->slab = s;
+ p->slab_cache = s;
#ifdef CONFIG_SLUB_DEBUG
list_for_each_entry(p, &n->full, lru)
- p->slab = s;
+ p->slab_cache = s;
#endif
}
}
+ list_add(&s->list, &slab_caches);
+ return s;
}
void __init kmem_cache_init(void)
{
+ static __initdata struct kmem_cache boot_kmem_cache,
+ boot_kmem_cache_node;
int i;
- int caches = 0;
- struct kmem_cache *temp_kmem_cache;
- int order;
- struct kmem_cache *temp_kmem_cache_node;
- unsigned long kmalloc_size;
+ int caches = 2;
if (debug_guardpage_minorder())
slub_max_order = 0;
- kmem_size = offsetof(struct kmem_cache, node) +
- nr_node_ids * sizeof(struct kmem_cache_node *);
-
- /* Allocate two kmem_caches from the page allocator */
- kmalloc_size = ALIGN(kmem_size, cache_line_size());
- order = get_order(2 * kmalloc_size);
- kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order);
-
- /*
- * Must first have the slab cache available for the allocations of the
- * struct kmem_cache_node's. There is special bootstrap code in
- * kmem_cache_open for slab_state == DOWN.
- */
- kmem_cache_node = (void *)kmem_cache + kmalloc_size;
+ kmem_cache_node = &boot_kmem_cache_node;
+ kmem_cache = &boot_kmem_cache;
- kmem_cache_node->name = "kmem_cache_node";
- kmem_cache_node->size = kmem_cache_node->object_size =
- sizeof(struct kmem_cache_node);
- kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+ create_boot_cache(kmem_cache_node, "kmem_cache_node",
+ sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
/* Able to allocate the per node structures */
slab_state = PARTIAL;
- temp_kmem_cache = kmem_cache;
- kmem_cache->name = "kmem_cache";
- kmem_cache->size = kmem_cache->object_size = kmem_size;
- kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+ create_boot_cache(kmem_cache, "kmem_cache",
+ offsetof(struct kmem_cache, node) +
+ nr_node_ids * sizeof(struct kmem_cache_node *),
+ SLAB_HWCACHE_ALIGN);
- kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
- memcpy(kmem_cache, temp_kmem_cache, kmem_size);
+ kmem_cache = bootstrap(&boot_kmem_cache);
/*
* Allocate kmem_cache_node properly from the kmem_cache slab.
* kmem_cache_node is separately allocated so no need to
* update any list pointers.
*/
- temp_kmem_cache_node = kmem_cache_node;
-
- kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
- memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
-
- kmem_cache_bootstrap_fixup(kmem_cache_node);
-
- caches++;
- kmem_cache_bootstrap_fixup(kmem_cache);
- caches++;
- /* Free temporary boot structure */
- free_pages((unsigned long)temp_kmem_cache, order);
+ kmem_cache_node = bootstrap(&boot_kmem_cache_node);
/* Now we can use the kmem_cache to allocate kmalloc slabs */
@@ -3964,6 +3859,10 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
if (err)
return err;
+ /* Mutex is not taken during early boot */
+ if (slab_state <= UP)
+ return 0;
+
mutex_unlock(&slab_mutex);
err = sysfs_slab_add(s);
mutex_lock(&slab_mutex);
@@ -5265,13 +5164,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
const char *name;
- int unmergeable;
+ int unmergeable = slab_unmergeable(s);
- if (slab_state < FULL)
- /* Defer until later */
- return 0;
-
- unmergeable = slab_unmergeable(s);
if (unmergeable) {
/*
* Slabcache can never be merged so we can use the name proper.
@@ -5405,49 +5299,14 @@ __initcall(slab_sysfs_init);
* The /proc/slabinfo ABI
*/
#ifdef CONFIG_SLABINFO
-static void print_slabinfo_header(struct seq_file *m)
-{
- seq_puts(m, "slabinfo - version: 2.1\n");
- seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
- "<objperslab> <pagesperslab>");
- seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
- seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
- seq_putc(m, '\n');
-}
-
-static void *s_start(struct seq_file *m, loff_t *pos)
-{
- loff_t n = *pos;
-
- mutex_lock(&slab_mutex);
- if (!n)
- print_slabinfo_header(m);
-
- return seq_list_start(&slab_caches, *pos);
-}
-
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
- return seq_list_next(p, &slab_caches, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
-{
- mutex_unlock(&slab_mutex);
-}
-
-static int s_show(struct seq_file *m, void *p)
+void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
{
unsigned long nr_partials = 0;
unsigned long nr_slabs = 0;
- unsigned long nr_inuse = 0;
unsigned long nr_objs = 0;
unsigned long nr_free = 0;
- struct kmem_cache *s;
int node;
- s = list_entry(p, struct kmem_cache, list);
-
for_each_online_node(node) {
struct kmem_cache_node *n = get_node(s, node);
@@ -5460,41 +5319,21 @@ static int s_show(struct seq_file *m, void *p)
nr_free += count_partial(n, count_free);
}
- nr_inuse = nr_objs - nr_free;
-
- seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
- nr_objs, s->size, oo_objects(s->oo),
- (1 << oo_order(s->oo)));
- seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
- seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
- 0UL);
- seq_putc(m, '\n');
- return 0;
+ sinfo->active_objs = nr_objs - nr_free;
+ sinfo->num_objs = nr_objs;
+ sinfo->active_slabs = nr_slabs;
+ sinfo->num_slabs = nr_slabs;
+ sinfo->objects_per_slab = oo_objects(s->oo);
+ sinfo->cache_order = oo_order(s->oo);
}
-static const struct seq_operations slabinfo_op = {
- .start = s_start,
- .next = s_next,
- .stop = s_stop,
- .show = s_show,
-};
-
-static int slabinfo_open(struct inode *inode, struct file *file)
+void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
{
- return seq_open(file, &slabinfo_op);
}
-static const struct file_operations proc_slabinfo_operations = {
- .open = slabinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int __init slab_proc_init(void)
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
- proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
- return 0;
+ return -EIO;
}
-module_init(slab_proc_init);
#endif /* CONFIG_SLABINFO */