diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 227 |
1 files changed, 169 insertions, 58 deletions
diff --git a/mm/slab.c b/mm/slab.c index bd88411595b9..1e9330f43f46 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -116,6 +116,7 @@ #include <linux/kmemcheck.h> #include <linux/memory.h> #include <linux/prefetch.h> +#include <linux/locallock.h> #include <net/sock.h> @@ -633,12 +634,78 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) #endif static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); +static DEFINE_PER_CPU(struct list_head, slab_free_list); +static DEFINE_LOCAL_IRQ_LOCK(slab_lock); + +#ifndef CONFIG_PREEMPT_RT_BASE +# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1) +#else +/* + * execute func() for all CPUs. On PREEMPT_RT we dont actually have + * to run on the remote CPUs - we only have to take their CPU-locks. + * (This is a rare operation, so cacheline bouncing is not an issue.) + */ +static void +slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) +{ + unsigned int i; + + get_cpu_light(); + for_each_online_cpu(i) + func(arg, i); + put_cpu_light(); +} + +static void lock_slab_on(unsigned int cpu) +{ + local_lock_irq_on(slab_lock, cpu); +} + +static void unlock_slab_on(unsigned int cpu) +{ + local_unlock_irq_on(slab_lock, cpu); +} +#endif + +static void free_delayed(struct list_head *h) +{ + while(!list_empty(h)) { + struct page *page = list_first_entry(h, struct page, lru); + + list_del(&page->lru); + __free_pages(page, page->index); + } +} + +static void unlock_l3_and_free_delayed(spinlock_t *list_lock) +{ + LIST_HEAD(tmp); + + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); + local_spin_unlock_irq(slab_lock, list_lock); + free_delayed(&tmp); +} + +static void unlock_slab_and_free_delayed(unsigned long flags) +{ + LIST_HEAD(tmp); + + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); + local_unlock_irqrestore(slab_lock, flags); + free_delayed(&tmp); +} static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { return cachep->array[smp_processor_id()]; } +static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep, + int cpu) +{ + return cachep->array[cpu]; +} + static size_t slab_mgmt_size(size_t nr_objs, size_t align) { return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); @@ -1073,9 +1140,10 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n) if (n->alien) { struct array_cache *ac = n->alien[node]; - if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { + if (ac && ac->avail && + local_spin_trylock_irq(slab_lock, &ac->lock)) { __drain_alien_cache(cachep, ac, node); - spin_unlock_irq(&ac->lock); + local_spin_unlock_irq(slab_lock, &ac->lock); } } } @@ -1090,9 +1158,9 @@ static void drain_alien_cache(struct kmem_cache *cachep, for_each_online_node(i) { ac = alien[i]; if (ac) { - spin_lock_irqsave(&ac->lock, flags); + local_spin_lock_irqsave(slab_lock, &ac->lock, flags); __drain_alien_cache(cachep, ac, i); - spin_unlock_irqrestore(&ac->lock, flags); + local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags); } } } @@ -1171,11 +1239,11 @@ static int init_cache_node_node(int node) cachep->node[node] = n; } - spin_lock_irq(&cachep->node[node]->list_lock); + local_spin_lock_irq(slab_lock, &cachep->node[node]->list_lock); cachep->node[node]->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->node[node]->list_lock); + local_spin_unlock_irq(slab_lock, &cachep->node[node]->list_lock); } return 0; } @@ -1200,7 +1268,7 @@ static void __cpuinit cpuup_canceled(long cpu) if (!n) goto free_array_cache; - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); /* Free limit for this kmem_cache_node */ n->free_limit -= cachep->batchcount; @@ -1208,7 +1276,7 @@ static void __cpuinit cpuup_canceled(long cpu) free_block(cachep, nc->entry, nc->avail, node); if (!cpumask_empty(mask)) { - spin_unlock_irq(&n->list_lock); + unlock_l3_and_free_delayed(&n->list_lock); goto free_array_cache; } @@ -1222,7 +1290,7 @@ static void __cpuinit cpuup_canceled(long cpu) alien = n->alien; n->alien = NULL; - spin_unlock_irq(&n->list_lock); + unlock_l3_and_free_delayed(&n->list_lock); kfree(shared); if (alien) { @@ -1296,7 +1364,7 @@ static int __cpuinit cpuup_prepare(long cpu) n = cachep->node[node]; BUG_ON(!n); - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); if (!n->shared) { /* * We are serialised from CPU_DEAD or @@ -1311,7 +1379,7 @@ static int __cpuinit cpuup_prepare(long cpu) alien = NULL; } #endif - spin_unlock_irq(&n->list_lock); + local_spin_unlock_irq(slab_lock, &n->list_lock); kfree(shared); free_alien_cache(alien); if (cachep->flags & SLAB_DEBUG_OBJECTS) @@ -1512,6 +1580,10 @@ void __init kmem_cache_init(void) if (num_possible_nodes() == 1) use_alien_caches = 0; + local_irq_lock_init(slab_lock); + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(slab_free_list, i)); + for (i = 0; i < NUM_INIT_LISTS; i++) kmem_cache_node_init(&init_kmem_cache_node[i]); @@ -1789,12 +1861,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed) { unsigned long i = (1 << cachep->gfporder); - struct page *page = virt_to_page(addr); + struct page *page, *basepage = virt_to_page(addr); const unsigned long nr_freed = i; + page = basepage; + kmemcheck_free_shadow(page, cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) @@ -1813,7 +1887,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) memcg_release_pages(cachep, cachep->gfporder); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; - free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); + if (!delayed) { + free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); + } else { + basepage->index = cachep->gfporder; + list_add(&basepage->lru, &__get_cpu_var(slab_free_list)); + } } static void kmem_rcu_free(struct rcu_head *head) @@ -1821,7 +1900,7 @@ static void kmem_rcu_free(struct rcu_head *head) struct slab_rcu *slab_rcu = (struct slab_rcu *)head; struct kmem_cache *cachep = slab_rcu->cachep; - kmem_freepages(cachep, slab_rcu->addr); + kmem_freepages(cachep, slab_rcu->addr, false); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slab_rcu); } @@ -2038,7 +2117,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp, + bool delayed) { void *addr = slabp->s_mem - slabp->colouroff; @@ -2051,7 +2131,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) slab_rcu->addr = addr; call_rcu(&slab_rcu->head, kmem_rcu_free); } else { - kmem_freepages(cachep, addr); + kmem_freepages(cachep, addr, delayed); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slabp); } @@ -2408,7 +2488,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) #if DEBUG static void check_irq_off(void) { - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); } static void check_irq_on(void) @@ -2443,26 +2523,43 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, struct array_cache *ac, int force, int node); -static void do_drain(void *arg) +static void __do_drain(void *arg, unsigned int cpu) { struct kmem_cache *cachep = arg; struct array_cache *ac; - int node = numa_mem_id(); + int node = cpu_to_mem(cpu); - check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get_on_cpu(cachep, cpu); spin_lock(&cachep->node[node]->list_lock); free_block(cachep, ac->entry, ac->avail, node); spin_unlock(&cachep->node[node]->list_lock); ac->avail = 0; } +#ifndef CONFIG_PREEMPT_RT_BASE +static void do_drain(void *arg) +{ + __do_drain(arg, smp_processor_id()); +} +#else +static void do_drain(void *arg, int cpu) +{ + LIST_HEAD(tmp); + + lock_slab_on(cpu); + __do_drain(arg, cpu); + list_splice_init(&per_cpu(slab_free_list, cpu), &tmp); + unlock_slab_on(cpu); + free_delayed(&tmp); +} +#endif + static void drain_cpu_caches(struct kmem_cache *cachep) { struct kmem_cache_node *n; int node; - on_each_cpu(do_drain, cachep, 1); + slab_on_each_cpu(do_drain, cachep); check_irq_on(); for_each_online_node(node) { n = cachep->node[node]; @@ -2493,10 +2590,10 @@ static int drain_freelist(struct kmem_cache *cache, nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); p = n->slabs_free.prev; if (p == &n->slabs_free) { - spin_unlock_irq(&n->list_lock); + local_spin_unlock_irq(slab_lock, &n->list_lock); goto out; } @@ -2510,8 +2607,8 @@ static int drain_freelist(struct kmem_cache *cache, * to the cache. */ n->free_objects -= cache->num; - spin_unlock_irq(&n->list_lock); - slab_destroy(cache, slabp); + local_spin_unlock_irq(slab_lock, &n->list_lock); + slab_destroy(cache, slabp, false); nr_freed++; } out: @@ -2785,7 +2882,7 @@ static int cache_grow(struct kmem_cache *cachep, offset *= cachep->colour_off; if (local_flags & __GFP_WAIT) - local_irq_enable(); + local_unlock_irq(slab_lock); /* * The test for missing atomic flag is performed here, rather than @@ -2815,7 +2912,7 @@ static int cache_grow(struct kmem_cache *cachep, cache_init_objs(cachep, slabp); if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); check_irq_off(); spin_lock(&n->list_lock); @@ -2826,10 +2923,10 @@ static int cache_grow(struct kmem_cache *cachep, spin_unlock(&n->list_lock); return 1; opps1: - kmem_freepages(cachep, objp); + kmem_freepages(cachep, objp, false); failed: if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); return 0; } @@ -3243,11 +3340,11 @@ retry: * set and go into memory reserves if necessary. */ if (local_flags & __GFP_WAIT) - local_irq_enable(); + local_unlock_irq(slab_lock); kmem_flagcheck(cache, flags); obj = kmem_getpages(cache, local_flags, numa_mem_id()); if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); if (obj) { /* * Insert into the appropriate per node queues @@ -3368,7 +3465,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, cachep = memcg_kmem_get_cache(cachep, flags); cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + local_lock_irqsave(slab_lock, save_flags); if (nodeid == NUMA_NO_NODE) nodeid = slab_node; @@ -3393,7 +3490,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, /* ___cache_alloc_node can fall back to other nodes */ ptr = ____cache_alloc_node(cachep, flags, nodeid); out: - local_irq_restore(save_flags); + local_unlock_irqrestore(slab_lock, save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, flags); @@ -3455,9 +3552,9 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) cachep = memcg_kmem_get_cache(cachep, flags); cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + local_lock_irqsave(slab_lock, save_flags); objp = __do_cache_alloc(cachep, flags); - local_irq_restore(save_flags); + local_unlock_irqrestore(slab_lock, save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags, flags); @@ -3508,7 +3605,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, slabp, true); } else { list_add(&slabp->list, &n->slabs_free); } @@ -3771,12 +3868,12 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) if (!cachep) return; - local_irq_save(flags); debug_check_no_locks_freed(objp, cachep->object_size); if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(objp, cachep->object_size); + local_lock_irqsave(slab_lock, flags); __cache_free(cachep, objp, _RET_IP_); - local_irq_restore(flags); + unlock_slab_and_free_delayed(flags); trace_kmem_cache_free(_RET_IP_, objp); } @@ -3800,14 +3897,14 @@ void kfree(const void *objp) if (unlikely(ZERO_OR_NULL_PTR(objp))) return; - local_irq_save(flags); kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, c->object_size); debug_check_no_obj_freed(objp, c->object_size); + local_lock_irqsave(slab_lock, flags); __cache_free(c, (void *)objp, _RET_IP_); - local_irq_restore(flags); + unlock_slab_and_free_delayed(flags); } EXPORT_SYMBOL(kfree); @@ -3844,7 +3941,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) if (n) { struct array_cache *shared = n->shared; - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); if (shared) free_block(cachep, shared->entry, @@ -3857,7 +3954,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&n->list_lock); + unlock_l3_and_free_delayed(&n->list_lock); + kfree(shared); free_alien_cache(new_alien); continue; @@ -3904,18 +4002,29 @@ struct ccupdate_struct { struct array_cache *new[0]; }; -static void do_ccupdate_local(void *info) +static void __do_ccupdate_local(void *info, int cpu) { struct ccupdate_struct *new = info; struct array_cache *old; - check_irq_off(); - old = cpu_cache_get(new->cachep); + old = cpu_cache_get_on_cpu(new->cachep, cpu); - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; + new->cachep->array[cpu] = new->new[cpu]; + new->new[cpu] = old; } +#ifndef CONFIG_PREEMPT_RT_BASE +static void do_ccupdate_local(void *info) +{ + __do_ccupdate_local(info, smp_processor_id()); +} +#else +static void do_ccupdate_local(void *info, int cpu) +{ + __do_ccupdate_local(info, cpu); +} +#endif + /* Always called with the slab_mutex held */ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared, gfp_t gfp) @@ -3940,7 +4049,7 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, } new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1); + slab_on_each_cpu(do_ccupdate_local, (void *)new); check_irq_on(); cachep->batchcount = batchcount; @@ -3951,9 +4060,11 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, struct array_cache *ccold = new->new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); + local_spin_lock_irq(slab_lock, + &cachep->node[cpu_to_mem(i)]->list_lock); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); + + unlock_l3_and_free_delayed(&cachep->node[cpu_to_mem(i)]->list_lock); kfree(ccold); } kfree(new); @@ -4068,7 +4179,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, if (ac->touched && !force) { ac->touched = 0; } else { - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); if (ac->avail) { tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) @@ -4078,7 +4189,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } - spin_unlock_irq(&n->list_lock); + local_spin_unlock_irq(slab_lock, &n->list_lock); } } @@ -4171,7 +4282,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) continue; check_irq_on(); - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); list_for_each_entry(slabp, &n->slabs_full, list) { if (slabp->inuse != cachep->num && !error) @@ -4196,7 +4307,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) if (n->shared) shared_avail += n->shared->avail; - spin_unlock_irq(&n->list_lock); + local_spin_unlock_irq(slab_lock, &n->list_lock); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -4396,13 +4507,13 @@ static int leaks_show(struct seq_file *m, void *p) continue; check_irq_on(); - spin_lock_irq(&n->list_lock); + local_spin_lock_irq(slab_lock, &n->list_lock); list_for_each_entry(slabp, &n->slabs_full, list) handle_slab(x, cachep, slabp); list_for_each_entry(slabp, &n->slabs_partial, list) handle_slab(x, cachep, slabp); - spin_unlock_irq(&n->list_lock); + local_spin_unlock_irq(slab_lock, &n->list_lock); } name = cachep->name; if (x[0] == x[1]) { |