aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS3
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/x86/kernel/cpu/perf_event.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--drivers/ata/sata_promise.c2
-rw-r--r--drivers/block/cciss.c1
-rw-r--r--drivers/block/cpqarray.c1
-rw-r--r--drivers/gpu/drm/i2c/tda998x_drv.c3
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/bset.c39
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/journal.c33
-rw-r--r--drivers/md/bcache/request.c15
-rw-r--r--drivers/md/bcache/sysfs.c9
-rw-r--r--drivers/md/bcache/util.c11
-rw-r--r--drivers/md/bcache/util.h12
-rw-r--r--drivers/md/bcache/writeback.c42
-rw-r--r--fs/bio.c4
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--include/linux/memcontrol.h55
-rw-r--r--include/linux/smp.h6
-rw-r--r--include/uapi/linux/perf_event.h15
-rw-r--r--ipc/msg.c19
-rw-r--r--ipc/sem.c34
-rw-r--r--ipc/shm.c17
-rw-r--r--ipc/util.c32
-rw-r--r--ipc/util.h10
-rw-r--r--kernel/audit.c5
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/reboot.c9
-rw-r--r--kernel/sched/fair.c9
-rw-r--r--kernel/watchdog.c60
-rw-r--r--mm/memcontrol.c560
-rw-r--r--mm/mlock.c1
-rw-r--r--mm/vmscan.c83
-rwxr-xr-xscripts/checkpatch.pl4
-rw-r--r--tools/lib/lk/debugfs.c1
-rw-r--r--tools/perf/arch/x86/util/tsc.c6
-rw-r--r--tools/perf/builtin-inject.c2
-rw-r--r--tools/perf/builtin-report.c5
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/builtin-trace.c17
-rw-r--r--tools/perf/config/Makefile3
-rw-r--r--tools/perf/config/feature-tests.mak10
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/dwarf-aux.c19
-rw-r--r--tools/perf/util/dwarf-aux.h3
-rw-r--r--tools/perf/util/header.c41
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/probe-finder.c12
-rw-r--r--tools/perf/util/session.c9
-rw-r--r--tools/perf/util/session.h4
-rw-r--r--tools/perf/util/symbol-elf.c16
-rw-r--r--tools/perf/util/trace-event-parse.c2
55 files changed, 875 insertions, 435 deletions
diff --git a/CREDITS b/CREDITS
index 9416a9a8b95..0640e165048 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2808,8 +2808,7 @@ S: Ottawa, Ontario
S: Canada K2P 0X8
N: Mikael Pettersson
-E: mikpe@it.uu.se
-W: http://user.it.uu.se/~mikpe/linux/
+E: mikpelinux@gmail.com
D: Miscellaneous fixes
N: Reed H. Petty
diff --git a/MAINTAINERS b/MAINTAINERS
index e61c2e83fc2..c53fe955964 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1812,7 +1812,8 @@ S: Supported
F: drivers/net/ethernet/broadcom/bnx2x/
BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE
-M: Christian Daudt <csd@broadcom.com>
+M: Christian Daudt <bcm@fixthebug.org>
+L: bcm-kernel-feedback-list@broadcom.com
T: git git://git.github.com/broadcom/bcm11351
S: Maintained
F: arch/arm/mach-bcm/
@@ -6595,7 +6596,7 @@ S: Obsolete
F: drivers/net/wireless/prism54/
PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER
-M: Mikael Pettersson <mikpe@it.uu.se>
+M: Mikael Pettersson <mikpelinux@gmail.com>
L: linux-ide@vger.kernel.org
S: Maintained
F: drivers/ata/sata_promise.*
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8355c84b972..a9c606bb494 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1883,9 +1883,9 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
- userpg->cap_usr_time = 0;
- userpg->cap_usr_time_zero = 0;
- userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return;
- userpg->cap_usr_time = 1;
+ userpg->cap_user_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
if (sched_clock_stable && !check_tsc_disabled()) {
- userpg->cap_usr_time_zero = 1;
+ userpg->cap_user_time_zero = 1;
userpg->time_zero = this_cpu_read(cyc2ns_offset);
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9db76c31b3c..f31a1655d1f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void)
break;
case 55: /* Atom 22nm "Silvermont" */
+ case 77: /* Avoton "Silvermont" */
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 8ed44589b0e..4118f9f6831 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
{
struct intel_uncore_box *box;
int i, size;
size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
- box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+ box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
@@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
- fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+ fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
- box = uncore_alloc_box(type, 0);
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
if (!box)
return -ENOMEM;
@@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
if (pmu->func_id < 0)
pmu->func_id = j;
- box = uncore_alloc_box(type, cpu);
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 958ba2a420c..97f4acb54ad 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -2,7 +2,7 @@
* sata_promise.c - Promise SATA
*
* Maintained by: Tejun Heo <tj@kernel.org>
- * Mikael Pettersson <mikpe@it.uu.se>
+ * Mikael Pettersson
* Please ALWAYS copy linux-ide@vger.kernel.org
* on emails.
*
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2d95ff5353..edfa2515bc8 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
int err;
u32 cp;
+ memset(&arg64, 0, sizeof(arg64));
err = 0;
err |=
copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 639d26b90b9..2b944038453 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -1193,6 +1193,7 @@ out_passthru:
ida_pci_info_struct pciinfo;
if (!arg) return -EINVAL;
+ memset(&pciinfo, 0, sizeof(pciinfo));
pciinfo.bus = host->pci_dev->bus->number;
pciinfo.dev_fn = host->pci_dev->devfn;
pciinfo.board_id = host->board_id;
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index b1f8fc69023..60e84043aa3 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -707,8 +707,7 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode)
reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2);
break;
case DRM_MODE_DPMS_OFF:
- /* disable audio and video ports */
- reg_write(encoder, REG_ENA_AP, 0x00);
+ /* disable video ports */
reg_write(encoder, REG_ENA_VP_0, 0x00);
reg_write(encoder, REG_ENA_VP_1, 0x00);
reg_write(encoder, REG_ENA_VP_2, 0x00);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b39f6f0b45f..0f12382aa35 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
*/
atomic_t has_dirty;
- struct ratelimit writeback_rate;
+ struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
/*
@@ -507,10 +507,9 @@ struct cached_dev {
*/
sector_t last_read;
- /* Number of writeback bios in flight */
- atomic_t in_flight;
+ /* Limit number of writeback bios in flight */
+ struct semaphore in_flight;
struct closure_with_timer writeback;
- struct closure_waitlist writeback_wait;
struct keybuf writeback_keys;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8010eed06a5..22d1ae72c28 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
/* Mergesort */
+static void sort_key_next(struct btree_iter *iter,
+ struct btree_iter_set *i)
+{
+ i->k = bkey_next(i->k);
+
+ if (i->k == i->end)
+ *i = iter->data[--iter->used];
+}
+
static void btree_sort_fixup(struct btree_iter *iter)
{
while (iter->used > 1) {
struct btree_iter_set *top = iter->data, *i = top + 1;
- struct bkey *k;
if (iter->used > 2 &&
btree_iter_cmp(i[0], i[1]))
i++;
- for (k = i->k;
- k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
- k = bkey_next(k))
- if (top->k > i->k)
- __bch_cut_front(top->k, k);
- else if (KEY_SIZE(k))
- bch_cut_back(&START_KEY(k), top->k);
-
- if (top->k < i->k || k == i->k)
+ if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
break;
- heap_sift(iter, i - top, btree_iter_cmp);
+ if (!KEY_SIZE(i->k)) {
+ sort_key_next(iter, i);
+ heap_sift(iter, i - top, btree_iter_cmp);
+ continue;
+ }
+
+ if (top->k > i->k) {
+ if (bkey_cmp(top->k, i->k) >= 0)
+ sort_key_next(iter, i);
+ else
+ bch_cut_front(top->k, i->k);
+
+ heap_sift(iter, i - top, btree_iter_cmp);
+ } else {
+ /* can't happen because of comparison func */
+ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
+ bch_cut_back(&START_KEY(i->k), top->k);
+ }
}
}
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index f9764e61978..f42fc7ed9cd 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
return;
err:
- bch_cache_set_error(b->c, "io error reading bucket %lu",
+ bch_cache_set_error(b->c, "io error reading bucket %zu",
PTR_BUCKET_NR(b->c, &b->key, 0));
}
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
return SHRINK_STOP;
/* Return -1 if we can't do anything right now */
- if (sc->gfp_mask & __GFP_WAIT)
+ if (sc->gfp_mask & __GFP_IO)
mutex_lock(&c->bucket_lock);
else if (!mutex_trylock(&c->bucket_lock))
return -1;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ba95ab84b2b..8435f81e5d8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
pr_debug("%u journal buckets", ca->sb.njournal_buckets);
- /* Read journal buckets ordered by golden ratio hash to quickly
+ /*
+ * Read journal buckets ordered by golden ratio hash to quickly
* find a sequence of buckets with valid journal entries
*/
for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
goto bsearch;
}
- /* If that fails, check all the buckets we haven't checked
+ /*
+ * If that fails, check all the buckets we haven't checked
* already
*/
pr_debug("falling back to linear search");
- for (l = 0; l < ca->sb.njournal_buckets; l++) {
- if (test_bit(l, bitmap))
- continue;
-
+ for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
+ l < ca->sb.njournal_buckets;
+ l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
if (read_bucket(l))
goto bsearch;
- }
+
+ if (list_empty(list))
+ continue;
bsearch:
/* Binary search */
m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ bsearch:
r = m;
}
- /* Read buckets in reverse order until we stop finding more
+ /*
+ * Read buckets in reverse order until we stop finding more
* journal entries
*/
- pr_debug("finishing up");
+ pr_debug("finishing up: m %u njournal_buckets %u",
+ m, ca->sb.njournal_buckets);
l = m;
while (1) {
@@ -228,9 +233,10 @@ bsearch:
}
}
- c->journal.seq = list_entry(list->prev,
- struct journal_replay,
- list)->j.seq;
+ if (!list_empty(list))
+ c->journal.seq = list_entry(list->prev,
+ struct journal_replay,
+ list)->j.seq;
return 0;
#undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
return;
}
- switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) {
+ switch (atomic_read(&ja->discard_in_flight)) {
case DISCARD_IN_FLIGHT:
return;
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
if (cl)
BUG_ON(!closure_wait(&w->wait, cl));
+ closure_flush(&c->journal.io);
__journal_try_write(c, true);
}
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 786a1a4f74d..71eb233b9ac 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s)
} else {
bch_writeback_add(dc);
- if (s->op.flush_journal) {
+ if (bio->bi_rw & REQ_FLUSH) {
/* Also need to send a flush to the backing device */
- s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
- dc->disk.bio_split);
+ struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
+ dc->disk.bio_split);
- bio->bi_size = 0;
- bio->bi_vcnt = 0;
- closure_bio_submit(bio, cl, s->d);
+ flush->bi_rw = WRITE_FLUSH;
+ flush->bi_bdev = bio->bi_bdev;
+ flush->bi_end_io = request_endio;
+ flush->bi_private = cl;
+
+ closure_bio_submit(flush, cl, s->d);
} else {
s->op.cache_bio = bio;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4fe6ab2fbe2..924dcfdae11 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -223,8 +223,13 @@ STORE(__cached_dev)
}
if (attr == &sysfs_label) {
- /* note: endlines are preserved */
- memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
+ if (size > SB_LABEL_SIZE)
+ return -EINVAL;
+ memcpy(dc->sb.label, buf, size);
+ if (size < SB_LABEL_SIZE)
+ dc->sb.label[size] = '\0';
+ if (size && dc->sb.label[size - 1] == '\n')
+ dc->sb.label[size - 1] = '\0';
bch_write_bdev_super(dc, NULL);
if (dc->disk.c) {
memcpy(dc->disk.c->uuids[dc->disk.id].label,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 98eb81159a2..420dad545c7 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
stats->last = now ?: 1;
}
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
+/**
+ * bch_next_delay() - increment @d by the amount of work done, and return how
+ * long to delay until the next time to do some work.
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
uint64_t now = local_clock();
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1ae2a73ad85..ea345c6896f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
(ewma) >> factor; \
})
-struct ratelimit {
+struct bch_ratelimit {
+ /* Next time we want to do some work, in nanoseconds */
uint64_t next;
+
+ /*
+ * Rate at which we want to do work, in units per nanosecond
+ * The units here correspond to the units passed to bch_next_delay()
+ */
unsigned rate;
};
-static inline void ratelimit_reset(struct ratelimit *d)
+static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
{
d->next = local_clock();
}
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
#define __DIV_SAFE(n, d, zero) \
({ \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff55162..ba3ee48320f 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
{
+ uint64_t ret;
+
if (atomic_read(&dc->disk.detaching) ||
!dc->writeback_percent)
return 0;
- return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
+ ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
+
+ return min_t(uint64_t, ret, HZ);
}
/* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
up_write(&dc->writeback_lock);
- ratelimit_reset(&dc->writeback_rate);
+ bch_ratelimit_reset(&dc->writeback_rate);
/* Punt to workqueue only so we don't recurse and blow the stack */
continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
}
bch_keybuf_del(&dc->writeback_keys, w);
- atomic_dec_bug(&dc->in_flight);
-
- closure_wake_up(&dc->writeback_wait);
+ up(&dc->in_flight);
closure_return_with_destructor(cl, dirty_io_destructor);
}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
closure_bio_submit(&io->bio, cl, &io->dc->disk);
- continue_at(cl, write_dirty_finish, dirty_wq);
+ continue_at(cl, write_dirty_finish, system_wq);
}
static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
closure_bio_submit(&io->bio, cl, &io->dc->disk);
- continue_at(cl, write_dirty, dirty_wq);
+ continue_at(cl, write_dirty, system_wq);
}
static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
if (delay > 0 &&
(KEY_START(&w->key) != dc->last_read ||
- jiffies_to_msecs(delay) > 50)) {
- w->private = NULL;
-
- closure_delay(&dc->writeback, delay);
- continue_at(cl, read_dirty, dirty_wq);
- }
+ jiffies_to_msecs(delay) > 50))
+ delay = schedule_timeout_uninterruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
trace_bcache_writeback(&w->key);
- closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
+ down(&dc->in_flight);
+ closure_call(&io->cl, read_dirty_submit, NULL, cl);
delay = writeback_delay(dc, KEY_SIZE(&w->key));
-
- atomic_inc(&dc->in_flight);
-
- if (!closure_wait_event(&dc->writeback_wait, cl,
- atomic_read(&dc->in_flight) < 64))
- continue_at(cl, read_dirty, dirty_wq);
}
if (0) {
@@ -442,7 +435,11 @@ err:
bch_keybuf_del(&dc->writeback_keys, w);
}
- refill_dirty(cl);
+ /*
+ * Wait for outstanding writeback IOs to finish (and keybuf slots to be
+ * freed) before refilling again
+ */
+ continue_at(cl, refill_dirty, dirty_wq);
}
/* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
+ sema_init(&dc->in_flight, 64);
closure_init_unlocked(&dc->writeback);
init_rwsem(&dc->writeback_lock);
@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
int __init bch_writeback_init(void)
{
- dirty_wq = create_singlethread_workqueue("bcache_writeback");
+ dirty_wq = create_workqueue("bcache_writeback");
if (!dirty_wq)
return -ENOMEM;
diff --git a/fs/bio.c b/fs/bio.c
index b3b20ed9510..ea5035da4d9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
src_p = kmap_atomic(src_bv->bv_page);
dst_p = kmap_atomic(dst_bv->bv_page);
- memcpy(dst_p + dst_bv->bv_offset,
- src_p + src_bv->bv_offset,
+ memcpy(dst_p + dst_offset,
+ src_p + src_offset,
bytes);
kunmap_atomic(dst_p);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 121da2dc3be..d4e81e4a9b0 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
{
int tmp, hangup_needed = 0;
struct ocfs2_super *osb = NULL;
- char nodestr[8];
+ char nodestr[12];
trace_ocfs2_dismount_volume(sb);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 60e95872da2..ecc82b37c4c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation;
};
-enum mem_cgroup_filter_t {
- VISIT, /* visit current node */
- SKIP, /* skip the current node and continue traversal */
- SKIP_TREE, /* skip the whole subtree and continue traversal */
-};
-
-/*
- * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
- * iterate through the hierarchy tree. Each tree element is checked by the
- * predicate before it is returned by the iterator. If a filter returns
- * SKIP or SKIP_TREE then the iterator code continues traversal (with the
- * next node down the hierarchy or the next node that doesn't belong under the
- * memcg's subtree).
- */
-typedef enum mem_cgroup_filter_t
-(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
-
#ifdef CONFIG_MEMCG
/*
* All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok);
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim,
- mem_cgroup_iter_filter cond);
-
-static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim)
-{
- return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
-}
-
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+ struct mem_cgroup *,
+ struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
/*
@@ -260,9 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
mem_cgroup_update_page_stat(page, idx, -1);
}
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
- struct mem_cgroup *root);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned);
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -376,15 +350,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok)
{
}
-static inline struct mem_cgroup *
-mem_cgroup_iter_cond(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim,
- mem_cgroup_iter_filter cond)
-{
- /* first call must return non-NULL, second return NULL */
- return (struct mem_cgroup *)(unsigned long)!prev;
-}
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
@@ -471,11 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
}
static inline
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
- struct mem_cgroup *root)
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
{
- return VISIT;
+ return 0;
}
static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index cfb7ca094b3..731f5237d5f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
static inline void kick_all_cpus_sync(void) { }
+static inline void __smp_call_function_single(int cpuid,
+ struct call_single_data *data, int wait)
+{
+ on_each_cpu(data->func, data->info, wait);
+}
+
#endif /* !SMP */
/*
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 40a1fb80739..009a655a5d3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -380,10 +380,13 @@ struct perf_event_mmap_page {
union {
__u64 capabilities;
struct {
- __u64 cap_usr_time : 1,
- cap_usr_rdpmc : 1,
- cap_usr_time_zero : 1,
- cap_____res : 61;
+ __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+ cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
+
+ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
+ cap_user_time : 1, /* The time_* fields are used */
+ cap_user_time_zero : 1, /* The time_zero field is used */
+ cap_____res : 59;
};
};
@@ -442,12 +445,13 @@ struct perf_event_mmap_page {
* ((rem * time_mult) >> time_shift);
*/
__u64 time_zero;
+ __u32 size; /* Header size up to __reserved[] fields. */
/*
* Hole for extension of the self monitor capabilities
*/
- __u64 __reserved[119]; /* align to 1k */
+ __u8 __reserved[118*8+4]; /* align to 1k. */
/*
* Control data for the mmap() data buffer.
@@ -528,6 +532,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
diff --git a/ipc/msg.c b/ipc/msg.c
index b0d541d4267..9e4310c546a 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
ipc_rmid(&msg_ids(ns), &s->q_perm);
}
+static void msg_rcu_free(struct rcu_head *head)
+{
+ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+ struct msg_queue *msq = ipc_rcu_to_struct(p);
+
+ security_msg_queue_free(msq);
+ ipc_rcu_free(head);
+}
+
/**
* newque - Create a new msg queue
* @ns: namespace
@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
if (retval) {
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, ipc_rcu_free);
return retval;
}
/* ipc_addid() locks msq upon success. */
id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (id < 0) {
- security_msg_queue_free(msq);
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, msg_rcu_free);
return id;
}
@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
free_msg(msg);
}
atomic_sub(msq->q_cbytes, &ns->msg_bytes);
- security_msg_queue_free(msq);
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, msg_rcu_free);
}
/*
@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
rcu_read_lock();
ipc_lock_object(&msq->q_perm);
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, ipc_rcu_free);
if (msq->q_perm.deleted) {
err = -EIDRM;
goto out_unlock0;
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21f384..19c8b980d1f 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,6 +243,15 @@ static void merge_queues(struct sem_array *sma)
}
}
+static void sem_rcu_free(struct rcu_head *head)
+{
+ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+ struct sem_array *sma = ipc_rcu_to_struct(p);
+
+ security_sem_free(sma);
+ ipc_rcu_free(head);
+}
+
/*
* If the request contains only one semaphore operation, and there are
* no complex transactions pending, lock only the semaphore involved.
@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
static inline void sem_lock_and_putref(struct sem_array *sma)
{
sem_lock(sma, NULL, -1);
- ipc_rcu_putref(sma);
-}
-
-static inline void sem_putref(struct sem_array *sma)
-{
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
}
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
sma->sem_perm.security = NULL;
retval = security_sem_alloc(sma);
if (retval) {
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return retval;
}
id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if (id < 0) {
- security_sem_free(sma);
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, sem_rcu_free);
return id;
}
ns->used_sems += nsems;
@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
wake_up_sem_queue_do(&tasks);
ns->used_sems -= sma->sem_nsems;
- security_sem_free(sma);
- ipc_rcu_putref(sma);
+ ipc_rcu_putref(sma, sem_rcu_free);
}
static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
rcu_read_unlock();
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if(sem_io == NULL) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return -ENOMEM;
}
@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
if(nsems > SEMMSL_FAST) {
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if(sem_io == NULL) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return -ENOMEM;
}
}
if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
err = -EFAULT;
goto out_free;
}
for (i = 0; i < nsems; i++) {
if (sem_io[i] > SEMVMX) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
err = -ERANGE;
goto out_free;
}
@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
/* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
if (!new) {
- sem_putref(sma);
+ ipc_rcu_putref(sma, ipc_rcu_free);
return ERR_PTR(-ENOMEM);
}
diff --git a/ipc/shm.c b/ipc/shm.c
index 2821cdf93ad..d69739610fd 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
ipc_lock_object(&ipcp->shm_perm);
}
+static void shm_rcu_free(struct rcu_head *head)
+{
+ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+ struct shmid_kernel *shp = ipc_rcu_to_struct(p);
+
+ security_shm_free(shp);
+ ipc_rcu_free(head);
+}
+
static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
{
ipc_rmid(&shm_ids(ns), &s->shm_perm);
@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
user_shm_unlock(file_inode(shp->shm_file)->i_size,
shp->mlock_user);
fput (shp->shm_file);
- security_shm_free(shp);
- ipc_rcu_putref(shp);
+ ipc_rcu_putref(shp, shm_rcu_free);
}
/*
@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_perm.security = NULL;
error = security_shm_alloc(shp);
if (error) {
- ipc_rcu_putref(shp);
+ ipc_rcu_putref(shp, ipc_rcu_free);
return error;
}
@@ -566,8 +574,7 @@ no_id:
user_shm_unlock(size, shp->mlock_user);
fput(file);
no_file:
- security_shm_free(shp);
- ipc_rcu_putref(shp);
+ ipc_rcu_putref(shp, shm_rcu_free);
return error;
}
diff --git a/ipc/util.c b/ipc/util.c
index e829da9ed01..fdb8ae74077 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size)
kfree(ptr);
}
-struct ipc_rcu {
- struct rcu_head rcu;
- atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
/**
* ipc_rcu_alloc - allocate ipc and rcu space
* @size: size desired
@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr)
return atomic_inc_not_zero(&p->refcount);
}
-/**
- * ipc_schedule_free - free ipc + rcu space
- * @head: RCU callback structure for queued work
- */
-static void ipc_schedule_free(struct rcu_head *head)
-{
- vfree(container_of(head, struct ipc_rcu, rcu));
-}
-
-void ipc_rcu_putref(void *ptr)
+void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
{
struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
if (!atomic_dec_and_test(&p->refcount))
return;
- if (is_vmalloc_addr(ptr)) {
- call_rcu(&p->rcu, ipc_schedule_free);
- } else {
- kfree_rcu(p, rcu);
- }
+ call_rcu(&p->rcu, func);
+}
+
+void ipc_rcu_free(struct rcu_head *head)
+{
+ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+
+ if (is_vmalloc_addr(p))
+ vfree(p);
+ else
+ kfree(p);
}
/**
diff --git a/ipc/util.h b/ipc/util.h
index c5f3338ba1f..f2f5036f2ee 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
static inline void shm_exit_ns(struct ipc_namespace *ns) { }
#endif
+struct ipc_rcu {
+ struct rcu_head rcu;
+ atomic_t refcount;
+} ____cacheline_aligned_in_smp;
+
+#define ipc_rcu_to_struct(p) ((void *)(p+1))
+
/*
* Structure that holds the parameters needed by the ipc operations
* (see after)
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
*/
void* ipc_rcu_alloc(int size);
int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr);
+void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
+void ipc_rcu_free(struct rcu_head *head);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
diff --git a/kernel/audit.c b/kernel/audit.c
index 91e53d04b6a..7b0e23a740c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1117,9 +1117,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
sleep_time = timeout_start + audit_backlog_wait_time -
jiffies;
- if ((long)sleep_time > 0)
+ if ((long)sleep_time > 0) {
wait_for_auditd(sleep_time);
- continue;
+ continue;
+ }
}
if (audit_rate_check() && printk_ratelimit())
printk(KERN_WARNING
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dd236b66ca3..cb4238e85b3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event,
*running = ctx_time - event->tstamp_running;
}
+static void perf_event_init_userpage(struct perf_event *event)
+{
+ struct perf_event_mmap_page *userpg;
+ struct ring_buffer *rb;
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (!rb)
+ goto unlock;
+
+ userpg = rb->user_page;
+
+ /* Allow new userspace to detect that bit 0 is deprecated */
+ userpg->cap_bit0_is_deprecated = 1;
+ userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
+
+unlock:
+ rcu_read_unlock();
+}
+
void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
}
@@ -4044,6 +4064,7 @@ again:
ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb);
+ perf_event_init_userpage(event);
perf_event_update_userpage(event);
unlock:
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 269ed9384cc..f813b347464 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -32,7 +32,14 @@ EXPORT_SYMBOL(cad_pid);
#endif
enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
-int reboot_default;
+/*
+ * This variable is used privately to keep track of whether or not
+ * reboot_type is still set to its default value (i.e., reboot= hasn't
+ * been set on the command line). This is needed so that we can
+ * suppress DMI scanning for reboot quirks. Without it, it's
+ * impossible to override a faulty reboot quirk without recompiling.
+ */
+int reboot_default = 1;
int reboot_cpu;
enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11cd1366735..7c70201fbc6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4242,7 +4242,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
}
if (!se) {
- cfs_rq->h_load = rq->avg.load_avg_contrib;
+ cfs_rq->h_load = cfs_rq->runnable_load_avg;
cfs_rq->last_h_load_update = now;
}
@@ -4823,8 +4823,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
(busiest->load_per_task * SCHED_POWER_SCALE) /
busiest->group_power;
- if (busiest->avg_load - local->avg_load + scaled_busy_load_per_task >=
- (scaled_busy_load_per_task * imbn)) {
+ if (busiest->avg_load + scaled_busy_load_per_task >=
+ local->avg_load + (scaled_busy_load_per_task * imbn)) {
env->imbalance = busiest->load_per_task;
return;
}
@@ -4896,7 +4896,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* max load less than avg load(as we skip the groups at or below
* its cpu_power, while calculating max_load..)
*/
- if (busiest->avg_load < sds->avg_load) {
+ if (busiest->avg_load <= sds->avg_load ||
+ local->avg_load >= sds->avg_load) {
env->imbalance = 0;
return fix_small_imbalance(env, sds);
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51c4f34d258..4431610f049 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = {
.unpark = watchdog_enable,
};
-static int watchdog_enable_all_cpus(void)
+static void restart_watchdog_hrtimer(void *info)
+{
+ struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
+ int ret;
+
+ /*
+ * No need to cancel and restart hrtimer if it is currently executing
+ * because it will reprogram itself with the new period now.
+ * We should never see it unqueued here because we are running per-cpu
+ * with interrupts disabled.
+ */
+ ret = hrtimer_try_to_cancel(hrtimer);
+ if (ret == 1)
+ hrtimer_start(hrtimer, ns_to_ktime(sample_period),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static void update_timers(int cpu)
+{
+ struct call_single_data data = {.func = restart_watchdog_hrtimer};
+ /*
+ * Make sure that perf event counter will adopt to a new
+ * sampling period. Updating the sampling period directly would
+ * be much nicer but we do not have an API for that now so
+ * let's use a big hammer.
+ * Hrtimer will adopt the new period on the next tick but this
+ * might be late already so we have to restart the timer as well.
+ */
+ watchdog_nmi_disable(cpu);
+ __smp_call_function_single(cpu, &data, 1);
+ watchdog_nmi_enable(cpu);
+}
+
+static void update_timers_all_cpus(void)
+{
+ int cpu;
+
+ get_online_cpus();
+ preempt_disable();
+ for_each_online_cpu(cpu)
+ update_timers(cpu);
+ preempt_enable();
+ put_online_cpus();
+}
+
+static int watchdog_enable_all_cpus(bool sample_period_changed)
{
int err = 0;
@@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void)
pr_err("Failed to create watchdog threads, disabled\n");
else
watchdog_running = 1;
+ } else if (sample_period_changed) {
+ update_timers_all_cpus();
}
return err;
@@ -520,13 +567,15 @@ int proc_dowatchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int err, old_thresh, old_enabled;
+ static DEFINE_MUTEX(watchdog_proc_mutex);
+ mutex_lock(&watchdog_proc_mutex);
old_thresh = ACCESS_ONCE(watchdog_thresh);
old_enabled = ACCESS_ONCE(watchdog_user_enabled);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err || !write)
- return err;
+ goto out;
set_sample_period();
/*
@@ -535,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
* watchdog_*_all_cpus() function takes care of this.
*/
if (watchdog_user_enabled && watchdog_thresh)
- err = watchdog_enable_all_cpus();
+ err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
else
watchdog_disable_all_cpus();
@@ -544,7 +593,8 @@ int proc_dowatchdog(struct ctl_table *table, int write,
watchdog_thresh = old_thresh;
watchdog_user_enabled = old_enabled;
}
-
+out:
+ mutex_unlock(&watchdog_proc_mutex);
return err;
}
#endif /* CONFIG_SYSCTL */
@@ -554,5 +604,5 @@ void __init lockup_detector_init(void)
set_sample_period();
if (watchdog_user_enabled)
- watchdog_enable_all_cpus();
+ watchdog_enable_all_cpus(false);
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5ff3ce1302..1c52ddbc839 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -39,6 +39,7 @@
#include <linux/limits.h>
#include <linux/export.h>
#include <linux/mutex.h>
+#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
@@ -160,6 +161,10 @@ struct mem_cgroup_per_zone {
struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
+ struct rb_node tree_node; /* RB tree node */
+ unsigned long long usage_in_excess;/* Set to the value by which */
+ /* the soft limit is exceeded*/
+ bool on_tree;
struct mem_cgroup *memcg; /* Back pointer, we cannot */
/* use container_of */
};
@@ -168,6 +173,26 @@ struct mem_cgroup_per_node {
struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
};
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+ struct rb_root rb_root;
+ spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+ struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+ struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
struct mem_cgroup_threshold {
struct eventfd_ctx *eventfd;
u64 threshold;
@@ -303,22 +328,6 @@ struct mem_cgroup {
atomic_t numainfo_events;
atomic_t numainfo_updating;
#endif
- /*
- * Protects soft_contributed transitions.
- * See mem_cgroup_update_soft_limit
- */
- spinlock_t soft_lock;
-
- /*
- * If true then this group has increased parents' children_in_excess
- * when it got over the soft limit.
- * When a group falls bellow the soft limit, parents' children_in_excess
- * is decreased and soft_contributed changed to false.
- */
- bool soft_contributed;
-
- /* Number of children that are in soft limit excess */
- atomic_t children_in_excess;
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
@@ -422,6 +431,7 @@ static bool move_file(void)
* limit reclaim to prevent infinite loops, if they ever occur.
*/
#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
+#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -648,6 +658,164 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
return mem_cgroup_zoneinfo(memcg, nid, zid);
}
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+ return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+ int nid = page_to_nid(page);
+ int zid = page_zonenum(page);
+
+ return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz,
+ unsigned long long new_usage_in_excess)
+{
+ struct rb_node **p = &mctz->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct mem_cgroup_per_zone *mz_node;
+
+ if (mz->on_tree)
+ return;
+
+ mz->usage_in_excess = new_usage_in_excess;
+ if (!mz->usage_in_excess)
+ return;
+ while (*p) {
+ parent = *p;
+ mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+ tree_node);
+ if (mz->usage_in_excess < mz_node->usage_in_excess)
+ p = &(*p)->rb_left;
+ /*
+ * We can't avoid mem cgroups that are over their soft
+ * limit by the same amount
+ */
+ else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&mz->tree_node, parent, p);
+ rb_insert_color(&mz->tree_node, &mctz->rb_root);
+ mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ if (!mz->on_tree)
+ return;
+ rb_erase(&mz->tree_node, &mctz->rb_root);
+ mz->on_tree = false;
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
+ struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
+{
+ spin_lock(&mctz->lock);
+ __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ spin_unlock(&mctz->lock);
+}
+
+
+static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
+{
+ unsigned long long excess;
+ struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_tree_per_zone *mctz;
+ int nid = page_to_nid(page);
+ int zid = page_zonenum(page);
+ mctz = soft_limit_tree_from_page(page);
+
+ /*
+ * Necessary to update all ancestors when hierarchy is used.
+ * because their event counter is not touched.
+ */
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+ mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+ excess = res_counter_soft_limit_excess(&memcg->res);
+ /*
+ * We have to update the tree if mz is on RB-tree or
+ * mem is over its softlimit.
+ */
+ if (excess || mz->on_tree) {
+ spin_lock(&mctz->lock);
+ /* if on-tree, remove it */
+ if (mz->on_tree)
+ __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ /*
+ * Insert again. mz->usage_in_excess will be updated.
+ * If excess is 0, no tree ops.
+ */
+ __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
+ spin_unlock(&mctz->lock);
+ }
+ }
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
+{
+ int node, zone;
+ struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_tree_per_zone *mctz;
+
+ for_each_node(node) {
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ mz = mem_cgroup_zoneinfo(memcg, node, zone);
+ mctz = soft_limit_tree_node_zone(node, zone);
+ mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ }
+ }
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct rb_node *rightmost = NULL;
+ struct mem_cgroup_per_zone *mz;
+
+retry:
+ mz = NULL;
+ rightmost = rb_last(&mctz->rb_root);
+ if (!rightmost)
+ goto done; /* Nothing to reclaim from */
+
+ mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+ /*
+ * Remove the node now but someone else can add it back,
+ * we will to add it back at the end of reclaim to its correct
+ * position in the tree.
+ */
+ __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+ if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
+ !css_tryget(&mz->memcg->css))
+ goto retry;
+done:
+ return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+ struct mem_cgroup_per_zone *mz;
+
+ spin_lock(&mctz->lock);
+ mz = __mem_cgroup_largest_soft_limit_node(mctz);
+ spin_unlock(&mctz->lock);
+ return mz;
+}
+
/*
* Implementation Note: reading percpu statistics for memcg.
*
@@ -822,48 +990,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
}
/*
- * Called from rate-limited memcg_check_events when enough
- * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure
- * that all the parents up the hierarchy will be notified that this group
- * is in excess or that it is not in excess anymore. mmecg->soft_contributed
- * makes the transition a single action whenever the state flips from one to
- * the other.
- */
-static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
-{
- unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
- struct mem_cgroup *parent = memcg;
- int delta = 0;
-
- spin_lock(&memcg->soft_lock);
- if (excess) {
- if (!memcg->soft_contributed) {
- delta = 1;
- memcg->soft_contributed = true;
- }
- } else {
- if (memcg->soft_contributed) {
- delta = -1;
- memcg->soft_contributed = false;
- }
- }
-
- /*
- * Necessary to update all ancestors when hierarchy is used
- * because their event counter is not touched.
- * We track children even outside the hierarchy for the root
- * cgroup because tree walk starting at root should visit
- * all cgroups and we want to prevent from pointless tree
- * walk if no children is below the limit.
- */
- while (delta && (parent = parent_mem_cgroup(parent)))
- atomic_add(delta, &parent->children_in_excess);
- if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
- atomic_add(delta, &root_mem_cgroup->children_in_excess);
- spin_unlock(&memcg->soft_lock);
-}
-
-/*
* Check events in order.
*
*/
@@ -886,7 +1012,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
mem_cgroup_threshold(memcg);
if (unlikely(do_softlimit))
- mem_cgroup_update_soft_limit(memcg);
+ mem_cgroup_update_tree(memcg, page);
#if MAX_NUMNODES > 1
if (unlikely(do_numainfo))
atomic_inc(&memcg->numainfo_events);
@@ -929,15 +1055,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
return memcg;
}
-static enum mem_cgroup_filter_t
-mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
- mem_cgroup_iter_filter cond)
-{
- if (!cond)
- return VISIT;
- return cond(memcg, root);
-}
-
/*
* Returns a next (in a pre-order walk) alive memcg (with elevated css
* ref. count) or NULL if the whole root's subtree has been visited.
@@ -945,7 +1062,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
* helper function to be used by mem_cgroup_iter
*/
static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
- struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
+ struct mem_cgroup *last_visited)
{
struct cgroup_subsys_state *prev_css, *next_css;
@@ -963,31 +1080,11 @@ skip_node:
if (next_css) {
struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
- switch (mem_cgroup_filter(mem, root, cond)) {
- case SKIP:
+ if (css_tryget(&mem->css))
+ return mem;
+ else {
prev_css = next_css;
goto skip_node;
- case SKIP_TREE:
- if (mem == root)
- return NULL;
- /*
- * css_rightmost_descendant is not an optimal way to
- * skip through a subtree (especially for imbalanced
- * trees leaning to right) but that's what we have right
- * now. More effective solution would be traversing
- * right-up for first non-NULL without calling
- * css_next_descendant_pre afterwards.
- */
- prev_css = css_rightmost_descendant(next_css);
- goto skip_node;
- case VISIT:
- if (css_tryget(&mem->css))
- return mem;
- else {
- prev_css = next_css;
- goto skip_node;
- }
- break;
}
}
@@ -1051,7 +1148,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
* @root: hierarchy root
* @prev: previously returned memcg, NULL on first invocation
* @reclaim: cookie for shared reclaim walks, NULL for full walks
- * @cond: filter for visited nodes, NULL for no filter
*
* Returns references to children of the hierarchy below @root, or
* @root itself, or %NULL after a full round-trip.
@@ -1064,18 +1160,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
* divide up the memcgs in the hierarchy among all concurrent
* reclaimers operating on the same zone and priority.
*/
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim,
- mem_cgroup_iter_filter cond)
+ struct mem_cgroup_reclaim_cookie *reclaim)
{
struct mem_cgroup *memcg = NULL;
struct mem_cgroup *last_visited = NULL;
- if (mem_cgroup_disabled()) {
- /* first call must return non-NULL, second return NULL */
- return (struct mem_cgroup *)(unsigned long)!prev;
- }
+ if (mem_cgroup_disabled())
+ return NULL;
if (!root)
root = root_mem_cgroup;
@@ -1086,9 +1179,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
if (!root->use_hierarchy && root != root_mem_cgroup) {
if (prev)
goto out_css_put;
- if (mem_cgroup_filter(root, root, cond) == VISIT)
- return root;
- return NULL;
+ return root;
}
rcu_read_lock();
@@ -1111,7 +1202,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
last_visited = mem_cgroup_iter_load(iter, root, &seq);
}
- memcg = __mem_cgroup_iter_next(root, last_visited, cond);
+ memcg = __mem_cgroup_iter_next(root, last_visited);
if (reclaim) {
mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1122,11 +1213,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
reclaim->generation = iter->generation;
}
- /*
- * We have finished the whole tree walk or no group has been
- * visited because filter told us to skip the root node.
- */
- if (!memcg && (prev || (cond && !last_visited)))
+ if (prev && !memcg)
goto out_unlock;
}
out_unlock:
@@ -1767,7 +1854,6 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
return total;
}
-#if MAX_NUMNODES > 1
/**
* test_mem_cgroup_node_reclaimable
* @memcg: the target memcg
@@ -1790,6 +1876,7 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
return false;
}
+#if MAX_NUMNODES > 1
/*
* Always updating the nodemask is not very good - even if we have an empty
@@ -1857,50 +1944,104 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
return node;
}
+/*
+ * Check all nodes whether it contains reclaimable pages or not.
+ * For quick scan, we make use of scan_nodes. This will allow us to skip
+ * unused nodes. But scan_nodes is lazily updated and may not cotain
+ * enough new information. We need to do double check.
+ */
+static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
+{
+ int nid;
+
+ /*
+ * quick check...making use of scan_node.
+ * We can skip unused nodes.
+ */
+ if (!nodes_empty(memcg->scan_nodes)) {
+ for (nid = first_node(memcg->scan_nodes);
+ nid < MAX_NUMNODES;
+ nid = next_node(nid, memcg->scan_nodes)) {
+
+ if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
+ return true;
+ }
+ }
+ /*
+ * Check rest of nodes.
+ */
+ for_each_node_state(nid, N_MEMORY) {
+ if (node_isset(nid, memcg->scan_nodes))
+ continue;
+ if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
+ return true;
+ }
+ return false;
+}
+
#else
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
{
return 0;
}
-#endif
-
-/*
- * A group is eligible for the soft limit reclaim under the given root
- * hierarchy if
- * a) it is over its soft limit
- * b) any parent up the hierarchy is over its soft limit
- *
- * If the given group doesn't have any children over the limit then it
- * doesn't make any sense to iterate its subtree.
- */
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
- struct mem_cgroup *root)
+static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
{
- struct mem_cgroup *parent;
-
- if (!memcg)
- memcg = root_mem_cgroup;
- parent = memcg;
-
- if (res_counter_soft_limit_excess(&memcg->res))
- return VISIT;
+ return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
+}
+#endif
- /*
- * If any parent up to the root in the hierarchy is over its soft limit
- * then we have to obey and reclaim from this group as well.
- */
- while ((parent = parent_mem_cgroup(parent))) {
- if (res_counter_soft_limit_excess(&parent->res))
- return VISIT;
- if (parent == root)
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+ struct zone *zone,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
+{
+ struct mem_cgroup *victim = NULL;
+ int total = 0;
+ int loop = 0;
+ unsigned long excess;
+ unsigned long nr_scanned;
+ struct mem_cgroup_reclaim_cookie reclaim = {
+ .zone = zone,
+ .priority = 0,
+ };
+
+ excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
+
+ while (1) {
+ victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
+ if (!victim) {
+ loop++;
+ if (loop >= 2) {
+ /*
+ * If we have not been able to reclaim
+ * anything, it might because there are
+ * no reclaimable pages under this hierarchy
+ */
+ if (!total)
+ break;
+ /*
+ * We want to do more targeted reclaim.
+ * excess >> 2 is not to excessive so as to
+ * reclaim too much, nor too less that we keep
+ * coming back to reclaim from this cgroup
+ */
+ if (total >= (excess >> 2) ||
+ (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
+ break;
+ }
+ continue;
+ }
+ if (!mem_cgroup_reclaimable(victim, false))
+ continue;
+ total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+ zone, &nr_scanned);
+ *total_scanned += nr_scanned;
+ if (!res_counter_soft_limit_excess(&root_memcg->res))
break;
}
-
- if (!atomic_read(&memcg->children_in_excess))
- return SKIP_TREE;
- return SKIP;
+ mem_cgroup_iter_break(root_memcg, victim);
+ return total;
}
static DEFINE_SPINLOCK(memcg_oom_lock);
@@ -2812,7 +2953,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
unlock_page_cgroup(pc);
/*
- * "charge_statistics" updated event counter.
+ * "charge_statistics" updated event counter. Then, check it.
+ * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
+ * if they exceeds softlimit.
*/
memcg_check_events(memcg, page);
}
@@ -4647,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
return ret;
}
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
+{
+ unsigned long nr_reclaimed = 0;
+ struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+ unsigned long reclaimed;
+ int loop = 0;
+ struct mem_cgroup_tree_per_zone *mctz;
+ unsigned long long excess;
+ unsigned long nr_scanned;
+
+ if (order > 0)
+ return 0;
+
+ mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
+ /*
+ * This loop can run a while, specially if mem_cgroup's continuously
+ * keep exceeding their soft limit and putting the system under
+ * pressure
+ */
+ do {
+ if (next_mz)
+ mz = next_mz;
+ else
+ mz = mem_cgroup_largest_soft_limit_node(mctz);
+ if (!mz)
+ break;
+
+ nr_scanned = 0;
+ reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
+ gfp_mask, &nr_scanned);
+ nr_reclaimed += reclaimed;
+ *total_scanned += nr_scanned;
+ spin_lock(&mctz->lock);
+
+ /*
+ * If we failed to reclaim anything from this memory cgroup
+ * it is time to move on to the next cgroup
+ */
+ next_mz = NULL;
+ if (!reclaimed) {
+ do {
+ /*
+ * Loop until we find yet another one.
+ *
+ * By the time we get the soft_limit lock
+ * again, someone might have aded the
+ * group back on the RB tree. Iterate to
+ * make sure we get a different mem.
+ * mem_cgroup_largest_soft_limit_node returns
+ * NULL if no other cgroup is present on
+ * the tree
+ */
+ next_mz =
+ __mem_cgroup_largest_soft_limit_node(mctz);
+ if (next_mz == mz)
+ css_put(&next_mz->memcg->css);
+ else /* next_mz == NULL or other memcg */
+ break;
+ } while (1);
+ }
+ __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+ excess = res_counter_soft_limit_excess(&mz->memcg->res);
+ /*
+ * One school of thought says that we should not add
+ * back the node to the tree if reclaim returns 0.
+ * But our reclaim could return 0, simply because due
+ * to priority we are exposing a smaller subset of
+ * memory to reclaim from. Consider this as a longer
+ * term TODO.
+ */
+ /* If excess == 0, no tree ops */
+ __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
+ spin_unlock(&mctz->lock);
+ css_put(&mz->memcg->css);
+ loop++;
+ /*
+ * Could not reclaim anything and there are no more
+ * mem cgroups to try or we seem to be looping without
+ * reclaiming anything.
+ */
+ if (!nr_reclaimed &&
+ (next_mz == NULL ||
+ loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+ break;
+ } while (!nr_reclaimed);
+ if (next_mz)
+ css_put(&next_mz->memcg->css);
+ return nr_reclaimed;
+}
+
/**
* mem_cgroup_force_empty_list - clears LRU of a group
* @memcg: group to clear
@@ -5911,6 +6146,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
mz = &pn->zoneinfo[zone];
lruvec_init(&mz->lruvec);
+ mz->usage_in_excess = 0;
+ mz->on_tree = false;
mz->memcg = memcg;
}
memcg->nodeinfo[node] = pn;
@@ -5966,6 +6203,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
int node;
size_t size = memcg_size();
+ mem_cgroup_remove_from_trees(memcg);
free_css_id(&mem_cgroup_subsys, &memcg->css);
for_each_node(node)
@@ -6002,6 +6240,29 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
}
EXPORT_SYMBOL(parent_mem_cgroup);
+static void __init mem_cgroup_soft_limit_tree_init(void)
+{
+ struct mem_cgroup_tree_per_node *rtpn;
+ struct mem_cgroup_tree_per_zone *rtpz;
+ int tmp, node, zone;
+
+ for_each_node(node) {
+ tmp = node;
+ if (!node_state(node, N_NORMAL_MEMORY))
+ tmp = -1;
+ rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+ BUG_ON(!rtpn);
+
+ soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ rtpz = &rtpn->rb_tree_per_zone[zone];
+ rtpz->rb_root = RB_ROOT;
+ spin_lock_init(&rtpz->lock);
+ }
+ }
+}
+
static struct cgroup_subsys_state * __ref
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
@@ -6031,7 +6292,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock);
vmpressure_init(&memcg->vmpressure);
- spin_lock_init(&memcg->soft_lock);
return &memcg->css;
@@ -6109,13 +6369,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
mem_cgroup_invalidate_reclaim_iterators(memcg);
mem_cgroup_reparent_charges(memcg);
- if (memcg->soft_contributed) {
- while ((memcg = parent_mem_cgroup(memcg)))
- atomic_dec(&memcg->children_in_excess);
-
- if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
- atomic_dec(&root_mem_cgroup->children_in_excess);
- }
mem_cgroup_destroy_all_caches(memcg);
vmpressure_cleanup(&memcg->vmpressure);
}
@@ -6790,6 +7043,7 @@ static int __init mem_cgroup_init(void)
{
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
enable_swap_cgroup();
+ mem_cgroup_soft_limit_tree_init();
memcg_stock_init();
return 0;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index d6380266324..67ba6da7d0e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -736,6 +736,7 @@ static int do_mlockall(int flags)
/* Ignore errors */
mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
+ cond_resched();
}
out:
return 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8ed1b775bdc..beb35778c69 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -139,23 +139,11 @@ static bool global_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup;
}
-
-static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
-{
- struct mem_cgroup *root = sc->target_mem_cgroup;
- return !mem_cgroup_disabled() &&
- mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
-}
#else
static bool global_reclaim(struct scan_control *sc)
{
return true;
}
-
-static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
-{
- return false;
-}
#endif
unsigned long zone_reclaimable_pages(struct zone *zone)
@@ -2176,11 +2164,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static int
-__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
- int groups_scanned = 0;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2188,17 +2174,15 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
.zone = zone,
.priority = sc->priority,
};
- struct mem_cgroup *memcg = NULL;
- mem_cgroup_iter_filter filter = (soft_reclaim) ?
- mem_cgroup_soft_reclaim_eligible : NULL;
+ struct mem_cgroup *memcg;
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
+ memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ do {
struct lruvec *lruvec;
- groups_scanned++;
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
shrink_lruvec(lruvec, sc);
@@ -2218,7 +2202,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
mem_cgroup_iter_break(root, memcg);
break;
}
- }
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ } while (memcg);
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
sc->nr_scanned - nr_scanned,
@@ -2226,37 +2211,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
-
- return groups_scanned;
-}
-
-
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
-{
- bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
- unsigned long nr_scanned = sc->nr_scanned;
- int scanned_groups;
-
- scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
- /*
- * memcg iterator might race with other reclaimer or start from
- * a incomplete tree walk so the tree walk in __shrink_zone
- * might have missed groups that are above the soft limit. Try
- * another loop to catch up with others. Do it just once to
- * prevent from reclaim latencies when other reclaimers always
- * preempt this one.
- */
- if (do_soft_reclaim && !scanned_groups)
- __shrink_zone(zone, sc, do_soft_reclaim);
-
- /*
- * No group is over the soft limit or those that are do not have
- * pages in the zone we are reclaiming so we have to reclaim everybody
- */
- if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
- __shrink_zone(zone, sc, false);
- return;
- }
}
/* Returns true if compaction should go ahead for a high-order request */
@@ -2320,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
+ unsigned long nr_soft_reclaimed;
+ unsigned long nr_soft_scanned;
bool aborted_reclaim = false;
/*
@@ -2359,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
continue;
}
}
+ /*
+ * This steals pages from memory cgroups over softlimit
+ * and returns the number of reclaimed pages and
+ * scanned pages. This works for global memory pressure
+ * and balancing, not for a memcg's limit.
+ */
+ nr_soft_scanned = 0;
+ nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+ sc->order, sc->gfp_mask,
+ &nr_soft_scanned);
+ sc->nr_reclaimed += nr_soft_reclaimed;
+ sc->nr_scanned += nr_soft_scanned;
/* need some check for avoid more shrink_zone() */
}
@@ -2952,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
{
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
+ unsigned long nr_soft_reclaimed;
+ unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
@@ -3066,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
sc.nr_scanned = 0;
+ nr_soft_scanned = 0;
+ /*
+ * Call soft limit reclaim before calling shrink_zone.
+ */
+ nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+ order, sc.gfp_mask,
+ &nr_soft_scanned);
+ sc.nr_reclaimed += nr_soft_reclaimed;
+
/*
* There should be no need to raise the scanning
* priority if enough pages are already being scanned
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 47016c304c8..66cad506b8a 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3975,8 +3975,8 @@ sub string_find_replace {
# check for new externs in .h files.
if ($realfile =~ /\.h$/ &&
$line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
- if (WARN("AVOID_EXTERNS",
- "extern prototypes should be avoided in .h files\n" . $herecurr) &&
+ if (CHK("AVOID_EXTERNS",
+ "extern prototypes should be avoided in .h files\n" . $herecurr) &&
$fix) {
$fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
}
diff --git a/tools/lib/lk/debugfs.c b/tools/lib/lk/debugfs.c
index 099e7cd022e..7c434796235 100644
--- a/tools/lib/lk/debugfs.c
+++ b/tools/lib/lk/debugfs.c
@@ -5,7 +5,6 @@
#include <stdbool.h>
#include <sys/vfs.h>
#include <sys/mount.h>
-#include <linux/magic.h>
#include <linux/kernel.h>
#include "debugfs.h"
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 9570c2b0f83..b2519e49424 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
struct perf_tsc_conversion *tc)
{
- bool cap_usr_time_zero;
+ bool cap_user_time_zero;
u32 seq;
int i = 0;
@@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
tc->time_mult = pc->time_mult;
tc->time_shift = pc->time_shift;
tc->time_zero = pc->time_zero;
- cap_usr_time_zero = pc->cap_usr_time_zero;
+ cap_user_time_zero = pc->cap_user_time_zero;
rmb();
if (pc->lock == seq && !(seq & 1))
break;
@@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
}
}
- if (!cap_usr_time_zero)
+ if (!cap_user_time_zero)
return -EOPNOTSUPP;
return 0;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 423875c999b..afe377b2884 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -321,8 +321,6 @@ found:
return perf_event__repipe(tool, event_sw, &sample_sw, machine);
}
-extern volatile int session_done;
-
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8e50d8d7741..72eae7498c0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -401,8 +401,6 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
return 0;
}
-extern volatile int session_done;
-
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
@@ -568,6 +566,9 @@ static int __cmd_report(struct perf_report *rep)
}
}
+ if (session_done())
+ return 0;
+
if (nr_samples == 0) {
ui__error("The %s file has no samples!\n", session->filename);
return 0;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7f31a3ded1b..9c333ff3dfe 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -553,8 +553,6 @@ static struct perf_tool perf_script = {
.ordering_requires_timestamps = true,
};
-extern volatile int session_done;
-
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f5aa6375e3e..fd485340472 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -16,6 +16,23 @@
#include <sys/mman.h>
#include <linux/futex.h>
+/* For older distros: */
+#ifndef MAP_STACK
+# define MAP_STACK 0x20000
+#endif
+
+#ifndef MADV_HWPOISON
+# define MADV_HWPOISON 100
+#endif
+
+#ifndef MADV_MERGEABLE
+# define MADV_MERGEABLE 12
+#endif
+
+#ifndef MADV_UNMERGEABLE
+# define MADV_UNMERGEABLE 13
+#endif
+
static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
unsigned long arg,
u8 arg_idx __maybe_unused,
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 214e17e97e5..346ee929d25 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -180,6 +180,9 @@ FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
CFLAGS += -DLIBELF_MMAP
endif
+ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y)
+ CFLAGS += -DHAVE_ELF_GETPHDRNUM
+endif
# include ARCH specific config
-include $(src-perf)/arch/$(ARCH)/Makefile
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index 708fb8e9822..d5a8dd44945 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -61,6 +61,15 @@ int main(void)
}
endef
+define SOURCE_ELF_GETPHDRNUM
+#include <libelf.h>
+int main(void)
+{
+ size_t dst;
+ return elf_getphdrnum(0, &dst);
+}
+endef
+
ifndef NO_SLANG
define SOURCE_SLANG
#include <slang.h>
@@ -210,6 +219,7 @@ define SOURCE_LIBAUDIT
int main(void)
{
+ printf(\"error message: %s\n\", audit_errno_to_name(0));
return audit_open();
}
endef
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index bfc5a27597d..7eae5488ece 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -809,7 +809,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
end = map__rip_2objdump(map, sym->end);
offset = line_ip - start;
- if (offset < 0 || (u64)line_ip > end)
+ if ((u64)line_ip < start || (u64)line_ip > end)
offset = -1;
else
parsed_line = tmp2 + 1;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 3e5f5430a28..e23bde19d59 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -263,6 +263,21 @@ bool die_is_signed_type(Dwarf_Die *tp_die)
}
/**
+ * die_is_func_def - Ensure that this DIE is a subprogram and definition
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is a subprogram and NOT a declaration. This
+ * returns true if @dw_die is a function definition.
+ **/
+bool die_is_func_def(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+
+ return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
+ dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
+}
+
+/**
* die_get_data_member_location - Get the data-member offset
* @mb_die: a DIE of a member of a data structure
* @offs: The offset of the member in the data structure
@@ -392,6 +407,10 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
{
struct __addr_die_search_param *ad = data;
+ /*
+ * Since a declaration entry doesn't has given pc, this always returns
+ * function definition entry.
+ */
if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
dwarf_haspc(fn_die, ad->addr)) {
memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 6ce1717784b..8658d41697d 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -38,6 +38,9 @@ extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
int (*callback)(Dwarf_Die *, void *), void *data);
+/* Ensure that this DIE is a subprogram and definition (not declaration) */
+extern bool die_is_func_def(Dwarf_Die *dw_die);
+
/* Compare diename and tname */
extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 26441d0e571..ce69901176d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -199,9 +199,11 @@ static int write_buildid(char *name, size_t name_len, u8 *build_id,
return write_padded(fd, name, name_len + 1, len);
}
-static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
- u16 misc, int fd)
+static int __dsos__write_buildid_table(struct list_head *head,
+ struct machine *machine,
+ pid_t pid, u16 misc, int fd)
{
+ char nm[PATH_MAX];
struct dso *pos;
dsos__for_each_with_build_id(pos, head) {
@@ -215,6 +217,10 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
if (is_vdso_map(pos->short_name)) {
name = (char *) VDSO__MAP_NAME;
name_len = sizeof(VDSO__MAP_NAME) + 1;
+ } else if (dso__is_kcore(pos)) {
+ machine__mmap_name(machine, nm, sizeof(nm));
+ name = nm;
+ name_len = strlen(nm) + 1;
} else {
name = pos->long_name;
name_len = pos->long_name_len + 1;
@@ -240,10 +246,10 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
umisc = PERF_RECORD_MISC_GUEST_USER;
}
- err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
- kmisc, fd);
+ err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
+ machine->pid, kmisc, fd);
if (err == 0)
- err = __dsos__write_buildid_table(&machine->user_dsos,
+ err = __dsos__write_buildid_table(&machine->user_dsos, machine,
machine->pid, umisc, fd);
return err;
}
@@ -375,23 +381,31 @@ out_free:
return err;
}
-static int dso__cache_build_id(struct dso *dso, const char *debugdir)
+static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+ const char *debugdir)
{
bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
bool is_vdso = is_vdso_map(dso->short_name);
+ char *name = dso->long_name;
+ char nm[PATH_MAX];
- return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
- dso->long_name, debugdir,
- is_kallsyms, is_vdso);
+ if (dso__is_kcore(dso)) {
+ is_kallsyms = true;
+ machine__mmap_name(machine, nm, sizeof(nm));
+ name = nm;
+ }
+ return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
+ debugdir, is_kallsyms, is_vdso);
}
-static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
+static int __dsos__cache_build_ids(struct list_head *head,
+ struct machine *machine, const char *debugdir)
{
struct dso *pos;
int err = 0;
dsos__for_each_with_build_id(pos, head)
- if (dso__cache_build_id(pos, debugdir))
+ if (dso__cache_build_id(pos, machine, debugdir))
err = -1;
return err;
@@ -399,8 +413,9 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
{
- int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
- ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
+ int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
+ debugdir);
+ ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
return ret;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 46a0d35a05e..9ff6cf3e9a9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -611,6 +611,8 @@ void hists__collapse_resort(struct hists *hists)
next = rb_first(root);
while (next) {
+ if (session_done())
+ break;
n = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&n->rb_node_in);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index be0329394d5..20c7299a9d4 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -734,7 +734,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
}
/* If not a real subprogram, find a real one */
- if (dwarf_tag(sc_die) != DW_TAG_subprogram) {
+ if (!die_is_func_def(sc_die)) {
if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
pr_warning("Failed to find probe point in any "
"functions.\n");
@@ -980,12 +980,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
struct dwarf_callback_param *param = data;
struct probe_finder *pf = param->data;
struct perf_probe_point *pp = &pf->pev->point;
- Dwarf_Attribute attr;
/* Check tag and diename */
- if (dwarf_tag(sp_die) != DW_TAG_subprogram ||
- !die_compare_name(sp_die, pp->function) ||
- dwarf_attr(sp_die, DW_AT_declaration, &attr))
+ if (!die_is_func_def(sp_die) ||
+ !die_compare_name(sp_die, pp->function))
return DWARF_CB_OK;
/* Check declared file */
@@ -1474,7 +1472,7 @@ static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
return 0;
}
-/* Search function from function name */
+/* Search function definition from function name */
static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
{
struct dwarf_callback_param *param = data;
@@ -1485,7 +1483,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
return DWARF_CB_OK;
- if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
+ if (die_is_func_def(sp_die) &&
die_compare_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die);
dwarf_decl_line(sp_die, &lr->offset);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 51f5edf2a6d..70ffa41518f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -531,6 +531,9 @@ static int flush_sample_queue(struct perf_session *s,
return 0;
list_for_each_entry_safe(iter, tmp, head, list) {
+ if (session_done())
+ return 0;
+
if (iter->timestamp > limit)
break;
@@ -1160,7 +1163,6 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
}
}
-#define session_done() (*(volatile int *)(&session_done))
volatile int session_done;
static int __perf_session__process_pipe_events(struct perf_session *self,
@@ -1372,10 +1374,13 @@ more:
"Processing events...");
}
+ err = 0;
+ if (session_done())
+ goto out_err;
+
if (file_pos < file_size)
goto more;
- err = 0;
/* do the final flush for ordered samples */
session->ordered_samples.next_flush = ULLONG_MAX;
err = flush_sample_queue(session, tool);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 3aa75fb2225..04bf7373a7e 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -124,4 +124,8 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session,
#define perf_session__set_tracepoints_handlers(session, array) \
__perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
+
+extern volatile int session_done;
+
+#define session_done() (*(volatile int *)(&session_done))
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a7b9ab55738..a9c829be521 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -8,6 +8,22 @@
#include "symbol.h"
#include "debug.h"
+#ifndef HAVE_ELF_GETPHDRNUM
+static int elf_getphdrnum(Elf *elf, size_t *dst)
+{
+ GElf_Ehdr gehdr;
+ GElf_Ehdr *ehdr;
+
+ ehdr = gelf_getehdr(elf, &gehdr);
+ if (!ehdr)
+ return -1;
+
+ *dst = ehdr->e_phnum;
+
+ return 0;
+}
+#endif
+
#ifndef NT_GNU_BUILD_ID
#define NT_GNU_BUILD_ID 3
#endif
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index fe7a27d67d2..e9e1c03f927 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -186,7 +186,7 @@ void parse_proc_kallsyms(struct pevent *pevent,
char *next = NULL;
char *addr_str;
char *mod;
- char *fmt;
+ char *fmt = NULL;
line = strtok_r(file, "\n", &next);
while (line) {