aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt3
-rw-r--r--arch/arm/include/asm/pmu.h12
-rw-r--r--arch/arm/include/asm/topology.h3
-rw-r--r--arch/arm/kernel/hw_breakpoint.c3
-rw-r--r--arch/arm/kernel/perf_event.c19
-rw-r--r--arch/arm/kernel/perf_event_cpu.c117
-rw-r--r--arch/arm/kernel/perf_event_v7.c57
-rw-r--r--arch/arm/kernel/topology.c27
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--mm/vmstat.c95
10 files changed, 289 insertions, 49 deletions
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 343781b9f24..4ce82d045a6 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -16,6 +16,9 @@ Required properties:
"arm,arm1176-pmu"
"arm,arm1136-pmu"
- interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+ If there are more than one cluster with same CPU type
+ then there should be separate PMU nodes per cluster.
Example:
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f24edad26c7..0cd7824ca76 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -62,9 +62,19 @@ struct pmu_hw_events {
raw_spinlock_t pmu_lock;
};
+struct cpupmu_regs {
+ u32 pmc;
+ u32 pmcntenset;
+ u32 pmuseren;
+ u32 pmintenset;
+ u32 pmxevttype[8];
+ u32 pmxevtcnt[8];
+};
+
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
+ cpumask_t valid_cpus;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct perf_event *event);
@@ -81,6 +91,8 @@ struct arm_pmu {
int (*request_irq)(struct arm_pmu *, irq_handler_t handler);
void (*free_irq)(struct arm_pmu *);
int (*map_event)(struct perf_event *event);
+ void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
+ void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 5692ba11322..983fa7c153a 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -26,6 +26,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
/* Common values for CPUs */
@@ -62,6 +63,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
static inline void init_cpu_topology(void) { }
static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+ cpumask_t *cluster_mask) { return -EINVAL; }
#endif
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 1fd749ee4a1..1b803117ed9 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {
static void __init pm_init(void)
{
- cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+ if (has_ossr)
+ cpu_pm_register_notifier(&dbg_cpu_pm_nb);
}
#else
static inline void pm_init(void)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8c3094d0f7b..d847c622a7b 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
+#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -81,6 +82,9 @@ armpmu_map_event(struct perf_event *event,
return armpmu_map_cache_event(cache_map, config);
case PERF_TYPE_RAW:
return armpmu_map_raw_event(raw_event_mask, config);
+ default:
+ if (event->attr.type >= PERF_TYPE_MAX)
+ return armpmu_map_raw_event(raw_event_mask, config);
}
return -ENOENT;
@@ -158,6 +162,8 @@ armpmu_stop(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
@@ -174,6 +180,8 @@ static void armpmu_start(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
@@ -201,6 +209,9 @@ armpmu_del(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
+
armpmu_stop(event, PERF_EF_UPDATE);
hw_events->events[idx] = NULL;
clear_bit(idx, hw_events->used_mask);
@@ -217,6 +228,10 @@ armpmu_add(struct perf_event *event, int flags)
int idx;
int err = 0;
+ /* An event following a process won't be stopped earlier */
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return 0;
+
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
@@ -416,6 +431,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
+ if (event->cpu != -1 &&
+ !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+ return -ENOENT;
+
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e3dbc..0b48a38e3cf 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,6 +19,7 @@
#define pr_fmt(fmt) "CPU PMU: " fmt
#include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/of.h>
@@ -31,33 +32,36 @@
#include <asm/pmu.h>
/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+
/*
* Despite the names, these two functions are CPU-specific and are used
* by the OProfile/perf code.
*/
const char *perf_pmu_name(void)
{
- if (!cpu_pmu)
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+ if (!pmu)
return NULL;
- return cpu_pmu->name;
+ return pmu->name;
}
EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
- int max_events = 0;
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
- if (cpu_pmu != NULL)
- max_events = cpu_pmu->num_events;
+ if (!pmu)
+ return 0;
- return max_events;
+ return pmu->num_events;
}
EXPORT_SYMBOL_GPL(perf_num_counters);
@@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
{
int i, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
+ int cpu = -1;
irqs = min(pmu_device->num_resources, num_possible_cpus());
for (i = 0; i < irqs; ++i) {
- if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+ cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
+ if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
if (irq >= 0)
@@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
{
int i, err, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
+ int cpu = -1;
if (!pmu_device)
return -ENODEV;
@@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
for (i = 0; i < irqs; ++i) {
err = 0;
+ cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
irq = platform_get_irq(pmu_device, i);
if (irq < 0)
continue;
@@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
* assume that we're running on a uniprocessor machine and
* continue. Otherwise, continue without this interrupt.
*/
- if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+ if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
irq, i);
continue;
@@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
return err;
}
- cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+ cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
}
return 0;
@@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int cpu;
- for_each_possible_cpu(cpu) {
+ for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
events->events = per_cpu(hw_events, cpu);
events->used_mask = per_cpu(used_mask, cpu);
@@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
/* Ensure the PMU has sane values out of reset. */
if (cpu_pmu->reset)
- on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+ on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
}
/*
@@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
return NOTIFY_DONE;
- if (cpu_pmu && cpu_pmu->reset)
- cpu_pmu->reset(cpu_pmu);
+ if (pmu && pmu->reset)
+ pmu->reset(pmu);
else
return NOTIFY_DONE;
return NOTIFY_OK;
}
+static int cpu_pmu_pm_notify(struct notifier_block *b,
+ unsigned long action, void *hcpu)
+{
+ int cpu = smp_processor_id();
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
+ struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+
+ if (!pmu)
+ return NOTIFY_DONE;
+
+ if (action == CPU_PM_ENTER && pmu->save_regs) {
+ pmu->save_regs(pmu, pmuregs);
+ } else if (action == CPU_PM_EXIT && pmu->restore_regs) {
+ pmu->restore_regs(pmu, pmuregs);
+ }
+
+ return NOTIFY_OK;
+}
+
static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
.notifier_call = cpu_pmu_notify,
};
+static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
+ .notifier_call = cpu_pmu_pm_notify,
+};
+
/*
* PMU platform driver and devicetree bindings.
*/
@@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
}
}
+ /* assume PMU support all the CPUs in this case */
+ cpumask_setall(&pmu->valid_cpus);
+
put_cpu();
return ret;
}
@@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int cpu_pmu_device_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
- int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
- int ret = -ENODEV;
-
- if (cpu_pmu) {
- pr_info("attempt to register multiple PMU devices!");
- return -ENOSPC;
- }
+ int ret = 0;
+ int cpu;
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
if (!pmu) {
@@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
}
if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
- init_fn = of_id->data;
- ret = init_fn(pmu);
+ smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+ struct device_node *ncluster;
+ int cluster = -1;
+ cpumask_t sibling_mask;
+
+ ncluster = of_parse_phandle(node, "cluster", 0);
+ if (ncluster) {
+ int len;
+ const u32 *hwid;
+ hwid = of_get_property(ncluster, "reg", &len);
+ if (hwid && len == 4)
+ cluster = be32_to_cpup(hwid);
+ }
+ /* set sibling mask to all cpu mask if socket is not specified */
+ if (cluster == -1 ||
+ cluster_to_logical_mask(cluster, &sibling_mask))
+ cpumask_setall(&sibling_mask);
+
+ smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+ /* now set the valid_cpus after init */
+ cpumask_copy(&pmu->valid_cpus, &sibling_mask);
} else {
ret = probe_current_pmu(pmu);
}
@@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
goto out_free;
}
- cpu_pmu = pmu;
- cpu_pmu->plat_device = pdev;
- cpu_pmu_init(cpu_pmu);
- ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+ for_each_cpu_mask(cpu, pmu->valid_cpus)
+ per_cpu(cpu_pmu, cpu) = pmu;
+
+ pmu->plat_device = pdev;
+ cpu_pmu_init(pmu);
+ ret = armpmu_register(pmu, -1);
if (!ret)
return 0;
@@ -313,9 +366,17 @@ static int __init register_pmu_driver(void)
if (err)
return err;
+ err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
+ if (err) {
+ unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+ return err;
+ }
+
err = platform_driver_register(&cpu_pmu_driver);
- if (err)
+ if (err) {
+ cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+ }
return err;
}
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 039cffb053a..654db5030c3 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
}
#endif
+static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+ struct cpupmu_regs *regs)
+{
+ unsigned int cnt;
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
+ if (!(regs->pmc & ARMV7_PMNC_E))
+ return;
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
+ asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
+ asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+ for (cnt = ARMV7_IDX_COUNTER0;
+ cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mrc p15, 0, %0, c9, c13, 1"
+ : "=r"(regs->pmxevttype[cnt]));
+ asm volatile("mrc p15, 0, %0, c9, c13, 2"
+ : "=r"(regs->pmxevtcnt[cnt]));
+ }
+ return;
+}
+
+static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+ struct cpupmu_regs *regs)
+{
+ unsigned int cnt;
+ if (!(regs->pmc & ARMV7_PMNC_E))
+ return;
+
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
+ asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
+ asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
+ asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+ for (cnt = ARMV7_IDX_COUNTER0;
+ cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mcr p15, 0, %0, c9, c13, 1"
+ : : "r"(regs->pmxevttype[cnt]));
+ asm volatile("mcr p15, 0, %0, c9, c13, 2"
+ : : "r"(regs->pmxevtcnt[cnt]));
+ }
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+}
+
static void armv7pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
@@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->start = armv7pmu_start;
cpu_pmu->stop = armv7pmu_stop;
cpu_pmu->reset = armv7pmu_reset;
+ cpu_pmu->save_regs = armv7pmu_save_regs;
+ cpu_pmu->restore_regs = armv7pmu_restore_regs;
cpu_pmu->max_period = (1LLU << 32) - 1;
};
@@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A8";
+ cpu_pmu->name = "ARMv7_Cortex_A8";
cpu_pmu->map_event = armv7_a8_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A9";
+ cpu_pmu->name = "ARMv7_Cortex_A9";
cpu_pmu->map_event = armv7_a9_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A5";
+ cpu_pmu->name = "ARMv7_Cortex_A5";
cpu_pmu->map_event = armv7_a5_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A15";
+ cpu_pmu->name = "ARMv7_Cortex_A15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A7";
+ cpu_pmu->name = "ARMv7_Cortex_A7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 4459c0b4e91..677da58d9e8 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -398,6 +398,33 @@ void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id: cluster HW identifier
+ * @cluster_mask: the cpumask location to be initialized, modified by the
+ * function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+ int cpu;
+
+ if (!cluster_mask)
+ return -EINVAL;
+
+ for_each_online_cpu(cpu)
+ if (socket_id == topology_physical_package_id(cpu)) {
+ cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
* init_cpu_topology is called at boot when only one cpu is running
* which prevent simultaneous write access to cpu_topology array
*/
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index c586679b6fe..a30ab7910ff 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -198,7 +198,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
-void refresh_cpu_vm_stats(int);
+bool refresh_cpu_vm_stats(int);
void refresh_zone_stat_thresholds(void);
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f42745e6578..b916a43a6b3 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
#include <linux/math64.h>
@@ -432,11 +433,12 @@ EXPORT_SYMBOL(dec_zone_page_state);
* with the global counters. These could cause remote node cache line
* bouncing and will have to be only done when necessary.
*/
-void refresh_cpu_vm_stats(int cpu)
+bool refresh_cpu_vm_stats(int cpu)
{
struct zone *zone;
int i;
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+ bool vm_activity = false;
for_each_populated_zone(zone) {
struct per_cpu_pageset *p;
@@ -483,14 +485,21 @@ void refresh_cpu_vm_stats(int cpu)
if (p->expire)
continue;
- if (p->pcp.count)
+ if (p->pcp.count) {
+ vm_activity = true;
drain_zone_pages(zone, &p->pcp);
+ }
#endif
}
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (global_diff[i])
+ if (global_diff[i]) {
atomic_long_add(global_diff[i], &vm_stat[i]);
+ vm_activity = true;
+ }
+
+ return vm_activity;
+
}
/*
@@ -1174,22 +1183,72 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
+static struct cpumask vmstat_off_cpus;
+struct delayed_work vmstat_monitor_work;
-static void vmstat_update(struct work_struct *w)
+static inline bool need_vmstat(int cpu)
{
- refresh_cpu_vm_stats(smp_processor_id());
- schedule_delayed_work(&__get_cpu_var(vmstat_work),
- round_jiffies_relative(sysctl_stat_interval));
+ struct zone *zone;
+ int i;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pageset *p;
+
+ p = per_cpu_ptr(zone->pageset, cpu);
+
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+ if (p->vm_stat_diff[i])
+ return true;
+
+ if (zone_to_nid(zone) != numa_node_id() && p->pcp.count)
+ return true;
+ }
+
+ return false;
}
-static void __cpuinit start_cpu_timer(int cpu)
+static void vmstat_update(struct work_struct *w);
+
+static void start_cpu_timer(int cpu)
{
struct delayed_work *work = &per_cpu(vmstat_work, cpu);
- INIT_DEFERRABLE_WORK(work, vmstat_update);
+ cpumask_clear_cpu(cpu, &vmstat_off_cpus);
schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
}
+static void __cpuinit setup_cpu_timer(int cpu)
+{
+ struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+
+ INIT_DEFERRABLE_WORK(work, vmstat_update);
+ start_cpu_timer(cpu);
+}
+
+static void vmstat_update_monitor(struct work_struct *w)
+{
+ int cpu;
+
+ for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask)
+ if (need_vmstat(cpu))
+ start_cpu_timer(cpu);
+
+ queue_delayed_work(system_unbound_wq, &vmstat_monitor_work,
+ round_jiffies_relative(sysctl_stat_interval));
+}
+
+
+static void vmstat_update(struct work_struct *w)
+{
+ int cpu = smp_processor_id();
+
+ if (likely(refresh_cpu_vm_stats(cpu)))
+ schedule_delayed_work(&__get_cpu_var(vmstat_work),
+ round_jiffies_relative(sysctl_stat_interval));
+ else
+ cpumask_set_cpu(cpu, &vmstat_off_cpus);
+}
+
/*
* Use the cpu notifier to insure that the thresholds are recalculated
* when necessary.
@@ -1204,17 +1263,19 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds();
- start_cpu_timer(cpu);
+ setup_cpu_timer(cpu);
node_set_state(cpu_to_node(cpu), N_CPU);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
- per_cpu(vmstat_work, cpu).work.func = NULL;
+ if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) {
+ cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+ per_cpu(vmstat_work, cpu).work.func = NULL;
+ }
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
+ setup_cpu_timer(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -1237,8 +1298,14 @@ static int __init setup_vmstat(void)
register_cpu_notifier(&vmstat_notifier);
+ INIT_DEFERRABLE_WORK(&vmstat_monitor_work,
+ vmstat_update_monitor);
+ queue_delayed_work(system_unbound_wq,
+ &vmstat_monitor_work,
+ round_jiffies_relative(HZ));
+
for_each_online_cpu(cpu)
- start_cpu_timer(cpu);
+ setup_cpu_timer(cpu);
#endif
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);