aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Medhurst <tixy@linaro.org>2013-10-11 17:12:02 +0100
committerJon Medhurst <tixy@linaro.org>2013-10-11 17:12:02 +0100
commit68f98fec62bada031c89ee61b5bccd61914c23c3 (patch)
tree19fcbf6c9a5dc405505375129e26d6231d4a867e
parent7ac3860c98608e2f9782902b4439e0ade53c2364 (diff)
parentd8063e7015122eb3f6173acf496171def8941734 (diff)
Merge tag 'big-LITTLE-MP-13.10' into for-lsk
-rw-r--r--arch/arm/Kconfig12
-rw-r--r--arch/arm/kernel/smp.c5
-rw-r--r--drivers/irqchip/irq-gic.c5
-rw-r--r--include/trace/events/arm-ipi.h100
-rw-r--r--include/trace/events/sched.h72
-rw-r--r--include/trace/events/smp.h91
-rw-r--r--kernel/sched/fair.c302
-rw-r--r--kernel/smp.c12
8 files changed, 514 insertions, 85 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2a5f5b8c385..e79dfda6644 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1578,6 +1578,18 @@ config HMP_FREQUENCY_INVARIANT_SCALE
migration strategy to interact more predictably with CPUFreq's
asynchronous compute capacity changes.
+config SCHED_HMP_LITTLE_PACKING
+ bool "Small task packing for HMP"
+ depends on SCHED_HMP
+ default n
+ help
+ Allows the HMP Scheduler to pack small tasks into CPUs in the
+ smallest HMP domain.
+ Controlled by two sysfs files in sys/kernel/hmp.
+ packing_enable: 1 to enable, 0 to disable packing. Default 1.
+ packing_limit: runqueue load ratio where a RQ is considered
+ to be full. Default is NICE_0_LOAD * 9/8.
+
config HAVE_ARM_SCU
bool
help
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb..80cd1ac1e0d 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -46,6 +46,9 @@
#include <asm/virt.h>
#include <asm/mach/arch.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/arm-ipi.h>
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -604,6 +607,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
if (ipinr < NR_IPI)
__inc_irq_stat(cpu, ipi_irqs[ipinr]);
+ trace_arm_ipi_entry(ipinr);
switch (ipinr) {
case IPI_WAKEUP:
break;
@@ -643,6 +647,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
cpu, ipinr);
break;
}
+ trace_arm_ipi_exit(ipinr);
set_irq_regs(old_regs);
}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 19ceaa60e0f..5dc511c5872 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqchip/arm-gic.h>
+#include <trace/events/arm-ipi.h>
#include <asm/irq.h>
#include <asm/exception.h>
@@ -649,8 +650,10 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
unsigned long map = 0;
/* Convert our logical CPU mask into a physical one. */
- for_each_cpu(cpu, mask)
+ for_each_cpu(cpu, mask) {
+ trace_arm_ipi_send(irq, cpu);
map |= gic_cpu_map[cpu];
+ }
/*
* Ensure that stores to Normal memory are visible to the
diff --git a/include/trace/events/arm-ipi.h b/include/trace/events/arm-ipi.h
new file mode 100644
index 00000000000..5d3bd21827b
--- /dev/null
+++ b/include/trace/events/arm-ipi.h
@@ -0,0 +1,100 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM arm-ipi
+
+#if !defined(_TRACE_ARM_IPI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM_IPI_H
+
+#include <linux/tracepoint.h>
+
+#define show_arm_ipi_name(val) \
+ __print_symbolic(val, \
+ { 0, "IPI_WAKEUP" }, \
+ { 1, "IPI_TIMER" }, \
+ { 2, "IPI_RESCHEDULE" }, \
+ { 3, "IPI_CALL_FUNC" }, \
+ { 4, "IPI_CALL_FUNC_SINGLE" }, \
+ { 5, "IPI_CPU_STOP" }, \
+ { 6, "IPI_COMPLETION" }, \
+ { 7, "IPI_CPU_BACKTRACE" })
+
+DECLARE_EVENT_CLASS(arm_ipi,
+
+ TP_PROTO(unsigned int ipi_nr),
+
+ TP_ARGS(ipi_nr),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ipi )
+ ),
+
+ TP_fast_assign(
+ __entry->ipi = ipi_nr;
+ ),
+
+ TP_printk("ipi=%u [action=%s]", __entry->ipi,
+ show_arm_ipi_name(__entry->ipi))
+);
+
+/**
+ * arm_ipi_entry - called in the arm-generic ipi handler immediately before
+ * entering ipi-type handler
+ * @ipi_nr: ipi number
+ *
+ * When used in combination with the arm_ipi_exit tracepoint
+ * we can determine the ipi handler runtine.
+ */
+DEFINE_EVENT(arm_ipi, arm_ipi_entry,
+
+ TP_PROTO(unsigned int ipi_nr),
+
+ TP_ARGS(ipi_nr)
+);
+
+/**
+ * arm_ipi_exit - called in the arm-generic ipi handler immediately
+ * after the ipi-type handler returns
+ * @ipi_nr: ipi number
+ *
+ * When used in combination with the arm_ipi_entry tracepoint
+ * we can determine the ipi handler runtine.
+ */
+DEFINE_EVENT(arm_ipi, arm_ipi_exit,
+
+ TP_PROTO(unsigned int ipi_nr),
+
+ TP_ARGS(ipi_nr)
+);
+
+/**
+ * arm_ipi_send - called as the ipi target mask is built, immediately
+ * before the register is written
+ * @ipi_nr: ipi number
+ * @dest: cpu to send to
+ *
+ * When used in combination with the arm_ipi_entry tracepoint
+ * we can determine the ipi raise to run latency.
+ */
+TRACE_EVENT(arm_ipi_send,
+
+ TP_PROTO(unsigned int ipi_nr, int dest),
+
+ TP_ARGS(ipi_nr, dest),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ipi )
+ __field( int , dest )
+ ),
+
+ TP_fast_assign(
+ __entry->ipi = ipi_nr;
+ __entry->dest = dest;
+ ),
+
+ TP_printk("dest=%d ipi=%u [action=%s]", __entry->dest,
+ __entry->ipi, show_arm_ipi_name(__entry->ipi))
+);
+
+#endif /* _TRACE_ARM_IPI_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 203e8e9933b..66dc53bca19 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -530,6 +530,29 @@ TRACE_EVENT(sched_rq_runnable_load,
__entry->load)
);
+TRACE_EVENT(sched_rq_nr_running,
+
+ TP_PROTO(int cpu, unsigned int nr_running, int nr_iowait),
+
+ TP_ARGS(cpu, nr_running, nr_iowait),
+
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(unsigned int, nr_running)
+ __field(int, nr_iowait)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->nr_running = nr_running;
+ __entry->nr_iowait = nr_iowait;
+ ),
+
+ TP_printk("cpu=%d nr_running=%u nr_iowait=%d",
+ __entry->cpu,
+ __entry->nr_running, __entry->nr_iowait)
+);
+
/*
* Tracepoint for showing tracked task cpu usage ratio [0..1023].
*/
@@ -559,6 +582,10 @@ TRACE_EVENT(sched_task_usage_ratio,
/*
* Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations.
*/
+#define HMP_MIGRATE_WAKEUP 0
+#define HMP_MIGRATE_FORCE 1
+#define HMP_MIGRATE_OFFLOAD 2
+#define HMP_MIGRATE_IDLE_PULL 3
TRACE_EVENT(sched_hmp_migrate,
TP_PROTO(struct task_struct *tsk, int dest, int force),
@@ -583,6 +610,51 @@ TRACE_EVENT(sched_hmp_migrate,
__entry->comm, __entry->pid,
__entry->dest, __entry->force)
);
+
+TRACE_EVENT(sched_hmp_offload_abort,
+
+ TP_PROTO(int cpu, int data, char *label),
+
+ TP_ARGS(cpu,data,label),
+
+ TP_STRUCT__entry(
+ __array(char, label, 64)
+ __field(int, cpu)
+ __field(int, data)
+ ),
+
+ TP_fast_assign(
+ strncpy(__entry->label, label, 64);
+ __entry->cpu = cpu;
+ __entry->data = data;
+ ),
+
+ TP_printk("cpu=%d data=%d label=%63s",
+ __entry->cpu, __entry->data,
+ __entry->label)
+);
+
+TRACE_EVENT(sched_hmp_offload_succeed,
+
+ TP_PROTO(int cpu, int dest_cpu),
+
+ TP_ARGS(cpu,dest_cpu),
+
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(int, dest_cpu)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->dest_cpu = dest_cpu;
+ ),
+
+ TP_printk("cpu=%d dest=%d",
+ __entry->cpu,
+ __entry->dest_cpu)
+);
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/include/trace/events/smp.h b/include/trace/events/smp.h
new file mode 100644
index 00000000000..c8abfd74472
--- /dev/null
+++ b/include/trace/events/smp.h
@@ -0,0 +1,91 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM smp
+
+#if !defined(_TRACE_SMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SMP_H
+
+#include <linux/tracepoint.h>
+typedef void (*__smp_call_func_t)(void *info);
+
+DECLARE_EVENT_CLASS(smp_call_class,
+
+ TP_PROTO(__smp_call_func_t fnc),
+
+ TP_ARGS(fnc),
+
+ TP_STRUCT__entry(
+ __field( void *, func )
+ ),
+
+ TP_fast_assign(
+ __entry->func = fnc;
+ ),
+
+ TP_printk("func=%pf", __entry->func)
+);
+
+/**
+ * smp_call_func_entry - called in the generic smp-cross-call-handler
+ * immediately before calling the destination
+ * function
+ * @func: function pointer
+ *
+ * When used in combination with the smp_call_func_exit tracepoint
+ * we can determine the cross-call runtime.
+ */
+DEFINE_EVENT(smp_call_class, smp_call_func_entry,
+
+ TP_PROTO(__smp_call_func_t fnc),
+
+ TP_ARGS(fnc)
+);
+
+/**
+ * smp_call_func_exit - called in the generic smp-cross-call-handler
+ * immediately after the destination function
+ * returns
+ * @func: function pointer
+ *
+ * When used in combination with the smp_call_entry tracepoint
+ * we can determine the cross-call runtime.
+ */
+DEFINE_EVENT(smp_call_class, smp_call_func_exit,
+
+ TP_PROTO(__smp_call_func_t fnc),
+
+ TP_ARGS(fnc)
+);
+
+/**
+ * smp_call_func_send - called as destination function is set
+ * in the per-cpu storage
+ * @func: function pointer
+ * @dest: cpu to send to
+ *
+ * When used in combination with the smp_cross_call_entry tracepoint
+ * we can determine the call-to-run latency.
+ */
+TRACE_EVENT(smp_call_func_send,
+
+ TP_PROTO(__smp_call_func_t func, int dest),
+
+ TP_ARGS(func, dest),
+
+ TP_STRUCT__entry(
+ __field( void * , func )
+ __field( int , dest )
+ ),
+
+ TP_fast_assign(
+ __entry->func = func;
+ __entry->dest = dest;
+ ),
+
+ TP_printk("dest=%d func=%pf", __entry->dest,
+ __entry->func)
+);
+
+#endif /* _TRACE_SMP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 754634e774a..66b5b30159f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1226,11 +1226,7 @@ struct hmp_global_attr {
int (*from_sysfs)(int);
};
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
struct hmp_data_struct {
#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -1688,6 +1684,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
__update_tg_runnable_avg(&rq->avg, &rq->cfs);
trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
+ trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter);
}
/* Add the load generated by se into cfs_rq's child load-average */
@@ -3663,25 +3660,46 @@ static struct sched_entity *hmp_get_lightest_task(
* Migration thresholds should be in the range [0..1023]
* hmp_up_threshold: min. load required for migrating tasks to a faster cpu
* hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu
- * The default values (512, 256) offer good responsiveness, but may need
- * tweaking suit particular needs.
*
* hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
* hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
* hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
*/
-unsigned int hmp_up_threshold = 512;
-unsigned int hmp_down_threshold = 256;
+unsigned int hmp_up_threshold = 700;
+unsigned int hmp_down_threshold = 512;
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
#endif
unsigned int hmp_next_up_threshold = 4096;
unsigned int hmp_next_down_threshold = 4096;
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+ we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650; /* 80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
- int *min_cpu);
+ int *min_cpu, struct cpumask *affinity);
+
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+ return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
/* Check if cpu is in fastest hmp_domain */
static inline unsigned int hmp_cpu_is_fastest(int cpu)
@@ -3721,22 +3739,23 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu)
/*
* Selects a cpu in previous (faster) hmp_domain
- * Note that cpumask_any_and() returns the first cpu in the cpumask
*/
static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
int cpu)
{
int lowest_cpu=NR_CPUS;
- __always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu);
- /*
- * If the lowest-loaded CPU in the domain is allowed by the task affinity
- * select that one, otherwise select one which is allowed
- */
- if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk)))
- return lowest_cpu;
+ __always_unused int lowest_ratio;
+ struct hmp_domain *hmp;
+
+ if (hmp_cpu_is_fastest(cpu))
+ hmp = hmp_cpu_domain(cpu);
else
- return cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
- tsk_cpus_allowed(tsk));
+ hmp = hmp_faster_domain(cpu);
+
+ lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+ tsk_cpus_allowed(tsk));
+
+ return lowest_cpu;
}
/*
@@ -3755,18 +3774,54 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
else
hmp = hmp_slower_domain(cpu);
- lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu);
- /*
- * If the lowest-loaded CPU in the domain is allowed by the task affinity
- * select that one, otherwise select one which is allowed
- */
- if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk)))
- return lowest_cpu;
- else
- return cpumask_any_and(&hmp_slower_domain(cpu)->cpus,
- tsk_cpus_allowed(tsk));
+ lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+ tsk_cpus_allowed(tsk));
+
+ return lowest_cpu;
}
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+ int cpu) {
+ int tmp_cpu;
+ unsigned long estimated_load;
+ struct hmp_domain *hmp;
+ struct sched_avg *avg;
+ struct cpumask allowed_hmp_cpus;
+
+ if(!hmp_packing_enabled ||
+ tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+ return hmp_select_slower_cpu(tsk, cpu);
+
+ if (hmp_cpu_is_slowest(cpu))
+ hmp = hmp_cpu_domain(cpu);
+ else
+ hmp = hmp_slower_domain(cpu);
+ /* respect affinity */
+ cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+ tsk_cpus_allowed(tsk));
+
+ for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+ avg = &cpu_rq(tmp_cpu)->avg;
+ /* estimate new rq load if we add this task */
+ estimated_load = avg->load_avg_ratio +
+ tsk->se.avg.load_avg_ratio;
+ if (estimated_load <= hmp_full_threshold) {
+ cpu = tmp_cpu;
+ break;
+ }
+ }
+ /* if no match was found, the task uses the initial value */
+ return cpu;
+}
+#endif
static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
{
/* hack - always use clock from first online CPU */
@@ -3890,6 +3945,15 @@ static int hmp_freqinvar_from_sysfs(int value)
return value;
}
#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+ if (value < 0)
+ return -1;
+ return value;
+}
+#endif
static void hmp_attr_add(
const char *name,
int *value,
@@ -3942,6 +4006,16 @@ static int hmp_attr_init(void)
NULL,
hmp_freqinvar_from_sysfs);
#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ hmp_attr_add("packing_enable",
+ &hmp_packing_enabled,
+ NULL,
+ hmp_freqinvar_from_sysfs);
+ hmp_attr_add("packing_limit",
+ &hmp_full_threshold,
+ NULL,
+ hmp_packing_from_sysfs);
+#endif
hmp_data.attr_group.name = "hmp";
hmp_data.attr_group.attrs = hmp_data.attributes;
ret = sysfs_create_group(kernel_kobj,
@@ -3950,9 +4024,24 @@ static int hmp_attr_init(void)
}
late_initcall(hmp_attr_init);
#endif /* CONFIG_HMP_VARIABLE_SCALE */
-
+/*
+ * return the load of the lowest-loaded CPU in a given HMP domain
+ * min_cpu optionally points to an int to receive the CPU.
+ * affinity optionally points to a cpumask containing the
+ * CPUs to be considered. note:
+ * + min_cpu = NR_CPUS only if no CPUs are in the set of
+ * affinity && hmp_domain cpus
+ * + min_cpu will always otherwise equal one of the CPUs in
+ * the hmp domain
+ * + when more than one CPU has the same load, the one which
+ * is least-recently-disturbed by an HMP migration will be
+ * selected
+ * + if all CPUs are equally loaded or idle and the times are
+ * all the same, the first in the set will be used
+ * + if affinity is not set, cpu_online_mask is used
+ */
static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
- int *min_cpu)
+ int *min_cpu, struct cpumask *affinity)
{
int cpu;
int min_cpu_runnable_temp = NR_CPUS;
@@ -3961,8 +4050,15 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
unsigned long min_runnable_load = INT_MAX;
unsigned long contrib;
struct sched_avg *avg;
+ struct cpumask temp_cpumask;
+ /*
+ * only look at CPUs allowed if specified,
+ * otherwise look at all online CPUs in the
+ * right HMP domain
+ */
+ cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask);
- for_each_cpu_mask(cpu, hmpd->cpus) {
+ for_each_cpu_mask(cpu, temp_cpumask) {
avg = &cpu_rq(cpu)->avg;
/* used for both up and down migration */
curr_last_migration = avg->hmp_last_up_migration ?
@@ -4024,27 +4120,36 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
return NR_CPUS;
/* Is there an idle CPU in the current domain */
- min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL);
- if (min_usage == 0)
+ min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL);
+ if (min_usage == 0) {
+ trace_sched_hmp_offload_abort(cpu, min_usage, "load");
return NR_CPUS;
+ }
/* Is the task alone on the cpu? */
- if (cpu_rq(cpu)->cfs.h_nr_running < 2)
+ if (cpu_rq(cpu)->cfs.h_nr_running < 2) {
+ trace_sched_hmp_offload_abort(cpu,
+ cpu_rq(cpu)->cfs.h_nr_running, "nr_running");
return NR_CPUS;
+ }
/* Is the task actually starving? */
/* >=25% ratio running/runnable = starving */
- if (hmp_task_starvation(se) > 768)
+ if (hmp_task_starvation(se) > 768) {
+ trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se),
+ "starvation");
return NR_CPUS;
+ }
/* Does the slower domain have any idle CPUs? */
- min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu);
- if (min_usage > 0)
- return NR_CPUS;
+ min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu,
+ tsk_cpus_allowed(task_of(se)));
- if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus))
+ if (min_usage == 0) {
+ trace_sched_hmp_offload_succeed(cpu, dest_cpu);
return dest_cpu;
-
+ } else
+ trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain");
return NR_CPUS;
}
#endif /* CONFIG_SCHED_HMP */
@@ -4076,30 +4181,13 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
#ifdef CONFIG_SCHED_HMP
/* always put non-kernel forking tasks on a big domain */
if (p->mm && (sd_flag & SD_BALANCE_FORK)) {
- if(hmp_cpu_is_fastest(prev_cpu)) {
- struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains);
- __always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu);
- if (new_cpu != NR_CPUS &&
- cpumask_test_cpu(new_cpu,
- tsk_cpus_allowed(p))) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- } else {
- new_cpu = cpumask_any_and(
- &hmp_faster_domain(cpu)->cpus,
- tsk_cpus_allowed(p));
- if (new_cpu < nr_cpu_ids) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- }
- }
- } else {
- new_cpu = hmp_select_faster_cpu(p, prev_cpu);
- if (new_cpu != NR_CPUS) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- }
+ new_cpu = hmp_select_faster_cpu(p, prev_cpu);
+ if (new_cpu != NR_CPUS) {
+ hmp_next_up_delay(&p->se, new_cpu);
+ return new_cpu;
}
+ /* failed to perform HMP fork balance, use normal balance */
+ new_cpu = cpu;
}
#endif
@@ -4178,16 +4266,24 @@ unlock:
rcu_read_unlock();
#ifdef CONFIG_SCHED_HMP
+ prev_cpu = task_cpu(p);
+
if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) {
hmp_next_up_delay(&p->se, new_cpu);
- trace_sched_hmp_migrate(p, new_cpu, 0);
+ trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
return new_cpu;
}
if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
new_cpu = hmp_select_slower_cpu(p, prev_cpu);
- hmp_next_down_delay(&p->se, new_cpu);
- trace_sched_hmp_migrate(p, new_cpu, 0);
- return new_cpu;
+#endif
+ if (new_cpu != prev_cpu) {
+ hmp_next_down_delay(&p->se, new_cpu);
+ trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+ return new_cpu;
+ }
}
/* Make sure that the task stays in its previous hmp domain */
if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -6154,16 +6250,49 @@ static struct {
unsigned long next_balance; /* in jiffy units */
} nohz ____cacheline_aligned;
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+ struct hmp_domain *hmp;
+ /* always allow ilb on non-slowest domain */
+ if (!hmp_cpu_is_slowest(cpu))
+ return 1;
+
+ hmp = hmp_cpu_domain(cpu);
+ for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+ /* only idle balance if a CPU is loaded over threshold */
+ if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+ return 1;
+ }
+ return 0;
+}
+#endif
+
static inline int find_new_ilb(int call_cpu)
{
int ilb = cpumask_first(nohz.idle_cpus_mask);
#ifdef CONFIG_SCHED_HMP
+ int ilb_needed = 1;
+
/* restrict nohz balancing to occur in the same hmp domain */
ilb = cpumask_first_and(nohz.idle_cpus_mask,
&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ if (ilb < nr_cpu_ids)
+ ilb_needed = hmp_packing_ilb_needed(ilb);
#endif
+
+ if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+ return ilb;
+#else
if (ilb < nr_cpu_ids && idle_cpu(ilb))
return ilb;
+#endif
return nr_cpu_ids;
}
@@ -6489,11 +6618,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se)
{
struct task_struct *p = task_of(se);
+ int temp_target_cpu;
u64 now;
- if (target_cpu)
- *target_cpu = NR_CPUS;
-
if (hmp_cpu_is_fastest(cpu))
return 0;
@@ -6516,13 +6643,12 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti
* idle CPU or 1023 for any partly-busy one.
* Be explicit about requirement for an idle CPU.
*/
- if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0)
- return 0;
-
- if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus,
- tsk_cpus_allowed(p)))
+ if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu,
+ tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) {
+ if(target_cpu)
+ *target_cpu = temp_target_cpu;
return 1;
-
+ }
return 0;
}
@@ -6532,8 +6658,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
struct task_struct *p = task_of(se);
u64 now;
- if (hmp_cpu_is_slowest(cpu))
+ if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ if(hmp_packing_enabled)
+ return 1;
+ else
+#endif
return 0;
+ }
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
/* Filter by task priority */
@@ -6702,6 +6834,7 @@ static int hmp_active_task_migration_cpu_stop(void *data)
rcu_read_unlock();
double_unlock_balance(busiest_rq, target_rq);
out_unlock:
+ put_task_struct(p);
busiest_rq->active_balance = 0;
raw_spin_unlock_irq(&busiest_rq->lock);
return 0;
@@ -6775,6 +6908,7 @@ static int hmp_idle_pull_cpu_stop(void *data)
rcu_read_unlock();
double_unlock_balance(busiest_rq, target_rq);
out_unlock:
+ put_task_struct(p);
busiest_rq->active_balance = 0;
raw_spin_unlock_irq(&busiest_rq->lock);
return 0;
@@ -6820,11 +6954,12 @@ static void hmp_force_up_migration(int this_cpu)
p = task_of(curr);
if (hmp_up_migration(cpu, &target_cpu, curr)) {
if (!target->active_balance) {
+ get_task_struct(p);
target->active_balance = 1;
target->push_cpu = target_cpu;
target->migrate_task = p;
force = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, 1);
+ trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE);
hmp_next_up_delay(&p->se, target->push_cpu);
}
}
@@ -6835,12 +6970,14 @@ static void hmp_force_up_migration(int this_cpu)
* require extensive book keeping.
*/
curr = hmp_get_lightest_task(orig, 1);
+ p = task_of(curr);
target->push_cpu = hmp_offload_down(cpu, curr);
if (target->push_cpu < NR_CPUS) {
+ get_task_struct(p);
target->active_balance = 1;
target->migrate_task = p;
force = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, 2);
+ trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD);
hmp_next_down_delay(&p->se, target->push_cpu);
}
}
@@ -6915,11 +7052,12 @@ static unsigned int hmp_idle_pull(int this_cpu)
/* now we have a candidate */
raw_spin_lock_irqsave(&target->lock, flags);
if (!target->active_balance && task_rq(p) == target) {
+ get_task_struct(p);
target->active_balance = 1;
target->push_cpu = this_cpu;
target->migrate_task = p;
force = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, 3);
+ trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
hmp_next_up_delay(&p->se, target->push_cpu);
}
raw_spin_unlock_irqrestore(&target->lock, flags);
diff --git a/kernel/smp.c b/kernel/smp.c
index 4dba0f7b72a..23ccc67dcbb 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -12,6 +12,8 @@
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/cpu.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/smp.h>
#include "smpboot.h"
@@ -159,8 +161,10 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
- if (ipi)
+ if (ipi) {
+ trace_smp_call_func_send(csd->func, cpu);
arch_send_call_function_single_ipi(cpu);
+ }
if (wait)
csd_lock_wait(csd);
@@ -197,8 +201,9 @@ void generic_smp_call_function_single_interrupt(void)
* so save them away before making the call:
*/
csd_flags = csd->flags;
-
+ trace_smp_call_func_entry(csd->func);
csd->func(csd->info);
+ trace_smp_call_func_exit(csd->func);
/*
* Unlocked CSDs are valid through generic_exec_single():
@@ -228,6 +233,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
int this_cpu;
int err = 0;
+ trace_smp_call_func_send(func, cpu);
/*
* prevent preemption and reschedule on another processor,
* as well as CPU removal
@@ -245,7 +251,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
if (cpu == this_cpu) {
local_irq_save(flags);
+ trace_smp_call_func_entry(func);
func(info);
+ trace_smp_call_func_exit(func);
local_irq_restore(flags);
} else {
if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {