diff options
author | Jon Medhurst <tixy@linaro.org> | 2013-10-11 17:12:02 +0100 |
---|---|---|
committer | Jon Medhurst <tixy@linaro.org> | 2013-10-11 17:12:02 +0100 |
commit | 68f98fec62bada031c89ee61b5bccd61914c23c3 (patch) | |
tree | 19fcbf6c9a5dc405505375129e26d6231d4a867e | |
parent | 7ac3860c98608e2f9782902b4439e0ade53c2364 (diff) | |
parent | d8063e7015122eb3f6173acf496171def8941734 (diff) |
Merge tag 'big-LITTLE-MP-13.10' into for-lsk
-rw-r--r-- | arch/arm/Kconfig | 12 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 5 | ||||
-rw-r--r-- | drivers/irqchip/irq-gic.c | 5 | ||||
-rw-r--r-- | include/trace/events/arm-ipi.h | 100 | ||||
-rw-r--r-- | include/trace/events/sched.h | 72 | ||||
-rw-r--r-- | include/trace/events/smp.h | 91 | ||||
-rw-r--r-- | kernel/sched/fair.c | 302 | ||||
-rw-r--r-- | kernel/smp.c | 12 |
8 files changed, 514 insertions, 85 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2a5f5b8c385..e79dfda6644 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1578,6 +1578,18 @@ config HMP_FREQUENCY_INVARIANT_SCALE migration strategy to interact more predictably with CPUFreq's asynchronous compute capacity changes. +config SCHED_HMP_LITTLE_PACKING + bool "Small task packing for HMP" + depends on SCHED_HMP + default n + help + Allows the HMP Scheduler to pack small tasks into CPUs in the + smallest HMP domain. + Controlled by two sysfs files in sys/kernel/hmp. + packing_enable: 1 to enable, 0 to disable packing. Default 1. + packing_limit: runqueue load ratio where a RQ is considered + to be full. Default is NICE_0_LOAD * 9/8. + config HAVE_ARM_SCU bool help diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb451bb..80cd1ac1e0d 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -46,6 +46,9 @@ #include <asm/virt.h> #include <asm/mach/arch.h> +#define CREATE_TRACE_POINTS +#include <trace/events/arm-ipi.h> + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -604,6 +607,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if (ipinr < NR_IPI) __inc_irq_stat(cpu, ipi_irqs[ipinr]); + trace_arm_ipi_entry(ipinr); switch (ipinr) { case IPI_WAKEUP: break; @@ -643,6 +647,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) cpu, ipinr); break; } + trace_arm_ipi_exit(ipinr); set_irq_regs(old_regs); } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 19ceaa60e0f..5dc511c5872 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -41,6 +41,7 @@ #include <linux/slab.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqchip/arm-gic.h> +#include <trace/events/arm-ipi.h> #include <asm/irq.h> #include <asm/exception.h> @@ -649,8 +650,10 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) unsigned long map = 0; /* Convert our logical CPU mask into a physical one. */ - for_each_cpu(cpu, mask) + for_each_cpu(cpu, mask) { + trace_arm_ipi_send(irq, cpu); map |= gic_cpu_map[cpu]; + } /* * Ensure that stores to Normal memory are visible to the diff --git a/include/trace/events/arm-ipi.h b/include/trace/events/arm-ipi.h new file mode 100644 index 00000000000..5d3bd21827b --- /dev/null +++ b/include/trace/events/arm-ipi.h @@ -0,0 +1,100 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM arm-ipi + +#if !defined(_TRACE_ARM_IPI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ARM_IPI_H + +#include <linux/tracepoint.h> + +#define show_arm_ipi_name(val) \ + __print_symbolic(val, \ + { 0, "IPI_WAKEUP" }, \ + { 1, "IPI_TIMER" }, \ + { 2, "IPI_RESCHEDULE" }, \ + { 3, "IPI_CALL_FUNC" }, \ + { 4, "IPI_CALL_FUNC_SINGLE" }, \ + { 5, "IPI_CPU_STOP" }, \ + { 6, "IPI_COMPLETION" }, \ + { 7, "IPI_CPU_BACKTRACE" }) + +DECLARE_EVENT_CLASS(arm_ipi, + + TP_PROTO(unsigned int ipi_nr), + + TP_ARGS(ipi_nr), + + TP_STRUCT__entry( + __field( unsigned int, ipi ) + ), + + TP_fast_assign( + __entry->ipi = ipi_nr; + ), + + TP_printk("ipi=%u [action=%s]", __entry->ipi, + show_arm_ipi_name(__entry->ipi)) +); + +/** + * arm_ipi_entry - called in the arm-generic ipi handler immediately before + * entering ipi-type handler + * @ipi_nr: ipi number + * + * When used in combination with the arm_ipi_exit tracepoint + * we can determine the ipi handler runtine. + */ +DEFINE_EVENT(arm_ipi, arm_ipi_entry, + + TP_PROTO(unsigned int ipi_nr), + + TP_ARGS(ipi_nr) +); + +/** + * arm_ipi_exit - called in the arm-generic ipi handler immediately + * after the ipi-type handler returns + * @ipi_nr: ipi number + * + * When used in combination with the arm_ipi_entry tracepoint + * we can determine the ipi handler runtine. + */ +DEFINE_EVENT(arm_ipi, arm_ipi_exit, + + TP_PROTO(unsigned int ipi_nr), + + TP_ARGS(ipi_nr) +); + +/** + * arm_ipi_send - called as the ipi target mask is built, immediately + * before the register is written + * @ipi_nr: ipi number + * @dest: cpu to send to + * + * When used in combination with the arm_ipi_entry tracepoint + * we can determine the ipi raise to run latency. + */ +TRACE_EVENT(arm_ipi_send, + + TP_PROTO(unsigned int ipi_nr, int dest), + + TP_ARGS(ipi_nr, dest), + + TP_STRUCT__entry( + __field( unsigned int, ipi ) + __field( int , dest ) + ), + + TP_fast_assign( + __entry->ipi = ipi_nr; + __entry->dest = dest; + ), + + TP_printk("dest=%d ipi=%u [action=%s]", __entry->dest, + __entry->ipi, show_arm_ipi_name(__entry->ipi)) +); + +#endif /* _TRACE_ARM_IPI_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 203e8e9933b..66dc53bca19 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -530,6 +530,29 @@ TRACE_EVENT(sched_rq_runnable_load, __entry->load) ); +TRACE_EVENT(sched_rq_nr_running, + + TP_PROTO(int cpu, unsigned int nr_running, int nr_iowait), + + TP_ARGS(cpu, nr_running, nr_iowait), + + TP_STRUCT__entry( + __field(int, cpu) + __field(unsigned int, nr_running) + __field(int, nr_iowait) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->nr_running = nr_running; + __entry->nr_iowait = nr_iowait; + ), + + TP_printk("cpu=%d nr_running=%u nr_iowait=%d", + __entry->cpu, + __entry->nr_running, __entry->nr_iowait) +); + /* * Tracepoint for showing tracked task cpu usage ratio [0..1023]. */ @@ -559,6 +582,10 @@ TRACE_EVENT(sched_task_usage_ratio, /* * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations. */ +#define HMP_MIGRATE_WAKEUP 0 +#define HMP_MIGRATE_FORCE 1 +#define HMP_MIGRATE_OFFLOAD 2 +#define HMP_MIGRATE_IDLE_PULL 3 TRACE_EVENT(sched_hmp_migrate, TP_PROTO(struct task_struct *tsk, int dest, int force), @@ -583,6 +610,51 @@ TRACE_EVENT(sched_hmp_migrate, __entry->comm, __entry->pid, __entry->dest, __entry->force) ); + +TRACE_EVENT(sched_hmp_offload_abort, + + TP_PROTO(int cpu, int data, char *label), + + TP_ARGS(cpu,data,label), + + TP_STRUCT__entry( + __array(char, label, 64) + __field(int, cpu) + __field(int, data) + ), + + TP_fast_assign( + strncpy(__entry->label, label, 64); + __entry->cpu = cpu; + __entry->data = data; + ), + + TP_printk("cpu=%d data=%d label=%63s", + __entry->cpu, __entry->data, + __entry->label) +); + +TRACE_EVENT(sched_hmp_offload_succeed, + + TP_PROTO(int cpu, int dest_cpu), + + TP_ARGS(cpu,dest_cpu), + + TP_STRUCT__entry( + __field(int, cpu) + __field(int, dest_cpu) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("cpu=%d dest=%d", + __entry->cpu, + __entry->dest_cpu) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/trace/events/smp.h b/include/trace/events/smp.h new file mode 100644 index 00000000000..c8abfd74472 --- /dev/null +++ b/include/trace/events/smp.h @@ -0,0 +1,91 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM smp + +#if !defined(_TRACE_SMP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SMP_H + +#include <linux/tracepoint.h> +typedef void (*__smp_call_func_t)(void *info); + +DECLARE_EVENT_CLASS(smp_call_class, + + TP_PROTO(__smp_call_func_t fnc), + + TP_ARGS(fnc), + + TP_STRUCT__entry( + __field( void *, func ) + ), + + TP_fast_assign( + __entry->func = fnc; + ), + + TP_printk("func=%pf", __entry->func) +); + +/** + * smp_call_func_entry - called in the generic smp-cross-call-handler + * immediately before calling the destination + * function + * @func: function pointer + * + * When used in combination with the smp_call_func_exit tracepoint + * we can determine the cross-call runtime. + */ +DEFINE_EVENT(smp_call_class, smp_call_func_entry, + + TP_PROTO(__smp_call_func_t fnc), + + TP_ARGS(fnc) +); + +/** + * smp_call_func_exit - called in the generic smp-cross-call-handler + * immediately after the destination function + * returns + * @func: function pointer + * + * When used in combination with the smp_call_entry tracepoint + * we can determine the cross-call runtime. + */ +DEFINE_EVENT(smp_call_class, smp_call_func_exit, + + TP_PROTO(__smp_call_func_t fnc), + + TP_ARGS(fnc) +); + +/** + * smp_call_func_send - called as destination function is set + * in the per-cpu storage + * @func: function pointer + * @dest: cpu to send to + * + * When used in combination with the smp_cross_call_entry tracepoint + * we can determine the call-to-run latency. + */ +TRACE_EVENT(smp_call_func_send, + + TP_PROTO(__smp_call_func_t func, int dest), + + TP_ARGS(func, dest), + + TP_STRUCT__entry( + __field( void * , func ) + __field( int , dest ) + ), + + TP_fast_assign( + __entry->func = func; + __entry->dest = dest; + ), + + TP_printk("dest=%d func=%pf", __entry->dest, + __entry->func) +); + +#endif /* _TRACE_SMP_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 754634e774a..66b5b30159f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1226,11 +1226,7 @@ struct hmp_global_attr { int (*from_sysfs)(int); }; -#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE -#define HMP_DATA_SYSFS_MAX 4 -#else -#define HMP_DATA_SYSFS_MAX 3 -#endif +#define HMP_DATA_SYSFS_MAX 8 struct hmp_data_struct { #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE @@ -1688,6 +1684,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable) __update_tg_runnable_avg(&rq->avg, &rq->cfs); trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio); trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg); + trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter); } /* Add the load generated by se into cfs_rq's child load-average */ @@ -3663,25 +3660,46 @@ static struct sched_entity *hmp_get_lightest_task( * Migration thresholds should be in the range [0..1023] * hmp_up_threshold: min. load required for migrating tasks to a faster cpu * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu - * The default values (512, 256) offer good responsiveness, but may need - * tweaking suit particular needs. * * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio) * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms) * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms) + * + * Small Task Packing: + * We can choose to fill the littlest CPUs in an HMP system rather than + * the typical spreading mechanic. This behavior is controllable using + * two variables. + * hmp_packing_enabled: runtime control over pack/spread + * hmp_full_threshold: Consider a CPU with this much unweighted load full */ -unsigned int hmp_up_threshold = 512; -unsigned int hmp_down_threshold = 256; +unsigned int hmp_up_threshold = 700; +unsigned int hmp_down_threshold = 512; #ifdef CONFIG_SCHED_HMP_PRIO_FILTER unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL); #endif unsigned int hmp_next_up_threshold = 4096; unsigned int hmp_next_down_threshold = 4096; +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +unsigned int hmp_packing_enabled = 1; +#ifndef CONFIG_ARCH_VEXPRESS_TC2 +unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8; +#else +/* TC2 has a sharp consumption curve @ around 800Mhz, so + we aim to spread the load around that frequency. */ +unsigned int hmp_full_threshold = 650; /* 80% of the 800Mhz freq * NICE_0_LOAD */ +#endif +#endif + static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se); static unsigned int hmp_down_migration(int cpu, struct sched_entity *se); static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, - int *min_cpu); + int *min_cpu, struct cpumask *affinity); + +static inline struct hmp_domain *hmp_smallest_domain(void) +{ + return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains); +} /* Check if cpu is in fastest hmp_domain */ static inline unsigned int hmp_cpu_is_fastest(int cpu) @@ -3721,22 +3739,23 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu) /* * Selects a cpu in previous (faster) hmp_domain - * Note that cpumask_any_and() returns the first cpu in the cpumask */ static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk, int cpu) { int lowest_cpu=NR_CPUS; - __always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu); - /* - * If the lowest-loaded CPU in the domain is allowed by the task affinity - * select that one, otherwise select one which is allowed - */ - if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) - return lowest_cpu; + __always_unused int lowest_ratio; + struct hmp_domain *hmp; + + if (hmp_cpu_is_fastest(cpu)) + hmp = hmp_cpu_domain(cpu); else - return cpumask_any_and(&hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(tsk)); + hmp = hmp_faster_domain(cpu); + + lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu, + tsk_cpus_allowed(tsk)); + + return lowest_cpu; } /* @@ -3755,18 +3774,54 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk, else hmp = hmp_slower_domain(cpu); - lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu); - /* - * If the lowest-loaded CPU in the domain is allowed by the task affinity - * select that one, otherwise select one which is allowed - */ - if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) - return lowest_cpu; - else - return cpumask_any_and(&hmp_slower_domain(cpu)->cpus, - tsk_cpus_allowed(tsk)); + lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu, + tsk_cpus_allowed(tsk)); + + return lowest_cpu; } +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +/* + * Select the 'best' candidate little CPU to wake up on. + * Implements a packing strategy which examines CPU in + * logical CPU order, and selects the first which will + * have at least 10% capacity available, according to + * both tracked load of the runqueue and the task. + */ +static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk, + int cpu) { + int tmp_cpu; + unsigned long estimated_load; + struct hmp_domain *hmp; + struct sched_avg *avg; + struct cpumask allowed_hmp_cpus; + + if(!hmp_packing_enabled || + tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100)) + return hmp_select_slower_cpu(tsk, cpu); + + if (hmp_cpu_is_slowest(cpu)) + hmp = hmp_cpu_domain(cpu); + else + hmp = hmp_slower_domain(cpu); + /* respect affinity */ + cpumask_and(&allowed_hmp_cpus, &hmp->cpus, + tsk_cpus_allowed(tsk)); + + for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) { + avg = &cpu_rq(tmp_cpu)->avg; + /* estimate new rq load if we add this task */ + estimated_load = avg->load_avg_ratio + + tsk->se.avg.load_avg_ratio; + if (estimated_load <= hmp_full_threshold) { + cpu = tmp_cpu; + break; + } + } + /* if no match was found, the task uses the initial value */ + return cpu; +} +#endif static inline void hmp_next_up_delay(struct sched_entity *se, int cpu) { /* hack - always use clock from first online CPU */ @@ -3890,6 +3945,15 @@ static int hmp_freqinvar_from_sysfs(int value) return value; } #endif +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +/* packing value must be non-negative */ +static int hmp_packing_from_sysfs(int value) +{ + if (value < 0) + return -1; + return value; +} +#endif static void hmp_attr_add( const char *name, int *value, @@ -3942,6 +4006,16 @@ static int hmp_attr_init(void) NULL, hmp_freqinvar_from_sysfs); #endif +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + hmp_attr_add("packing_enable", + &hmp_packing_enabled, + NULL, + hmp_freqinvar_from_sysfs); + hmp_attr_add("packing_limit", + &hmp_full_threshold, + NULL, + hmp_packing_from_sysfs); +#endif hmp_data.attr_group.name = "hmp"; hmp_data.attr_group.attrs = hmp_data.attributes; ret = sysfs_create_group(kernel_kobj, @@ -3950,9 +4024,24 @@ static int hmp_attr_init(void) } late_initcall(hmp_attr_init); #endif /* CONFIG_HMP_VARIABLE_SCALE */ - +/* + * return the load of the lowest-loaded CPU in a given HMP domain + * min_cpu optionally points to an int to receive the CPU. + * affinity optionally points to a cpumask containing the + * CPUs to be considered. note: + * + min_cpu = NR_CPUS only if no CPUs are in the set of + * affinity && hmp_domain cpus + * + min_cpu will always otherwise equal one of the CPUs in + * the hmp domain + * + when more than one CPU has the same load, the one which + * is least-recently-disturbed by an HMP migration will be + * selected + * + if all CPUs are equally loaded or idle and the times are + * all the same, the first in the set will be used + * + if affinity is not set, cpu_online_mask is used + */ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, - int *min_cpu) + int *min_cpu, struct cpumask *affinity) { int cpu; int min_cpu_runnable_temp = NR_CPUS; @@ -3961,8 +4050,15 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, unsigned long min_runnable_load = INT_MAX; unsigned long contrib; struct sched_avg *avg; + struct cpumask temp_cpumask; + /* + * only look at CPUs allowed if specified, + * otherwise look at all online CPUs in the + * right HMP domain + */ + cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask); - for_each_cpu_mask(cpu, hmpd->cpus) { + for_each_cpu_mask(cpu, temp_cpumask) { avg = &cpu_rq(cpu)->avg; /* used for both up and down migration */ curr_last_migration = avg->hmp_last_up_migration ? @@ -4024,27 +4120,36 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se) return NR_CPUS; /* Is there an idle CPU in the current domain */ - min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL); - if (min_usage == 0) + min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL); + if (min_usage == 0) { + trace_sched_hmp_offload_abort(cpu, min_usage, "load"); return NR_CPUS; + } /* Is the task alone on the cpu? */ - if (cpu_rq(cpu)->cfs.h_nr_running < 2) + if (cpu_rq(cpu)->cfs.h_nr_running < 2) { + trace_sched_hmp_offload_abort(cpu, + cpu_rq(cpu)->cfs.h_nr_running, "nr_running"); return NR_CPUS; + } /* Is the task actually starving? */ /* >=25% ratio running/runnable = starving */ - if (hmp_task_starvation(se) > 768) + if (hmp_task_starvation(se) > 768) { + trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se), + "starvation"); return NR_CPUS; + } /* Does the slower domain have any idle CPUs? */ - min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu); - if (min_usage > 0) - return NR_CPUS; + min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu, + tsk_cpus_allowed(task_of(se))); - if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus)) + if (min_usage == 0) { + trace_sched_hmp_offload_succeed(cpu, dest_cpu); return dest_cpu; - + } else + trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain"); return NR_CPUS; } #endif /* CONFIG_SCHED_HMP */ @@ -4076,30 +4181,13 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) #ifdef CONFIG_SCHED_HMP /* always put non-kernel forking tasks on a big domain */ if (p->mm && (sd_flag & SD_BALANCE_FORK)) { - if(hmp_cpu_is_fastest(prev_cpu)) { - struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains); - __always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu); - if (new_cpu != NR_CPUS && - cpumask_test_cpu(new_cpu, - tsk_cpus_allowed(p))) { - hmp_next_up_delay(&p->se, new_cpu); - return new_cpu; - } else { - new_cpu = cpumask_any_and( - &hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(p)); - if (new_cpu < nr_cpu_ids) { - hmp_next_up_delay(&p->se, new_cpu); - return new_cpu; - } - } - } else { - new_cpu = hmp_select_faster_cpu(p, prev_cpu); - if (new_cpu != NR_CPUS) { - hmp_next_up_delay(&p->se, new_cpu); - return new_cpu; - } + new_cpu = hmp_select_faster_cpu(p, prev_cpu); + if (new_cpu != NR_CPUS) { + hmp_next_up_delay(&p->se, new_cpu); + return new_cpu; } + /* failed to perform HMP fork balance, use normal balance */ + new_cpu = cpu; } #endif @@ -4178,16 +4266,24 @@ unlock: rcu_read_unlock(); #ifdef CONFIG_SCHED_HMP + prev_cpu = task_cpu(p); + if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) { hmp_next_up_delay(&p->se, new_cpu); - trace_sched_hmp_migrate(p, new_cpu, 0); + trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP); return new_cpu; } if (hmp_down_migration(prev_cpu, &p->se)) { +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + new_cpu = hmp_best_little_cpu(p, prev_cpu); +#else new_cpu = hmp_select_slower_cpu(p, prev_cpu); - hmp_next_down_delay(&p->se, new_cpu); - trace_sched_hmp_migrate(p, new_cpu, 0); - return new_cpu; +#endif + if (new_cpu != prev_cpu) { + hmp_next_down_delay(&p->se, new_cpu); + trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP); + return new_cpu; + } } /* Make sure that the task stays in its previous hmp domain */ if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus)) @@ -6154,16 +6250,49 @@ static struct { unsigned long next_balance; /* in jiffy units */ } nohz ____cacheline_aligned; +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +/* + * Decide if the tasks on the busy CPUs in the + * littlest domain would benefit from an idle balance + */ +static int hmp_packing_ilb_needed(int cpu) +{ + struct hmp_domain *hmp; + /* always allow ilb on non-slowest domain */ + if (!hmp_cpu_is_slowest(cpu)) + return 1; + + hmp = hmp_cpu_domain(cpu); + for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) { + /* only idle balance if a CPU is loaded over threshold */ + if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold) + return 1; + } + return 0; +} +#endif + static inline int find_new_ilb(int call_cpu) { int ilb = cpumask_first(nohz.idle_cpus_mask); #ifdef CONFIG_SCHED_HMP + int ilb_needed = 1; + /* restrict nohz balancing to occur in the same hmp domain */ ilb = cpumask_first_and(nohz.idle_cpus_mask, &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus); + +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + if (ilb < nr_cpu_ids) + ilb_needed = hmp_packing_ilb_needed(ilb); #endif + + if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb)) + return ilb; +#else if (ilb < nr_cpu_ids && idle_cpu(ilb)) return ilb; +#endif return nr_cpu_ids; } @@ -6489,11 +6618,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se) { struct task_struct *p = task_of(se); + int temp_target_cpu; u64 now; - if (target_cpu) - *target_cpu = NR_CPUS; - if (hmp_cpu_is_fastest(cpu)) return 0; @@ -6516,13 +6643,12 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti * idle CPU or 1023 for any partly-busy one. * Be explicit about requirement for an idle CPU. */ - if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0) - return 0; - - if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(p))) + if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu, + tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) { + if(target_cpu) + *target_cpu = temp_target_cpu; return 1; - + } return 0; } @@ -6532,8 +6658,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se) struct task_struct *p = task_of(se); u64 now; - if (hmp_cpu_is_slowest(cpu)) + if (hmp_cpu_is_slowest(cpu)) { +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + if(hmp_packing_enabled) + return 1; + else +#endif return 0; + } #ifdef CONFIG_SCHED_HMP_PRIO_FILTER /* Filter by task priority */ @@ -6702,6 +6834,7 @@ static int hmp_active_task_migration_cpu_stop(void *data) rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: + put_task_struct(p); busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); return 0; @@ -6775,6 +6908,7 @@ static int hmp_idle_pull_cpu_stop(void *data) rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: + put_task_struct(p); busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); return 0; @@ -6820,11 +6954,12 @@ static void hmp_force_up_migration(int this_cpu) p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { if (!target->active_balance) { + get_task_struct(p); target->active_balance = 1; target->push_cpu = target_cpu; target->migrate_task = p; force = 1; - trace_sched_hmp_migrate(p, target->push_cpu, 1); + trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); hmp_next_up_delay(&p->se, target->push_cpu); } } @@ -6835,12 +6970,14 @@ static void hmp_force_up_migration(int this_cpu) * require extensive book keeping. */ curr = hmp_get_lightest_task(orig, 1); + p = task_of(curr); target->push_cpu = hmp_offload_down(cpu, curr); if (target->push_cpu < NR_CPUS) { + get_task_struct(p); target->active_balance = 1; target->migrate_task = p; force = 1; - trace_sched_hmp_migrate(p, target->push_cpu, 2); + trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD); hmp_next_down_delay(&p->se, target->push_cpu); } } @@ -6915,11 +7052,12 @@ static unsigned int hmp_idle_pull(int this_cpu) /* now we have a candidate */ raw_spin_lock_irqsave(&target->lock, flags); if (!target->active_balance && task_rq(p) == target) { + get_task_struct(p); target->active_balance = 1; target->push_cpu = this_cpu; target->migrate_task = p; force = 1; - trace_sched_hmp_migrate(p, target->push_cpu, 3); + trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL); hmp_next_up_delay(&p->se, target->push_cpu); } raw_spin_unlock_irqrestore(&target->lock, flags); diff --git a/kernel/smp.c b/kernel/smp.c index 4dba0f7b72a..23ccc67dcbb 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -12,6 +12,8 @@ #include <linux/gfp.h> #include <linux/smp.h> #include <linux/cpu.h> +#define CREATE_TRACE_POINTS +#include <trace/events/smp.h> #include "smpboot.h" @@ -159,8 +161,10 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait) * locking and barrier primitives. Generic code isn't really * equipped to do the right thing... */ - if (ipi) + if (ipi) { + trace_smp_call_func_send(csd->func, cpu); arch_send_call_function_single_ipi(cpu); + } if (wait) csd_lock_wait(csd); @@ -197,8 +201,9 @@ void generic_smp_call_function_single_interrupt(void) * so save them away before making the call: */ csd_flags = csd->flags; - + trace_smp_call_func_entry(csd->func); csd->func(csd->info); + trace_smp_call_func_exit(csd->func); /* * Unlocked CSDs are valid through generic_exec_single(): @@ -228,6 +233,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int this_cpu; int err = 0; + trace_smp_call_func_send(func, cpu); /* * prevent preemption and reschedule on another processor, * as well as CPU removal @@ -245,7 +251,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, if (cpu == this_cpu) { local_irq_save(flags); + trace_smp_call_func_entry(func); func(info); + trace_smp_call_func_exit(func); local_irq_restore(flags); } else { if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { |