aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2014-11-28 19:38:10 +0000
committerRobin Randhawa <robin.randhawa@arm.com>2015-04-09 12:25:44 +0100
commit51f266e848e6e566495b62b61a3a886397dce6ea (patch)
tree95bf37db6540ae51e89add36a1be42cdb71018cb
parenta0f3f42e205b0d1a3cb3d9138b3dc3f046aa74f3 (diff)
gts: Revert GTS code
In order to facilitiate EAS development revert the GTS changes to bring us closer to a vanilla kernel. Signed-off-by: Mark Brown <broonie@linaro.org>
-rw-r--r--Documentation/arm/small_task_packing.txt136
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt3
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--arch/arm/Kconfig103
-rw-r--r--arch/arm/include/asm/pmu.h12
-rw-r--r--arch/arm/include/asm/topology.h34
-rw-r--r--arch/arm/kernel/hw_breakpoint.c3
-rw-r--r--arch/arm/kernel/perf_event.c19
-rw-r--r--arch/arm/kernel/perf_event_cpu.c117
-rw-r--r--arch/arm/kernel/perf_event_v7.c57
-rw-r--r--arch/arm/kernel/smp.c5
-rw-r--r--arch/arm/kernel/topology.c135
-rw-r--r--arch/arm64/kernel/smp.c3
-rw-r--r--drivers/irqchip/irq-gic.c5
-rw-r--r--include/linux/sched.h21
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--include/trace/events/arm-ipi.h100
-rw-r--r--include/trace/events/sched.h274
-rw-r--r--include/trace/events/smp.h90
-rw-r--r--kernel/irq/irqdesc.c29
-rw-r--r--kernel/sched/core.c38
-rw-r--r--kernel/sched/debug.c9
-rw-r--r--kernel/sched/fair.c1858
-rw-r--r--kernel/sched/sched.h14
-rw-r--r--kernel/smp.c12
-rw-r--r--linaro/configs/big-LITTLE-MP.conf12
-rw-r--r--mm/vmstat.c95
27 files changed, 90 insertions, 3105 deletions
diff --git a/Documentation/arm/small_task_packing.txt b/Documentation/arm/small_task_packing.txt
deleted file mode 100644
index 43f0a8b80234..000000000000
--- a/Documentation/arm/small_task_packing.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-Small Task Packing in the big.LITTLE MP Reference Patch Set
-
-What is small task packing?
-----
-Simply that the scheduler will fit as many small tasks on a single CPU
-as possible before using other CPUs. A small task is defined as one
-whose tracked load is less than 90% of a NICE_0 task. This is a change
-from the usual behavior since the scheduler will normally use an idle
-CPU for a waking task unless that task is considered cache hot.
-
-
-How is it implemented?
-----
-Since all small tasks must wake up relatively frequently, the main
-requirement for packing small tasks is to select a partly-busy CPU when
-waking rather than looking for an idle CPU. We use the tracked load of
-the CPU runqueue to determine how heavily loaded each CPU is and the
-tracked load of the task to determine if it will fit on the CPU. We
-always start with the lowest-numbered CPU in a sched domain and stop
-looking when we find a CPU with enough space for the task.
-
-Some further tweaks are necessary to suppress load balancing when the
-CPU is not fully loaded, otherwise the scheduler attempts to spread
-tasks evenly across the domain.
-
-
-How does it interact with the HMP patches?
-----
-Firstly, we only enable packing on the little domain. The intent is that
-the big domain is intended to spread tasks amongst the available CPUs
-one-task-per-CPU. The little domain however is attempting to use as
-little power as possible while servicing its tasks.
-
-Secondly, since we offload big tasks onto little CPUs in order to try
-to devote one CPU to each task, we have a threshold above which we do
-not try to pack a task and instead will select an idle CPU if possible.
-This maintains maximum forward progress for busy tasks temporarily
-demoted from big CPUs.
-
-
-Can the behaviour be tuned?
-----
-Yes, the load level of a 'full' CPU can be easily modified in the source
-and is exposed through sysfs as /sys/kernel/hmp/packing_limit to be
-changed at runtime. The presence of the packing behaviour is controlled
-by CONFIG_SCHED_HMP_LITTLE_PACKING and can be disabled at run-time
-using /sys/kernel/hmp/packing_enable.
-The definition of a small task is hard coded as 90% of NICE_0_LOAD
-and cannot be modified at run time.
-
-
-Why do I need to tune it?
-----
-The optimal configuration is likely to be different depending upon the
-design and manufacturing of your SoC.
-
-In the main, there are two system effects from enabling small task
-packing.
-
-1. CPU operating point may increase
-2. wakeup latency of tasks may be increased
-
-There are also likely to be secondary effects from loading one CPU
-rather than spreading tasks.
-
-Note that all of these system effects are dependent upon the workload
-under consideration.
-
-
-CPU Operating Point
-----
-The primary impact of loading one CPU with a number of light tasks is to
-increase the compute requirement of that CPU since it is no longer idle
-as often. Increased compute requirement causes an increase in the
-frequency of the CPU through CPUfreq.
-
-Consider this example:
-We have a system with 3 CPUs which can operate at any frequency between
-350MHz and 1GHz. The system has 6 tasks which would each produce 10%
-load at 1GHz. The scheduler has frequency-invariant load scaling
-enabled. Our DVFS governor aims for 80% utilization at the chosen
-frequency.
-
-Without task packing, these tasks will be spread out amongst all CPUs
-such that each has 2. This will produce roughly 20% system load, and
-the frequency of the package will remain at 350MHz.
-
-With task packing set to the default packing_limit, all of these tasks
-will sit on one CPU and require a package frequency of ~750MHz to reach
-80% utilization. (0.75 = 0.6 * 0.8).
-
-When a package operates on a single frequency domain, all CPUs in that
-package share frequency and voltage.
-
-Depending upon the SoC implementation there can be a significant amount
-of energy lost to leakage from idle CPUs. The decision about how
-loaded a CPU must be to be considered 'full' is therefore controllable
-through sysfs (sys/kernel/hmp/packing_limit) and directly in the code.
-
-Continuing the example, lets set packing_limit to 450 which means we
-will pack tasks until the total load of all running tasks >= 450. In
-practise, this is very similar to a 55% idle 1Ghz CPU.
-
-Now we are only able to place 4 tasks on CPU0, and two will overflow
-onto CPU1. CPU0 will have a load of 40% and CPU1 will have a load of
-20%. In order to still hit 80% utilization, CPU0 now only needs to
-operate at (0.4*0.8=0.32) 320MHz, which means that the lowest operating
-point will be selected, the same as in the non-packing case, except that
-now CPU2 is no longer needed and can be power-gated.
-
-In order to use less energy, the saving from power-gating CPU2 must be
-more than the energy spent running CPU0 for the extra cycles. This
-depends upon the SoC implementation.
-
-This is obviously a contrived example requiring all the tasks to
-be runnable at the same time, but it illustrates the point.
-
-
-Wakeup Latency
-----
-This is an unavoidable consequence of trying to pack tasks together
-rather than giving them a CPU each. If you cannot find an acceptable
-level of wakeup latency, you should turn packing off.
-
-Cyclictest is a good test application for determining the added latency
-when configuring packing.
-
-
-Why is it turned off for the VersatileExpress V2P_CA15A7 CoreTile?
-----
-Simply, this core tile only has power gating for the whole A7 package.
-When small task packing is enabled, all our low-energy use cases
-normally fit onto one A7 CPU. We therefore end up with 2 mostly-idle
-CPUs and one mostly-busy CPU. This decreases the amount of time
-available where the whole package is idle and can be turned off.
-
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 4ce82d045a6b..343781b9f246 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -16,9 +16,6 @@ Required properties:
"arm,arm1176-pmu"
"arm,arm1136-pmu"
- interrupts : 1 combined interrupt or 1 per core.
-- cluster : a phandle to the cluster to which it belongs
- If there are more than one cluster with same CPU type
- then there should be separate PMU nodes per cluster.
Example:
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d592974d77d7..76b5347357e2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1241,15 +1241,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
See comment before ip2_setup() in
drivers/char/ip2/ip2base.c.
- irqaffinity= [SMP] Set the default irq affinity mask
- Format:
- <cpu number>,...,<cpu number>
- or
- <cpu number>-<cpu number>
- (must be a positive range in ascending order)
- or a mixture
- <cpu number>,...,<cpu number>-<cpu number>
-
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1a86004f3f27..e1fa1c229a5b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1497,109 +1497,6 @@ config SCHED_SMT
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
-config DISABLE_CPU_SCHED_DOMAIN_BALANCE
- bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
- help
- Disables scheduler load-balancing at CPU sched domain level.
-
-config SCHED_HMP
- bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
- depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
- help
- Experimental scheduler optimizations for heterogeneous platforms.
- Attempts to introspectively select task affinity to optimize power
- and performance. Basic support for multiple (>2) cpu types is in place,
- but it has only been tested with two types of cpus.
- There is currently no support for migration of task groups, hence
- !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
- between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
- When turned on, this option adds sys/kernel/hmp directory which
- contains the following files:
- up_threshold - the load average threshold used for up migration
- (0 - 1023)
- down_threshold - the load average threshold used for down migration
- (0 - 1023)
- hmp_domains - a list of cpumasks for the present HMP domains,
- starting with the 'biggest' and ending with the
- 'smallest'.
- Note that both the threshold files can be written at runtime to
- control scheduler behaviour.
-
-config SCHED_HMP_PRIO_FILTER
- bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
- depends on SCHED_HMP
- help
- Enables task priority based HMP migration filter. Any task with
- a NICE value above the threshold will always be on low-power cpus
- with less compute capacity.
-
-config SCHED_HMP_PRIO_FILTER_VAL
- int "NICE priority threshold"
- default 5
- depends on SCHED_HMP_PRIO_FILTER
-
-config HMP_FAST_CPU_MASK
- string "HMP scheduler fast CPU mask"
- depends on SCHED_HMP
- help
- Leave empty to use device tree information.
- Specify the cpuids of the fast CPUs in the system as a list string,
- e.g. cpuid 0+1 should be specified as 0-1.
-
-config HMP_SLOW_CPU_MASK
- string "HMP scheduler slow CPU mask"
- depends on SCHED_HMP
- help
- Leave empty to use device tree information.
- Specify the cpuids of the slow CPUs in the system as a list string,
- e.g. cpuid 0+1 should be specified as 0-1.
-
-config HMP_VARIABLE_SCALE
- bool "Allows changing the load tracking scale through sysfs"
- depends on SCHED_HMP
- help
- When turned on, this option exports the load average period value
- for the load tracking patches through sysfs.
- The values can be modified to change the rate of load accumulation
- used for HMP migration. 'load_avg_period_ms' is the time in ms to
- reach a load average of 0.5 for an idle task of 0 load average
- ratio which becomes 100% busy.
- For example, with load_avg_period_ms = 128 and up_threshold = 512,
- a running task with a load of 0 will be migrated to a bigger CPU after
- 128ms, because after 128ms its load_avg_ratio is 0.5 and the real
- up_threshold is 0.5.
- This patch has the same behavior as changing the Y of the load
- average computation to
- (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
- but removes intermediate overflows in computation.
-
-config HMP_FREQUENCY_INVARIANT_SCALE
- bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
- depends on SCHED_HMP && CPU_FREQ
- help
- Scales the current load contribution in line with the frequency
- of the CPU that the task was executed on.
- In this version, we use a simple linear scale derived from the
- maximum frequency reported by CPUFreq.
- Restricting tracked load to be scaled by the CPU's frequency
- represents the consumption of possible compute capacity
- (rather than consumption of actual instantaneous capacity as
- normal) and allows the HMP migration's simple threshold
- migration strategy to interact more predictably with CPUFreq's
- asynchronous compute capacity changes.
-
-config SCHED_HMP_LITTLE_PACKING
- bool "Small task packing for HMP"
- depends on SCHED_HMP
- default n
- help
- Allows the HMP Scheduler to pack small tasks into CPUs in the
- smallest HMP domain.
- Controlled by two sysfs files in sys/kernel/hmp.
- packing_enable: 1 to enable, 0 to disable packing. Default 1.
- packing_limit: runqueue load ratio where a RQ is considered
- to be full. Default is NICE_0_LOAD * 9/8.
-
config HAVE_ARM_SCU
bool
help
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 0cd7824ca762..f24edad26c70 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -62,19 +62,9 @@ struct pmu_hw_events {
raw_spinlock_t pmu_lock;
};
-struct cpupmu_regs {
- u32 pmc;
- u32 pmcntenset;
- u32 pmuseren;
- u32 pmintenset;
- u32 pmxevttype[8];
- u32 pmxevtcnt[8];
-};
-
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
- cpumask_t valid_cpus;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct perf_event *event);
@@ -91,8 +81,6 @@ struct arm_pmu {
int (*request_irq)(struct arm_pmu *, irq_handler_t handler);
void (*free_irq)(struct arm_pmu *);
int (*map_event)(struct perf_event *event);
- void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
- void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 983fa7c153a2..58b8b84adcd2 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -26,45 +26,11 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
-int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
-
-#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
-/* Common values for CPUs */
-#ifndef SD_CPU_INIT
-#define SD_CPU_INIT (struct sched_domain) { \
- .min_interval = 1, \
- .max_interval = 4, \
- .busy_factor = 64, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 2, \
- .idle_idx = 1, \
- .newidle_idx = 0, \
- .wake_idx = 0, \
- .forkexec_idx = 0, \
- \
- .flags = 0*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 1*SD_WAKE_AFFINE \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 0*SD_SERIALIZE \
- , \
- .last_balance = jiffies, \
- .balance_interval = 1, \
-}
-#endif
-#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
#else
static inline void init_cpu_topology(void) { }
static inline void store_cpu_topology(unsigned int cpuid) { }
-static inline int cluster_to_logical_mask(unsigned int socket_id,
- cpumask_t *cluster_mask) { return -EINVAL; }
#endif
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7eee611b6ee5..2bdcb0f4cf5d 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1049,8 +1049,7 @@ static struct notifier_block dbg_cpu_pm_nb = {
static void __init pm_init(void)
{
- if (has_ossr)
- cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+ cpu_pm_register_notifier(&dbg_cpu_pm_nb);
}
#else
static inline void pm_init(void)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index b41749fe56dc..ace0ce8f6641 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,7 +12,6 @@
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
-#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -87,9 +86,6 @@ armpmu_map_event(struct perf_event *event,
return armpmu_map_cache_event(cache_map, config);
case PERF_TYPE_RAW:
return armpmu_map_raw_event(raw_event_mask, config);
- default:
- if (event->attr.type >= PERF_TYPE_MAX)
- return armpmu_map_raw_event(raw_event_mask, config);
}
return -ENOENT;
@@ -167,8 +163,6 @@ armpmu_stop(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
- return;
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
@@ -185,8 +179,6 @@ static void armpmu_start(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
- return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
@@ -214,9 +206,6 @@ armpmu_del(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
- return;
-
armpmu_stop(event, PERF_EF_UPDATE);
hw_events->events[idx] = NULL;
clear_bit(idx, hw_events->used_mask);
@@ -233,10 +222,6 @@ armpmu_add(struct perf_event *event, int flags)
int idx;
int err = 0;
- /* An event following a process won't be stopped earlier */
- if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
- return 0;
-
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
@@ -446,10 +431,6 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
- if (event->cpu != -1 &&
- !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
- return -ENOENT;
-
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index e0665b871f5b..0e9609657c79 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,7 +19,6 @@
#define pr_fmt(fmt) "CPU PMU: " fmt
#include <linux/bitmap.h>
-#include <linux/cpu_pm.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/of.h>
@@ -32,36 +31,33 @@
#include <asm/pmu.h>
/* Set at runtime when we know what CPU type we are. */
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
+static struct arm_pmu *cpu_pmu;
static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
-
/*
* Despite the names, these two functions are CPU-specific and are used
* by the OProfile/perf code.
*/
const char *perf_pmu_name(void)
{
- struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
- if (!pmu)
+ if (!cpu_pmu)
return NULL;
- return pmu->name;
+ return cpu_pmu->name;
}
EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
- struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+ int max_events = 0;
- if (!pmu)
- return 0;
+ if (cpu_pmu != NULL)
+ max_events = cpu_pmu->num_events;
- return pmu->num_events;
+ return max_events;
}
EXPORT_SYMBOL_GPL(perf_num_counters);
@@ -79,13 +75,11 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
{
int i, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
- int cpu = -1;
irqs = min(pmu_device->num_resources, num_possible_cpus());
for (i = 0; i < irqs; ++i) {
- cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
- if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+ if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
if (irq >= 0)
@@ -97,7 +91,6 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
{
int i, err, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
- int cpu = -1;
if (!pmu_device)
return -ENODEV;
@@ -110,7 +103,6 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
for (i = 0; i < irqs; ++i) {
err = 0;
- cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
irq = platform_get_irq(pmu_device, i);
if (irq < 0)
continue;
@@ -120,7 +112,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
* assume that we're running on a uniprocessor machine and
* continue. Otherwise, continue without this interrupt.
*/
- if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+ if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
irq, i);
continue;
@@ -134,7 +126,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
return err;
}
- cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+ cpumask_set_cpu(i, &cpu_pmu->active_irqs);
}
return 0;
@@ -143,7 +135,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int cpu;
- for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
+ for_each_possible_cpu(cpu) {
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
events->events = per_cpu(hw_events, cpu);
events->used_mask = per_cpu(used_mask, cpu);
@@ -156,7 +148,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
/* Ensure the PMU has sane values out of reset. */
if (cpu_pmu->reset)
- on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
+ on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
}
/*
@@ -168,46 +160,21 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
- struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
-
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
return NOTIFY_DONE;
- if (pmu && pmu->reset)
- pmu->reset(pmu);
+ if (cpu_pmu && cpu_pmu->reset)
+ cpu_pmu->reset(cpu_pmu);
else
return NOTIFY_DONE;
return NOTIFY_OK;
}
-static int cpu_pmu_pm_notify(struct notifier_block *b,
- unsigned long action, void *hcpu)
-{
- int cpu = smp_processor_id();
- struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
- struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
-
- if (!pmu)
- return NOTIFY_DONE;
-
- if (action == CPU_PM_ENTER && pmu->save_regs) {
- pmu->save_regs(pmu, pmuregs);
- } else if (action == CPU_PM_EXIT && pmu->restore_regs) {
- pmu->restore_regs(pmu, pmuregs);
- }
-
- return NOTIFY_OK;
-}
-
static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
.notifier_call = cpu_pmu_notify,
};
-static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
- .notifier_call = cpu_pmu_pm_notify,
-};
-
/*
* PMU platform driver and devicetree bindings.
*/
@@ -269,9 +236,6 @@ static int probe_current_pmu(struct arm_pmu *pmu)
break;
}
- /* assume PMU support all the CPUs in this case */
- cpumask_setall(&pmu->valid_cpus);
-
put_cpu();
return ret;
}
@@ -279,10 +243,15 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int cpu_pmu_device_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
+ int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
- int ret = 0;
- int cpu;
+ int ret = -ENODEV;
+
+ if (cpu_pmu) {
+ pr_info("attempt to register multiple PMU devices!");
+ return -ENOSPC;
+ }
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
if (!pmu) {
@@ -291,28 +260,8 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
}
if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
- smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
- struct device_node *ncluster;
- int cluster = -1;
- cpumask_t sibling_mask;
-
- ncluster = of_parse_phandle(node, "cluster", 0);
- if (ncluster) {
- int len;
- const u32 *hwid;
- hwid = of_get_property(ncluster, "reg", &len);
- if (hwid && len == 4)
- cluster = be32_to_cpup(hwid);
- }
- /* set sibling mask to all cpu mask if socket is not specified */
- if (cluster == -1 ||
- cluster_to_logical_mask(cluster, &sibling_mask))
- cpumask_setall(&sibling_mask);
-
- smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
-
- /* now set the valid_cpus after init */
- cpumask_copy(&pmu->valid_cpus, &sibling_mask);
+ init_fn = of_id->data;
+ ret = init_fn(pmu);
} else {
ret = probe_current_pmu(pmu);
}
@@ -322,12 +271,10 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
goto out_free;
}
- for_each_cpu_mask(cpu, pmu->valid_cpus)
- per_cpu(cpu_pmu, cpu) = pmu;
-
- pmu->plat_device = pdev;
- cpu_pmu_init(pmu);
- ret = armpmu_register(pmu, -1);
+ cpu_pmu = pmu;
+ cpu_pmu->plat_device = pdev;
+ cpu_pmu_init(cpu_pmu);
+ ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
if (!ret)
return 0;
@@ -356,17 +303,9 @@ static int __init register_pmu_driver(void)
if (err)
return err;
- err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
- if (err) {
- unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
- return err;
- }
-
err = platform_driver_register(&cpu_pmu_driver);
- if (err) {
- cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
+ if (err)
unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
- }
return err;
}
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 654db5030c31..039cffb053a7 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -950,51 +950,6 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
}
#endif
-static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
- struct cpupmu_regs *regs)
-{
- unsigned int cnt;
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
- if (!(regs->pmc & ARMV7_PMNC_E))
- return;
-
- asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
- asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
- asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
- for (cnt = ARMV7_IDX_COUNTER0;
- cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
- armv7_pmnc_select_counter(cnt);
- asm volatile("mrc p15, 0, %0, c9, c13, 1"
- : "=r"(regs->pmxevttype[cnt]));
- asm volatile("mrc p15, 0, %0, c9, c13, 2"
- : "=r"(regs->pmxevtcnt[cnt]));
- }
- return;
-}
-
-static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
- struct cpupmu_regs *regs)
-{
- unsigned int cnt;
- if (!(regs->pmc & ARMV7_PMNC_E))
- return;
-
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
- asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
- asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
- asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
- for (cnt = ARMV7_IDX_COUNTER0;
- cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
- armv7_pmnc_select_counter(cnt);
- asm volatile("mcr p15, 0, %0, c9, c13, 1"
- : : "r"(regs->pmxevttype[cnt]));
- asm volatile("mcr p15, 0, %0, c9, c13, 2"
- : : "r"(regs->pmxevtcnt[cnt]));
- }
- asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
-}
-
static void armv7pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
@@ -1268,8 +1223,6 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->start = armv7pmu_start;
cpu_pmu->stop = armv7pmu_stop;
cpu_pmu->reset = armv7pmu_reset;
- cpu_pmu->save_regs = armv7pmu_save_regs;
- cpu_pmu->restore_regs = armv7pmu_restore_regs;
cpu_pmu->max_period = (1LLU << 32) - 1;
};
@@ -1287,7 +1240,7 @@ static u32 armv7_read_num_pmnc_events(void)
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7_Cortex_A8";
+ cpu_pmu->name = "ARMv7 Cortex-A8";
cpu_pmu->map_event = armv7_a8_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1296,7 +1249,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7_Cortex_A9";
+ cpu_pmu->name = "ARMv7 Cortex-A9";
cpu_pmu->map_event = armv7_a9_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1305,7 +1258,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7_Cortex_A5";
+ cpu_pmu->name = "ARMv7 Cortex-A5";
cpu_pmu->map_event = armv7_a5_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1314,7 +1267,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7_Cortex_A15";
+ cpu_pmu->name = "ARMv7 Cortex-A15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1324,7 +1277,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7_Cortex_A7";
+ cpu_pmu->name = "ARMv7 Cortex-A7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index f2724e475b96..40bbafe7a1b7 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -46,9 +46,6 @@
#include <asm/virt.h>
#include <asm/mach/arch.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/arm-ipi.h>
-
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -676,7 +673,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
if (ipinr < NR_IPI)
__inc_irq_stat(cpu, ipi_irqs[ipinr]);
- trace_arm_ipi_entry(ipinr);
switch (ipinr) {
case IPI_WAKEUP:
break;
@@ -726,7 +722,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
cpu, ipinr);
break;
}
- trace_arm_ipi_exit(ipinr);
set_irq_regs(old_regs);
}
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 677da58d9e88..c5a59546a256 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -23,7 +23,6 @@
#include <linux/slab.h>
#include <asm/cputype.h>
-#include <asm/smp_plat.h>
#include <asm/topology.h>
/*
@@ -290,140 +289,6 @@ void store_cpu_topology(unsigned int cpuid)
cpu_topology[cpuid].socket_id, mpidr);
}
-
-#ifdef CONFIG_SCHED_HMP
-
-static const char * const little_cores[] = {
- "arm,cortex-a7",
- NULL,
-};
-
-static bool is_little_cpu(struct device_node *cn)
-{
- const char * const *lc;
- for (lc = little_cores; *lc; lc++)
- if (of_device_is_compatible(cn, *lc))
- return true;
- return false;
-}
-
-void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
- struct cpumask *slow)
-{
- struct device_node *cn = NULL;
- int cpu;
-
- cpumask_clear(fast);
- cpumask_clear(slow);
-
- /*
- * Use the config options if they are given. This helps testing
- * HMP scheduling on systems without a big.LITTLE architecture.
- */
- if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
- if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
- WARN(1, "Failed to parse HMP fast cpu mask!\n");
- if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
- WARN(1, "Failed to parse HMP slow cpu mask!\n");
- return;
- }
-
- /*
- * Else, parse device tree for little cores.
- */
- while ((cn = of_find_node_by_type(cn, "cpu"))) {
-
- const u32 *mpidr;
- int len;
-
- mpidr = of_get_property(cn, "reg", &len);
- if (!mpidr || len != 4) {
- pr_err("* %s missing reg property\n", cn->full_name);
- continue;
- }
-
- cpu = get_logical_index(be32_to_cpup(mpidr));
- if (cpu == -EINVAL) {
- pr_err("couldn't get logical index for mpidr %x\n",
- be32_to_cpup(mpidr));
- break;
- }
-
- if (is_little_cpu(cn))
- cpumask_set_cpu(cpu, slow);
- else
- cpumask_set_cpu(cpu, fast);
- }
-
- if (!cpumask_empty(fast) && !cpumask_empty(slow))
- return;
-
- /*
- * We didn't find both big and little cores so let's call all cores
- * fast as this will keep the system running, with all cores being
- * treated equal.
- */
- cpumask_setall(fast);
- cpumask_clear(slow);
-}
-
-struct cpumask hmp_slow_cpu_mask;
-
-void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
-{
- struct cpumask hmp_fast_cpu_mask;
- struct hmp_domain *domain;
-
- arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
-
- /*
- * Initialize hmp_domains
- * Must be ordered with respect to compute capacity.
- * Fastest domain at head of list.
- */
- if(!cpumask_empty(&hmp_slow_cpu_mask)) {
- domain = (struct hmp_domain *)
- kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
- cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
- cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
- list_add(&domain->hmp_domains, hmp_domains_list);
- }
- domain = (struct hmp_domain *)
- kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
- cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
- cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
- list_add(&domain->hmp_domains, hmp_domains_list);
-}
-#endif /* CONFIG_SCHED_HMP */
-
-
-/*
- * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
- * @socket_id: cluster HW identifier
- * @cluster_mask: the cpumask location to be initialized, modified by the
- * function only if return value == 0
- *
- * Return:
- *
- * 0 on success
- * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
- */
-int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
-{
- int cpu;
-
- if (!cluster_mask)
- return -EINVAL;
-
- for_each_online_cpu(cpu)
- if (socket_id == topology_physical_package_id(cpu)) {
- cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
- return 0;
- }
-
- return -EINVAL;
-}
-
/*
* init_cpu_topology is called at boot when only one cpu is running
* which prevent simultaneous write access to cpu_topology array
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9a3c7ef182fd..c4e6c2fc8b63 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -49,9 +49,6 @@
#include <asm/tlbflush.h>
#include <asm/ptrace.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/arm-ipi.h>
-
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4cb670e61707..7a17951a9d66 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -39,7 +39,6 @@
#include <linux/slab.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqchip/arm-gic.h>
-#include <trace/events/arm-ipi.h>
#include <asm/cputype.h>
#include <asm/irq.h>
@@ -605,10 +604,8 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
raw_spin_lock_irqsave(&irq_controller_lock, flags);
/* Convert our logical CPU mask into a physical one. */
- for_each_cpu(cpu, mask) {
- trace_arm_ipi_send(irq, cpu);
+ for_each_cpu(cpu, mask)
map |= gic_cpu_map[cpu];
- }
/*
* Ensure that stores to Normal memory are visible to the
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5229df9d7107..b97ba064a195 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -890,13 +890,6 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
bool cpus_share_cache(int this_cpu, int that_cpu);
-#ifdef CONFIG_SCHED_HMP
-struct hmp_domain {
- struct cpumask cpus;
- struct cpumask possible_cpus;
- struct list_head hmp_domains;
-};
-#endif /* CONFIG_SCHED_HMP */
#else /* CONFIG_SMP */
struct sched_domain_attr;
@@ -943,22 +936,8 @@ struct sched_avg {
u64 last_runnable_update;
s64 decay_count;
unsigned long load_avg_contrib;
- unsigned long load_avg_ratio;
-#ifdef CONFIG_SCHED_HMP
- u64 hmp_last_up_migration;
- u64 hmp_last_down_migration;
-#endif
- u32 usage_avg_sum;
};
-#ifdef CONFIG_SCHED_HMP
-/*
- * We want to avoid boosting any processes forked from init (PID 1)
- * and kthreadd (assumed to be PID 2).
- */
-#define hmp_task_should_forkboost(task) ((task->parent && task->parent->pid > 2))
-#endif
-
#ifdef CONFIG_SCHEDSTATS
struct sched_statistics {
u64 wait_start;
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d4e3d793f79..9044769f2296 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -195,7 +195,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
-bool refresh_cpu_vm_stats(int);
+void refresh_cpu_vm_stats(int);
void refresh_zone_stat_thresholds(void);
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
diff --git a/include/trace/events/arm-ipi.h b/include/trace/events/arm-ipi.h
deleted file mode 100644
index 5d3bd21827be..000000000000
--- a/include/trace/events/arm-ipi.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM arm-ipi
-
-#if !defined(_TRACE_ARM_IPI_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_ARM_IPI_H
-
-#include <linux/tracepoint.h>
-
-#define show_arm_ipi_name(val) \
- __print_symbolic(val, \
- { 0, "IPI_WAKEUP" }, \
- { 1, "IPI_TIMER" }, \
- { 2, "IPI_RESCHEDULE" }, \
- { 3, "IPI_CALL_FUNC" }, \
- { 4, "IPI_CALL_FUNC_SINGLE" }, \
- { 5, "IPI_CPU_STOP" }, \
- { 6, "IPI_COMPLETION" }, \
- { 7, "IPI_CPU_BACKTRACE" })
-
-DECLARE_EVENT_CLASS(arm_ipi,
-
- TP_PROTO(unsigned int ipi_nr),
-
- TP_ARGS(ipi_nr),
-
- TP_STRUCT__entry(
- __field( unsigned int, ipi )
- ),
-
- TP_fast_assign(
- __entry->ipi = ipi_nr;
- ),
-
- TP_printk("ipi=%u [action=%s]", __entry->ipi,
- show_arm_ipi_name(__entry->ipi))
-);
-
-/**
- * arm_ipi_entry - called in the arm-generic ipi handler immediately before
- * entering ipi-type handler
- * @ipi_nr: ipi number
- *
- * When used in combination with the arm_ipi_exit tracepoint
- * we can determine the ipi handler runtine.
- */
-DEFINE_EVENT(arm_ipi, arm_ipi_entry,
-
- TP_PROTO(unsigned int ipi_nr),
-
- TP_ARGS(ipi_nr)
-);
-
-/**
- * arm_ipi_exit - called in the arm-generic ipi handler immediately
- * after the ipi-type handler returns
- * @ipi_nr: ipi number
- *
- * When used in combination with the arm_ipi_entry tracepoint
- * we can determine the ipi handler runtine.
- */
-DEFINE_EVENT(arm_ipi, arm_ipi_exit,
-
- TP_PROTO(unsigned int ipi_nr),
-
- TP_ARGS(ipi_nr)
-);
-
-/**
- * arm_ipi_send - called as the ipi target mask is built, immediately
- * before the register is written
- * @ipi_nr: ipi number
- * @dest: cpu to send to
- *
- * When used in combination with the arm_ipi_entry tracepoint
- * we can determine the ipi raise to run latency.
- */
-TRACE_EVENT(arm_ipi_send,
-
- TP_PROTO(unsigned int ipi_nr, int dest),
-
- TP_ARGS(ipi_nr, dest),
-
- TP_STRUCT__entry(
- __field( unsigned int, ipi )
- __field( int , dest )
- ),
-
- TP_fast_assign(
- __entry->ipi = ipi_nr;
- __entry->dest = dest;
- ),
-
- TP_printk("dest=%d ipi=%u [action=%s]", __entry->dest,
- __entry->ipi, show_arm_ipi_name(__entry->ipi))
-);
-
-#endif /* _TRACE_ARM_IPI_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 2afcb71857fd..e5586caff67a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -430,280 +430,6 @@ TRACE_EVENT(sched_pi_setprio,
__entry->oldprio, __entry->newprio)
);
-/*
- * Tracepoint for showing tracked load contribution.
- */
-TRACE_EVENT(sched_task_load_contrib,
-
- TP_PROTO(struct task_struct *tsk, unsigned long load_contrib),
-
- TP_ARGS(tsk, load_contrib),
-
- TP_STRUCT__entry(
- __array(char, comm, TASK_COMM_LEN)
- __field(pid_t, pid)
- __field(unsigned long, load_contrib)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->pid = tsk->pid;
- __entry->load_contrib = load_contrib;
- ),
-
- TP_printk("comm=%s pid=%d load_contrib=%lu",
- __entry->comm, __entry->pid,
- __entry->load_contrib)
-);
-
-/*
- * Tracepoint for showing tracked task runnable ratio [0..1023].
- */
-TRACE_EVENT(sched_task_runnable_ratio,
-
- TP_PROTO(struct task_struct *tsk, unsigned long ratio),
-
- TP_ARGS(tsk, ratio),
-
- TP_STRUCT__entry(
- __array(char, comm, TASK_COMM_LEN)
- __field(pid_t, pid)
- __field(unsigned long, ratio)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->pid = tsk->pid;
- __entry->ratio = ratio;
- ),
-
- TP_printk("comm=%s pid=%d ratio=%lu",
- __entry->comm, __entry->pid,
- __entry->ratio)
-);
-
-/*
- * Tracepoint for showing tracked rq runnable ratio [0..1023].
- */
-TRACE_EVENT(sched_rq_runnable_ratio,
-
- TP_PROTO(int cpu, unsigned long ratio),
-
- TP_ARGS(cpu, ratio),
-
- TP_STRUCT__entry(
- __field(int, cpu)
- __field(unsigned long, ratio)
- ),
-
- TP_fast_assign(
- __entry->cpu = cpu;
- __entry->ratio = ratio;
- ),
-
- TP_printk("cpu=%d ratio=%lu",
- __entry->cpu,
- __entry->ratio)
-);
-
-/*
- * Tracepoint for showing tracked rq runnable load.
- */
-TRACE_EVENT(sched_rq_runnable_load,
-
- TP_PROTO(int cpu, u64 load),
-
- TP_ARGS(cpu, load),
-
- TP_STRUCT__entry(
- __field(int, cpu)
- __field(u64, load)
- ),
-
- TP_fast_assign(
- __entry->cpu = cpu;
- __entry->load = load;
- ),
-
- TP_printk("cpu=%d load=%llu",
- __entry->cpu,
- __entry->load)
-);
-
-TRACE_EVENT(sched_rq_nr_running,
-
- TP_PROTO(int cpu, unsigned int nr_running, int nr_iowait),
-
- TP_ARGS(cpu, nr_running, nr_iowait),
-
- TP_STRUCT__entry(
- __field(int, cpu)
- __field(unsigned int, nr_running)
- __field(int, nr_iowait)
- ),
-
- TP_fast_assign(
- __entry->cpu = cpu;
- __entry->nr_running = nr_running;
- __entry->nr_iowait = nr_iowait;
- ),
-
- TP_printk("cpu=%d nr_running=%u nr_iowait=%d",
- __entry->cpu,
- __entry->nr_running, __entry->nr_iowait)
-);
-
-/*
- * Tracepoint for showing tracked task cpu usage ratio [0..1023].
- */
-TRACE_EVENT(sched_task_usage_ratio,
-
- TP_PROTO(struct task_struct *tsk, unsigned long ratio),
-
- TP_ARGS(tsk, ratio),
-
- TP_STRUCT__entry(
- __array(char, comm, TASK_COMM_LEN)
- __field(pid_t, pid)
- __field(unsigned long, ratio)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->pid = tsk->pid;
- __entry->ratio = ratio;
- ),
-
- TP_printk("comm=%s pid=%d ratio=%lu",
- __entry->comm, __entry->pid,
- __entry->ratio)
-);
-
-/*
- * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations,
- * marking the forced transition of runnable or running tasks.
- */
-TRACE_EVENT(sched_hmp_migrate_force_running,
-
- TP_PROTO(struct task_struct *tsk, int running),
-
- TP_ARGS(tsk, running),
-
- TP_STRUCT__entry(
- __array(char, comm, TASK_COMM_LEN)
- __field(int, running)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->running = running;
- ),
-
- TP_printk("running=%d comm=%s",
- __entry->running, __entry->comm)
-);
-
-/*
- * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations,
- * marking the forced transition of runnable or running
- * tasks when a task is about to go idle.
- */
-TRACE_EVENT(sched_hmp_migrate_idle_running,
-
- TP_PROTO(struct task_struct *tsk, int running),
-
- TP_ARGS(tsk, running),
-
- TP_STRUCT__entry(
- __array(char, comm, TASK_COMM_LEN)
- __field(int, running)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->running = running;
- ),
-
- TP_printk("running=%d comm=%s",
- __entry->running, __entry->comm)
-);
-
-/*
- * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations.
- */
-#define HMP_MIGRATE_WAKEUP 0
-#define HMP_MIGRATE_FORCE 1
-#define HMP_MIGRATE_OFFLOAD 2
-#define HMP_MIGRATE_IDLE_PULL 3
-TRACE_EVENT(sched_hmp_migrate,
-
- TP_PROTO(struct task_struct *tsk, int dest, int force),
-
- TP_ARGS(tsk, dest, force),
-
- TP_STRUCT__entry(
- __array(char, comm, TASK_COMM_LEN)
- __field(pid_t, pid)
- __field(int, dest)
- __field(int, force)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
- __entry->pid = tsk->pid;
- __entry->dest = dest;
- __entry->force = force;
- ),
-
- TP_printk("comm=%s pid=%d dest=%d force=%d",
- __entry->comm, __entry->pid,
- __entry->dest, __entry->force)
-);
-
-TRACE_EVENT(sched_hmp_offload_abort,
-
- TP_PROTO(int cpu, int data, char *label),
-
- TP_ARGS(cpu,data,label),
-
- TP_STRUCT__entry(
- __array(char, label, 64)
- __field(int, cpu)
- __field(int, data)
- ),
-
- TP_fast_assign(
- strncpy(__entry->label, label, 64);
- __entry->cpu = cpu;
- __entry->data = data;
- ),
-
- TP_printk("cpu=%d data=%d label=%63s",
- __entry->cpu, __entry->data,
- __entry->label)
-);
-
-TRACE_EVENT(sched_hmp_offload_succeed,
-
- TP_PROTO(int cpu, int dest_cpu),
-
- TP_ARGS(cpu,dest_cpu),
-
- TP_STRUCT__entry(
- __field(int, cpu)
- __field(int, dest_cpu)
- ),
-
- TP_fast_assign(
- __entry->cpu = cpu;
- __entry->dest_cpu = dest_cpu;
- ),
-
- TP_printk("cpu=%d dest=%d",
- __entry->cpu,
- __entry->dest_cpu)
-);
-
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/include/trace/events/smp.h b/include/trace/events/smp.h
deleted file mode 100644
index da0baf27a39a..000000000000
--- a/include/trace/events/smp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM smp
-
-#if !defined(_TRACE_SMP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_SMP_H
-
-#include <linux/tracepoint.h>
-
-DECLARE_EVENT_CLASS(smp_call_class,
-
- TP_PROTO(void * fnc),
-
- TP_ARGS(fnc),
-
- TP_STRUCT__entry(
- __field( void *, func )
- ),
-
- TP_fast_assign(
- __entry->func = fnc;
- ),
-
- TP_printk("func=%pf", __entry->func)
-);
-
-/**
- * smp_call_func_entry - called in the generic smp-cross-call-handler
- * immediately before calling the destination
- * function
- * @func: function pointer
- *
- * When used in combination with the smp_call_func_exit tracepoint
- * we can determine the cross-call runtime.
- */
-DEFINE_EVENT(smp_call_class, smp_call_func_entry,
-
- TP_PROTO(void * fnc),
-
- TP_ARGS(fnc)
-);
-
-/**
- * smp_call_func_exit - called in the generic smp-cross-call-handler
- * immediately after the destination function
- * returns
- * @func: function pointer
- *
- * When used in combination with the smp_call_entry tracepoint
- * we can determine the cross-call runtime.
- */
-DEFINE_EVENT(smp_call_class, smp_call_func_exit,
-
- TP_PROTO(void * fnc),
-
- TP_ARGS(fnc)
-);
-
-/**
- * smp_call_func_send - called as destination function is set
- * in the per-cpu storage
- * @func: function pointer
- * @dest: cpu to send to
- *
- * When used in combination with the smp_cross_call_entry tracepoint
- * we can determine the call-to-run latency.
- */
-TRACE_EVENT(smp_call_func_send,
-
- TP_PROTO(void * func, int dest),
-
- TP_ARGS(func, dest),
-
- TP_STRUCT__entry(
- __field( void * , func )
- __field( int , dest )
- ),
-
- TP_fast_assign(
- __entry->func = func;
- __entry->dest = dest;
- ),
-
- TP_printk("dest=%d func=%pf", __entry->dest,
- __entry->func)
-);
-
-#endif /* _TRACE_SMP_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 3fcb6faa5fa6..8ab8e9390297 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -23,35 +23,10 @@
static struct lock_class_key irq_desc_lock_class;
#if defined(CONFIG_SMP)
-static int __init irq_affinity_setup(char *str)
-{
- zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
- cpulist_parse(str, irq_default_affinity);
- /*
- * Set at least the boot cpu. We don't want to end up with
- * bugreports caused by random comandline masks
- */
- cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
- return 1;
-}
-__setup("irqaffinity=", irq_affinity_setup);
-
-extern struct cpumask hmp_slow_cpu_mask;
-
static void __init init_irq_default_affinity(void)
{
-#ifdef CONFIG_CPUMASK_OFFSTACK
- if (!irq_default_affinity)
- zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
-#endif
-#ifdef CONFIG_SCHED_HMP
- if (!cpumask_empty(&hmp_slow_cpu_mask)) {
- cpumask_copy(irq_default_affinity, &hmp_slow_cpu_mask);
- return;
- }
-#endif
- if (cpumask_empty(irq_default_affinity))
- cpumask_setall(irq_default_affinity);
+ alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+ cpumask_setall(irq_default_affinity);
}
#else
static void __init init_irq_default_affinity(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ea4e780697b4..48ae418a4fd3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1407,11 +1407,7 @@ void scheduler_ipi(void)
{
if (llist_empty(&this_rq()->wake_list)
&& !tick_nohz_full_cpu(smp_processor_id())
- && !got_nohz_idle_kick()
-#ifdef CONFIG_SCHED_HMP
- && !this_rq()->wake_for_idle_pull
-#endif
- )
+ && !got_nohz_idle_kick())
return;
/*
@@ -1438,11 +1434,6 @@ void scheduler_ipi(void)
this_rq()->idle_balance = 1;
raise_softirq_irqoff(SCHED_SOFTIRQ);
}
-#ifdef CONFIG_SCHED_HMP
- else if (unlikely(this_rq()->wake_for_idle_pull))
- raise_softirq_irqoff(SCHED_SOFTIRQ);
-#endif
-
irq_exit();
}
@@ -1632,20 +1623,6 @@ static void __sched_fork(struct task_struct *p)
#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
p->se.avg.runnable_avg_period = 0;
p->se.avg.runnable_avg_sum = 0;
-#ifdef CONFIG_SCHED_HMP
- /* keep LOAD_AVG_MAX in sync with fair.c if load avg series is changed */
-#define LOAD_AVG_MAX 47742
- p->se.avg.hmp_last_up_migration = 0;
- p->se.avg.hmp_last_down_migration = 0;
- if (hmp_task_should_forkboost(p)) {
- p->se.avg.load_avg_ratio = 1023;
- p->se.avg.load_avg_contrib =
- (1023 * scale_load_down(p->se.load.weight));
- p->se.avg.runnable_avg_period = LOAD_AVG_MAX;
- p->se.avg.runnable_avg_sum = LOAD_AVG_MAX;
- p->se.avg.usage_avg_sum = LOAD_AVG_MAX;
- }
-#endif
#endif
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3848,8 +3825,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
return pid ? find_task_by_vpid(pid) : current;
}
-extern struct cpumask hmp_slow_cpu_mask;
-
/* Actually do priority change: must hold rq lock. */
static void
__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
@@ -3859,17 +3834,8 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
p->normal_prio = normal_prio(p);
/* we are holding p->pi_lock already */
p->prio = rt_mutex_getprio(p);
- if (rt_prio(p->prio)) {
+ if (rt_prio(p->prio))
p->sched_class = &rt_sched_class;
-#ifdef CONFIG_SCHED_HMP
- if (!cpumask_empty(&hmp_slow_cpu_mask))
- if (cpumask_equal(&p->cpus_allowed, cpu_all_mask)) {
- p->nr_cpus_allowed =
- cpumask_weight(&hmp_slow_cpu_mask);
- do_set_cpus_allowed(p, &hmp_slow_cpu_mask);
- }
-#endif
- }
else
p->sched_class = &fair_sched_class;
set_load_weight(p);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 1e23284fd692..701b6c8a4b12 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -94,7 +94,6 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
#ifdef CONFIG_SMP
P(se->avg.runnable_avg_sum);
P(se->avg.runnable_avg_period);
- P(se->avg.usage_avg_sum);
P(se->avg.load_avg_contrib);
P(se->avg.decay_count);
#endif
@@ -224,8 +223,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
cfs_rq->tg_runnable_contrib);
SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg",
atomic_read(&cfs_rq->tg->runnable_avg));
- SEQ_printf(m, " .%-30s: %d\n", "tg->usage_avg",
- atomic_read(&cfs_rq->tg->usage_avg));
#endif
#ifdef CONFIG_CFS_BANDWIDTH
SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",
@@ -577,12 +574,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
"nr_involuntary_switches", (long long)p->nivcsw);
P(se.load.weight);
-#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
- P(se.avg.runnable_avg_sum);
- P(se.avg.runnable_avg_period);
- P(se.avg.load_avg_contrib);
- P(se.avg.decay_count);
-#endif
P(policy);
P(prio);
#undef PN
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 41d0cbda605d..c7ab8eab5427 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -31,21 +31,9 @@
#include <linux/task_work.h>
#include <trace/events/sched.h>
-#include <linux/sysfs.h>
-#include <linux/vmalloc.h>
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-/* Include cpufreq header to add a notifier so that cpu frequency
- * scaling can track the current CPU frequency
- */
-#include <linux/cpufreq.h>
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
-#ifdef CONFIG_SCHED_HMP
-#include <linux/cpuidle.h>
-#endif
#include "sched.h"
-
/*
* Targeted preemption latency for CPU-bound tasks:
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
@@ -1220,91 +1208,8 @@ static u32 __compute_runnable_contrib(u64 n)
return contrib + runnable_avg_yN_sum[n];
}
-#ifdef CONFIG_SCHED_HMP
-#define HMP_VARIABLE_SCALE_SHIFT 16ULL
-struct hmp_global_attr {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj,
- struct attribute *attr, char *buf);
- ssize_t (*store)(struct kobject *a, struct attribute *b,
- const char *c, size_t count);
- int *value;
- int (*to_sysfs)(int);
- int (*from_sysfs)(int);
- ssize_t (*to_sysfs_text)(char *buf, int buf_size);
-};
-
-#define HMP_DATA_SYSFS_MAX 8
-
-struct hmp_data_struct {
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- int freqinvar_load_scale_enabled;
-#endif
- int multiplier; /* used to scale the time delta */
- struct attribute_group attr_group;
- struct attribute *attributes[HMP_DATA_SYSFS_MAX + 1];
- struct hmp_global_attr attr[HMP_DATA_SYSFS_MAX];
-} hmp_data;
-
-static u64 hmp_variable_scale_convert(u64 delta);
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-/* Frequency-Invariant Load Modification:
- * Loads are calculated as in PJT's patch however we also scale the current
- * contribution in line with the frequency of the CPU that the task was
- * executed on.
- * In this version, we use a simple linear scale derived from the maximum
- * frequency reported by CPUFreq. As an example:
- *
- * Consider that we ran a task for 100% of the previous interval.
- *
- * Our CPU was under asynchronous frequency control through one of the
- * CPUFreq governors.
- *
- * The CPUFreq governor reports that it is able to scale the CPU between
- * 500MHz and 1GHz.
- *
- * During the period, the CPU was running at 1GHz.
- *
- * In this case, our load contribution for that period is calculated as
- * 1 * (number_of_active_microseconds)
- *
- * This results in our task being able to accumulate maximum load as normal.
- *
- *
- * Consider now that our CPU was executing at 500MHz.
- *
- * We now scale the load contribution such that it is calculated as
- * 0.5 * (number_of_active_microseconds)
- *
- * Our task can only record 50% maximum load during this period.
- *
- * This represents the task consuming 50% of the CPU's *possible* compute
- * capacity. However the task did consume 100% of the CPU's *available*
- * compute capacity which is the value seen by the CPUFreq governor and
- * user-side CPU Utilization tools.
- *
- * Restricting tracked load to be scaled by the CPU's frequency accurately
- * represents the consumption of possible compute capacity and allows the
- * HMP migration's simple threshold migration strategy to interact more
- * predictably with CPUFreq's asynchronous compute capacity changes.
- */
-#define SCHED_FREQSCALE_SHIFT 10
-struct cpufreq_extents {
- u32 curr_scale;
- u32 min;
- u32 max;
- u32 flags;
-};
-/* Flag set when the governor in use only allows one frequency.
- * Disables scaling.
- */
-#define SCHED_LOAD_FREQINVAR_SINGLEFREQ 0x01
-
-static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS];
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
-#endif /* CONFIG_SCHED_HMP */
-
-/* We can represent the historical contribution to runnable average as the
+/*
+ * We can represent the historical contribution to runnable average as the
* coefficients of a geometric series. To do this we sub-divide our runnable
* history into segments of approximately 1ms (1024us); label the segment that
* occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
@@ -1333,24 +1238,13 @@ static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS];
*/
static __always_inline int __update_entity_runnable_avg(u64 now,
struct sched_avg *sa,
- int runnable,
- int running,
- int cpu)
+ int runnable)
{
u64 delta, periods;
u32 runnable_contrib;
int delta_w, decayed = 0;
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- u64 scaled_delta;
- u32 scaled_runnable_contrib;
- int scaled_delta_w;
- u32 curr_scale = 1024;
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
delta = now - sa->last_runnable_update;
-#ifdef CONFIG_SCHED_HMP
- delta = hmp_variable_scale_convert(delta);
-#endif
/*
* This should only happen when time goes backwards, which it
* unfortunately does during sched clock init when we swap over to TSC.
@@ -1369,12 +1263,6 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
return 0;
sa->last_runnable_update = now;
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- /* retrieve scale factor for load */
- if (hmp_data.freqinvar_load_scale_enabled)
- curr_scale = freq_scale[cpu].curr_scale;
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
-
/* delta_w is the amount already accumulated against our next period */
delta_w = sa->runnable_avg_period % 1024;
if (delta + delta_w >= 1024) {
@@ -1387,20 +1275,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
* period and accrue it.
*/
delta_w = 1024 - delta_w;
- /* scale runnable time if necessary */
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- scaled_delta_w = (delta_w * curr_scale)
- >> SCHED_FREQSCALE_SHIFT;
- if (runnable)
- sa->runnable_avg_sum += scaled_delta_w;
- if (running)
- sa->usage_avg_sum += scaled_delta_w;
-#else
if (runnable)
sa->runnable_avg_sum += delta_w;
- if (running)
- sa->usage_avg_sum += delta_w;
-#endif /* #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
sa->runnable_avg_period += delta_w;
delta -= delta_w;
@@ -1408,49 +1284,22 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
/* Figure out how many additional periods this update spans */
periods = delta / 1024;
delta %= 1024;
- /* decay the load we have accumulated so far */
+
sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
periods + 1);
sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
periods + 1);
- sa->usage_avg_sum = decay_load(sa->usage_avg_sum, periods + 1);
- /* add the contribution from this period */
+
/* Efficiently calculate \sum (1..n_period) 1024*y^i */
runnable_contrib = __compute_runnable_contrib(periods);
- /* Apply load scaling if necessary.
- * Note that multiplying the whole series is same as
- * multiplying all terms
- */
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- scaled_runnable_contrib = (runnable_contrib * curr_scale)
- >> SCHED_FREQSCALE_SHIFT;
- if (runnable)
- sa->runnable_avg_sum += scaled_runnable_contrib;
- if (running)
- sa->usage_avg_sum += scaled_runnable_contrib;
-#else
if (runnable)
sa->runnable_avg_sum += runnable_contrib;
- if (running)
- sa->usage_avg_sum += runnable_contrib;
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
sa->runnable_avg_period += runnable_contrib;
}
/* Remainder of delta accrued against u_0` */
- /* scale if necessary */
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- scaled_delta = ((delta * curr_scale) >> SCHED_FREQSCALE_SHIFT);
- if (runnable)
- sa->runnable_avg_sum += scaled_delta;
- if (running)
- sa->usage_avg_sum += scaled_delta;
-#else
if (runnable)
sa->runnable_avg_sum += delta;
- if (running)
- sa->usage_avg_sum += delta;
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
sa->runnable_avg_period += delta;
return decayed;
@@ -1463,9 +1312,12 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se)
u64 decays = atomic64_read(&cfs_rq->decay_counter);
decays -= se->avg.decay_count;
- if (decays)
- se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
+ if (!decays)
+ return 0;
+
+ se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
se->avg.decay_count = 0;
+
return decays;
}
@@ -1493,28 +1345,16 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
struct cfs_rq *cfs_rq)
{
struct task_group *tg = cfs_rq->tg;
- long contrib, usage_contrib;
+ long contrib;
/* The fraction of a cpu used by this cfs_rq */
contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
sa->runnable_avg_period + 1);
contrib -= cfs_rq->tg_runnable_contrib;
- usage_contrib = div_u64(sa->usage_avg_sum << NICE_0_SHIFT,
- sa->runnable_avg_period + 1);
- usage_contrib -= cfs_rq->tg_usage_contrib;
-
- /*
- * contrib/usage at this point represent deltas, only update if they
- * are substantive.
- */
- if ((abs(contrib) > cfs_rq->tg_runnable_contrib / 64) ||
- (abs(usage_contrib) > cfs_rq->tg_usage_contrib / 64)) {
+ if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
atomic_add(contrib, &tg->runnable_avg);
cfs_rq->tg_runnable_contrib += contrib;
-
- atomic_add(usage_contrib, &tg->usage_avg);
- cfs_rq->tg_usage_contrib += usage_contrib;
}
}
@@ -1575,18 +1415,12 @@ static inline void __update_task_entity_contrib(struct sched_entity *se)
contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);
contrib /= (se->avg.runnable_avg_period + 1);
se->avg.load_avg_contrib = scale_load(contrib);
- trace_sched_task_load_contrib(task_of(se), se->avg.load_avg_contrib);
- contrib = se->avg.runnable_avg_sum * scale_load_down(NICE_0_LOAD);
- contrib /= (se->avg.runnable_avg_period + 1);
- se->avg.load_avg_ratio = scale_load(contrib);
- trace_sched_task_runnable_ratio(task_of(se), se->avg.load_avg_ratio);
}
/* Compute the current contribution to load_avg by se, return any delta */
-static long __update_entity_load_avg_contrib(struct sched_entity *se, long *ratio)
+static long __update_entity_load_avg_contrib(struct sched_entity *se)
{
long old_contrib = se->avg.load_avg_contrib;
- long old_ratio = se->avg.load_avg_ratio;
if (entity_is_task(se)) {
__update_task_entity_contrib(se);
@@ -1595,8 +1429,6 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se, long *rati
__update_group_entity_contrib(se);
}
- if (ratio)
- *ratio = se->avg.load_avg_ratio - old_ratio;
return se->avg.load_avg_contrib - old_contrib;
}
@@ -1616,13 +1448,9 @@ static inline void update_entity_load_avg(struct sched_entity *se,
int update_cfs_rq)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- long contrib_delta, ratio_delta;
+ long contrib_delta;
u64 now;
- int cpu = -1; /* not used in normal case */
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- cpu = cfs_rq->rq->cpu;
-#endif
/*
* For a group entity we need to use their owned cfs_rq_clock_task() in
* case they are the parent of a throttled hierarchy.
@@ -1632,21 +1460,18 @@ static inline void update_entity_load_avg(struct sched_entity *se,
else
now = cfs_rq_clock_task(group_cfs_rq(se));
- if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq,
- cfs_rq->curr == se, cpu))
+ if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
return;
- contrib_delta = __update_entity_load_avg_contrib(se, &ratio_delta);
+ contrib_delta = __update_entity_load_avg_contrib(se);
if (!update_cfs_rq)
return;
- if (se->on_rq) {
+ if (se->on_rq)
cfs_rq->runnable_load_avg += contrib_delta;
- rq_of(cfs_rq)->avg.load_avg_ratio += ratio_delta;
- } else {
+ else
subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
- }
}
/*
@@ -1679,17 +1504,8 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
{
- int cpu = -1; /* not used in normal case */
-
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- cpu = rq->cpu;
-#endif
- __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable,
- runnable, cpu);
+ __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
__update_tg_runnable_avg(&rq->avg, &rq->cfs);
- trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
- trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
- trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter);
}
/* Add the load generated by se into cfs_rq's child load-average */
@@ -1731,8 +1547,6 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
}
cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
- rq_of(cfs_rq)->avg.load_avg_ratio += se->avg.load_avg_ratio;
-
/* we force update consideration on load-balancer moves */
update_cfs_rq_blocked_load(cfs_rq, !wakeup);
}
@@ -1751,8 +1565,6 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
update_cfs_rq_blocked_load(cfs_rq, !sleep);
cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
- rq_of(cfs_rq)->avg.load_avg_ratio -= se->avg.load_avg_ratio;
-
if (sleep) {
cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -2081,7 +1893,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
update_stats_wait_end(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
- update_entity_load_avg(se, 1);
}
update_stats_curr_start(cfs_rq, se);
@@ -3530,835 +3341,6 @@ done:
return target;
}
-#ifdef CONFIG_SCHED_HMP
-/*
- * Heterogenous multiprocessor (HMP) optimizations
- *
- * The cpu types are distinguished using a list of hmp_domains
- * which each represent one cpu type using a cpumask.
- * The list is assumed ordered by compute capacity with the
- * fastest domain first.
- */
-DEFINE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
-static const int hmp_max_tasks = 5;
-
-extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
-
-#ifdef CONFIG_CPU_IDLE
-/*
- * hmp_idle_pull:
- *
- * In this version we have stopped using forced up migrations when we
- * detect that a task running on a little CPU should be moved to a bigger
- * CPU. In most cases, the bigger CPU is in a deep sleep state and a forced
- * migration means we stop the task immediately but need to wait for the
- * target CPU to wake up before we can restart the task which is being
- * moved. Instead, we now wake a big CPU with an IPI and ask it to pull
- * a task when ready. This allows the task to continue executing on its
- * current CPU, reducing the amount of time that the task is stalled for.
- *
- * keepalive timers:
- *
- * The keepalive timer is used as a way to keep a CPU engaged in an
- * idle pull operation out of idle while waiting for the source
- * CPU to stop and move the task. Ideally this would not be necessary
- * and we could impose a temporary zero-latency requirement on the
- * current CPU, but in the current QoS framework this will result in
- * all CPUs in the system being unable to enter idle states which is
- * not desirable. The timer does not perform any work when it expires.
- */
-struct hmp_keepalive {
- bool init;
- ktime_t delay; /* if zero, no need for timer */
- struct hrtimer timer;
-};
-DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive);
-
-/* setup per-cpu keepalive timers */
-static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer)
-{
- return HRTIMER_NORESTART;
-}
-
-/*
- * Work out if any of the idle states have an exit latency too high for us.
- * ns_delay is passed in containing the max we are willing to tolerate.
- * If there are none, set ns_delay to zero.
- * If there are any, set ns_delay to
- * ('target_residency of state with shortest too-big latency' - 1) * 1000.
- */
-static void hmp_keepalive_delay(int cpu, unsigned int *ns_delay)
-{
- struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
- struct cpuidle_driver *drv;
-
- drv = cpuidle_get_cpu_driver(dev);
- if (drv) {
- unsigned int us_delay = UINT_MAX;
- unsigned int us_max_delay = *ns_delay / 1000;
- int idx;
- /* if cpuidle states are guaranteed to be sorted we
- * could stop at the first match.
- */
- for (idx = 0; idx < drv->state_count; idx++) {
- if (drv->states[idx].exit_latency > us_max_delay &&
- drv->states[idx].target_residency < us_delay) {
- us_delay = drv->states[idx].target_residency;
- }
- }
- if (us_delay == UINT_MAX)
- *ns_delay = 0; /* no timer required */
- else
- *ns_delay = 1000 * (us_delay - 1);
- }
-}
-
-static void hmp_cpu_keepalive_trigger(void)
-{
- int cpu = smp_processor_id();
- struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
- if (!keepalive->init) {
- unsigned int ns_delay = 100000; /* tolerate 100usec delay */
-
- hrtimer_init(&keepalive->timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- keepalive->timer.function = hmp_cpu_keepalive_notify;
-
- hmp_keepalive_delay(cpu, &ns_delay);
- keepalive->delay = ns_to_ktime(ns_delay);
- keepalive->init = true;
- }
- if (ktime_to_ns(keepalive->delay))
- hrtimer_start(&keepalive->timer,
- keepalive->delay, HRTIMER_MODE_REL_PINNED);
-}
-
-static void hmp_cpu_keepalive_cancel(int cpu)
-{
- struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
- if (keepalive->init)
- hrtimer_cancel(&keepalive->timer);
-}
-#else /* !CONFIG_CPU_IDLE */
-static void hmp_cpu_keepalive_trigger(void)
-{
-}
-
-static void hmp_cpu_keepalive_cancel(int cpu)
-{
-}
-#endif
-
-/* Setup hmp_domains */
-static int __init hmp_cpu_mask_setup(void)
-{
- char buf[64];
- struct hmp_domain *domain;
- struct list_head *pos;
- int dc, cpu;
-
- pr_debug("Initializing HMP scheduler:\n");
-
- /* Initialize hmp_domains using platform code */
- arch_get_hmp_domains(&hmp_domains);
- if (list_empty(&hmp_domains)) {
- pr_debug("HMP domain list is empty!\n");
- return 0;
- }
-
- /* Print hmp_domains */
- dc = 0;
- list_for_each(pos, &hmp_domains) {
- domain = list_entry(pos, struct hmp_domain, hmp_domains);
- cpulist_scnprintf(buf, 64, &domain->possible_cpus);
- pr_debug(" HMP domain %d: %s\n", dc, buf);
-
- for_each_cpu_mask(cpu, domain->possible_cpus) {
- per_cpu(hmp_cpu_domain, cpu) = domain;
- }
- dc++;
- }
-
- return 1;
-}
-
-static struct hmp_domain *hmp_get_hmp_domain_for_cpu(int cpu)
-{
- struct hmp_domain *domain;
- struct list_head *pos;
-
- list_for_each(pos, &hmp_domains) {
- domain = list_entry(pos, struct hmp_domain, hmp_domains);
- if(cpumask_test_cpu(cpu, &domain->possible_cpus))
- return domain;
- }
- return NULL;
-}
-
-static void hmp_online_cpu(int cpu)
-{
- struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);
-
- if(domain)
- cpumask_set_cpu(cpu, &domain->cpus);
-}
-
-static void hmp_offline_cpu(int cpu)
-{
- struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);
-
- if(domain)
- cpumask_clear_cpu(cpu, &domain->cpus);
-
- hmp_cpu_keepalive_cancel(cpu);
-}
-/*
- * Needed to determine heaviest tasks etc.
- */
-static inline unsigned int hmp_cpu_is_fastest(int cpu);
-static inline unsigned int hmp_cpu_is_slowest(int cpu);
-static inline struct hmp_domain *hmp_slower_domain(int cpu);
-static inline struct hmp_domain *hmp_faster_domain(int cpu);
-
-/* must hold runqueue lock for queue se is currently on */
-static struct sched_entity *hmp_get_heaviest_task(
- struct sched_entity *se, int target_cpu)
-{
- int num_tasks = hmp_max_tasks;
- struct sched_entity *max_se = se;
- unsigned long int max_ratio = se->avg.load_avg_ratio;
- const struct cpumask *hmp_target_mask = NULL;
- struct hmp_domain *hmp;
-
- if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
- return max_se;
-
- hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
- hmp_target_mask = &hmp->cpus;
- if (target_cpu >= 0) {
- /* idle_balance gets run on a CPU while
- * it is in the middle of being hotplugged
- * out. Bail early in that case.
- */
- if(!cpumask_test_cpu(target_cpu, hmp_target_mask))
- return NULL;
- hmp_target_mask = cpumask_of(target_cpu);
- }
- /* The currently running task is not on the runqueue */
- se = __pick_first_entity(cfs_rq_of(se));
-
- while (num_tasks && se) {
- if (entity_is_task(se) &&
- se->avg.load_avg_ratio > max_ratio &&
- cpumask_intersects(hmp_target_mask,
- tsk_cpus_allowed(task_of(se)))) {
- max_se = se;
- max_ratio = se->avg.load_avg_ratio;
- }
- se = __pick_next_entity(se);
- num_tasks--;
- }
- return max_se;
-}
-
-static struct sched_entity *hmp_get_lightest_task(
- struct sched_entity *se, int migrate_down)
-{
- int num_tasks = hmp_max_tasks;
- struct sched_entity *min_se = se;
- unsigned long int min_ratio = se->avg.load_avg_ratio;
- const struct cpumask *hmp_target_mask = NULL;
-
- if (migrate_down) {
- struct hmp_domain *hmp;
- if (hmp_cpu_is_slowest(cpu_of(se->cfs_rq->rq)))
- return min_se;
- hmp = hmp_slower_domain(cpu_of(se->cfs_rq->rq));
- hmp_target_mask = &hmp->cpus;
- }
- /* The currently running task is not on the runqueue */
- se = __pick_first_entity(cfs_rq_of(se));
-
- while (num_tasks && se) {
- if (entity_is_task(se) &&
- (se->avg.load_avg_ratio < min_ratio &&
- hmp_target_mask &&
- cpumask_intersects(hmp_target_mask,
- tsk_cpus_allowed(task_of(se))))) {
- min_se = se;
- min_ratio = se->avg.load_avg_ratio;
- }
- se = __pick_next_entity(se);
- num_tasks--;
- }
- return min_se;
-}
-
-/*
- * Migration thresholds should be in the range [0..1023]
- * hmp_up_threshold: min. load required for migrating tasks to a faster cpu
- * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu
- *
- * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
- * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
- * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
- *
- * Small Task Packing:
- * We can choose to fill the littlest CPUs in an HMP system rather than
- * the typical spreading mechanic. This behavior is controllable using
- * two variables.
- * hmp_packing_enabled: runtime control over pack/spread
- * hmp_full_threshold: Consider a CPU with this much unweighted load full
- */
-unsigned int hmp_up_threshold = 700;
-unsigned int hmp_down_threshold = 512;
-#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
-unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
-#endif
-unsigned int hmp_next_up_threshold = 4096;
-unsigned int hmp_next_down_threshold = 4096;
-
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
-/*
- * Set the default packing threshold to try to keep little
- * CPUs at no more than 80% of their maximum frequency if only
- * packing a small number of small tasks. Bigger tasks will
- * raise frequency as normal.
- * In order to pack a task onto a CPU, the sum of the
- * unweighted runnable_avg load of existing tasks plus the
- * load of the new task must be less than hmp_full_threshold.
- *
- * This works in conjunction with frequency-invariant load
- * and DVFS governors. Since most DVFS governors aim for 80%
- * utilisation, we arrive at (0.8*0.8*(max_load=1024))=655
- * and use a value slightly lower to give a little headroom
- * in the decision.
- * Note that the most efficient frequency is different for
- * each system so /sys/kernel/hmp/packing_limit should be
- * configured at runtime for any given platform to achieve
- * optimal energy usage. Some systems may not benefit from
- * packing, so this feature can also be disabled at runtime
- * with /sys/kernel/hmp/packing_enable
- */
-unsigned int hmp_packing_enabled = 1;
-unsigned int hmp_full_threshold = 650;
-#endif
-
-static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
-static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
-static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
- int *min_cpu, struct cpumask *affinity);
-
-static inline struct hmp_domain *hmp_smallest_domain(void)
-{
- return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
-}
-
-/* Check if cpu is in fastest hmp_domain */
-static inline unsigned int hmp_cpu_is_fastest(int cpu)
-{
- struct list_head *pos;
-
- pos = &hmp_cpu_domain(cpu)->hmp_domains;
- return pos == hmp_domains.next;
-}
-
-/* Check if cpu is in slowest hmp_domain */
-static inline unsigned int hmp_cpu_is_slowest(int cpu)
-{
- struct list_head *pos;
-
- pos = &hmp_cpu_domain(cpu)->hmp_domains;
- return list_is_last(pos, &hmp_domains);
-}
-
-/* Next (slower) hmp_domain relative to cpu */
-static inline struct hmp_domain *hmp_slower_domain(int cpu)
-{
- struct list_head *pos;
-
- pos = &hmp_cpu_domain(cpu)->hmp_domains;
- return list_entry(pos->next, struct hmp_domain, hmp_domains);
-}
-
-/* Previous (faster) hmp_domain relative to cpu */
-static inline struct hmp_domain *hmp_faster_domain(int cpu)
-{
- struct list_head *pos;
-
- pos = &hmp_cpu_domain(cpu)->hmp_domains;
- return list_entry(pos->prev, struct hmp_domain, hmp_domains);
-}
-
-/*
- * Selects a cpu in previous (faster) hmp_domain
- */
-static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
- int cpu)
-{
- int lowest_cpu=NR_CPUS;
- __always_unused int lowest_ratio;
- struct hmp_domain *hmp;
-
- if (hmp_cpu_is_fastest(cpu))
- hmp = hmp_cpu_domain(cpu);
- else
- hmp = hmp_faster_domain(cpu);
-
- lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
- tsk_cpus_allowed(tsk));
-
- return lowest_cpu;
-}
-
-/*
- * Selects a cpu in next (slower) hmp_domain
- * Note that cpumask_any_and() returns the first cpu in the cpumask
- */
-static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
- int cpu)
-{
- int lowest_cpu=NR_CPUS;
- struct hmp_domain *hmp;
- __always_unused int lowest_ratio;
-
- if (hmp_cpu_is_slowest(cpu))
- hmp = hmp_cpu_domain(cpu);
- else
- hmp = hmp_slower_domain(cpu);
-
- lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
- tsk_cpus_allowed(tsk));
-
- return lowest_cpu;
-}
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
-/*
- * Select the 'best' candidate little CPU to wake up on.
- * Implements a packing strategy which examines CPU in
- * logical CPU order, and selects the first which will
- * be loaded less than hmp_full_threshold according to
- * the sum of the tracked load of the runqueue and the task.
- */
-static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
- int cpu) {
- int tmp_cpu;
- unsigned long estimated_load;
- struct hmp_domain *hmp;
- struct sched_avg *avg;
- struct cpumask allowed_hmp_cpus;
-
- if(!hmp_packing_enabled ||
- tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
- return hmp_select_slower_cpu(tsk, cpu);
-
- if (hmp_cpu_is_slowest(cpu))
- hmp = hmp_cpu_domain(cpu);
- else
- hmp = hmp_slower_domain(cpu);
-
- /* respect affinity */
- cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
- tsk_cpus_allowed(tsk));
-
- for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
- avg = &cpu_rq(tmp_cpu)->avg;
- /* estimate new rq load if we add this task */
- estimated_load = avg->load_avg_ratio +
- tsk->se.avg.load_avg_ratio;
- if (estimated_load <= hmp_full_threshold) {
- cpu = tmp_cpu;
- break;
- }
- }
- /* if no match was found, the task uses the initial value */
- return cpu;
-}
-#endif
-static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
-{
- /* hack - always use clock from first online CPU */
- u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
- se->avg.hmp_last_up_migration = now;
- se->avg.hmp_last_down_migration = 0;
- cpu_rq(cpu)->avg.hmp_last_up_migration = now;
- cpu_rq(cpu)->avg.hmp_last_down_migration = 0;
-}
-
-static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
-{
- /* hack - always use clock from first online CPU */
- u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
- se->avg.hmp_last_down_migration = now;
- se->avg.hmp_last_up_migration = 0;
- cpu_rq(cpu)->avg.hmp_last_down_migration = now;
- cpu_rq(cpu)->avg.hmp_last_up_migration = 0;
-}
-
-/*
- * Heterogenous multiprocessor (HMP) optimizations
- *
- * These functions allow to change the growing speed of the load_avg_ratio
- * by default it goes from 0 to 0.5 in LOAD_AVG_PERIOD = 32ms
- * This can now be changed with /sys/kernel/hmp/load_avg_period_ms.
- *
- * These functions also allow to change the up and down threshold of HMP
- * using /sys/kernel/hmp/{up,down}_threshold.
- * Both must be between 0 and 1023. The threshold that is compared
- * to the load_avg_ratio is up_threshold/1024 and down_threshold/1024.
- *
- * For instance, if load_avg_period = 64 and up_threshold = 512, an idle
- * task with a load of 0 will reach the threshold after 64ms of busy loop.
- *
- * Changing load_avg_periods_ms has the same effect than changing the
- * default scaling factor Y=1002/1024 in the load_avg_ratio computation to
- * (1002/1024.0)^(LOAD_AVG_PERIOD/load_avg_period_ms), but the last one
- * could trigger overflows.
- * For instance, with Y = 1023/1024 in __update_task_entity_contrib()
- * "contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);"
- * could be overflowed for a weight > 2^12 even is the load_avg_contrib
- * should still be a 32bits result. This would not happen by multiplicating
- * delta time by 1/22 and setting load_avg_period_ms = 706.
- */
-
-/*
- * By scaling the delta time it end-up increasing or decrease the
- * growing speed of the per entity load_avg_ratio
- * The scale factor hmp_data.multiplier is a fixed point
- * number: (32-HMP_VARIABLE_SCALE_SHIFT).HMP_VARIABLE_SCALE_SHIFT
- */
-static inline u64 hmp_variable_scale_convert(u64 delta)
-{
-#ifdef CONFIG_HMP_VARIABLE_SCALE
- u64 high = delta >> 32ULL;
- u64 low = delta & 0xffffffffULL;
- low *= hmp_data.multiplier;
- high *= hmp_data.multiplier;
- return (low >> HMP_VARIABLE_SCALE_SHIFT)
- + (high << (32ULL - HMP_VARIABLE_SCALE_SHIFT));
-#else
- return delta;
-#endif
-}
-
-static ssize_t hmp_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct hmp_global_attr *hmp_attr =
- container_of(attr, struct hmp_global_attr, attr);
- int temp;
-
- if (hmp_attr->to_sysfs_text != NULL)
- return hmp_attr->to_sysfs_text(buf, PAGE_SIZE);
-
- temp = *(hmp_attr->value);
- if (hmp_attr->to_sysfs != NULL)
- temp = hmp_attr->to_sysfs(temp);
-
- return (ssize_t)sprintf(buf, "%d\n", temp);
-}
-
-static ssize_t hmp_store(struct kobject *a, struct attribute *attr,
- const char *buf, size_t count)
-{
- int temp;
- ssize_t ret = count;
- struct hmp_global_attr *hmp_attr =
- container_of(attr, struct hmp_global_attr, attr);
- char *str = vmalloc(count + 1);
- if (str == NULL)
- return -ENOMEM;
- memcpy(str, buf, count);
- str[count] = 0;
- if (sscanf(str, "%d", &temp) < 1)
- ret = -EINVAL;
- else {
- if (hmp_attr->from_sysfs != NULL)
- temp = hmp_attr->from_sysfs(temp);
- if (temp < 0)
- ret = -EINVAL;
- else
- *(hmp_attr->value) = temp;
- }
- vfree(str);
- return ret;
-}
-
-static ssize_t hmp_print_domains(char *outbuf, int outbufsize)
-{
- char buf[64];
- const char nospace[] = "%s", space[] = " %s";
- const char *fmt = nospace;
- struct hmp_domain *domain;
- struct list_head *pos;
- int outpos = 0;
- list_for_each(pos, &hmp_domains) {
- domain = list_entry(pos, struct hmp_domain, hmp_domains);
- if (cpumask_scnprintf(buf, 64, &domain->possible_cpus)) {
- outpos += sprintf(outbuf+outpos, fmt, buf);
- fmt = space;
- }
- }
- strcat(outbuf, "\n");
- return outpos+1;
-}
-
-#ifdef CONFIG_HMP_VARIABLE_SCALE
-static int hmp_period_tofrom_sysfs(int value)
-{
- return (LOAD_AVG_PERIOD << HMP_VARIABLE_SCALE_SHIFT) / value;
-}
-#endif
-/* max value for threshold is 1024 */
-static int hmp_theshold_from_sysfs(int value)
-{
- if (value > 1024)
- return -1;
- return value;
-}
-#if defined(CONFIG_SCHED_HMP_LITTLE_PACKING) || \
- defined(CONFIG_HMP_FREQUENCY_INVARIANT_SCALE)
-/* toggle control is only 0,1 off/on */
-static int hmp_toggle_from_sysfs(int value)
-{
- if (value < 0 || value > 1)
- return -1;
- return value;
-}
-#endif
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
-/* packing value must be non-negative */
-static int hmp_packing_from_sysfs(int value)
-{
- if (value < 0)
- return -1;
- return value;
-}
-#endif
-static void hmp_attr_add(
- const char *name,
- int *value,
- int (*to_sysfs)(int),
- int (*from_sysfs)(int),
- ssize_t (*to_sysfs_text)(char *, int),
- umode_t mode)
-{
- int i = 0;
- while (hmp_data.attributes[i] != NULL) {
- i++;
- if (i >= HMP_DATA_SYSFS_MAX)
- return;
- }
- if (mode)
- hmp_data.attr[i].attr.mode = mode;
- else
- hmp_data.attr[i].attr.mode = 0644;
- hmp_data.attr[i].show = hmp_show;
- hmp_data.attr[i].store = hmp_store;
- hmp_data.attr[i].attr.name = name;
- hmp_data.attr[i].value = value;
- hmp_data.attr[i].to_sysfs = to_sysfs;
- hmp_data.attr[i].from_sysfs = from_sysfs;
- hmp_data.attr[i].to_sysfs_text = to_sysfs_text;
- hmp_data.attributes[i] = &hmp_data.attr[i].attr;
- hmp_data.attributes[i + 1] = NULL;
-}
-
-static int hmp_attr_init(void)
-{
- int ret;
- memset(&hmp_data, sizeof(hmp_data), 0);
- hmp_attr_add("hmp_domains",
- NULL,
- NULL,
- NULL,
- hmp_print_domains,
- 0444);
- hmp_attr_add("up_threshold",
- &hmp_up_threshold,
- NULL,
- hmp_theshold_from_sysfs,
- NULL,
- 0);
- hmp_attr_add("down_threshold",
- &hmp_down_threshold,
- NULL,
- hmp_theshold_from_sysfs,
- NULL,
- 0);
-#ifdef CONFIG_HMP_VARIABLE_SCALE
- /* by default load_avg_period_ms == LOAD_AVG_PERIOD
- * meaning no change
- */
- hmp_data.multiplier = hmp_period_tofrom_sysfs(LOAD_AVG_PERIOD);
- hmp_attr_add("load_avg_period_ms",
- &hmp_data.multiplier,
- hmp_period_tofrom_sysfs,
- hmp_period_tofrom_sysfs,
- NULL,
- 0);
-#endif
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
- /* default frequency-invariant scaling ON */
- hmp_data.freqinvar_load_scale_enabled = 1;
- hmp_attr_add("frequency_invariant_load_scale",
- &hmp_data.freqinvar_load_scale_enabled,
- NULL,
- hmp_toggle_from_sysfs,
- NULL,
- 0);
-#endif
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
- hmp_attr_add("packing_enable",
- &hmp_packing_enabled,
- NULL,
- hmp_toggle_from_sysfs,
- NULL,
- 0);
- hmp_attr_add("packing_limit",
- &hmp_full_threshold,
- NULL,
- hmp_packing_from_sysfs,
- NULL,
- 0);
-#endif
- hmp_data.attr_group.name = "hmp";
- hmp_data.attr_group.attrs = hmp_data.attributes;
- ret = sysfs_create_group(kernel_kobj,
- &hmp_data.attr_group);
- return 0;
-}
-late_initcall(hmp_attr_init);
-/*
- * return the load of the lowest-loaded CPU in a given HMP domain
- * min_cpu optionally points to an int to receive the CPU.
- * affinity optionally points to a cpumask containing the
- * CPUs to be considered. note:
- * + min_cpu = NR_CPUS only if no CPUs are in the set of
- * affinity && hmp_domain cpus
- * + min_cpu will always otherwise equal one of the CPUs in
- * the hmp domain
- * + when more than one CPU has the same load, the one which
- * is least-recently-disturbed by an HMP migration will be
- * selected
- * + if all CPUs are equally loaded or idle and the times are
- * all the same, the first in the set will be used
- * + if affinity is not set, cpu_online_mask is used
- */
-static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
- int *min_cpu, struct cpumask *affinity)
-{
- int cpu;
- int min_cpu_runnable_temp = NR_CPUS;
- u64 min_target_last_migration = ULLONG_MAX;
- u64 curr_last_migration;
- unsigned long min_runnable_load = INT_MAX;
- unsigned long contrib;
- struct sched_avg *avg;
- struct cpumask temp_cpumask;
- /*
- * only look at CPUs allowed if specified,
- * otherwise look at all online CPUs in the
- * right HMP domain
- */
- cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask);
-
- for_each_cpu_mask(cpu, temp_cpumask) {
- avg = &cpu_rq(cpu)->avg;
- /* used for both up and down migration */
- curr_last_migration = avg->hmp_last_up_migration ?
- avg->hmp_last_up_migration : avg->hmp_last_down_migration;
-
- contrib = avg->load_avg_ratio;
- /*
- * Consider a runqueue completely busy if there is any load
- * on it. Definitely not the best for overall fairness, but
- * does well in typical Android use cases.
- */
- if (contrib)
- contrib = 1023;
-
- if ((contrib < min_runnable_load) ||
- (contrib == min_runnable_load &&
- curr_last_migration < min_target_last_migration)) {
- /*
- * if the load is the same target the CPU with
- * the longest time since a migration.
- * This is to spread migration load between
- * members of a domain more evenly when the
- * domain is fully loaded
- */
- min_runnable_load = contrib;
- min_cpu_runnable_temp = cpu;
- min_target_last_migration = curr_last_migration;
- }
- }
-
- if (min_cpu)
- *min_cpu = min_cpu_runnable_temp;
-
- return min_runnable_load;
-}
-
-/*
- * Calculate the task starvation
- * This is the ratio of actually running time vs. runnable time.
- * If the two are equal the task is getting the cpu time it needs or
- * it is alone on the cpu and the cpu is fully utilized.
- */
-static inline unsigned int hmp_task_starvation(struct sched_entity *se)
-{
- u32 starvation;
-
- starvation = se->avg.usage_avg_sum * scale_load_down(NICE_0_LOAD);
- starvation /= (se->avg.runnable_avg_sum + 1);
-
- return scale_load(starvation);
-}
-
-static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
-{
- int min_usage;
- int dest_cpu = NR_CPUS;
-
- if (hmp_cpu_is_slowest(cpu))
- return NR_CPUS;
-
- /* Is there an idle CPU in the current domain */
- min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL);
- if (min_usage == 0) {
- trace_sched_hmp_offload_abort(cpu, min_usage, "load");
- return NR_CPUS;
- }
-
- /* Is the task alone on the cpu? */
- if (cpu_rq(cpu)->cfs.h_nr_running < 2) {
- trace_sched_hmp_offload_abort(cpu,
- cpu_rq(cpu)->cfs.h_nr_running, "nr_running");
- return NR_CPUS;
- }
-
- /* Is the task actually starving? */
- /* >=25% ratio running/runnable = starving */
- if (hmp_task_starvation(se) > 768) {
- trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se),
- "starvation");
- return NR_CPUS;
- }
-
- /* Does the slower domain have any idle CPUs? */
- min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu,
- tsk_cpus_allowed(task_of(se)));
-
- if (min_usage == 0) {
- trace_sched_hmp_offload_succeed(cpu, dest_cpu);
- return dest_cpu;
- } else
- trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain");
- return NR_CPUS;
-}
-#endif /* CONFIG_SCHED_HMP */
-
/*
* sched_balance_self: balance the current task (running on cpu) in domains
* that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -4383,19 +3365,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
if (p->nr_cpus_allowed == 1)
return prev_cpu;
-#ifdef CONFIG_SCHED_HMP
- /* always put non-kernel forking tasks on a big domain */
- if (unlikely(sd_flag & SD_BALANCE_FORK) && hmp_task_should_forkboost(p)) {
- new_cpu = hmp_select_faster_cpu(p, prev_cpu);
- if (new_cpu != NR_CPUS) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- }
- /* failed to perform HMP fork balance, use normal balance */
- new_cpu = cpu;
- }
-#endif
-
if (sd_flag & SD_BALANCE_WAKE) {
if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
want_affine = 1;
@@ -4470,35 +3439,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
unlock:
rcu_read_unlock();
-#ifdef CONFIG_SCHED_HMP
- prev_cpu = task_cpu(p);
-
- if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) {
- hmp_next_up_delay(&p->se, new_cpu);
- trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
- return new_cpu;
- }
- if (hmp_down_migration(prev_cpu, &p->se)) {
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
- new_cpu = hmp_best_little_cpu(p, prev_cpu);
-#else
- new_cpu = hmp_select_slower_cpu(p, prev_cpu);
-#endif
- /*
- * we might have no suitable CPU
- * in which case new_cpu == NR_CPUS
- */
- if (new_cpu < NR_CPUS && new_cpu != prev_cpu) {
- hmp_next_down_delay(&p->se, new_cpu);
- trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
- return new_cpu;
- }
- }
- /* Make sure that the task stays in its previous hmp domain */
- if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
- return prev_cpu;
-#endif
-
return new_cpu;
}
@@ -4508,16 +3448,6 @@ unlock:
* load-balance).
*/
#ifdef CONFIG_FAIR_GROUP_SCHED
-
-#ifdef CONFIG_NO_HZ_COMMON
-static int nohz_test_cpu(int cpu);
-#else
-static inline int nohz_test_cpu(int cpu)
-{
- return 0;
-}
-#endif
-
/*
* Called immediately before a task is migrated to a new cpu; task_cpu(p) and
* cfs_rq_of(p) references at time of call are still valid and identify the
@@ -4537,25 +3467,6 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu)
* be negative here since on-rq tasks have decay-count == 0.
*/
if (se->avg.decay_count) {
- /*
- * If we migrate a sleeping task away from a CPU
- * which has the tick stopped, then both the clock_task
- * and decay_counter will be out of date for that CPU
- * and we will not decay load correctly.
- */
- if (!se->on_rq && nohz_test_cpu(task_cpu(p))) {
- struct rq *rq = cpu_rq(task_cpu(p));
- unsigned long flags;
- /*
- * Current CPU cannot be holding rq->lock in this
- * circumstance, but another might be. We must hold
- * rq->lock before we go poking around in its clocks
- */
- raw_spin_lock_irqsave(&rq->lock, flags);
- update_rq_clock(rq);
- update_cfs_rq_blocked_load(cfs_rq, 0);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- }
se->avg.decay_count = -__synchronize_entity_decay(se);
atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load);
}
@@ -5061,6 +3972,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* 1) task is cache cold, or
* 2) too many balance attempts have failed.
*/
+
tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
if (!tsk_cache_hot ||
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
@@ -6346,16 +5258,6 @@ out:
return ld_moved;
}
-#ifdef CONFIG_SCHED_HMP
-static unsigned int hmp_idle_pull(int this_cpu);
-static int move_specific_task(struct lb_env *env, struct task_struct *pm);
-#else
-static int move_specific_task(struct lb_env *env, struct task_struct *pm)
-{
- return 0;
-}
-#endif
-
/*
* idle_balance is called by schedule() if this_cpu is about to become
* idle. Attempts to pull tasks from other CPUs.
@@ -6400,10 +5302,7 @@ void idle_balance(int this_cpu, struct rq *this_rq)
}
}
rcu_read_unlock();
-#ifdef CONFIG_SCHED_HMP
- if (!pulled_task)
- pulled_task = hmp_idle_pull(this_cpu);
-#endif
+
raw_spin_lock(&this_rq->lock);
if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
@@ -6415,19 +5314,22 @@ void idle_balance(int this_cpu, struct rq *this_rq)
}
}
-static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag)
+/*
+ * active_load_balance_cpu_stop is run by cpu stopper. It pushes
+ * running tasks off the busiest CPU onto idle CPUs. It requires at
+ * least 1 task to be running on each physical CPU where possible, and
+ * avoids physical / logical imbalances.
+ */
+static int active_load_balance_cpu_stop(void *data)
{
struct rq *busiest_rq = data;
int busiest_cpu = cpu_of(busiest_rq);
int target_cpu = busiest_rq->push_cpu;
struct rq *target_rq = cpu_rq(target_cpu);
struct sched_domain *sd;
- struct task_struct *p = NULL;
raw_spin_lock_irq(&busiest_rq->lock);
-#ifdef CONFIG_SCHED_HMP
- p = busiest_rq->migrate_task;
-#endif
+
/* make sure the requested cpu hasn't gone down in the meantime */
if (unlikely(busiest_cpu != smp_processor_id() ||
!busiest_rq->active_balance))
@@ -6437,11 +5339,6 @@ static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag)
if (busiest_rq->nr_running <= 1)
goto out_unlock;
- if (!check_sd_lb_flag) {
- /* Task has migrated meanwhile, abort forced migration */
- if (task_rq(p) != busiest_rq)
- goto out_unlock;
- }
/*
* This condition is "impossible", if it occurs
* we need to fix it. Originally reported by
@@ -6455,14 +5352,12 @@ static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag)
/* Search for an sd spanning us and the target CPU. */
rcu_read_lock();
for_each_domain(target_cpu, sd) {
- if (((check_sd_lb_flag && sd->flags & SD_LOAD_BALANCE) ||
- !check_sd_lb_flag) &&
- cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
+ if ((sd->flags & SD_LOAD_BALANCE) &&
+ cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
break;
}
if (likely(sd)) {
- bool success = false;
struct lb_env env = {
.sd = sd,
.dst_cpu = target_cpu,
@@ -6474,14 +5369,7 @@ static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag)
schedstat_inc(sd, alb_count);
- if (check_sd_lb_flag) {
- if (move_one_task(&env))
- success = true;
- } else {
- if (move_specific_task(&env, p))
- success = true;
- }
- if (success)
+ if (move_one_task(&env))
schedstat_inc(sd, alb_pushed);
else
schedstat_inc(sd, alb_failed);
@@ -6489,24 +5377,11 @@ static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag)
rcu_read_unlock();
double_unlock_balance(busiest_rq, target_rq);
out_unlock:
- if (!check_sd_lb_flag)
- put_task_struct(p);
busiest_rq->active_balance = 0;
raw_spin_unlock_irq(&busiest_rq->lock);
return 0;
}
-/*
- * active_load_balance_cpu_stop is run by cpu stopper. It pushes
- * running tasks off the busiest CPU onto idle CPUs. It requires at
- * least 1 task to be running on each physical CPU where possible, and
- * avoids physical / logical imbalances.
- */
-static int active_load_balance_cpu_stop(void *data)
-{
- return __do_active_load_balance_cpu_stop(data, true);
-}
-
#ifdef CONFIG_NO_HZ_COMMON
/*
* idle load balancing details
@@ -6520,80 +5395,12 @@ static struct {
unsigned long next_balance; /* in jiffy units */
} nohz ____cacheline_aligned;
-/*
- * nohz_test_cpu used when load tracking is enabled. FAIR_GROUP_SCHED
- * dependency below may be removed when load tracking guards are
- * removed.
- */
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static int nohz_test_cpu(int cpu)
-{
- return cpumask_test_cpu(cpu, nohz.idle_cpus_mask);
-}
-#endif
-
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
-/*
- * Decide if the tasks on the busy CPUs in the
- * littlest domain would benefit from an idle balance
- */
-static int hmp_packing_ilb_needed(int cpu, int ilb_needed)
-{
- struct hmp_domain *hmp;
- /* allow previous decision on non-slowest domain */
- if (!hmp_cpu_is_slowest(cpu))
- return ilb_needed;
-
- /* if disabled, use normal ILB behaviour */
- if (!hmp_packing_enabled)
- return ilb_needed;
-
- hmp = hmp_cpu_domain(cpu);
- for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
- /* only idle balance if a CPU is loaded over threshold */
- if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
- return 1;
- }
- return 0;
-}
-#endif
-
-DEFINE_PER_CPU(cpumask_var_t, ilb_tmpmask);
-
static inline int find_new_ilb(int call_cpu)
{
int ilb = cpumask_first(nohz.idle_cpus_mask);
-#ifdef CONFIG_SCHED_HMP
- int ilb_needed = 0;
- int cpu;
- struct cpumask* tmp = per_cpu(ilb_tmpmask, smp_processor_id());
-
- /* restrict nohz balancing to occur in the same hmp domain */
- ilb = cpumask_first_and(nohz.idle_cpus_mask,
- &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
-
- /* check to see if it's necessary within this domain */
- cpumask_andnot(tmp,
- &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus,
- nohz.idle_cpus_mask);
- for_each_cpu(cpu, tmp) {
- if (cpu_rq(cpu)->nr_running > 1) {
- ilb_needed = 1;
- break;
- }
- }
-
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
- if (ilb < nr_cpu_ids)
- ilb_needed = hmp_packing_ilb_needed(ilb, ilb_needed);
-#endif
- if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
- return ilb;
-#else
if (ilb < nr_cpu_ids && idle_cpu(ilb))
return ilb;
-#endif
return nr_cpu_ids;
}
@@ -6870,18 +5677,6 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
if (time_before(now, nohz.next_balance))
return 0;
-#ifdef CONFIG_SCHED_HMP
- /*
- * Bail out if there are no nohz CPUs in our
- * HMP domain, since we will move tasks between
- * domains through wakeup and force balancing
- * as necessary based upon task load.
- */
- if (cpumask_first_and(nohz.idle_cpus_mask,
- &((struct hmp_domain *)hmp_cpu_domain(cpu))->cpus) >= nr_cpu_ids)
- return 0;
-#endif
-
if (rq->nr_running >= 2)
goto need_kick;
@@ -6914,442 +5709,6 @@ need_kick:
static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
#endif
-#ifdef CONFIG_SCHED_HMP
-static unsigned int hmp_task_eligible_for_up_migration(struct sched_entity *se)
-{
- /* below hmp_up_threshold, never eligible */
- if (se->avg.load_avg_ratio < hmp_up_threshold)
- return 0;
- return 1;
-}
-
-/* Check if task should migrate to a faster cpu */
-static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se)
-{
- struct task_struct *p = task_of(se);
- int temp_target_cpu;
- u64 now;
-
- if (hmp_cpu_is_fastest(cpu))
- return 0;
-
-#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
- /* Filter by task priority */
- if (p->prio >= hmp_up_prio)
- return 0;
-#endif
- if (!hmp_task_eligible_for_up_migration(se))
- return 0;
-
- /* Let the task load settle before doing another up migration */
- /* hack - always use clock from first online CPU */
- now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
- if (((now - se->avg.hmp_last_up_migration) >> 10)
- < hmp_next_up_threshold)
- return 0;
-
- /* hmp_domain_min_load only returns 0 for an
- * idle CPU or 1023 for any partly-busy one.
- * Be explicit about requirement for an idle CPU.
- */
- if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu,
- tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) {
- if(target_cpu)
- *target_cpu = temp_target_cpu;
- return 1;
- }
- return 0;
-}
-
-/* Check if task should migrate to a slower cpu */
-static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
-{
- struct task_struct *p = task_of(se);
- u64 now;
-
- if (hmp_cpu_is_slowest(cpu)) {
-#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
- if(hmp_packing_enabled)
- return 1;
- else
-#endif
- return 0;
- }
-
-#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
- /* Filter by task priority */
- if ((p->prio >= hmp_up_prio) &&
- cpumask_intersects(&hmp_slower_domain(cpu)->cpus,
- tsk_cpus_allowed(p))) {
- return 1;
- }
-#endif
-
- /* Let the task load settle before doing another down migration */
- /* hack - always use clock from first online CPU */
- now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
- if (((now - se->avg.hmp_last_down_migration) >> 10)
- < hmp_next_down_threshold)
- return 0;
-
- if (cpumask_intersects(&hmp_slower_domain(cpu)->cpus,
- tsk_cpus_allowed(p))
- && se->avg.load_avg_ratio < hmp_down_threshold) {
- return 1;
- }
- return 0;
-}
-
-/*
- * hmp_can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
- * Ideally this function should be merged with can_migrate_task() to avoid
- * redundant code.
- */
-static int hmp_can_migrate_task(struct task_struct *p, struct lb_env *env)
-{
- int tsk_cache_hot = 0;
-
- /*
- * We do not migrate tasks that are:
- * 1) running (obviously), or
- * 2) cannot be migrated to this CPU due to cpus_allowed
- */
- if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
- schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
- return 0;
- }
- env->flags &= ~LBF_ALL_PINNED;
-
- if (task_running(env->src_rq, p)) {
- schedstat_inc(p, se.statistics.nr_failed_migrations_running);
- return 0;
- }
-
- /*
- * Aggressive migration if:
- * 1) task is cache cold, or
- * 2) too many balance attempts have failed.
- */
-
- tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
- if (!tsk_cache_hot ||
- env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
-#ifdef CONFIG_SCHEDSTATS
- if (tsk_cache_hot) {
- schedstat_inc(env->sd, lb_hot_gained[env->idle]);
- schedstat_inc(p, se.statistics.nr_forced_migrations);
- }
-#endif
- return 1;
- }
-
- return 1;
-}
-
-/*
- * move_specific_task tries to move a specific task.
- * Returns 1 if successful and 0 otherwise.
- * Called with both runqueues locked.
- */
-static int move_specific_task(struct lb_env *env, struct task_struct *pm)
-{
- struct task_struct *p, *n;
-
- list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
- if (throttled_lb_pair(task_group(p), env->src_rq->cpu,
- env->dst_cpu))
- continue;
-
- if (!hmp_can_migrate_task(p, env))
- continue;
- /* Check if we found the right task */
- if (p != pm)
- continue;
-
- move_task(p, env);
- /*
- * Right now, this is only the third place move_task()
- * is called, so we can safely collect move_task()
- * stats here rather than inside move_task().
- */
- schedstat_inc(env->sd, lb_gained[env->idle]);
- return 1;
- }
- return 0;
-}
-
-/*
- * hmp_active_task_migration_cpu_stop is run by cpu stopper and used to
- * migrate a specific task from one runqueue to another.
- * hmp_force_up_migration uses this to push a currently running task
- * off a runqueue. hmp_idle_pull uses this to pull a currently
- * running task to an idle runqueue.
- * Reuses __do_active_load_balance_cpu_stop to actually do the work.
- */
-static int hmp_active_task_migration_cpu_stop(void *data)
-{
- return __do_active_load_balance_cpu_stop(data, false);
-}
-
-/*
- * Move task in a runnable state to another CPU.
- *
- * Tailored on 'active_load_balance_cpu_stop' with slight
- * modification to locking and pre-transfer checks. Note
- * rq->lock must be held before calling.
- */
-static void hmp_migrate_runnable_task(struct rq *rq)
-{
- struct sched_domain *sd;
- int src_cpu = cpu_of(rq);
- struct rq *src_rq = rq;
- int dst_cpu = rq->push_cpu;
- struct rq *dst_rq = cpu_rq(dst_cpu);
- struct task_struct *p = rq->migrate_task;
- /*
- * One last check to make sure nobody else is playing
- * with the source rq.
- */
- if (src_rq->active_balance)
- goto out;
-
- if (src_rq->nr_running <= 1)
- goto out;
-
- if (task_rq(p) != src_rq)
- goto out;
- /*
- * Not sure if this applies here but one can never
- * be too cautious
- */
- BUG_ON(src_rq == dst_rq);
-
- double_lock_balance(src_rq, dst_rq);
-
- rcu_read_lock();
- for_each_domain(dst_cpu, sd) {
- if (cpumask_test_cpu(src_cpu, sched_domain_span(sd)))
- break;
- }
-
- if (likely(sd)) {
- struct lb_env env = {
- .sd = sd,
- .dst_cpu = dst_cpu,
- .dst_rq = dst_rq,
- .src_cpu = src_cpu,
- .src_rq = src_rq,
- .idle = CPU_IDLE,
- };
-
- schedstat_inc(sd, alb_count);
-
- if (move_specific_task(&env, p))
- schedstat_inc(sd, alb_pushed);
- else
- schedstat_inc(sd, alb_failed);
- }
-
- rcu_read_unlock();
- double_unlock_balance(src_rq, dst_rq);
-out:
- put_task_struct(p);
-}
-
-static DEFINE_SPINLOCK(hmp_force_migration);
-
-/*
- * hmp_force_up_migration checks runqueues for tasks that need to
- * be actively migrated to a faster cpu.
- */
-static void hmp_force_up_migration(int this_cpu)
-{
- int cpu, target_cpu;
- struct sched_entity *curr, *orig;
- struct rq *target;
- unsigned long flags;
- unsigned int force, got_target;
- struct task_struct *p;
-
- if (!spin_trylock(&hmp_force_migration))
- return;
- for_each_online_cpu(cpu) {
- force = 0;
- got_target = 0;
- target = cpu_rq(cpu);
- raw_spin_lock_irqsave(&target->lock, flags);
- curr = target->cfs.curr;
- if (!curr || target->active_balance) {
- raw_spin_unlock_irqrestore(&target->lock, flags);
- continue;
- }
- if (!entity_is_task(curr)) {
- struct cfs_rq *cfs_rq;
-
- cfs_rq = group_cfs_rq(curr);
- while (cfs_rq) {
- curr = cfs_rq->curr;
- cfs_rq = group_cfs_rq(curr);
- }
- }
- orig = curr;
- curr = hmp_get_heaviest_task(curr, -1);
- if (!curr) {
- raw_spin_unlock_irqrestore(&target->lock, flags);
- continue;
- }
- p = task_of(curr);
- if (hmp_up_migration(cpu, &target_cpu, curr)) {
- cpu_rq(target_cpu)->wake_for_idle_pull = 1;
- raw_spin_unlock_irqrestore(&target->lock, flags);
- spin_unlock(&hmp_force_migration);
- smp_send_reschedule(target_cpu);
- return;
- }
- if (!got_target) {
- /*
- * For now we just check the currently running task.
- * Selecting the lightest task for offloading will
- * require extensive book keeping.
- */
- curr = hmp_get_lightest_task(orig, 1);
- p = task_of(curr);
- target->push_cpu = hmp_offload_down(cpu, curr);
- if (target->push_cpu < NR_CPUS) {
- get_task_struct(p);
- target->migrate_task = p;
- got_target = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD);
- hmp_next_down_delay(&p->se, target->push_cpu);
- }
- }
- /*
- * We have a target with no active_balance. If the task
- * is not currently running move it, otherwise let the
- * CPU stopper take care of it.
- */
- if (got_target) {
- if (!task_running(target, p)) {
- trace_sched_hmp_migrate_force_running(p, 0);
- hmp_migrate_runnable_task(target);
- } else {
- target->active_balance = 1;
- force = 1;
- }
- }
-
- raw_spin_unlock_irqrestore(&target->lock, flags);
-
- if (force)
- stop_one_cpu_nowait(cpu_of(target),
- hmp_active_task_migration_cpu_stop,
- target, &target->active_balance_work);
- }
- spin_unlock(&hmp_force_migration);
-}
-/*
- * hmp_idle_pull looks at little domain runqueues to see
- * if a task should be pulled.
- *
- * Reuses hmp_force_migration spinlock.
- *
- */
-static unsigned int hmp_idle_pull(int this_cpu)
-{
- int cpu;
- struct sched_entity *curr, *orig;
- struct hmp_domain *hmp_domain = NULL;
- struct rq *target = NULL, *rq;
- unsigned long flags, ratio = 0;
- unsigned int force = 0;
- struct task_struct *p = NULL;
-
- if (!hmp_cpu_is_slowest(this_cpu))
- hmp_domain = hmp_slower_domain(this_cpu);
- if (!hmp_domain)
- return 0;
-
- if (!spin_trylock(&hmp_force_migration))
- return 0;
-
- /* first select a task */
- for_each_cpu(cpu, &hmp_domain->cpus) {
- rq = cpu_rq(cpu);
- raw_spin_lock_irqsave(&rq->lock, flags);
- curr = rq->cfs.curr;
- if (!curr) {
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- continue;
- }
- if (!entity_is_task(curr)) {
- struct cfs_rq *cfs_rq;
-
- cfs_rq = group_cfs_rq(curr);
- while (cfs_rq) {
- curr = cfs_rq->curr;
- if (!entity_is_task(curr))
- cfs_rq = group_cfs_rq(curr);
- else
- cfs_rq = NULL;
- }
- }
- orig = curr;
- curr = hmp_get_heaviest_task(curr, this_cpu);
- /* check if heaviest eligible task on this
- * CPU is heavier than previous task
- */
- if (curr && hmp_task_eligible_for_up_migration(curr) &&
- curr->avg.load_avg_ratio > ratio &&
- cpumask_test_cpu(this_cpu,
- tsk_cpus_allowed(task_of(curr)))) {
- p = task_of(curr);
- target = rq;
- ratio = curr->avg.load_avg_ratio;
- }
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- }
-
- if (!p)
- goto done;
-
- /* now we have a candidate */
- raw_spin_lock_irqsave(&target->lock, flags);
- if (!target->active_balance && task_rq(p) == target) {
- get_task_struct(p);
- target->push_cpu = this_cpu;
- target->migrate_task = p;
- trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
- hmp_next_up_delay(&p->se, target->push_cpu);
- /*
- * if the task isn't running move it right away.
- * Otherwise setup the active_balance mechanic and let
- * the CPU stopper do its job.
- */
- if (!task_running(target, p)) {
- trace_sched_hmp_migrate_idle_running(p, 0);
- hmp_migrate_runnable_task(target);
- } else {
- target->active_balance = 1;
- force = 1;
- }
- }
- raw_spin_unlock_irqrestore(&target->lock, flags);
-
- if (force) {
- /* start timer to keep us awake */
- hmp_cpu_keepalive_trigger();
- stop_one_cpu_nowait(cpu_of(target),
- hmp_active_task_migration_cpu_stop,
- target, &target->active_balance_work);
- }
-done:
- spin_unlock(&hmp_force_migration);
- return force;
-}
-#else
-static void hmp_force_up_migration(int this_cpu) { }
-#endif /* CONFIG_SCHED_HMP */
-
/*
* run_rebalance_domains is triggered when needed from the scheduler tick.
* Also triggered for nohz idle balancing (with nohz_balancing_kick set).
@@ -7361,20 +5720,6 @@ static void run_rebalance_domains(struct softirq_action *h)
enum cpu_idle_type idle = this_rq->idle_balance ?
CPU_IDLE : CPU_NOT_IDLE;
-#ifdef CONFIG_SCHED_HMP
- /* shortcut for hmp idle pull wakeups */
- if (unlikely(this_rq->wake_for_idle_pull)) {
- this_rq->wake_for_idle_pull = 0;
- if (hmp_idle_pull(this_cpu)) {
- /* break out unless running nohz idle as well */
- if (idle != CPU_IDLE)
- return;
- }
- }
-#endif
-
- hmp_force_up_migration(this_cpu);
-
rebalance_domains(this_cpu, idle);
/*
@@ -7407,17 +5752,11 @@ void trigger_load_balance(struct rq *rq, int cpu)
static void rq_online_fair(struct rq *rq)
{
-#ifdef CONFIG_SCHED_HMP
- hmp_online_cpu(rq->cpu);
-#endif
update_sysctl();
}
static void rq_offline_fair(struct rq *rq)
{
-#ifdef CONFIG_SCHED_HMP
- hmp_offline_cpu(rq->cpu);
-#endif
update_sysctl();
/* Ensure any throttled groups are reachable by pick_next_task */
@@ -7885,139 +6224,6 @@ __init void init_sched_fair_class(void)
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
cpu_notifier(sched_ilb_notifier, 0);
#endif
-
-#ifdef CONFIG_SCHED_HMP
- hmp_cpu_mask_setup();
-#endif
#endif /* SMP */
}
-
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-static u32 cpufreq_calc_scale(u32 min, u32 max, u32 curr)
-{
- u32 result = curr / max;
- return result;
-}
-
-/* Called when the CPU Frequency is changed.
- * Once for each CPU.
- */
-static int cpufreq_callback(struct notifier_block *nb,
- unsigned long val, void *data)
-{
- struct cpufreq_freqs *freq = data;
- int cpu = freq->cpu;
- struct cpufreq_extents *extents;
-
- if (freq->flags & CPUFREQ_CONST_LOOPS)
- return NOTIFY_OK;
-
- if (val != CPUFREQ_POSTCHANGE)
- return NOTIFY_OK;
-
- /* if dynamic load scale is disabled, set the load scale to 1.0 */
- if (!hmp_data.freqinvar_load_scale_enabled) {
- freq_scale[cpu].curr_scale = 1024;
- return NOTIFY_OK;
- }
-
- extents = &freq_scale[cpu];
- if (extents->flags & SCHED_LOAD_FREQINVAR_SINGLEFREQ) {
- /* If our governor was recognised as a single-freq governor,
- * use 1.0
- */
- extents->curr_scale = 1024;
- } else {
- extents->curr_scale = cpufreq_calc_scale(extents->min,
- extents->max, freq->new);
- }
-
- return NOTIFY_OK;
-}
-
-/* Called when the CPUFreq governor is changed.
- * Only called for the CPUs which are actually changed by the
- * userspace.
- */
-static int cpufreq_policy_callback(struct notifier_block *nb,
- unsigned long event, void *data)
-{
- struct cpufreq_policy *policy = data;
- struct cpufreq_extents *extents;
- int cpu, singleFreq = 0;
- static const char performance_governor[] = "performance";
- static const char powersave_governor[] = "powersave";
-
- if (event == CPUFREQ_START)
- return 0;
-
- if (event != CPUFREQ_INCOMPATIBLE)
- return 0;
-
- /* CPUFreq governors do not accurately report the range of
- * CPU Frequencies they will choose from.
- * We recognise performance and powersave governors as
- * single-frequency only.
- */
- if (!strncmp(policy->governor->name, performance_governor,
- strlen(performance_governor)) ||
- !strncmp(policy->governor->name, powersave_governor,
- strlen(powersave_governor)))
- singleFreq = 1;
-
- /* Make sure that all CPUs impacted by this policy are
- * updated since we will only get a notification when the
- * user explicitly changes the policy on a CPU.
- */
- for_each_cpu(cpu, policy->cpus) {
- extents = &freq_scale[cpu];
- extents->max = policy->max >> SCHED_FREQSCALE_SHIFT;
- extents->min = policy->min >> SCHED_FREQSCALE_SHIFT;
- if (!hmp_data.freqinvar_load_scale_enabled) {
- extents->curr_scale = 1024;
- } else if (singleFreq) {
- extents->flags |= SCHED_LOAD_FREQINVAR_SINGLEFREQ;
- extents->curr_scale = 1024;
- } else {
- extents->flags &= ~SCHED_LOAD_FREQINVAR_SINGLEFREQ;
- extents->curr_scale = cpufreq_calc_scale(extents->min,
- extents->max, policy->cur);
- }
- }
-
- return 0;
-}
-
-static struct notifier_block cpufreq_notifier = {
- .notifier_call = cpufreq_callback,
-};
-static struct notifier_block cpufreq_policy_notifier = {
- .notifier_call = cpufreq_policy_callback,
-};
-
-static int __init register_sched_cpufreq_notifier(void)
-{
- int ret = 0;
-
- /* init safe defaults since there are no policies at registration */
- for (ret = 0; ret < CONFIG_NR_CPUS; ret++) {
- /* safe defaults */
- freq_scale[ret].max = 1024;
- freq_scale[ret].min = 1024;
- freq_scale[ret].curr_scale = 1024;
- }
-
- pr_info("sched: registering cpufreq notifiers for scale-invariant loads\n");
- ret = cpufreq_register_notifier(&cpufreq_policy_notifier,
- CPUFREQ_POLICY_NOTIFIER);
-
- if (ret != -EINVAL)
- ret = cpufreq_register_notifier(&cpufreq_notifier,
- CPUFREQ_TRANSITION_NOTIFIER);
-
- return ret;
-}
-
-core_initcall(register_sched_cpufreq_notifier);
-#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0d19ede6849e..dfa31d533e3f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -142,7 +142,7 @@ struct task_group {
atomic_t load_weight;
atomic64_t load_avg;
- atomic_t runnable_avg, usage_avg;
+ atomic_t runnable_avg;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
@@ -279,7 +279,7 @@ struct cfs_rq {
#endif /* CONFIG_FAIR_GROUP_SCHED */
/* These always depend on CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_FAIR_GROUP_SCHED
- u32 tg_runnable_contrib, tg_usage_contrib;
+ u32 tg_runnable_contrib;
u64 tg_load_contrib;
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -464,10 +464,6 @@ struct rq {
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
-#ifdef CONFIG_SCHED_HMP
- struct task_struct *migrate_task;
- int wake_for_idle_pull;
-#endif
/* cpu of this runqueue: */
int cpu;
int online;
@@ -646,12 +642,6 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
extern int group_balance_cpu(struct sched_group *sg);
-#ifdef CONFIG_SCHED_HMP
-static LIST_HEAD(hmp_domains);
-DECLARE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
-#define hmp_cpu_domain(cpu) (per_cpu(hmp_cpu_domain, (cpu)))
-#endif /* CONFIG_SCHED_HMP */
-
#endif /* CONFIG_SMP */
#include "stats.h"
diff --git a/kernel/smp.c b/kernel/smp.c
index 74ba5e2c037c..88797cb0d23a 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -12,8 +12,6 @@
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/cpu.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/smp.h>
#include "smpboot.h"
@@ -161,10 +159,8 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
- if (ipi) {
- trace_smp_call_func_send(csd->func, cpu);
+ if (ipi)
arch_send_call_function_single_ipi(cpu);
- }
if (wait)
csd_lock_wait(csd);
@@ -201,9 +197,8 @@ void generic_smp_call_function_single_interrupt(void)
* so save them away before making the call:
*/
csd_flags = csd->flags;
- trace_smp_call_func_entry(csd->func);
+
csd->func(csd->info);
- trace_smp_call_func_exit(csd->func);
/*
* Unlocked CSDs are valid through generic_exec_single():
@@ -233,7 +228,6 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
int this_cpu;
int err = 0;
- trace_smp_call_func_send(func, cpu);
/*
* prevent preemption and reschedule on another processor,
* as well as CPU removal
@@ -251,9 +245,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
if (cpu == this_cpu) {
local_irq_save(flags);
- trace_smp_call_func_entry(func);
func(info);
- trace_smp_call_func_exit(func);
local_irq_restore(flags);
} else {
if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
diff --git a/linaro/configs/big-LITTLE-MP.conf b/linaro/configs/big-LITTLE-MP.conf
deleted file mode 100644
index ced3cf974f13..000000000000
--- a/linaro/configs/big-LITTLE-MP.conf
+++ /dev/null
@@ -1,12 +0,0 @@
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_NO_HZ=y
-CONFIG_SCHED_MC=y
-CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE=y
-CONFIG_SCHED_HMP=y
-CONFIG_HMP_FAST_CPU_MASK=""
-CONFIG_HMP_SLOW_CPU_MASK=""
-CONFIG_HMP_VARIABLE_SCALE=y
-CONFIG_HMP_FREQUENCY_INVARIANT_SCALE=y
-CONFIG_SCHED_HMP_LITTLE_PACKING=y
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6d9bace4e589..10bbb5427a6d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
-#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
#include <linux/math64.h>
@@ -433,12 +432,11 @@ EXPORT_SYMBOL(dec_zone_page_state);
* with the global counters. These could cause remote node cache line
* bouncing and will have to be only done when necessary.
*/
-bool refresh_cpu_vm_stats(int cpu)
+void refresh_cpu_vm_stats(int cpu)
{
struct zone *zone;
int i;
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
- bool vm_activity = false;
for_each_populated_zone(zone) {
struct per_cpu_pageset *p;
@@ -485,21 +483,14 @@ bool refresh_cpu_vm_stats(int cpu)
if (p->expire)
continue;
- if (p->pcp.count) {
- vm_activity = true;
+ if (p->pcp.count)
drain_zone_pages(zone, &p->pcp);
- }
#endif
}
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (global_diff[i]) {
+ if (global_diff[i])
atomic_long_add(global_diff[i], &vm_stat[i]);
- vm_activity = true;
- }
-
- return vm_activity;
-
}
/*
@@ -1184,70 +1175,20 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
-static struct cpumask vmstat_off_cpus;
-struct delayed_work vmstat_monitor_work;
-
-static inline bool need_vmstat(int cpu)
-{
- struct zone *zone;
- int i;
-
- for_each_populated_zone(zone) {
- struct per_cpu_pageset *p;
- p = per_cpu_ptr(zone->pageset, cpu);
-
- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (p->vm_stat_diff[i])
- return true;
-
- if (zone_to_nid(zone) != numa_node_id() && p->pcp.count)
- return true;
- }
-
- return false;
-}
-
-static void vmstat_update(struct work_struct *w);
-
-static void start_cpu_timer(int cpu)
+static void vmstat_update(struct work_struct *w)
{
- struct delayed_work *work = &per_cpu(vmstat_work, cpu);
-
- cpumask_clear_cpu(cpu, &vmstat_off_cpus);
- schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
+ refresh_cpu_vm_stats(smp_processor_id());
+ schedule_delayed_work(&__get_cpu_var(vmstat_work),
+ round_jiffies_relative(sysctl_stat_interval));
}
-static void __cpuinit setup_cpu_timer(int cpu)
+static void __cpuinit start_cpu_timer(int cpu)
{
struct delayed_work *work = &per_cpu(vmstat_work, cpu);
INIT_DEFERRABLE_WORK(work, vmstat_update);
- start_cpu_timer(cpu);
-}
-
-static void vmstat_update_monitor(struct work_struct *w)
-{
- int cpu;
-
- for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask)
- if (need_vmstat(cpu))
- start_cpu_timer(cpu);
-
- queue_delayed_work(system_unbound_wq, &vmstat_monitor_work,
- round_jiffies_relative(sysctl_stat_interval));
-}
-
-
-static void vmstat_update(struct work_struct *w)
-{
- int cpu = smp_processor_id();
-
- if (likely(refresh_cpu_vm_stats(cpu)))
- schedule_delayed_work(&__get_cpu_var(vmstat_work),
- round_jiffies_relative(sysctl_stat_interval));
- else
- cpumask_set_cpu(cpu, &vmstat_off_cpus);
+ schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
}
/*
@@ -1264,19 +1205,17 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds();
- setup_cpu_timer(cpu);
+ start_cpu_timer(cpu);
node_set_state(cpu_to_node(cpu), N_CPU);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) {
- cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
- per_cpu(vmstat_work, cpu).work.func = NULL;
- }
+ cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+ per_cpu(vmstat_work, cpu).work.func = NULL;
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- setup_cpu_timer(cpu);
+ start_cpu_timer(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -1299,14 +1238,8 @@ static int __init setup_vmstat(void)
register_cpu_notifier(&vmstat_notifier);
- INIT_DEFERRABLE_WORK(&vmstat_monitor_work,
- vmstat_update_monitor);
- queue_delayed_work(system_unbound_wq,
- &vmstat_monitor_work,
- round_jiffies_relative(HZ));
-
for_each_online_cpu(cpu)
- setup_cpu_timer(cpu);
+ start_cpu_timer(cpu);
#endif
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);