aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@linaro.org>2014-09-04 11:32:09 -0400
committerAlex Shi <alex.shi@linaro.org>2014-12-04 10:50:04 +0800
commit52f44fdb8c730bdb45f70adbf67031bb23cdf37f (patch)
tree16face7515bfdb223d5292ba7337134b8d64bd32
parent3ccdf3283e757fcc0aa394de53edce3de1a3e92b (diff)
sched: Let the scheduler see CPU idle states
When the cpu enters idle, it stores the cpuidle state pointer in its struct rq instance which in turn could be used to make a better decision when balancing tasks. As soon as the cpu exits its idle state, the struct rq reference is cleared. There are a couple of situations where the idle state pointer could be changed while it is being consulted: 1. For x86/acpi with dynamic c-states, when a laptop switches from battery to AC that could result on removing the deeper idle state. The acpi driver triggers: 'acpi_processor_cst_has_changed' 'cpuidle_pause_and_lock' 'cpuidle_uninstall_idle_handler' 'kick_all_cpus_sync'. All cpus will exit their idle state and the pointed object will be set to NULL. 2. The cpuidle driver is unloaded. Logically that could happen but not in practice because the drivers are always compiled in and 95% of them are not coded to unregister themselves. In any case, the unloading code must call 'cpuidle_unregister_device', that calls 'cpuidle_pause_and_lock' leading to 'kick_all_cpus_sync' as mentioned above. A race can happen if we use the pointer and then one of these two scenarios occurs at the same moment. In order to be safe, the idle state pointer stored in the rq must be used inside a rcu_read_lock section where we are protected with the 'rcu_barrier' in the 'cpuidle_uninstall_idle_handler' function. The idle_get_state() and idle_put_state() accessors should be used to that effect. Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Signed-off-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: linux-pm@vger.kernel.org Cc: linaro-kernel@lists.linaro.org Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/n/tip-@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> (cherry picked from commit 442bf3aaf55a91ebfec71da46a4ee10a3c905bcc) Signed-off-by: Alex Shi <alex.shi@linaro.org>
-rw-r--r--drivers/cpuidle/cpuidle.c6
-rw-r--r--kernel/sched/idle.c6
-rw-r--r--kernel/sched/sched.h30
3 files changed, 42 insertions, 0 deletions
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 08f5b55ec62..6d64077dc93 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -181,6 +181,12 @@ void cpuidle_uninstall_idle_handler(void)
initialized = 0;
kick_all_cpus_sync();
}
+
+ /*
+ * Make sure external observers (such as the scheduler)
+ * are done looking at pointed idle states.
+ */
+ synchronize_rcu();
}
/**
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c4cbd040943..eb368d79e4c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -147,6 +147,9 @@ use_default:
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
goto use_default;
+ /* Take note of the planned idle state. */
+ idle_set_state(this_rq(), &drv->states[next_state]);
+
/*
* Enter the idle state previously returned by the governor decision.
* This function will block until an interrupt occurs and will take
@@ -154,6 +157,9 @@ use_default:
*/
entered_state = cpuidle_enter(drv, dev, next_state);
+ /* The cpu is no longer idle or about to enter idle. */
+ idle_set_state(this_rq(), NULL);
+
if (broadcast)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f0907f6d39c..459b2d439c0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -11,6 +11,7 @@
#include "cpuacct.h"
struct rq;
+struct cpuidle_state;
extern __read_mostly int scheduler_running;
@@ -511,6 +512,11 @@ struct rq {
#ifdef CONFIG_SMP
struct llist_head wake_list;
#endif
+
+#ifdef CONFIG_CPU_IDLE
+ /* Must be inspected within a rcu lock section */
+ struct cpuidle_state *idle_state;
+#endif
};
static inline int cpu_of(struct rq *rq)
@@ -1041,6 +1047,30 @@ static inline void idle_exit_fair(struct rq *rq) { }
#endif
+#ifdef CONFIG_CPU_IDLE
+static inline void idle_set_state(struct rq *rq,
+ struct cpuidle_state *idle_state)
+{
+ rq->idle_state = idle_state;
+}
+
+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
+{
+ WARN_ON(!rcu_read_lock_held());
+ return rq->idle_state;
+}
+#else
+static inline void idle_set_state(struct rq *rq,
+ struct cpuidle_state *idle_state)
+{
+}
+
+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
+{
+ return NULL;
+}
+#endif
+
extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);