diff options
-rw-r--r-- | arch/x86/include/asm/preempt.h | 12 | ||||
-rw-r--r-- | include/linux/ftrace_event.h | 1 | ||||
-rw-r--r-- | include/linux/preempt.h | 33 | ||||
-rw-r--r-- | include/linux/sched.h | 37 | ||||
-rw-r--r-- | include/linux/thread_info.h | 13 | ||||
-rw-r--r-- | kernel/Kconfig.preempt | 6 | ||||
-rw-r--r-- | kernel/sched/core.c | 49 | ||||
-rw-r--r-- | kernel/sched/fair.c | 16 | ||||
-rw-r--r-- | kernel/sched/features.h | 3 | ||||
-rw-r--r-- | kernel/sched/sched.h | 9 | ||||
-rw-r--r-- | kernel/trace/trace.c | 41 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 13 |
13 files changed, 201 insertions, 34 deletions
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index c8b051933b1b..752fe5647288 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -85,17 +85,25 @@ static __always_inline void __preempt_count_sub(int val) * a decrement which hits zero means we have no preempt_count and should * reschedule. */ -static __always_inline bool __preempt_count_dec_and_test(void) +static __always_inline bool ____preempt_count_dec_and_test(void) { GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); } +static __always_inline bool __preempt_count_dec_and_test(void) +{ + if (____preempt_count_dec_and_test()) + return true; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +} + /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(void) { - return unlikely(!__this_cpu_read_4(__preempt_count)); + return unlikely(!__this_cpu_read_4(__preempt_count) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY)); } #ifdef CONFIG_PREEMPT diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1b21090db5e6..a8390fad3993 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -59,6 +59,7 @@ struct trace_entry { int pid; unsigned short migrate_disable; unsigned short padding; + unsigned char preempt_lazy_count; }; #define FTRACE_MAX_EVENT \ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 933912ec8268..116af6a9c380 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -33,6 +33,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) +#ifdef CONFIG_PREEMPT_LAZY +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) +#else +#define add_preempt_lazy_count(val) do { } while (0) +#define sub_preempt_lazy_count(val) do { } while (0) +#define inc_preempt_lazy_count() do { } while (0) +#define dec_preempt_lazy_count() do { } while (0) +#define preempt_lazy_count() (0) +#endif + #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ @@ -41,6 +55,12 @@ do { \ barrier(); \ } while (0) +#define preempt_lazy_disable() \ +do { \ + inc_preempt_lazy_count(); \ + barrier(); \ +} while (0) + #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ @@ -69,6 +89,13 @@ do { \ __preempt_schedule(); \ } while (0) +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else #define preempt_enable() \ do { \ @@ -99,7 +126,8 @@ do { \ #define preempt_enable_notrace() \ do { \ barrier(); \ - if (unlikely(__preempt_count_dec_and_test())) \ + if (unlikely(__preempt_count_dec_and_test() || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY))) \ __preempt_schedule_context(); \ } while (0) #else @@ -122,7 +150,6 @@ do { \ #define sched_preempt_enable_no_resched() barrier() #define preempt_enable_no_resched() barrier() #define preempt_enable() barrier() -#define preempt_check_resched() do { } while (0) #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() @@ -147,7 +174,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ - if (tif_need_resched()) \ + if (tif_need_resched_now()) \ set_preempt_need_resched(); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 318e77bc1b23..64b9569079a8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2650,6 +2650,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +#ifdef CONFIG_PREEMPT_LAZY +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) +{ + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); +} + +static inline int need_resched_lazy(void) +{ + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +} + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +#else +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } +static inline int need_resched_lazy(void) { return 0; } + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +#endif + static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index fddbe2023a5d..16e543dd0240 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -118,7 +118,18 @@ static inline __deprecated void set_need_resched(void) */ } -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#ifdef CONFIG_PREEMPT_LAZY +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY)) +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) +#define tif_need_resched_lazy() (test_thread_flag(TIF_NEED_RESCHED_LAZY)) + +#else +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED)) +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) +#define tif_need_resched_lazy() (0) + +#endif #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK /* diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index f8a2982bdbde..11dbe26a8279 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -6,6 +6,12 @@ config PREEMPT_RT_BASE bool select PREEMPT +config HAVE_PREEMPT_LAZY + bool + +config PREEMPT_LAZY + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL + choice prompt "Preemption Model" default PREEMPT_NONE diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 21d7f4a997b4..7366d643c199 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -539,6 +539,37 @@ void resched_task(struct task_struct *p) smp_send_reschedule(cpu); } +#ifdef CONFIG_PREEMPT_LAZY +void resched_task_lazy(struct task_struct *p) +{ + int cpu; + + if (!sched_feat(PREEMPT_LAZY)) { + resched_task(p); + return; + } + + lockdep_assert_held(&task_rq(p)->lock); + + if (test_tsk_need_resched(p)) + return; + + if (test_tsk_need_resched_lazy(p)) + return; + + set_tsk_need_resched_lazy(p); + + cpu = task_cpu(p); + if (cpu == smp_processor_id()) + return; + + /* NEED_RESCHED_LAZY must be visible before we test polling */ + smp_mb(); + if (!tsk_is_polling(p)) + smp_send_reschedule(cpu); +} +#endif + void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -1882,6 +1913,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->on_cpu = 0; #endif init_task_preempt_count(p); +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(p)->preempt_lazy_count = 0; +#endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); @@ -2664,6 +2698,7 @@ void migrate_disable(void) } preempt_disable(); + preempt_lazy_disable(); pin_current_cpu(); p->migrate_disable = 1; preempt_enable(); @@ -2718,6 +2753,7 @@ void migrate_enable(void) unpin_current_cpu(); preempt_enable(); + preempt_lazy_enable(); } EXPORT_SYMBOL(migrate_enable); #else @@ -2845,6 +2881,7 @@ need_resched: put_prev_task(rq, prev); next = pick_next_task(rq); clear_tsk_need_resched(prev); + clear_tsk_need_resched_lazy(prev); clear_preempt_need_resched(); rq->skip_clock_update = 0; @@ -2950,6 +2987,14 @@ asmlinkage void __sched notrace preempt_schedule(void) if (likely(!preemptible())) return; +#ifdef CONFIG_PREEMPT_LAZY + /* + * Check for lazy preemption + */ + if (current_thread_info()->preempt_lazy_count && + !test_thread_flag(TIF_NEED_RESCHED)) + return; +#endif do { __preempt_count_add(PREEMPT_ACTIVE); /* @@ -4694,7 +4739,9 @@ void init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); - +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(idle)->preempt_lazy_count = 0; +#endif /* * The idle tasks have their own, simple scheduling class: */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9b4c4f320130..ffe5ac103efa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2679,7 +2679,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. @@ -2703,7 +2703,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); } static void @@ -2824,7 +2824,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); return; } /* @@ -3013,7 +3013,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); } static __always_inline @@ -3612,7 +3612,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (rq->curr == p) - resched_task(p); + resched_task_lazy(p); return; } @@ -4477,7 +4477,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: - resched_task(curr); + resched_task_lazy(curr); /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved @@ -6965,7 +6965,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); - resched_task(rq->curr); + resched_task_lazy(rq->curr); } se->vruntime -= cfs_rq->min_vruntime; @@ -6990,7 +6990,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (rq->curr == p) { if (p->prio > oldprio) - resched_task(rq->curr); + resched_task_lazy(rq->curr); } else check_preempt_curr(rq, p, 0); } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d6eeac9695b6..ef66ab95271c 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -58,6 +58,9 @@ SCHED_FEAT(NONTASK_POWER, true) SCHED_FEAT(TTWU_QUEUE, true) #else SCHED_FEAT(TTWU_QUEUE, false) +# ifdef CONFIG_PREEMPT_LAZY +SCHED_FEAT(PREEMPT_LAZY, true) +# endif #endif SCHED_FEAT(FORCE_SD_OVERLAP, false) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8e7ee2cd20fc..e97603dde6f8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1201,6 +1201,15 @@ extern void init_sched_dl_class(void); extern void resched_task(struct task_struct *p); extern void resched_cpu(int cpu); +#ifdef CONFIG_PREEMPT_LAZY +extern void resched_task_lazy(struct task_struct *tsk); +#else +static inline void resched_task_lazy(struct task_struct *tsk) +{ + resched_task(tsk); +} +#endif + extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 79df6c90042e..5ee8c83eb085 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1541,6 +1541,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, struct task_struct *tsk = current; entry->preempt_count = pc & 0xff; + entry->preempt_lazy_count = preempt_lazy_count(); entry->pid = (tsk) ? tsk->pid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -1550,7 +1551,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, #endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; @@ -2459,15 +2461,17 @@ get_total_entries(struct trace_buffer *buf, static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n"); - seq_puts(m, "# / _-----=> irqs-off \n"); - seq_puts(m, "# | / _----=> need-resched \n"); - seq_puts(m, "# || / _---=> hardirq/softirq \n"); - seq_puts(m, "# ||| / _--=> preempt-depth \n"); - seq_puts(m, "# |||| / _--=> migrate-disable\n"); - seq_puts(m, "# ||||| / delay \n"); - seq_puts(m, "# cmd pid |||||| time | caller \n"); - seq_puts(m, "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _--------=> CPU# \n"); + seq_puts(m, "# / _-------=> irqs-off \n"); + seq_puts(m, "# | / _------=> need-resched \n"); + seq_puts(m, "# || / _-----=> need-resched_lazy \n"); + seq_puts(m, "# ||| / _----=> hardirq/softirq \n"); + seq_puts(m, "# |||| / _---=> preempt-depth \n"); + seq_puts(m, "# ||||| / _--=> preempt-lazy-depth\n"); + seq_puts(m, "# |||||| / _-=> migrate-disable \n"); + seq_puts(m, "# ||||||| / delay \n"); + seq_puts(m, "# cmd pid |||||||| time | caller \n"); + seq_puts(m, "# \\ / |||||||| \\ | / \n"); } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) @@ -2491,13 +2495,16 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) { print_event_info(buf, m); - seq_puts(m, "# _-----=> irqs-off\n"); - seq_puts(m, "# / _----=> need-resched\n"); - seq_puts(m, "# | / _---=> hardirq/softirq\n"); - seq_puts(m, "# || / _--=> preempt-depth\n"); - seq_puts(m, "# ||| / delay\n"); - seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); - seq_puts(m, "# | | | |||| | |\n"); + seq_puts(m, "# _-------=> irqs-off \n"); + seq_puts(m, "# / _------=> need-resched \n"); + seq_puts(m, "# |/ _-----=> need-resched_lazy \n"); + seq_puts(m, "# ||/ _----=> hardirq/softirq \n"); + seq_puts(m, "# |||/ _---=> preempt-depth \n"); + seq_puts(m, "# ||||/ _--=> preempt-lazy-depth\n"); + seq_puts(m, "# ||||| / _-=> migrate-disable \n"); + seq_puts(m, "# |||||| / delay\n"); + seq_puts(m, "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | |||||| | |\n"); } void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 02b592f2d4b7..2fd14b61929d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -118,6 +118,7 @@ struct kretprobe_trace_entry_head { * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler + * NEED_RESCHED_LAZY - lazy reschedule is requested */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, @@ -126,6 +127,7 @@ enum trace_flag_type { TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40, }; #define TRACE_BUF_SIZE 1024 diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e3b74184594f..e97e5cf0ebcd 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -606,6 +606,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { char hardsoft_irq; char need_resched; + char need_resched_lazy; char irqs_off; int hardirq; int softirq; @@ -634,6 +635,8 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) need_resched = '.'; break; } + need_resched_lazy = + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; hardsoft_irq = (hardirq && softirq) ? 'H' : @@ -641,8 +644,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) softirq ? 's' : '.'; - if (!trace_seq_printf(s, "%c%c%c", - irqs_off, need_resched, hardsoft_irq)) + if (!trace_seq_printf(s, "%c%c%c%c", + irqs_off, need_resched, need_resched_lazy, + hardsoft_irq)) return 0; if (entry->preempt_count) @@ -650,6 +654,11 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) else ret = trace_seq_putc(s, '.'); + if (entry->preempt_lazy_count) + ret = trace_seq_printf(s, "%x", entry->preempt_lazy_count); + else + ret = trace_seq_putc(s, '.'); + if (entry->migrate_disable) ret = trace_seq_printf(s, "%x", entry->migrate_disable); else |