diff options
Diffstat (limited to 'include')
72 files changed, 2124 insertions, 217 deletions
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 68534ef86ec8..0ef299da5a84 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -72,6 +72,7 @@ #define acpi_cache_t struct kmem_cache #define acpi_spinlock spinlock_t * +#define acpi_raw_spinlock raw_spinlock_t * #define acpi_cpu_flags unsigned long #else /* !__KERNEL__ */ @@ -174,6 +175,19 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache) lock ? AE_OK : AE_NO_MEMORY; \ }) +#define acpi_os_create_raw_lock(__handle) \ +({ \ + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ + \ + if (lock) { \ + *(__handle) = lock; \ + raw_spin_lock_init(*(__handle)); \ + } \ + lock ? AE_OK : AE_NO_MEMORY; \ +}) + +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) + #endif /* __KERNEL__ */ #endif /* __ACLINUX_H__ */ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 7d10f962aa13..aee7fd2eefc5 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -202,6 +202,20 @@ extern void warn_slowpath_null(const char *file, const int line); # define WARN_ON_SMP(x) ({0;}) #endif +#ifdef CONFIG_PREEMPT_RT_BASE +# define BUG_ON_RT(c) BUG_ON(c) +# define BUG_ON_NONRT(c) do { } while (0) +# define WARN_ON_RT(condition) WARN_ON(condition) +# define WARN_ON_NONRT(condition) do { } while (0) +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) +#else +# define BUG_ON_RT(c) do { } while (0) +# define BUG_ON_NONRT(c) BUG_ON(c) +# define WARN_ON_RT(condition) do { } while (0) +# define WARN_ON_NONRT(condition) WARN_ON(condition) +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9e52b0626b39..2f4697c8cb46 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -74,8 +74,52 @@ struct buffer_head { struct address_space *b_assoc_map; /* mapping this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ +#ifdef CONFIG_PREEMPT_RT_BASE + spinlock_t b_uptodate_lock; +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \ + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE) + spinlock_t b_state_lock; + spinlock_t b_journal_head_lock; +#endif +#endif }; +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) +{ + unsigned long flags; + +#ifndef CONFIG_PREEMPT_RT_BASE + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); +#else + spin_lock_irqsave(&bh->b_uptodate_lock, flags); +#endif + return flags; +} + +static inline void +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) +{ +#ifndef CONFIG_PREEMPT_RT_BASE + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); + local_irq_restore(flags); +#else + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); +#endif +} + +static inline void buffer_head_init_locks(struct buffer_head *bh) +{ +#ifdef CONFIG_PREEMPT_RT_BASE + spin_lock_init(&bh->b_uptodate_lock); +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \ + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE) + spin_lock_init(&bh->b_state_lock); + spin_lock_init(&bh->b_journal_head_lock); +#endif +#endif +} + /* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions. diff --git a/include/linux/completion.h b/include/linux/completion.h index 33f0280fd533..e197dcde3eb6 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -8,7 +8,7 @@ * See kernel/sched.c for details. */ -#include <linux/wait.h> +#include <linux/wait-simple.h> /* * struct completion - structure used to maintain state for a "completion" @@ -24,11 +24,11 @@ */ struct completion { unsigned int done; - wait_queue_head_t wait; + struct swait_head wait; }; #define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + { 0, SWAIT_HEAD_INITIALIZER((work).wait) } #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) @@ -73,7 +73,7 @@ struct completion { static inline void init_completion(struct completion *x) { x->done = 0; - init_waitqueue_head(&x->wait); + init_swait_head(&x->wait); } extern void wait_for_completion(struct completion *); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 322e0afc8634..88e6a6827775 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -1,15 +1,15 @@ /* * include/linux/cpu.h - generic cpu definition * - * This is mainly for topological representation. We define the - * basic 'struct cpu' here, which can be embedded in per-arch + * This is mainly for topological representation. We define the + * basic 'struct cpu' here, which can be embedded in per-arch * definitions of processors. * * Basic handling of the devices is done in drivers/base/cpu.c - * and system devices are handled in drivers/base/sys.c. + * and system devices are handled in drivers/base/sys.c. * * CPUs are exported via sysfs in the class/cpu/devices/ - * directory. + * directory. */ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ @@ -178,6 +178,8 @@ extern void get_online_cpus(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); +extern void pin_current_cpu(void); +extern void unpin_current_cpu(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) @@ -203,6 +205,8 @@ static inline void cpu_hotplug_driver_unlock(void) #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) +static inline void pin_current_cpu(void) { } +static inline void unpin_current_cpu(void) { } #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/include/linux/delay.h b/include/linux/delay.h index a6ecb34cf547..37caab306336 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) msleep(seconds * 1000); } +#ifdef CONFIG_PREEMPT_RT_FULL +extern void cpu_chill(void); +#else +# define cpu_chill() cpu_relax() +#endif + #endif /* defined(_LINUX_DELAY_H) */ diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b5e36017acd7..fa6412dd5b10 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -56,6 +56,9 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + unsigned short migrate_disable; + unsigned short padding; + unsigned char preempt_lazy_count; }; #define FTRACE_MAX_EVENT \ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index c1d6555d2567..260deed1721c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -61,7 +61,11 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) +#ifndef CONFIG_PREEMPT_RT_FULL +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) +#else +# define SOFTIRQ_DISABLE_OFFSET (0) +#endif #ifndef PREEMPT_ACTIVE #define PREEMPT_ACTIVE_BITS 1 @@ -74,10 +78,17 @@ #endif #define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | NMI_MASK)) +#ifndef CONFIG_PREEMPT_RT_FULL +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) +#else +# define softirq_count() (0UL) +extern int in_serving_softirq(void); +#endif + /* * Are we doing bottom half or hardware interrupt processing? * Are we in a softirq context? Interrupt context? @@ -87,7 +98,6 @@ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) /* * Are we in NMI context? diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7fb31da45d03..821d523cf888 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> +#include <linux/sched.h> #include <asm/cacheflush.h> @@ -85,32 +86,51 @@ static inline void __kunmap_atomic(void *addr) #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) +#ifndef CONFIG_PREEMPT_RT_FULL DECLARE_PER_CPU(int, __kmap_atomic_idx); +#endif static inline int kmap_atomic_idx_push(void) { +#ifndef CONFIG_PREEMPT_RT_FULL int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; -#ifdef CONFIG_DEBUG_HIGHMEM +# ifdef CONFIG_DEBUG_HIGHMEM WARN_ON_ONCE(in_irq() && !irqs_disabled()); BUG_ON(idx > KM_TYPE_NR); -#endif +# endif return idx; +#else + current->kmap_idx++; + BUG_ON(current->kmap_idx > KM_TYPE_NR); + return current->kmap_idx - 1; +#endif } static inline int kmap_atomic_idx(void) { +#ifndef CONFIG_PREEMPT_RT_FULL return __this_cpu_read(__kmap_atomic_idx) - 1; +#else + return current->kmap_idx - 1; +#endif } static inline void kmap_atomic_idx_pop(void) { -#ifdef CONFIG_DEBUG_HIGHMEM +#ifndef CONFIG_PREEMPT_RT_FULL +# ifdef CONFIG_DEBUG_HIGHMEM int idx = __this_cpu_dec_return(__kmap_atomic_idx); BUG_ON(idx < 0); -#else +# else __this_cpu_dec(__kmap_atomic_idx); +# endif +#else + current->kmap_idx--; +# ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(current->kmap_idx < 0); +# endif #endif } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d19a5c2d2270..bdbf77db0f4d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -111,6 +111,11 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; + struct list_head cb_entry; + int irqsafe; +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + ktime_t praecox; +#endif #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; @@ -147,6 +152,7 @@ struct hrtimer_clock_base { int index; clockid_t clockid; struct timerqueue_head active; + struct list_head expired; ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; @@ -190,6 +196,9 @@ struct hrtimer_cpu_base { unsigned long nr_hangs; ktime_t max_hang_time; #endif +#ifdef CONFIG_PREEMPT_RT_BASE + wait_queue_head_t wait; +#endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; }; @@ -385,6 +394,13 @@ static inline int hrtimer_restart(struct hrtimer *timer) return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } +/* Softirq preemption could deadlock timer removal */ +#ifdef CONFIG_PREEMPT_RT_BASE + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); +#else +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) +#endif + /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); @@ -445,9 +461,8 @@ extern int schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode, int clock); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); -/* Soft interrupt function to run the hrtimer queues: */ +/* Called from the periodic timer tick */ extern void hrtimer_run_queues(void); -extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/include/linux/idr.h b/include/linux/idr.h index 871a213a8477..267527b0f7d3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -92,10 +92,14 @@ void idr_init(struct idr *idp); * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ +#ifdef CONFIG_PREEMPT_RT_FULL +void idr_preload_end(void); +#else static inline void idr_preload_end(void) { preempt_enable(); } +#endif /** * idr_find - return pointer for given id diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f0949927..766558a8fe6d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -143,9 +143,16 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_PREEMPT_RT_BASE +# define INIT_TIMER_LIST .posix_timer_list = NULL, +#else +# define INIT_TIMER_LIST +#endif + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.vtime_lock), \ + .vtime_seq = SEQCNT_ZERO, \ .vtime_snap = 0, \ .vtime_snap_whence = VTIME_SYS, #else @@ -207,6 +214,7 @@ extern struct task_group root_task_group; .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ + INIT_TIMER_LIST \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5fa5afeeb759..838d4bdaeed3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -58,6 +58,7 @@ * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) */ #define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -71,6 +72,7 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define IRQF_NO_SOFTIRQ_CALL 0x00040000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -211,7 +213,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); #ifdef CONFIG_LOCKDEP # define local_irq_enable_in_hardirq() do { } while (0) #else -# define local_irq_enable_in_hardirq() local_irq_enable() +# define local_irq_enable_in_hardirq() local_irq_enable_nort() #endif extern void disable_irq_nosync(unsigned int irq); @@ -261,6 +263,7 @@ struct irq_affinity_notify { unsigned int irq; struct kref kref; struct work_struct work; + struct list_head list; void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; @@ -383,9 +386,13 @@ static inline int disable_irq_wake(unsigned int irq) #ifdef CONFIG_IRQ_FORCED_THREADING -extern bool force_irqthreads; +# ifndef CONFIG_PREEMPT_RT_BASE + extern bool force_irqthreads; +# else +# define force_irqthreads (true) +# endif #else -#define force_irqthreads (0) +#define force_irqthreads (false) #endif #ifndef __ARCH_SET_SOFTIRQ_PENDING @@ -441,8 +448,14 @@ struct softirq_action void (*action)(struct softirq_action *); }; +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); +static inline void thread_do_softirq(void) { do_softirq(); } +#else +extern void thread_do_softirq(void); +#endif + extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); @@ -450,6 +463,8 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +extern void softirq_check_pending_idle(void); + /* This is the worklist that queues up per-cpu softirq work. * * send_remote_sendirq() adds work to these lists, and @@ -490,8 +505,9 @@ extern void __send_remote_softirq(struct call_single_data *cp, int cpu, to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its execution is still not started, it will be executed only once. - * If this tasklet is already running on another CPU (or schedule is called - from tasklet itself), it is rescheduled for later. + * If this tasklet is already running on another CPU, it is rescheduled + for later. + * Schedule must not be called from the tasklet itself (a lockup occurs) * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. @@ -516,27 +532,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ + TASKLET_STATE_PENDING /* Tasklet is pending */ }; -#ifdef CONFIG_SMP +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) + +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } +static inline int tasklet_tryunlock(struct tasklet_struct *t) +{ + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; +} + static inline void tasklet_unlock(struct tasklet_struct *t) { smp_mb__before_clear_bit(); clear_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } -} +extern void tasklet_unlock_wait(struct tasklet_struct *t); + #else #define tasklet_trylock(t) 1 +#define tasklet_tryunlock(t) 1 #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif @@ -585,17 +610,8 @@ static inline void tasklet_disable(struct tasklet_struct *t) smp_mb(); } -static inline void tasklet_enable(struct tasklet_struct *t) -{ - smp_mb__before_atomic_dec(); - atomic_dec(&t->count); -} - -static inline void tasklet_hi_enable(struct tasklet_struct *t) -{ - smp_mb__before_atomic_dec(); - atomic_dec(&t->count); -} +extern void tasklet_enable(struct tasklet_struct *t); +extern void tasklet_hi_enable(struct tasklet_struct *t); extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); @@ -627,6 +643,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) tasklet_kill(&ttimer->tasklet); } +#ifdef CONFIG_PREEMPT_RT_FULL +extern void softirq_early_init(void); +#else +static inline void softirq_early_init(void) { } +#endif + /* * Autoprobing for irqs: * diff --git a/include/linux/irq.h b/include/linux/irq.h index bc4e06611958..7fad94a99baf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -70,6 +70,7 @@ typedef void (*irq_preflow_handler_t)(struct irq_data *data); * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context * IRQ_NESTED_TRHEAD - Interrupt nests into another thread * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) */ enum { IRQ_TYPE_NONE = 0x00000000, @@ -94,12 +95,14 @@ enum { IRQ_NESTED_THREAD = (1 << 15), IRQ_NOTHREAD = (1 << 16), IRQ_PER_CPU_DEVID = (1 << 17), + IRQ_NO_SOFTIRQ_CALL = (1 << 18), }; #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ - IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID) + IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ + IRQ_NO_SOFTIRQ_CALL) #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 66017028dcb3..60c19eeca9e0 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -16,6 +16,7 @@ #define IRQ_WORK_BUSY 2UL #define IRQ_WORK_FLAGS 3UL #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ struct irq_work { unsigned long flags; diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index f4101f01286b..83b9465169f7 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -52,6 +52,7 @@ struct irq_desc { unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; + u64 random_ip; raw_spinlock_t lock; struct cpumask *percpu_enabled; #ifdef CONFIG_SMP diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index d176d658fe25..a52b35d4229f 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -25,8 +25,6 @@ # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) @@ -39,9 +37,15 @@ # define trace_softirqs_enabled(p) 0 # define trace_hardirq_enter() do { } while (0) # define trace_hardirq_exit() do { } while (0) +# define INIT_TRACE_IRQFLAGS +#endif + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) +#else # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ @@ -147,4 +151,23 @@ #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ +/* + * local_irq* variants depending on RT/!RT + */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define local_irq_disable_nort() do { } while (0) +# define local_irq_enable_nort() do { } while (0) +# define local_irq_save_nort(flags) do { local_save_flags(flags); } while (0) +# define local_irq_restore_nort(flags) do { (void)(flags); } while (0) +# define local_irq_disable_rt() local_irq_disable() +# define local_irq_enable_rt() local_irq_enable() +#else +# define local_irq_disable_nort() local_irq_disable() +# define local_irq_enable_nort() local_irq_enable() +# define local_irq_save_nort(flags) local_irq_save(flags) +# define local_irq_restore_nort(flags) local_irq_restore(flags) +# define local_irq_disable_rt() do { } while (0) +# define local_irq_enable_rt() do { } while (0) +#endif + #endif diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h index 6133679bc4c0..0dbc1515154a 100644 --- a/include/linux/jbd_common.h +++ b/include/linux/jbd_common.h @@ -39,32 +39,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) static inline void jbd_lock_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(BH_State, &bh->b_state); +#else + spin_lock(&bh->b_state_lock); +#endif } static inline int jbd_trylock_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE return bit_spin_trylock(BH_State, &bh->b_state); +#else + return spin_trylock(&bh->b_state_lock); +#endif } static inline int jbd_is_locked_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE return bit_spin_is_locked(BH_State, &bh->b_state); +#else + return spin_is_locked(&bh->b_state_lock); +#endif } static inline void jbd_unlock_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_unlock(BH_State, &bh->b_state); +#else + spin_unlock(&bh->b_state_lock); +#endif } static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(BH_JournalHead, &bh->b_state); +#else + spin_lock(&bh->b_journal_head_lock); +#endif } static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_unlock(BH_JournalHead, &bh->b_state); +#else + spin_unlock(&bh->b_journal_head_lock); +#endif } #endif diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0976fc46d1e0..40c876bb31b2 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -50,7 +50,8 @@ #include <linux/compiler.h> #include <linux/workqueue.h> -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) && \ + !defined(CONFIG_PREEMPT_BASE) struct static_key { atomic_t enabled; diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 7f6fe6e015bc..680ad231674f 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -115,7 +115,7 @@ extern int kdb_trap_printk; extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args); extern __printf(1, 2) int kdb_printf(const char *, ...); typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); - +#define in_kdb_printk() (kdb_trap_printk) extern void kdb_init(int level); /* Access to kdb specific polling devices */ @@ -150,6 +150,7 @@ extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } +#define in_kdb_printk() (0) static inline void kdb_init(int level) {} static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, char *help, short minlen) { return 0; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e9ef6d6b51d5..463f8d746088 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -415,6 +415,7 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, + SYSTEM_SUSPEND, } system_state; #define TAINT_PROPRIETARY_MODULE 0 diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0d24e932db0b..d2c0d6d27163 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -42,22 +42,37 @@ #endif struct lglock { +#ifndef CONFIG_PREEMPT_RT_FULL arch_spinlock_t __percpu *lock; +#else + struct rt_mutex __percpu *lock; +#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key lock_key; struct lockdep_map lock_dep_map; #endif }; -#define DEFINE_LGLOCK(name) \ +#ifndef CONFIG_PREEMPT_RT_FULL +# define DEFINE_LGLOCK(name) \ static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ = __ARCH_SPIN_LOCK_UNLOCKED; \ struct lglock name = { .lock = &name ## _lock } -#define DEFINE_STATIC_LGLOCK(name) \ +# define DEFINE_STATIC_LGLOCK(name) \ static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ = __ARCH_SPIN_LOCK_UNLOCKED; \ static struct lglock name = { .lock = &name ## _lock } +#else + +# define DEFINE_LGLOCK(name) \ + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock); \ + struct lglock name = { .lock = &name ## _lock } + +# define DEFINE_STATIC_LGLOCK(name) \ + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock); \ + static struct lglock name = { .lock = &name ## _lock } +#endif void lg_lock_init(struct lglock *lg, char *name); void lg_local_lock(struct lglock *lg); diff --git a/include/linux/list.h b/include/linux/list.h index b83e5657365a..f6a9d3af4caf 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -373,6 +373,17 @@ static inline void list_splice_tail_init(struct list_head *list, (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) /** + * list_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 2eb88556c5c5..d8876a0cf036 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -2,6 +2,7 @@ #define _LINUX_LIST_BL_H #include <linux/list.h> +#include <linux/spinlock.h> #include <linux/bit_spinlock.h> /* @@ -32,13 +33,22 @@ struct hlist_bl_head { struct hlist_bl_node *first; +#ifdef CONFIG_PREEMPT_RT_BASE + raw_spinlock_t lock; +#endif }; struct hlist_bl_node { struct hlist_bl_node *next, **pprev; }; -#define INIT_HLIST_BL_HEAD(ptr) \ - ((ptr)->first = NULL) + +static inline void INIT_HLIST_BL_HEAD(struct hlist_bl_head *h) +{ + h->first = NULL; +#ifdef CONFIG_PREEMPT_RT_BASE + raw_spin_lock_init(&h->lock); +#endif +} static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) { @@ -117,12 +127,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) static inline void hlist_bl_lock(struct hlist_bl_head *b) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(0, (unsigned long *)b); +#else + raw_spin_lock(&b->lock); +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + __set_bit(0, (unsigned long *)b); +#endif +#endif } static inline void hlist_bl_unlock(struct hlist_bl_head *b) { +#ifndef CONFIG_PREEMPT_RT_BASE __bit_spin_unlock(0, (unsigned long *)b); +#else +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + __clear_bit(0, (unsigned long *)b); +#endif + raw_spin_unlock(&b->lock); +#endif } static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) diff --git a/include/linux/locallock.h b/include/linux/locallock.h new file mode 100644 index 000000000000..e7bd8be71f8c --- /dev/null +++ b/include/linux/locallock.h @@ -0,0 +1,254 @@ +#ifndef _LINUX_LOCALLOCK_H +#define _LINUX_LOCALLOCK_H + +#include <linux/percpu.h> +#include <linux/spinlock.h> + +#ifdef CONFIG_PREEMPT_RT_BASE + +#ifdef CONFIG_DEBUG_SPINLOCK +# define LL_WARN(cond) WARN_ON(cond) +#else +# define LL_WARN(cond) do { } while (0) +#endif + +/* + * per cpu lock based substitute for local_irq_*() + */ +struct local_irq_lock { + spinlock_t lock; + struct task_struct *owner; + int nestcnt; + unsigned long flags; +}; + +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } + +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ + DECLARE_PER_CPU(struct local_irq_lock, lvar) + +#define local_irq_lock_init(lvar) \ + do { \ + int __cpu; \ + for_each_possible_cpu(__cpu) \ + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ + } while (0) + +static inline void __local_lock(struct local_irq_lock *lv) +{ + if (lv->owner != current) { + spin_lock(&lv->lock); + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + } + lv->nestcnt++; +} + +#define local_lock(lvar) \ + do { __local_lock(&get_local_var(lvar)); } while (0) + +static inline int __local_trylock(struct local_irq_lock *lv) +{ + if (lv->owner != current && spin_trylock(&lv->lock)) { + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + lv->nestcnt = 1; + return 1; + } + return 0; +} + +#define local_trylock(lvar) \ + ({ \ + int __locked; \ + __locked = __local_trylock(&get_local_var(lvar)); \ + if (!__locked) \ + put_local_var(lvar); \ + __locked; \ + }) + +static inline void __local_unlock(struct local_irq_lock *lv) +{ + LL_WARN(lv->nestcnt == 0); + LL_WARN(lv->owner != current); + if (--lv->nestcnt) + return; + + lv->owner = NULL; + spin_unlock(&lv->lock); +} + +#define local_unlock(lvar) \ + do { \ + __local_unlock(&__get_cpu_var(lvar)); \ + put_local_var(lvar); \ + } while (0) + +static inline void __local_lock_irq(struct local_irq_lock *lv) +{ + spin_lock_irqsave(&lv->lock, lv->flags); + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + lv->nestcnt = 1; +} + +#define local_lock_irq(lvar) \ + do { __local_lock_irq(&get_local_var(lvar)); } while (0) + +#define local_lock_irq_on(lvar, cpu) \ + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) + +static inline void __local_unlock_irq(struct local_irq_lock *lv) +{ + LL_WARN(!lv->nestcnt); + LL_WARN(lv->owner != current); + lv->owner = NULL; + lv->nestcnt = 0; + spin_unlock_irq(&lv->lock); +} + +#define local_unlock_irq(lvar) \ + do { \ + __local_unlock_irq(&__get_cpu_var(lvar)); \ + put_local_var(lvar); \ + } while (0) + +#define local_unlock_irq_on(lvar, cpu) \ + do { \ + __local_unlock_irq(&per_cpu(lvar, cpu)); \ + } while (0) + +static inline int __local_lock_irqsave(struct local_irq_lock *lv) +{ + if (lv->owner != current) { + __local_lock_irq(lv); + return 0; + } else { + lv->nestcnt++; + return 1; + } +} + +#define local_lock_irqsave(lvar, _flags) \ + do { \ + if (__local_lock_irqsave(&get_local_var(lvar))) \ + put_local_var(lvar); \ + _flags = __get_cpu_var(lvar).flags; \ + } while (0) + +#define local_lock_irqsave_on(lvar, _flags, cpu) \ + do { \ + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ + _flags = per_cpu(lvar, cpu).flags; \ + } while (0) + +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, + unsigned long flags) +{ + LL_WARN(!lv->nestcnt); + LL_WARN(lv->owner != current); + if (--lv->nestcnt) + return 0; + + lv->owner = NULL; + spin_unlock_irqrestore(&lv->lock, lv->flags); + return 1; +} + +#define local_unlock_irqrestore(lvar, flags) \ + do { \ + if (__local_unlock_irqrestore(&__get_cpu_var(lvar), flags)) \ + put_local_var(lvar); \ + } while (0) + +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ + do { \ + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ + } while (0) + +#define local_spin_trylock_irq(lvar, lock) \ + ({ \ + int __locked; \ + local_lock_irq(lvar); \ + __locked = spin_trylock(lock); \ + if (!__locked) \ + local_unlock_irq(lvar); \ + __locked; \ + }) + +#define local_spin_lock_irq(lvar, lock) \ + do { \ + local_lock_irq(lvar); \ + spin_lock(lock); \ + } while (0) + +#define local_spin_unlock_irq(lvar, lock) \ + do { \ + spin_unlock(lock); \ + local_unlock_irq(lvar); \ + } while (0) + +#define local_spin_lock_irqsave(lvar, lock, flags) \ + do { \ + local_lock_irqsave(lvar, flags); \ + spin_lock(lock); \ + } while (0) + +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ + do { \ + spin_unlock(lock); \ + local_unlock_irqrestore(lvar, flags); \ + } while (0) + +#define get_locked_var(lvar, var) \ + (*({ \ + local_lock(lvar); \ + &__get_cpu_var(var); \ + })) + +#define put_locked_var(lvar, var) local_unlock(lvar) + +#define local_lock_cpu(lvar) \ + ({ \ + local_lock(lvar); \ + smp_processor_id(); \ + }) + +#define local_unlock_cpu(lvar) local_unlock(lvar) + +#else /* PREEMPT_RT_BASE */ + +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar + +static inline void local_irq_lock_init(int lvar) { } + +#define local_lock(lvar) preempt_disable() +#define local_unlock(lvar) preempt_enable() +#define local_lock_irq(lvar) local_irq_disable() +#define local_unlock_irq(lvar) local_irq_enable() +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) + +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) +#define local_spin_lock_irqsave(lvar, lock, flags) \ + spin_lock_irqsave(lock, flags) +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ + spin_unlock_irqrestore(lock, flags) + +#define get_locked_var(lvar, var) get_cpu_var(var) +#define put_locked_var(lvar, var) put_cpu_var(var) + +#define local_lock_cpu(lvar) get_cpu() +#define local_unlock_cpu(lvar) put_cpu() + +#endif + +#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index a9a48309f045..af4fe54ccbec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1254,27 +1254,59 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a * overflow into the next struct page (as it might with DEBUG_SPINLOCK). * When freeing, reset page->mapping so free_pages_check won't complain. */ +#ifndef CONFIG_PREEMPT_RT_FULL + #define __pte_lockptr(page) &((page)->ptl) -#define pte_lock_init(_page) do { \ - spin_lock_init(__pte_lockptr(_page)); \ -} while (0) + +static inline struct page *pte_lock_init(struct page *page) +{ + spin_lock_init(__pte_lockptr(page)); + return page; +} + #define pte_lock_deinit(page) ((page)->mapping = NULL) + +#else /* !PREEMPT_RT_FULL */ + +/* + * On PREEMPT_RT_FULL the spinlock_t's are too large to embed in the + * page frame, hence it only has a pointer and we need to dynamically + * allocate the lock when we allocate PTE-pages. + * + * This is an overall win, since only a small fraction of the pages + * will be PTE pages under normal circumstances. + */ + +#define __pte_lockptr(page) ((page)->ptl) + +extern struct page *pte_lock_init(struct page *page); +extern void pte_lock_deinit(struct page *page); + +#endif /* PREEMPT_RT_FULL */ + #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) #else /* !USE_SPLIT_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ -#define pte_lock_init(page) do {} while (0) +static inline struct page *pte_lock_init(struct page *page) { return page; } #define pte_lock_deinit(page) do {} while (0) #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) #endif /* USE_SPLIT_PTLOCKS */ -static inline void pgtable_page_ctor(struct page *page) +static inline struct page *__pgtable_page_ctor(struct page *page) { - pte_lock_init(page); - inc_zone_page_state(page, NR_PAGETABLE); + page = pte_lock_init(page); + if (page) + inc_zone_page_state(page, NR_PAGETABLE); + return page; } +#define pgtable_page_ctor(page) \ +do { \ + page = __pgtable_page_ctor(page); \ +} while (0) + static inline void pgtable_page_dtor(struct page *page) { pte_lock_deinit(page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10a9a17342fc..820a4094918e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/page-debug-flags.h> +#include <linux/rcupdate.h> #include <linux/uprobes.h> #include <linux/page-flags-layout.h> #include <asm/page.h> @@ -142,7 +143,11 @@ struct page { * system if PG_buddy is set. */ #if USE_SPLIT_PTLOCKS +# ifndef CONFIG_PREEMPT_RT_FULL spinlock_t ptl; +# else + spinlock_t *ptl; +# endif #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ @@ -446,6 +451,9 @@ struct mm_struct { bool tlb_flush_pending; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT_BASE + struct rcu_head delayed_drop; +#endif }; /* first nid will either be a valid NID or one of these values */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 433da8a1a426..e7d733cd80ed 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -17,6 +17,17 @@ #include <linux/atomic.h> +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ + , .dep_map = { .name = #lockname } +#else +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +#endif + +#ifdef CONFIG_PREEMPT_RT_FULL +# include <linux/mutex_rt.h> +#else + /* * Simple, straightforward mutexes with strict semantics: * @@ -98,13 +109,6 @@ do { \ static inline void mutex_destroy(struct mutex *lock) {} #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ - , .dep_map = { .name = #lockname } -#else -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -#endif - #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ @@ -170,6 +174,9 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); + +#endif /* !PREEMPT_RT_FULL */ + extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h new file mode 100644 index 000000000000..c38a44b14da5 --- /dev/null +++ b/include/linux/mutex_rt.h @@ -0,0 +1,84 @@ +#ifndef __LINUX_MUTEX_RT_H +#define __LINUX_MUTEX_RT_H + +#ifndef __LINUX_MUTEX_H +#error "Please include mutex.h" +#endif + +#include <linux/rtmutex.h> + +/* FIXME: Just for __lockfunc */ +#include <linux/spinlock.h> + +struct mutex { + struct rt_mutex lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#define __MUTEX_INITIALIZER(mutexname) \ + { \ + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ + } + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) + +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); +extern void __lockfunc _mutex_lock(struct mutex *lock); +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); +extern int __lockfunc _mutex_trylock(struct mutex *lock); +extern void __lockfunc _mutex_unlock(struct mutex *lock); + +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) +#define mutex_lock(l) _mutex_lock(l) +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) +#define mutex_lock_killable(l) _mutex_lock_killable(l) +#define mutex_trylock(l) _mutex_trylock(l) +#define mutex_unlock(l) _mutex_unlock(l) +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) +# define mutex_lock_interruptible_nested(l, s) \ + _mutex_lock_interruptible_nested(l, s) +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable_nested(l, s) + +# define mutex_lock_nest_lock(lock, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ +} while (0) + +#else +# define mutex_lock_nested(l, s) _mutex_lock(l) +# define mutex_lock_interruptible_nested(l, s) \ + _mutex_lock_interruptible(l) +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable(l) +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) +#endif + +# define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + rt_mutex_init(&(mutex)->lock); \ + __mutex_do_init((mutex), #mutex, &__key); \ +} while (0) + +# define __mutex_init(mutex, name, key) \ +do { \ + rt_mutex_init(&(mutex)->lock); \ + __mutex_do_init((mutex), name, key); \ +} while (0) + +#endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d2e0418ab5a..765679036cc7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1139,7 +1139,7 @@ struct net_device { unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_assign_type; /* hw address assignment type */ unsigned char addr_len; /* hardware address length */ - unsigned char neigh_priv_len; + unsigned short neigh_priv_len; unsigned short dev_id; /* for shared network cards */ spinlock_t addr_list_lock; @@ -1601,9 +1601,6 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern rwlock_t dev_base_lock; /* Device list lock */ -extern seqcount_t devnet_rename_seq; /* Device rename seq */ - - #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ @@ -1817,6 +1814,7 @@ struct softnet_data { unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; + struct sk_buff_head tofree_queue; }; static inline void input_queue_head_incr(struct softnet_data *sd) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dd49566315c6..7d083af9ca38 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -3,6 +3,7 @@ #include <linux/netdevice.h> +#include <linux/locallock.h> #include <uapi/linux/netfilter/x_tables.h> /** @@ -284,6 +285,8 @@ extern void xt_free_table_info(struct xt_table_info *info); */ DECLARE_PER_CPU(seqcount_t, xt_recseq); +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); + /** * xt_write_recseq_begin - start of a write section * @@ -298,6 +301,9 @@ static inline unsigned int xt_write_recseq_begin(void) { unsigned int addend; + /* RT protection */ + local_lock(xt_write_lock); + /* * Low order bit of sequence is set if we already * called xt_write_recseq_begin(). @@ -328,6 +334,7 @@ static inline void xt_write_recseq_end(unsigned int addend) /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ smp_wmb(); __this_cpu_add(xt_recseq.sequence, addend); + local_unlock(xt_write_lock); } /* diff --git a/include/linux/notifier.h b/include/linux/notifier.h index d14a4c362465..2e4414a0c1c4 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -6,7 +6,7 @@ * * Alan Cox <Alan.Cox@linux.org> */ - + #ifndef _LINUX_NOTIFIER_H #define _LINUX_NOTIFIER_H #include <linux/errno.h> @@ -42,9 +42,7 @@ * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. * As compensation, srcu_notifier_chain_unregister() is rather expensive. * SRCU notifier chains should be used when the chain will be called very - * often but notifier_blocks will seldom be removed. Also, SRCU notifier - * chains are slightly more difficult to use because they require special - * runtime initialization. + * often but notifier_blocks will seldom be removed. */ typedef int (*notifier_fn_t)(struct notifier_block *nb, @@ -88,7 +86,7 @@ struct srcu_notifier_head { (name)->head = NULL; \ } while (0) -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ +/* srcu_notifier_heads must be cleaned up dynamically */ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define srcu_cleanup_notifier_head(name) \ cleanup_srcu_struct(&(name)->srcu); @@ -101,7 +99,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); .head = NULL } #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } -/* srcu_notifier_heads cannot be initialized statically */ + +#define SRCU_NOTIFIER_INIT(name, pcpu) \ + { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .head = NULL, \ + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ + } #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ @@ -113,6 +117,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); struct raw_notifier_head name = \ RAW_NOTIFIER_INIT(name) +#define _SRCU_NOTIFIER_HEAD(name, mod) \ + static DEFINE_PER_CPU(struct srcu_struct_array, \ + name##_head_srcu_array); \ + mod struct srcu_notifier_head name = \ + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) + +#define SRCU_NOTIFIER_HEAD(name) \ + _SRCU_NOTIFIER_HEAD(name, ) + +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ + _SRCU_NOTIFIER_HEAD(name, static) + #ifdef __KERNEL__ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, @@ -182,12 +198,12 @@ static inline int notifier_to_errno(int ret) /* * Declared notifiers so far. I can imagine quite a few more chains - * over time (eg laptop power reset chains, reboot chain (to clean + * over time (eg laptop power reset chains, reboot chain (to clean * device units up), device [un]mount chain, module load/unload chain, - * low memory chain, screenblank chain (for plug in modular screenblankers) + * low memory chain, screenblank chain (for plug in modular screenblankers) * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... */ - + /* CPU notfiers are defined in include/linux/cpu.h. */ /* netdevice notifiers are defined in include/linux/netdevice.h */ diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 777a524716db..ca67e80fe525 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -24,6 +24,9 @@ enum { */ struct page_cgroup { unsigned long flags; +#ifdef CONFIG_PREEMPT_RT_BASE + spinlock_t pcg_lock; +#endif struct mem_cgroup *mem_cgroup; }; @@ -74,12 +77,20 @@ static inline void lock_page_cgroup(struct page_cgroup *pc) * Don't take this lock in IRQ context. * This lock is for pc->mem_cgroup, USED, MIGRATION */ +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(PCG_LOCK, &pc->flags); +#else + spin_lock(&pc->pcg_lock); +#endif } static inline void unlock_page_cgroup(struct page_cgroup *pc) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_unlock(PCG_LOCK, &pc->flags); +#else + spin_unlock(&pc->pcg_lock); +#endif } #else /* CONFIG_MEMCG */ @@ -102,6 +113,10 @@ static inline void __init page_cgroup_init_flatmem(void) { } +static inline void page_cgroup_lock_init(struct page_cgroup *pc) +{ +} + #endif /* CONFIG_MEMCG */ #include <linux/swap.h> diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cc88172c7d9a..12b394f75d8e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -48,6 +48,31 @@ preempt_enable(); \ } while (0) +#ifndef CONFIG_PREEMPT_RT_FULL +# define get_local_var(var) get_cpu_var(var) +# define put_local_var(var) put_cpu_var(var) +# define get_local_ptr(var) get_cpu_ptr(var) +# define put_local_ptr(var) put_cpu_ptr(var) +#else +# define get_local_var(var) (*({ \ + migrate_disable(); \ + &__get_cpu_var(var); })) + +# define put_local_var(var) do { \ + (void)&(var); \ + migrate_enable(); \ +} while (0) + +# define get_local_ptr(var) ({ \ + migrate_disable(); \ + this_cpu_ptr(var); }) + +# define put_local_ptr(var) do { \ + (void)(var); \ + migrate_enable(); \ +} while (0) +#endif + /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) diff --git a/include/linux/pid.h b/include/linux/pid.h index a089a3c447fc..dffe39c6be53 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,6 +2,7 @@ #define _LINUX_PID_H #include <linux/rcupdate.h> +#include <linux/atomic.h> enum pid_type { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index f5d4723cdb3d..c153cf2f3315 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -23,15 +23,38 @@ #define preempt_count() (current_thread_info()->preempt_count) +#ifdef CONFIG_PREEMPT_LAZY +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) +#else +#define add_preempt_lazy_count(val) do { } while (0) +#define sub_preempt_lazy_count(val) do { } while (0) +#define inc_preempt_lazy_count() do { } while (0) +#define dec_preempt_lazy_count() do { } while (0) +#define preempt_lazy_count() (0) +#endif + #ifdef CONFIG_PREEMPT asmlinkage void preempt_schedule(void); +# ifdef CONFIG_PREEMPT_LAZY #define preempt_check_resched() \ do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + if (unlikely(test_thread_flag(TIF_NEED_RESCHED) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY))) \ preempt_schedule(); \ } while (0) +# else +#define preempt_check_resched() \ +do { \ + if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + preempt_schedule(); \ +} while (0) +# endif #ifdef CONFIG_CONTEXT_TRACKING @@ -64,17 +87,36 @@ do { \ barrier(); \ } while (0) +#define preempt_lazy_disable() \ +do { \ + inc_preempt_lazy_count(); \ + barrier(); \ +} while (0) + #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ dec_preempt_count(); \ } while (0) -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#ifndef CONFIG_PREEMPT_RT_BASE +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() +# define preempt_check_resched_rt() barrier() +#else +# define preempt_enable_no_resched() preempt_enable() +# define preempt_check_resched_rt() preempt_check_resched() +#endif #define preempt_enable() \ do { \ - preempt_enable_no_resched(); \ + sched_preempt_enable_no_resched(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ barrier(); \ preempt_check_resched(); \ } while (0) @@ -123,9 +165,31 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() +#define preempt_check_resched_rt() barrier() #endif /* CONFIG_PREEMPT_COUNT */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define preempt_disable_rt() preempt_disable() +# define preempt_enable_rt() preempt_enable() +# define preempt_disable_nort() barrier() +# define preempt_enable_nort() barrier() +# ifdef CONFIG_SMP + extern void migrate_disable(void); + extern void migrate_enable(void); +# else /* CONFIG_SMP */ +# define migrate_disable() barrier() +# define migrate_enable() barrier() +# endif /* CONFIG_SMP */ +#else +# define preempt_disable_rt() barrier() +# define preempt_enable_rt() barrier() +# define preempt_disable_nort() preempt_disable() +# define preempt_enable_nort() preempt_enable() +# define migrate_disable() preempt_disable() +# define migrate_enable() preempt_enable() +#endif + #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; diff --git a/include/linux/printk.h b/include/linux/printk.h index 22c7052e9372..ad239743e738 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -101,9 +101,11 @@ int no_printk(const char *fmt, ...) extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); void early_vprintk(const char *fmt, va_list ap); +extern void printk_kill(void); #else static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } +static inline void printk_kill(void) { } #endif #ifdef CONFIG_PRINTK @@ -137,7 +139,6 @@ extern int __printk_ratelimit(const char *func); #define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); - extern int printk_delay_msec; extern int dmesg_restrict; extern int kptr_restrict; diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index ffc444c38b0a..7ddfbf97e32d 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -230,7 +230,13 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); unsigned long radix_tree_prev_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); + +#ifndef CONFIG_PREEMPT_RT_FULL int radix_tree_preload(gfp_t gfp_mask); +#else +static inline int radix_tree_preload(gfp_t gm) { return 0; } +#endif + void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); @@ -255,7 +261,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); static inline void radix_tree_preload_end(void) { - preempt_enable(); + preempt_enable_nort(); } /** diff --git a/include/linux/random.h b/include/linux/random.h index bf9085e89fb5..de4894a47274 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -12,7 +12,7 @@ extern void add_device_randomness(const void *, unsigned int); extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); -extern void add_interrupt_randomness(int irq, int irq_flags); +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip); extern void get_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ddcc7826d907..9b574a010331 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -128,6 +128,9 @@ extern void call_rcu(struct rcu_head *head, #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_PREEMPT_RT_FULL +#define call_rcu_bh call_rcu +#else /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -151,6 +154,7 @@ extern void call_rcu(struct rcu_head *head, */ extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); +#endif /** * call_rcu_sched() - Queue an RCU for invocation after sched grace period. @@ -190,6 +194,11 @@ void synchronize_rcu(void); * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) +#ifndef CONFIG_PREEMPT_RT_FULL +#define sched_rcu_preempt_depth() rcu_preempt_depth() +#else +static inline int sched_rcu_preempt_depth(void) { return 0; } +#endif #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -213,6 +222,8 @@ static inline int rcu_preempt_depth(void) return 0; } +#define sched_rcu_preempt_depth() rcu_preempt_depth() + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ @@ -367,7 +378,14 @@ static inline int rcu_read_lock_held(void) * rcu_read_lock_bh_held() is defined out of line to avoid #include-file * hell. */ +#ifdef CONFIG_PREEMPT_RT_FULL +static inline int rcu_read_lock_bh_held(void) +{ + return rcu_read_lock_held(); +} +#else extern int rcu_read_lock_bh_held(void); +#endif /** * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? @@ -824,10 +842,14 @@ static inline void rcu_read_unlock(void) static inline void rcu_read_lock_bh(void) { local_bh_disable(); +#ifdef CONFIG_PREEMPT_RT_FULL + rcu_read_lock(); +#else __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); rcu_lockdep_assert(!rcu_is_cpu_idle(), "rcu_read_lock_bh() used illegally while idle"); +#endif } /* @@ -837,10 +859,14 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { +#ifdef CONFIG_PREEMPT_RT_FULL + rcu_read_unlock(); +#else rcu_lockdep_assert(!rcu_is_cpu_idle(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); +#endif local_bh_enable(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 952b79339304..f1472a22a82e 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,7 +45,11 @@ static inline void rcu_virt_note_context_switch(int cpu) rcu_note_context_switch(cpu); } +#ifdef CONFIG_PREEMPT_RT_FULL +# define synchronize_rcu_bh synchronize_rcu +#else extern void synchronize_rcu_bh(void); +#endif extern void synchronize_sched_expedited(void); extern void synchronize_rcu_expedited(void); @@ -73,20 +77,30 @@ static inline void synchronize_rcu_bh_expedited(void) } extern void rcu_barrier(void); +#ifdef CONFIG_PREEMPT_RT_FULL +# define rcu_barrier_bh rcu_barrier +#else extern void rcu_barrier_bh(void); +#endif extern void rcu_barrier_sched(void); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); extern void rcu_force_quiescent_state(void); -extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); extern void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; +#ifndef CONFIG_PREEMPT_RT_FULL +extern void rcu_bh_force_quiescent_state(void); +extern long rcu_batches_completed_bh(void); +#else +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state +# define rcu_batches_completed_bh rcu_batches_completed +#endif + #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index de17134244f3..5ebd0bbb6eaa 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -14,7 +14,7 @@ #include <linux/linkage.h> #include <linux/plist.h> -#include <linux/spinlock_types.h> +#include <linux/spinlock_types_raw.h> extern int max_lock_depth; /* for sysctl */ @@ -29,9 +29,10 @@ struct rt_mutex { raw_spinlock_t wait_lock; struct plist_head wait_list; struct task_struct *owner; -#ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; - const char *name, *file; +#ifdef CONFIG_DEBUG_RT_MUTEXES + const char *file; + const char *name; int line; void *magic; #endif @@ -56,19 +57,39 @@ struct hrtimer_sleeper; #ifdef CONFIG_DEBUG_RT_MUTEXES # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ , .name = #mutexname, .file = __FILE__, .line = __LINE__ -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) + +# define rt_mutex_init(mutex) \ + do { \ + raw_spin_lock_init(&(mutex)->wait_lock); \ + __rt_mutex_init(mutex, #mutex); \ + } while (0) + extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) + +# define rt_mutex_init(mutex) \ + do { \ + raw_spin_lock_init(&(mutex)->wait_lock); \ + __rt_mutex_init(mutex, #mutex); \ + } while (0) + # define rt_mutex_debug_task_free(t) do { } while (0) #endif -#define __RT_MUTEX_INITIALIZER(mutexname) \ - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \ , .owner = NULL \ - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) + + +#define __RT_MUTEX_INITIALIZER(mutexname) \ + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } + +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ + , .save_state = 1 } #define DEFINE_RT_MUTEX(mutexname) \ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) @@ -90,6 +111,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock); extern void rt_mutex_lock(struct rt_mutex *lock); extern int rt_mutex_lock_interruptible(struct rt_mutex *lock, int detect_deadlock); +extern int rt_mutex_lock_killable(struct rt_mutex *lock, int detect_deadlock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, int detect_deadlock); diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h new file mode 100644 index 000000000000..853ee367fef4 --- /dev/null +++ b/include/linux/rwlock_rt.h @@ -0,0 +1,123 @@ +#ifndef __LINUX_RWLOCK_RT_H +#define __LINUX_RWLOCK_RT_H + +#ifndef __LINUX_SPINLOCK_H +#error Do not include directly. Use spinlock.h +#endif + +#define rwlock_init(rwl) \ +do { \ + static struct lock_class_key __key; \ + \ + rt_mutex_init(&(rwl)->lock); \ + __rt_rwlock_init(rwl, #rwl, &__key); \ +} while (0) + +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); + +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) + +#define write_trylock_irqsave(lock, flags) \ + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) + +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + migrate_disable(); \ + flags = rt_read_lock_irqsave(lock); \ + } while (0) + +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + migrate_disable(); \ + flags = rt_write_lock_irqsave(lock); \ + } while (0) + +#define read_lock(lock) \ + do { \ + migrate_disable(); \ + rt_read_lock(lock); \ + } while (0) + +#define read_lock_bh(lock) \ + do { \ + local_bh_disable(); \ + migrate_disable(); \ + rt_read_lock(lock); \ + } while (0) + +#define read_lock_irq(lock) read_lock(lock) + +#define write_lock(lock) \ + do { \ + migrate_disable(); \ + rt_write_lock(lock); \ + } while (0) + +#define write_lock_bh(lock) \ + do { \ + local_bh_disable(); \ + migrate_disable(); \ + rt_write_lock(lock); \ + } while (0) + +#define write_lock_irq(lock) write_lock(lock) + +#define read_unlock(lock) \ + do { \ + rt_read_unlock(lock); \ + migrate_enable(); \ + } while (0) + +#define read_unlock_bh(lock) \ + do { \ + rt_read_unlock(lock); \ + migrate_enable(); \ + local_bh_enable(); \ + } while (0) + +#define read_unlock_irq(lock) read_unlock(lock) + +#define write_unlock(lock) \ + do { \ + rt_write_unlock(lock); \ + migrate_enable(); \ + } while (0) + +#define write_unlock_bh(lock) \ + do { \ + rt_write_unlock(lock); \ + migrate_enable(); \ + local_bh_enable(); \ + } while (0) + +#define write_unlock_irq(lock) write_unlock(lock) + +#define read_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_read_unlock(lock); \ + migrate_enable(); \ + } while (0) + +#define write_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_write_unlock(lock); \ + migrate_enable(); \ + } while (0) + +#endif diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index cc0072e93e36..d0da966ad7a0 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -1,6 +1,10 @@ #ifndef __LINUX_RWLOCK_TYPES_H #define __LINUX_RWLOCK_TYPES_H +#if !defined(__LINUX_SPINLOCK_TYPES_H) +# error "Do not include directly, include spinlock_types.h" +#endif + /* * include/linux/rwlock_types.h - generic rwlock type definitions * and initializers @@ -43,6 +47,7 @@ typedef struct { RW_DEP_MAP_INIT(lockname) } #endif -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) +#define DEFINE_RWLOCK(name) \ + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name) #endif /* __LINUX_RWLOCK_TYPES_H */ diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h new file mode 100644 index 000000000000..b13832119591 --- /dev/null +++ b/include/linux/rwlock_types_rt.h @@ -0,0 +1,33 @@ +#ifndef __LINUX_RWLOCK_TYPES_RT_H +#define __LINUX_RWLOCK_TYPES_RT_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +#error "Do not include directly. Include spinlock_types.h instead" +#endif + +/* + * rwlocks - rtmutex which allows single reader recursion + */ +typedef struct { + struct rt_mutex lock; + int read_depth; + unsigned int break_lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} rwlock_t; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define RW_DEP_MAP_INIT(lockname) +#endif + +#define __RW_LOCK_UNLOCKED(name) \ + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ + RW_DEP_MAP_INIT(name) } + +#define DEFINE_RWLOCK(name) \ + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name) + +#endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 0616ffe45702..0ad60708685a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -16,6 +16,10 @@ #include <linux/atomic.h> +#ifdef CONFIG_PREEMPT_RT_FULL +#include <linux/rwsem_rt.h> +#else /* PREEMPT_RT_FULL */ + struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -149,4 +153,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem); # define up_read_non_owner(sem) up_read(sem) #endif +#endif /* !PREEMPT_RT_FULL */ + #endif /* _LINUX_RWSEM_H */ diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h new file mode 100644 index 000000000000..e94d945c5844 --- /dev/null +++ b/include/linux/rwsem_rt.h @@ -0,0 +1,128 @@ +#ifndef _LINUX_RWSEM_RT_H +#define _LINUX_RWSEM_RT_H + +#ifndef _LINUX_RWSEM_H +#error "Include rwsem.h" +#endif + +/* + * RW-semaphores are a spinlock plus a reader-depth count. + * + * Note that the semantics are different from the usual + * Linux rw-sems, in PREEMPT_RT mode we do not allow + * multiple readers to hold the lock at once, we only allow + * a read-lock owner to read-lock recursively. This is + * better for latency, makes the implementation inherently + * fair and makes it simpler as well. + */ + +#include <linux/rtmutex.h> + +struct rw_semaphore { + struct rt_mutex lock; + int read_depth; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#define __RWSEM_INITIALIZER(name) \ + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ + RW_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(lockname) \ + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) + +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, + struct lock_class_key *key); + +#define __rt_init_rwsem(sem, name, key) \ + do { \ + rt_mutex_init(&(sem)->lock); \ + __rt_rwsem_init((sem), (name), (key));\ + } while (0) + +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) + +# define rt_init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __rt_init_rwsem((sem), #sem, &__key); \ +} while (0) + +extern void rt_down_write(struct rw_semaphore *rwsem); +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, + struct lockdep_map *nest); +extern void rt_down_read(struct rw_semaphore *rwsem); +extern int rt_down_write_trylock(struct rw_semaphore *rwsem); +extern int rt_down_read_trylock(struct rw_semaphore *rwsem); +extern void rt_up_read(struct rw_semaphore *rwsem); +extern void rt_up_write(struct rw_semaphore *rwsem); +extern void rt_downgrade_write(struct rw_semaphore *rwsem); + +#define init_rwsem(sem) rt_init_rwsem(sem) +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) + +static inline void down_read(struct rw_semaphore *sem) +{ + rt_down_read(sem); +} + +static inline int down_read_trylock(struct rw_semaphore *sem) +{ + return rt_down_read_trylock(sem); +} + +static inline void down_write(struct rw_semaphore *sem) +{ + rt_down_write(sem); +} + +static inline int down_write_trylock(struct rw_semaphore *sem) +{ + return rt_down_write_trylock(sem); +} + +static inline void up_read(struct rw_semaphore *sem) +{ + rt_up_read(sem); +} + +static inline void up_write(struct rw_semaphore *sem) +{ + rt_up_write(sem); +} + +static inline void downgrade_write(struct rw_semaphore *sem) +{ + rt_downgrade_write(sem); +} + +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) +{ + return rt_down_read_nested(sem, subclass); +} + +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) +{ + rt_down_write_nested(sem, subclass); +} +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void down_write_nest_lock(struct rw_semaphore *sem, + struct rw_semaphore *nest_lock) +{ + rt_down_write_nested_lock(sem, &nest_lock->dep_map); +} + +#else + +static inline void down_write_nest_lock(struct rw_semaphore *sem, + struct rw_semaphore *nest_lock) +{ + rt_down_write_nested_lock(sem, NULL); +} +#endif +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index af1adcbf833c..683ad4a5608f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -23,6 +23,7 @@ struct sched_param { #include <linux/nodemask.h> #include <linux/mm_types.h> +#include <asm/kmap_types.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/cputime.h> @@ -52,6 +53,7 @@ struct sched_param { #include <linux/llist.h> #include <linux/uidgid.h> #include <linux/gfp.h> +#include <linux/hardirq.h> #include <asm/processor.h> @@ -1050,6 +1052,7 @@ enum perf_event_task_context { struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ + volatile long saved_state; /* saved state for "spinlock sleepers" */ void *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ @@ -1089,6 +1092,12 @@ struct task_struct { #endif unsigned int policy; +#ifdef CONFIG_PREEMPT_RT_FULL + int migrate_disable; +# ifdef CONFIG_SCHED_DEBUG + int migrate_disable_atomic; +# endif +#endif int nr_cpus_allowed; cpumask_t cpus_allowed; @@ -1184,7 +1193,8 @@ struct task_struct { struct cputime prev_cputime; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqlock_t vtime_seqlock; + raw_spinlock_t vtime_lock; + seqcount_t vtime_seq; unsigned long long vtime_snap; enum { VTIME_SLEEPING = 0, @@ -1200,6 +1210,9 @@ struct task_struct { struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#ifdef CONFIG_PREEMPT_RT_BASE + struct task_struct *posix_timer_list; +#endif /* process credentials */ const struct cred __rcu *real_cred; /* objective and real subjective task @@ -1231,10 +1244,15 @@ struct task_struct { /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; + struct sigqueue *sigqueue_cache; sigset_t blocked, real_blocked; sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ struct sigpending pending; +#ifdef CONFIG_PREEMPT_RT_FULL + /* TODO: move me into ->restart_block ? */ + struct siginfo forced_info; +#endif unsigned long sas_ss_sp; size_t sas_ss_size; @@ -1271,6 +1289,9 @@ struct task_struct { /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_PREEMPT_RT_FULL + int pagefault_disabled; +#endif #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; unsigned long hardirq_enable_ip; @@ -1346,6 +1367,9 @@ struct task_struct { struct mutex perf_event_mutex; struct list_head perf_event_list; #endif +#ifdef CONFIG_DEBUG_PREEMPT + unsigned long preempt_disable_ip; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; @@ -1413,6 +1437,12 @@ struct task_struct { unsigned long trace; /* bitmask and counter of trace recursion */ unsigned long trace_recursion; +#ifdef CONFIG_WAKEUP_LATENCY_HIST + u64 preempt_timestamp_hist; +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + long timer_offset; +#endif +#endif #endif /* CONFIG_TRACING */ #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ struct memcg_batch_info { @@ -1433,11 +1463,19 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif +#ifdef CONFIG_PREEMPT_RT_BASE + struct rcu_head put_rcu; + int softirq_nestcnt; + unsigned int softirqs_raised; +#endif +#ifdef CONFIG_PREEMPT_RT_FULL +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 + int kmap_idx; + pte_t kmap_pte[KM_TYPE_NR]; +# endif +#endif }; -/* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) - #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int node, int pages, bool migrated); extern void set_numabalancing_state(bool enabled); @@ -1450,6 +1488,17 @@ static inline void set_numabalancing_state(bool enabled) } #endif +#ifdef CONFIG_PREEMPT_RT_FULL +static inline bool cur_pf_disabled(void) { return current->pagefault_disabled; } +#else +static inline bool cur_pf_disabled(void) { return false; } +#endif + +static inline bool pagefault_disabled(void) +{ + return in_atomic() || cur_pf_disabled(); +} + static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1581,6 +1630,15 @@ extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) +#ifdef CONFIG_PREEMPT_RT_BASE +extern void __put_task_struct_cb(struct rcu_head *rhp); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + call_rcu(&t->put_rcu, __put_task_struct_cb); +} +#else extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) @@ -1588,6 +1646,7 @@ static inline void put_task_struct(struct task_struct *t) if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } +#endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, @@ -1626,6 +1685,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, /* * Per process flags */ +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ @@ -1770,6 +1830,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); +int migrate_me(void); +void tell_sched_cpu_down_begin(int cpu); +void tell_sched_cpu_down_done(int cpu); + #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) @@ -1782,6 +1846,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, return -EINVAL; return 0; } +static inline int migrate_me(void) { return 0; } +static inline void tell_sched_cpu_down_begin(int cpu) { } +static inline void tell_sched_cpu_down_done(int cpu) { } #endif #ifdef CONFIG_NO_HZ_COMMON @@ -1989,6 +2056,7 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_lock_sleeper(struct task_struct * tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); @@ -2103,12 +2171,24 @@ extern struct mm_struct * mm_alloc(void); /* mmdrop drops the mm and the page tables */ extern void __mmdrop(struct mm_struct *); + static inline void mmdrop(struct mm_struct * mm) { if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } +#ifdef CONFIG_PREEMPT_RT_BASE +extern void __mmdrop_delayed(struct rcu_head *rhp); +static inline void mmdrop_delayed(struct mm_struct *mm) +{ + if (atomic_dec_and_test(&mm->mm_count)) + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else +# define mmdrop_delayed(mm) mmdrop(mm) +#endif + /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); /* Grab a reference to a task's mm, if it is not already going away */ @@ -2391,6 +2471,52 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +#ifdef CONFIG_PREEMPT_LAZY +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) +{ + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); +} + +static inline int need_resched_lazy(void) +{ + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +} + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +static inline int need_resched(void) +{ + return test_thread_flag(TIF_NEED_RESCHED) || + test_thread_flag(TIF_NEED_RESCHED_LAZY); +} +#else +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } +static inline int need_resched_lazy(void) { return 0; } + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +static inline int need_resched(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} +#endif + static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); @@ -2422,11 +2548,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } -static inline int need_resched(void) -{ - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -2443,7 +2564,7 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT_COUNT +#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL) #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET #else #define PREEMPT_LOCK_OFFSET 0 @@ -2454,12 +2575,16 @@ extern int __cond_resched_lock(spinlock_t *lock); __cond_resched_lock(lock); \ }) +#ifndef CONFIG_PREEMPT_RT_FULL extern int __cond_resched_softirq(void); #define cond_resched_softirq() ({ \ __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ __cond_resched_softirq(); \ }) +#else +# define cond_resched_softirq() cond_resched() +#endif /* * Does a critical section need to be broken due to another @@ -2636,6 +2761,26 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +static inline int __migrate_disabled(struct task_struct *p) +{ +#ifdef CONFIG_PREEMPT_RT_FULL + return p->migrate_disable; +#else + return 0; +#endif +} + +/* Future-safe accessor for struct task_struct's cpus_allowed. */ +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) +{ +#ifdef CONFIG_PREEMPT_RT_FULL + if (p->migrate_disable) + return cpumask_of(task_cpu(p)); +#endif + + return &p->cpus_allowed; +} + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 440434df3627..4d54d6cc47b8 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -35,6 +35,7 @@ static inline int rt_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern int rt_mutex_check_prio(struct task_struct *task, int newprio); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { @@ -45,6 +46,10 @@ static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } +static inline int rt_mutex_check_prio(struct task_struct *task, int newprio) +{ + return 0; +} # define rt_mutex_adjust_pi(p) do { } while (0) static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 18299057402f..939ea1a5cb69 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -146,18 +146,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) * Sequence counter only version assumes that callers are using their * own mutexing. */ -static inline void write_seqcount_begin(seqcount_t *s) +static inline void __write_seqcount_begin(seqcount_t *s) { s->sequence++; smp_wmb(); } -static inline void write_seqcount_end(seqcount_t *s) +static inline void write_seqcount_begin(seqcount_t *s) +{ + preempt_disable_rt(); + __write_seqcount_begin(s); +} + +static inline void __write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; } +static inline void write_seqcount_end(seqcount_t *s) +{ + __write_seqcount_end(s); + preempt_enable_rt(); +} + /** * write_seqcount_barrier - invalidate in-progress read-side seq operations * @s: pointer to seqcount_t @@ -198,10 +210,33 @@ typedef struct { /* * Read side functions for starting and finalizing a read side section. */ +#ifndef CONFIG_PREEMPT_RT_FULL static inline unsigned read_seqbegin(const seqlock_t *sl) { return read_seqcount_begin(&sl->seqcount); } +#else +/* + * Starvation safe read side for RT + */ +static inline unsigned read_seqbegin(seqlock_t *sl) +{ + unsigned ret; + +repeat: + ret = ACCESS_ONCE(sl->seqcount.sequence); + if (unlikely(ret & 1)) { + /* + * Take the lock and let the writer proceed (i.e. evtl + * boost it), otherwise we could loop here forever. + */ + spin_lock(&sl->lock); + spin_unlock(&sl->lock); + goto repeat; + } + return ret; +} +#endif static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { @@ -216,36 +251,36 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + __write_seqcount_end(&sl->seqcount); spin_unlock(&sl->lock); } static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + __write_seqcount_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + __write_seqcount_end(&sl->seqcount); spin_unlock_irq(&sl->lock); } @@ -254,7 +289,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); return flags; } @@ -264,7 +299,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_end(&sl->seqcount); + __write_seqcount_end(&sl->seqcount); spin_unlock_irqrestore(&sl->lock, flags); } diff --git a/include/linux/signal.h b/include/linux/signal.h index 2ac423bdb676..1414eb203e92 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -226,6 +226,7 @@ static inline void init_sigpending(struct sigpending *sig) } extern void flush_sigqueue(struct sigpending *queue); +extern void flush_task_sigqueue(struct task_struct *tsk); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 478120ae34e5..05b43cecfbcc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -133,6 +133,7 @@ struct sk_buff_head { __u32 qlen; spinlock_t lock; + raw_spinlock_t raw_lock; }; struct sk_buff; @@ -1065,6 +1066,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) __skb_queue_head_init(list); } +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) +{ + raw_spin_lock_init(&list->raw_lock); + __skb_queue_head_init(list); +} + static inline void skb_queue_head_init_class(struct sk_buff_head *list, struct lock_class_key *class) { diff --git a/include/linux/smp.h b/include/linux/smp.h index c8488763277f..5a57c1d9f6b1 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -223,6 +223,9 @@ static inline void kick_all_cpus_sync(void) { } #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) #define put_cpu() preempt_enable() +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) +#define put_cpu_light() migrate_enable() + /* * Callback to arch code if there's nosmp or maxcpus=0 on the * boot command line: diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 75f34949d9ab..a124f92b4055 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -262,7 +262,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) /* Include rwlock functions */ -#include <linux/rwlock.h> +#ifdef CONFIG_PREEMPT_RT_FULL +# include <linux/rwlock_rt.h> +#else +# include <linux/rwlock.h> +#endif /* * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: @@ -273,6 +277,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # include <linux/spinlock_api_up.h> #endif +#ifdef CONFIG_PREEMPT_RT_FULL +# include <linux/spinlock_rt.h> +#else /* PREEMPT_RT_FULL */ + /* * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ @@ -402,4 +410,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +#endif /* !PREEMPT_RT_FULL */ + #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 51df117abe46..3f68f5018fff 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -191,6 +191,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) return 0; } -#include <linux/rwlock_api_smp.h> +#ifndef CONFIG_PREEMPT_RT_FULL +# include <linux/rwlock_api_smp.h> +#endif #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h new file mode 100644 index 000000000000..b3c504bb9852 --- /dev/null +++ b/include/linux/spinlock_rt.h @@ -0,0 +1,169 @@ +#ifndef __LINUX_SPINLOCK_RT_H +#define __LINUX_SPINLOCK_RT_H + +#ifndef __LINUX_SPINLOCK_H +#error Do not include directly. Use spinlock.h +#endif + +#include <linux/bug.h> + +extern void +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); + +#define spin_lock_init(slock) \ +do { \ + static struct lock_class_key __key; \ + \ + rt_mutex_init(&(slock)->lock); \ + __rt_spin_lock_init(slock, #slock, &__key); \ +} while (0) + +extern void __lockfunc rt_spin_lock(spinlock_t *lock); +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); +extern void __lockfunc rt_spin_unlock_after_trylock_in_irq(spinlock_t *lock); +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); + +/* + * lockdep-less calls, for derived types like rwlock: + * (for trylock they can use rt_mutex_trylock() directly. + */ +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); + +#define spin_lock_local(lock) rt_spin_lock(lock) +#define spin_unlock_local(lock) rt_spin_unlock(lock) + +#define spin_lock(lock) \ + do { \ + migrate_disable(); \ + rt_spin_lock(lock); \ + } while (0) + +#define spin_lock_bh(lock) \ + do { \ + local_bh_disable(); \ + migrate_disable(); \ + rt_spin_lock(lock); \ + } while (0) + +#define spin_lock_irq(lock) spin_lock(lock) + +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) + +#define spin_trylock(lock) \ +({ \ + int __locked; \ + migrate_disable(); \ + __locked = spin_do_trylock(lock); \ + if (!__locked) \ + migrate_enable(); \ + __locked; \ +}) + +#ifdef CONFIG_LOCKDEP +# define spin_lock_nested(lock, subclass) \ + do { \ + migrate_disable(); \ + rt_spin_lock_nested(lock, subclass); \ + } while (0) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + migrate_disable(); \ + rt_spin_lock_nested(lock, subclass); \ + } while (0) +#else +# define spin_lock_nested(lock, subclass) spin_lock(lock) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + spin_lock(lock); \ + } while (0) +#endif + +#define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + spin_lock(lock); \ + } while (0) + +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) +{ + unsigned long flags = 0; +#ifdef CONFIG_TRACE_IRQFLAGS + flags = rt_spin_lock_trace_flags(lock); +#else + spin_lock(lock); /* lock_local */ +#endif + return flags; +} + +/* FIXME: we need rt_spin_lock_nest_lock */ +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) + +#define spin_unlock(lock) \ + do { \ + rt_spin_unlock(lock); \ + migrate_enable(); \ + } while (0) + +#define spin_unlock_bh(lock) \ + do { \ + rt_spin_unlock(lock); \ + migrate_enable(); \ + local_bh_enable(); \ + } while (0) + +#define spin_unlock_irq(lock) spin_unlock(lock) + +#define spin_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + spin_unlock(lock); \ + } while (0) + +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) +#define spin_trylock_irq(lock) spin_trylock(lock) + +#define spin_trylock_irqsave(lock, flags) \ + rt_spin_trylock_irqsave(lock, &(flags)) + +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) + +#ifdef CONFIG_GENERIC_LOCKBREAK +# define spin_is_contended(lock) ((lock)->break_lock) +#else +# define spin_is_contended(lock) (((void)(lock), 0)) +#endif + +static inline int spin_can_lock(spinlock_t *lock) +{ + return !rt_mutex_is_locked(&lock->lock); +} + +static inline int spin_is_locked(spinlock_t *lock) +{ + return rt_mutex_is_locked(&lock->lock); +} + +static inline void assert_spin_locked(spinlock_t *lock) +{ + BUG_ON(!spin_is_locked(lock)); +} + +#define atomic_dec_and_lock(atomic, lock) \ + atomic_dec_and_spin_lock(atomic, lock) + +#endif diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 73548eb13a5d..10bac715ea96 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,80 +9,15 @@ * Released under the General Public License (GPL). */ -#if defined(CONFIG_SMP) -# include <asm/spinlock_types.h> -#else -# include <linux/spinlock_types_up.h> -#endif - -#include <linux/lockdep.h> - -typedef struct raw_spinlock { - arch_spinlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned int magic, owner_cpu; - void *owner; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} raw_spinlock_t; - -#define SPINLOCK_MAGIC 0xdead4ead - -#define SPINLOCK_OWNER_INIT ((void *)-1L) - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } -#else -# define SPIN_DEP_MAP_INIT(lockname) -#endif +#include <linux/spinlock_types_raw.h> -#ifdef CONFIG_DEBUG_SPINLOCK -# define SPIN_DEBUG_INIT(lockname) \ - .magic = SPINLOCK_MAGIC, \ - .owner_cpu = -1, \ - .owner = SPINLOCK_OWNER_INIT, +#ifndef CONFIG_PREEMPT_RT_FULL +# include <linux/spinlock_types_nort.h> +# include <linux/rwlock_types.h> #else -# define SPIN_DEBUG_INIT(lockname) +# include <linux/rtmutex.h> +# include <linux/spinlock_types_rt.h> +# include <linux/rwlock_types_rt.h> #endif -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ - { \ - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ - SPIN_DEBUG_INIT(lockname) \ - SPIN_DEP_MAP_INIT(lockname) } - -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) - -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) - -typedef struct spinlock { - union { - struct raw_spinlock rlock; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) - struct { - u8 __padding[LOCK_PADSIZE]; - struct lockdep_map dep_map; - }; -#endif - }; -} spinlock_t; - -#define __SPIN_LOCK_INITIALIZER(lockname) \ - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } - -#define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) - -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) - -#include <linux/rwlock_types.h> - #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h new file mode 100644 index 000000000000..f1dac1fb1d6a --- /dev/null +++ b/include/linux/spinlock_types_nort.h @@ -0,0 +1,33 @@ +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H +#define __LINUX_SPINLOCK_TYPES_NORT_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +#error "Do not include directly. Include spinlock_types.h instead" +#endif + +/* + * The non RT version maps spinlocks to raw_spinlocks + */ +typedef struct spinlock { + union { + struct raw_spinlock rlock; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) + struct { + u8 __padding[LOCK_PADSIZE]; + struct lockdep_map dep_map; + }; +#endif + }; +} spinlock_t; + +#define __SPIN_LOCK_INITIALIZER(lockname) \ + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } + +#define __SPIN_LOCK_UNLOCKED(lockname) \ + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) + +#endif diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h new file mode 100644 index 000000000000..edffc4d53fc9 --- /dev/null +++ b/include/linux/spinlock_types_raw.h @@ -0,0 +1,56 @@ +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H +#define __LINUX_SPINLOCK_TYPES_RAW_H + +#if defined(CONFIG_SMP) +# include <asm/spinlock_types.h> +#else +# include <linux/spinlock_types_up.h> +#endif + +#include <linux/lockdep.h> + +typedef struct raw_spinlock { + arch_spinlock_t raw_lock; +#ifdef CONFIG_GENERIC_LOCKBREAK + unsigned int break_lock; +#endif +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned int magic, owner_cpu; + void *owner; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} raw_spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#define SPINLOCK_OWNER_INIT ((void *)-1L) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define SPIN_DEP_MAP_INIT(lockname) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK +# define SPIN_DEBUG_INIT(lockname) \ + .magic = SPINLOCK_MAGIC, \ + .owner_cpu = -1, \ + .owner = SPINLOCK_OWNER_INIT, +#else +# define SPIN_DEBUG_INIT(lockname) +#endif + +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ + { \ + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ + SPIN_DEBUG_INIT(lockname) \ + SPIN_DEP_MAP_INIT(lockname) } + +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) + +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) + +#endif diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h new file mode 100644 index 000000000000..9fd431967abc --- /dev/null +++ b/include/linux/spinlock_types_rt.h @@ -0,0 +1,51 @@ +#ifndef __LINUX_SPINLOCK_TYPES_RT_H +#define __LINUX_SPINLOCK_TYPES_RT_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +#error "Do not include directly. Include spinlock_types.h instead" +#endif + +#include <linux/cache.h> + +/* + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: + */ +typedef struct spinlock { + struct rt_mutex lock; + unsigned int break_lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} spinlock_t; + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# define __RT_SPIN_INITIALIZER(name) \ + { \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + .save_state = 1, \ + .file = __FILE__, \ + .line = __LINE__ , \ + } +#else +# define __RT_SPIN_INITIALIZER(name) \ + { \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + .save_state = 1, \ + } +#endif + +/* +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) +*/ + +#define __SPIN_LOCK_UNLOCKED(name) \ + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ + SPIN_DEP_MAP_INIT(name) } + +#define __DEFINE_SPINLOCK(name) \ + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) + +#define DEFINE_SPINLOCK(name) \ + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name) + +#endif diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 04f4121a23ae..60347e5fa1dd 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); void process_srcu(struct work_struct *work); -#define __SRCU_STRUCT_INIT(name) \ +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ { \ .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ + .per_cpu_ref = &pcpu_name, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ .running = false, \ .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ @@ -104,11 +104,11 @@ void process_srcu(struct work_struct *work); */ #define DEFINE_SRCU(name) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - struct srcu_struct name = __SRCU_STRUCT_INIT(name); + struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array); #define DEFINE_STATIC_SRCU(name) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - static struct srcu_struct name = __SRCU_STRUCT_INIT(name); + static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array); /** * call_srcu() - Queue a callback for invocation after an SRCU grace period diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 14a8ff2de11e..b15655f84d12 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -25,6 +25,7 @@ #include <linux/rcupdate.h> #include <linux/wait.h> #include <linux/rbtree.h> +#include <linux/atomic.h> #include <uapi/linux/sysctl.h> /* For the /proc/sys support */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 8c5a197e1587..5fcd72c57ebe 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) extern int del_timer_sync(struct timer_list *timer); #else # define del_timer_sync(t) del_timer(t) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5ca0951e1855..44b37510c14f 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -6,38 +6,37 @@ /* * These routines enable/disable the pagefault handler in that - * it will not take any locks and go straight to the fixup table. - * - * They have great resemblance to the preempt_disable/enable calls - * and in fact they are identical; this is because currently there is - * no other way to make the pagefault handlers do this. So we do - * disable preemption but we don't necessarily care about that. + * it will not take any MM locks and go straight to the fixup table. */ -static inline void pagefault_disable(void) +static inline void raw_pagefault_disable(void) { inc_preempt_count(); - /* - * make sure to have issued the store before a pagefault - * can hit. - */ barrier(); } -static inline void pagefault_enable(void) +static inline void raw_pagefault_enable(void) { - /* - * make sure to issue those last loads/stores before enabling - * the pagefault handler again. - */ barrier(); dec_preempt_count(); - /* - * make sure we do.. - */ barrier(); preempt_check_resched(); } +#ifndef CONFIG_PREEMPT_RT_FULL +static inline void pagefault_disable(void) +{ + raw_pagefault_disable(); +} + +static inline void pagefault_enable(void) +{ + raw_pagefault_enable(); +} +#else +extern void pagefault_disable(void); +extern void pagefault_enable(void); +#endif + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, @@ -77,9 +76,9 @@ static inline unsigned long __copy_from_user_nocache(void *to, mm_segment_t old_fs = get_fs(); \ \ set_fs(KERNEL_DS); \ - pagefault_disable(); \ + raw_pagefault_disable(); \ ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ - pagefault_enable(); \ + raw_pagefault_enable(); \ set_fs(old_fs); \ ret; \ }) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 06f28beed7c2..d115f620831d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -26,6 +26,7 @@ #include <linux/errno.h> #include <linux/rbtree.h> +#include <linux/wait.h> struct vm_area_struct; struct mm_struct; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 2d4e3d793f79..0abbc5aaa0cf 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -29,7 +29,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); static inline void __count_vm_event(enum vm_event_item item) { + preempt_disable_rt(); __this_cpu_inc(vm_event_states.event[item]); + preempt_enable_rt(); } static inline void count_vm_event(enum vm_event_item item) @@ -39,7 +41,9 @@ static inline void count_vm_event(enum vm_event_item item) static inline void __count_vm_events(enum vm_event_item item, long delta) { + preempt_disable_rt(); __this_cpu_add(vm_event_states.event[item], delta); + preempt_enable_rt(); } static inline void count_vm_events(enum vm_event_item item, long delta) diff --git a/include/linux/wait-simple.h b/include/linux/wait-simple.h new file mode 100644 index 000000000000..f86bca2c41d5 --- /dev/null +++ b/include/linux/wait-simple.h @@ -0,0 +1,207 @@ +#ifndef _LINUX_WAIT_SIMPLE_H +#define _LINUX_WAIT_SIMPLE_H + +#include <linux/spinlock.h> +#include <linux/list.h> + +#include <asm/current.h> + +struct swaiter { + struct task_struct *task; + struct list_head node; +}; + +#define DEFINE_SWAITER(name) \ + struct swaiter name = { \ + .task = current, \ + .node = LIST_HEAD_INIT((name).node), \ + } + +struct swait_head { + raw_spinlock_t lock; + struct list_head list; +}; + +#define SWAIT_HEAD_INITIALIZER(name) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .list = LIST_HEAD_INIT((name).list), \ + } + +#define DEFINE_SWAIT_HEAD(name) \ + struct swait_head name = SWAIT_HEAD_INITIALIZER(name) + +extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key); + +#define init_swait_head(swh) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_swait_head((swh), &__key); \ + } while (0) + +/* + * Waiter functions + */ +extern void swait_prepare_locked(struct swait_head *head, struct swaiter *w); +extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state); +extern void swait_finish_locked(struct swait_head *head, struct swaiter *w); +extern void swait_finish(struct swait_head *head, struct swaiter *w); + +/* Check whether a head has waiters enqueued */ +static inline bool swaitqueue_active(struct swait_head *h) +{ + /* Make sure the condition is visible before checking list_empty() */ + smp_mb(); + return !list_empty(&h->list); +} + +/* + * Wakeup functions + */ +extern unsigned int __swait_wake(struct swait_head *head, unsigned int state, unsigned int num); +extern unsigned int __swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num); + +#define swait_wake(head) __swait_wake(head, TASK_NORMAL, 1) +#define swait_wake_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 1) +#define swait_wake_all(head) __swait_wake(head, TASK_NORMAL, 0) +#define swait_wake_all_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 0) + +/* + * Event API + */ +#define __swait_event(wq, condition) \ +do { \ + DEFINE_SWAITER(__wait); \ + \ + for (;;) { \ + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + schedule(); \ + } \ + swait_finish(&wq, &__wait); \ +} while (0) + +/** + * swait_event - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ +#define swait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __swait_event(wq, condition); \ +} while (0) + +#define __swait_event_interruptible(wq, condition, ret) \ +do { \ + DEFINE_SWAITER(__wait); \ + \ + for (;;) { \ + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + schedule(); \ + } \ + swait_finish(&wq, &__wait); \ +} while (0) + +#define __swait_event_interruptible_timeout(wq, condition, ret) \ +do { \ + DEFINE_SWAITER(__wait); \ + \ + for (;;) { \ + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + ret = schedule_timeout(ret); \ + if (!ret) \ + break; \ + } \ + swait_finish(&wq, &__wait); \ +} while (0) + +/** + * swait_event_interruptible - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ +#define swait_event_interruptible(wq, condition) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __swait_event_interruptible(wq, condition, __ret); \ + __ret; \ +}) + +#define swait_event_interruptible_timeout(wq, condition, timeout) \ +({ \ + int __ret = timeout; \ + if (!(condition)) \ + __swait_event_interruptible_timeout(wq, condition, __ret); \ + __ret; \ +}) + +#define __swait_event_timeout(wq, condition, ret) \ +do { \ + DEFINE_SWAITER(__wait); \ + \ + for (;;) { \ + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + ret = schedule_timeout(ret); \ + if (!ret) \ + break; \ + } \ + swait_finish(&wq, &__wait); \ +} while (0) + +/** + * swait_event_timeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if the @timeout elapsed, and the remaining + * jiffies if the condition evaluated to true before the timeout elapsed. + */ +#define swait_event_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!(condition)) \ + __swait_event_timeout(wq, condition, __ret); \ + __ret; \ +}) + +#endif diff --git a/include/net/dst.h b/include/net/dst.h index e0c97f5a57cf..bbab9411dd46 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -393,7 +393,7 @@ static inline void dst_confirm(struct dst_entry *dst) static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, struct sk_buff *skb) { - const struct hh_cache *hh; + struct hh_cache *hh; if (dst->pending_confirm) { unsigned long now = jiffies; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7e748ad8b50c..29d842a53324 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -335,7 +335,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) } #endif -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq; int hh_len; @@ -390,7 +390,7 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, const struct net_device *dev) { unsigned int seq; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2ba9de89e8ec..5602015fc5e9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -57,6 +57,7 @@ struct netns_ipv4 { int sysctl_icmp_echo_ignore_all; int sysctl_icmp_echo_ignore_broadcasts; + int sysctl_icmp_echo_sysrq; int sysctl_icmp_ignore_bogus_error_responses; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h new file mode 100644 index 000000000000..28646db2c775 --- /dev/null +++ b/include/trace/events/hist.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hist + +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HIST_H + +#include "latency_hist.h" +#include <linux/tracepoint.h> + +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST) +#define trace_preemptirqsoff_hist(a,b) +#else +TRACE_EVENT(preemptirqsoff_hist, + + TP_PROTO(int reason, int starthist), + + TP_ARGS(reason, starthist), + + TP_STRUCT__entry( + __field(int, reason ) + __field(int, starthist ) + ), + + TP_fast_assign( + __entry->reason = reason; + __entry->starthist = starthist; + ), + + TP_printk("reason=%s starthist=%s", getaction(__entry->reason), + __entry->starthist ? "start" : "stop") +); +#endif + +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST +#define trace_hrtimer_interrupt(a,b,c,d) +#else +TRACE_EVENT(hrtimer_interrupt, + + TP_PROTO(int cpu, long long offset, struct task_struct *curr, struct task_struct *task), + + TP_ARGS(cpu, offset, curr, task), + + TP_STRUCT__entry( + __field(int, cpu ) + __field(long long, offset ) + __array(char, ccomm, TASK_COMM_LEN) + __field(int, cprio ) + __array(char, tcomm, TASK_COMM_LEN) + __field(int, tprio ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->offset = offset; + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN); + __entry->cprio = curr->prio; + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", task != NULL ? TASK_COMM_LEN : 7); + __entry->tprio = task != NULL ? task->prio : -1; + ), + + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]", + __entry->cpu, __entry->offset, __entry->ccomm, __entry->cprio, __entry->tcomm, __entry->tprio) +); +#endif + +#endif /* _TRACE_HIST_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h new file mode 100644 index 000000000000..7f70794a13e7 --- /dev/null +++ b/include/trace/events/latency_hist.h @@ -0,0 +1,29 @@ +#ifndef _LATENCY_HIST_H +#define _LATENCY_HIST_H + +enum hist_action { + IRQS_ON, + PREEMPT_ON, + TRACE_STOP, + IRQS_OFF, + PREEMPT_OFF, + TRACE_START, +}; + +static char *actions[] = { + "IRQS_ON", + "PREEMPT_ON", + "TRACE_STOP", + "IRQS_OFF", + "PREEMPT_OFF", + "TRACE_START", +}; + +static inline char *getaction(int action) +{ + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0])) + return(actions[action]); + return("unknown"); +} + +#endif /* _LATENCY_HIST_H */ |