diff options
Diffstat (limited to 'drivers')
59 files changed, 1551 insertions, 238 deletions
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index a0c478784314..166ee955405f 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -112,7 +112,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); * interrupt level */ ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); /* Mutex for _OSI support */ diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index 3cf77afd142c..dc32e72132f1 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -269,14 +269,14 @@ acpi_status acpi_hw_clear_acpi_status(void) ACPI_BITMASK_ALL_FIXED_STATUS, ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* Clear the fixed events in PM1 A/B */ status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, ACPI_BITMASK_ALL_FIXED_STATUS); - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); if (ACPI_FAILURE(status)) { goto exit; diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c index 5f97468df8ff..8c017f15da7d 100644 --- a/drivers/acpi/acpica/hwxface.c +++ b/drivers/acpi/acpica/hwxface.c @@ -374,7 +374,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) return_ACPI_STATUS(AE_BAD_PARAMETER); } - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* * At this point, we know that the parent register is one of the @@ -435,7 +435,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 37b8b58fcd56..938795507d87 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) return_ACPI_STATUS (status); } - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); if (ACPI_FAILURE (status)) { return_ACPI_STATUS (status); } @@ -141,7 +141,7 @@ void acpi_ut_mutex_terminate(void) /* Delete the spinlocks */ acpi_os_delete_lock(acpi_gbl_gpe_lock); - acpi_os_delete_lock(acpi_gbl_hardware_lock); + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); acpi_os_delete_lock(acpi_gbl_reference_count_lock); /* Delete the reader/writer lock */ diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index cdf6215a9a22..c9f2a33b8c0b 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned long flags; unsigned int consumed; - local_irq_save(flags); + local_irq_save_nort(flags); consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); - local_irq_restore(flags); + local_irq_restore_nort(flags); return consumed; } @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned long flags; /* FIXME: use a bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page); /* do the actual data transfer */ @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) do_write); kunmap_atomic(buf); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, @@ -864,7 +864,7 @@ next_sg: unsigned long flags; /* FIXME: use bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page); /* do the actual data transfer */ @@ -872,7 +872,7 @@ next_sg: count, rw); kunmap_atomic(buf); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); consumed = ap->ops->sff_data_xfer(dev, buf + offset, diff --git a/drivers/char/random.c b/drivers/char/random.c index 9cd6968e2f92..eb47efec2506 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -776,8 +776,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) } sample; long delta, delta2, delta3; - preempt_disable(); - sample.jiffies = jiffies; sample.cycles = random_get_entropy(); sample.num = num; @@ -818,7 +816,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) */ credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); } - preempt_enable(); } void add_input_randomness(unsigned int type, unsigned int code, @@ -871,28 +868,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *(ptr + f->reg_idx++); } -void add_interrupt_randomness(int irq, int irq_flags) +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) { struct entropy_store *r; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); __u32 c_high, j_high; - __u64 ip; unsigned long seed; int credit = 0; if (cycles == 0) - cycles = get_reg(fast_pool, regs); + cycles = get_reg(fast_pool, NULL); c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; j_high = (sizeof(now) > 4) ? now >> 32 : 0; fast_pool->pool[0] ^= cycles ^ j_high ^ irq; fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; + if (!ip) + ip = _RET_IP_; fast_pool->pool[2] ^= ip; fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : - get_reg(fast_pool, regs); + get_reg(fast_pool, NULL); fast_mix(fast_pool); add_interrupt_bench(cycles); diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 8bdbc45c6dad..43f1c6bc6e28 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -23,8 +23,7 @@ * this 32 bit free-running counter. the second channel is not used. * * - The third channel may be used to provide a 16-bit clockevent - * source, used in either periodic or oneshot mode. This runs - * at 32 KiHZ, and can handle delays of up to two seconds. + * source, used in either periodic or oneshot mode. * * A boot clocksource and clockevent source are also currently needed, * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so @@ -74,6 +73,7 @@ static struct clocksource clksrc = { struct tc_clkevt_device { struct clock_event_device clkevt; struct clk *clk; + u32 freq; void __iomem *regs; }; @@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) return container_of(clkevt, struct tc_clkevt_device, clkevt); } -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, - * because using one of the divided clocks would usually mean the - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). - * - * A divided clock could be good for high resolution timers, since - * 30.5 usec resolution can seem "low". - */ static u32 timer_clock; static void tc_mode(enum clock_event_mode m, struct clock_event_device *d) @@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d) case CLOCK_EVT_MODE_PERIODIC: clk_enable(tcd->clk); - /* slow clock, count up to RC, then irq and restart */ + /* count up to RC, then irq and restart */ __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); - __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); + __raw_writel((tcd->freq + HZ / 2) / HZ, + tcaddr + ATMEL_TC_REG(2, RC)); /* Enable clock and interrupts on RC compare */ __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); @@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d) case CLOCK_EVT_MODE_ONESHOT: clk_enable(tcd->clk); - /* slow clock, count up to RC, then irq and stop */ + /* count up to RC, then irq and stop */ __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); @@ -157,8 +151,12 @@ static struct tc_clkevt_device clkevt = { .name = "tc_clkevt", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK /* Should be lower than at91rm9200's system timer */ .rating = 125, +#else + .rating = 200, +#endif .set_next_event = tc_next_event, .set_mode = tc_mode, }, @@ -178,8 +176,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) return IRQ_NONE; } -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) { + unsigned divisor = atmel_tc_divisors[divisor_idx]; int ret; struct clk *t2_clk = tc->clk[2]; int irq = tc->irq[2]; @@ -193,7 +192,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) clkevt.regs = tc->regs; clkevt.clk = t2_clk; - timer_clock = clk32k_divisor_idx; + timer_clock = divisor_idx; + if (!divisor) + clkevt.freq = 32768; + else + clkevt.freq = clk_get_rate(t2_clk) / divisor; clkevt.clkevt.cpumask = cpumask_of(0); @@ -203,7 +206,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) return ret; } - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); return ret; } @@ -340,7 +343,11 @@ static int __init tcb_clksrc_init(void) goto err_disable_t1; /* channel 2: periodic and oneshot timer support */ +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK ret = setup_clkevents(tc, clk32k_divisor_idx); +#else + ret = setup_clkevents(tc, best_divisor_idx); +#endif if (ret) goto err_unregister_clksrc; diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index c0304ff608b0..6eb7bf435d9b 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -90,6 +90,7 @@ static cycle_t read_pit_clk(struct clocksource *cs) return elapsed; } +static struct irqaction at91sam926x_pit_irq; /* * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) */ @@ -100,6 +101,8 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) switch (mode) { case CLOCK_EVT_MODE_PERIODIC: + /* Set up irq handler */ + setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq); /* update clocksource counter */ data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); pit_write(data->base, AT91_PIT_MR, @@ -113,6 +116,7 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) /* disable irq, leaving the clocksource active */ pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); + remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq); break; case CLOCK_EVT_MODE_RESUME: break; diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c index 1692e17e096b..306e2051f112 100644 --- a/drivers/clocksource/timer-atmel-st.c +++ b/drivers/clocksource/timer-atmel-st.c @@ -131,6 +131,7 @@ clkevt32k_mode(enum clock_event_mode mode, struct clock_event_device *dev) break; case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_UNUSED: + remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq); case CLOCK_EVT_MODE_RESUME: irqmask = 0; break; diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index c59bdcb83217..8f23161d80be 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -123,7 +123,7 @@ config X86_POWERNOW_K7_ACPI config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE help This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. Support for K10 and newer processors is now in acpi-cpufreq. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8ae655c364f4..ce1d93e93d1a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -64,12 +64,6 @@ static inline bool has_target(void) return cpufreq_driver->target_index || cpufreq_driver->target; } -/* - * rwsem to guarantee that cpufreq driver module doesn't unload during critical - * sections - */ -static DECLARE_RWSEM(cpufreq_rwsem); - /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); @@ -215,9 +209,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) if (cpu >= nr_cpu_ids) return NULL; - if (!down_read_trylock(&cpufreq_rwsem)) - return NULL; - /* get the cpufreq driver */ read_lock_irqsave(&cpufreq_driver_lock, flags); @@ -230,9 +221,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) read_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!policy) - up_read(&cpufreq_rwsem); - return policy; } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); @@ -240,7 +228,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get); void cpufreq_cpu_put(struct cpufreq_policy *policy) { kobject_put(&policy->kobj); - up_read(&cpufreq_rwsem); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); @@ -765,9 +752,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct freq_attr *fattr = to_attr(attr); ssize_t ret; - if (!down_read_trylock(&cpufreq_rwsem)) - return -EINVAL; - down_read(&policy->rwsem); if (fattr->show) @@ -776,7 +760,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ret = -EIO; up_read(&policy->rwsem); - up_read(&cpufreq_rwsem); return ret; } @@ -793,9 +776,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, if (!cpu_online(policy->cpu)) goto unlock; - if (!down_read_trylock(&cpufreq_rwsem)) - goto unlock; - down_write(&policy->rwsem); if (fattr->store) @@ -804,8 +784,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, ret = -EIO; up_write(&policy->rwsem); - - up_read(&cpufreq_rwsem); unlock: put_online_cpus(); @@ -1117,16 +1095,12 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (unlikely(policy)) return 0; - if (!down_read_trylock(&cpufreq_rwsem)) - return 0; - /* Check if this cpu was hot-unplugged earlier and has siblings */ read_lock_irqsave(&cpufreq_driver_lock, flags); for_each_policy(policy) { if (cpumask_test_cpu(cpu, policy->related_cpus)) { read_unlock_irqrestore(&cpufreq_driver_lock, flags); ret = cpufreq_add_policy_cpu(policy, cpu, dev); - up_read(&cpufreq_rwsem); return ret; } } @@ -1269,8 +1243,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) kobject_uevent(&policy->kobj, KOBJ_ADD); - up_read(&cpufreq_rwsem); - /* Callback for handling stuff after policy is ready */ if (cpufreq_driver->ready) cpufreq_driver->ready(policy); @@ -1304,8 +1276,6 @@ err_set_policy_cpu: cpufreq_policy_free(policy); nomem_out: - up_read(&cpufreq_rwsem); - return ret; } @@ -2499,19 +2469,20 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) pr_debug("unregistering driver %s\n", driver->name); + /* Protect against concurrent cpu hotplug */ + get_online_cpus(); subsys_interface_unregister(&cpufreq_interface); if (cpufreq_boost_supported()) cpufreq_sysfs_remove_file(&boost.attr); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); - down_write(&cpufreq_rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - up_write(&cpufreq_rwsem); + put_online_cpus(); return 0; } diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index b232397ad7ec..a0ace2758e2e 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -57,7 +57,7 @@ struct gpio_bank { u32 saved_datain; u32 level_mask; u32 toggle_mask; - spinlock_t lock; + raw_spinlock_t lock; struct gpio_chip chip; struct clk *dbck; u32 mod_usage; @@ -498,14 +498,14 @@ static int omap_gpio_irq_type(struct irq_data *d, unsigned type) (type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH))) return -EINVAL; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); retval = omap_set_gpio_triggering(bank, offset, type); omap_gpio_init_irq(bank, offset); if (!omap_gpio_is_input(bank, offset)) { - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return -EINVAL; } - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) __irq_set_handler_locked(d->irq, handle_level_irq); @@ -626,14 +626,14 @@ static int omap_set_gpio_wakeup(struct gpio_bank *bank, unsigned offset, return -EINVAL; } - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); if (enable) bank->context.wake_en |= gpio_bit; else bank->context.wake_en &= ~gpio_bit; writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -668,7 +668,7 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset) if (!BANK_USED(bank)) pm_runtime_get_sync(bank->dev); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); /* Set trigger to none. You need to enable the desired trigger with * request_irq() or set_irq_type(). Only do this if the IRQ line has * not already been requested. @@ -678,7 +678,7 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset) omap_enable_gpio_module(bank, offset); } bank->mod_usage |= BIT(offset); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -688,11 +688,11 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip); unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->mod_usage &= ~(BIT(offset)); omap_disable_gpio_module(bank, offset); omap_reset_gpio(bank, offset); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); /* * If this is the last gpio to be freed in the bank, @@ -794,9 +794,9 @@ static unsigned int omap_gpio_irq_startup(struct irq_data *d) if (!BANK_USED(bank)) pm_runtime_get_sync(bank->dev); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); omap_gpio_init_irq(bank, offset); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); omap_gpio_unmask_irq(d); return 0; @@ -808,11 +808,11 @@ static void omap_gpio_irq_shutdown(struct irq_data *d) unsigned long flags; unsigned offset = d->hwirq; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->irq_usage &= ~(BIT(offset)); omap_disable_gpio_module(bank, offset); omap_reset_gpio(bank, offset); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); /* * If this is the last IRQ to be freed in the bank, @@ -836,10 +836,10 @@ static void omap_gpio_mask_irq(struct irq_data *d) unsigned offset = d->hwirq; unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); omap_set_gpio_irqenable(bank, offset, 0); omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); } static void omap_gpio_unmask_irq(struct irq_data *d) @@ -849,7 +849,7 @@ static void omap_gpio_unmask_irq(struct irq_data *d) u32 trigger = irqd_get_trigger_type(d); unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); if (trigger) omap_set_gpio_triggering(bank, offset, trigger); @@ -861,7 +861,7 @@ static void omap_gpio_unmask_irq(struct irq_data *d) } omap_set_gpio_irqenable(bank, offset, 1); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); } /*---------------------------------------------------------------------*/ @@ -874,9 +874,9 @@ static int omap_mpuio_suspend_noirq(struct device *dev) OMAP_MPUIO_GPIO_MASKIT / bank->stride; unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -889,9 +889,9 @@ static int omap_mpuio_resume_noirq(struct device *dev) OMAP_MPUIO_GPIO_MASKIT / bank->stride; unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); writel_relaxed(bank->context.wake_en, mask_reg); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -937,9 +937,9 @@ static int omap_gpio_get_direction(struct gpio_chip *chip, unsigned offset) bank = container_of(chip, struct gpio_bank, chip); reg = bank->base + bank->regs->direction; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); dir = !!(readl_relaxed(reg) & BIT(offset)); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return dir; } @@ -949,9 +949,9 @@ static int omap_gpio_input(struct gpio_chip *chip, unsigned offset) unsigned long flags; bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); omap_set_gpio_direction(bank, offset, 1); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -973,10 +973,10 @@ static int omap_gpio_output(struct gpio_chip *chip, unsigned offset, int value) unsigned long flags; bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->set_dataout(bank, offset, value); omap_set_gpio_direction(bank, offset, 0); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -988,9 +988,9 @@ static int omap_gpio_debounce(struct gpio_chip *chip, unsigned offset, bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); omap2_set_gpio_debounce(bank, offset, debounce); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -1001,9 +1001,9 @@ static void omap_gpio_set(struct gpio_chip *chip, unsigned offset, int value) unsigned long flags; bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->set_dataout(bank, offset, value); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); } /*---------------------------------------------------------------------*/ @@ -1199,7 +1199,7 @@ static int omap_gpio_probe(struct platform_device *pdev) else bank->set_dataout = omap_set_gpio_dataout_mask; - spin_lock_init(&bank->lock); + raw_spin_lock_init(&bank->lock); /* Static mapping, never released */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1246,7 +1246,7 @@ static int omap_gpio_runtime_suspend(struct device *dev) unsigned long flags; u32 wake_low, wake_hi; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); /* * Only edges can generate a wakeup event to the PRCM. @@ -1299,7 +1299,7 @@ update_gpio_context_count: bank->get_context_loss_count(bank->dev); omap_gpio_dbck_disable(bank); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -1314,7 +1314,7 @@ static int omap_gpio_runtime_resume(struct device *dev) unsigned long flags; int c; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); /* * On the first resume during the probe, the context has not @@ -1350,14 +1350,14 @@ static int omap_gpio_runtime_resume(struct device *dev) if (c != bank->context_loss_count) { omap_gpio_restore_context(bank); } else { - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } } } if (!bank->workaround_enabled) { - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -1412,7 +1412,7 @@ static int omap_gpio_runtime_resume(struct device *dev) } bank->workaround_enabled = false; - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e793ed4..a3eadb970b35 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -32,6 +32,7 @@ #include "i915_trace.h" #include "intel_drv.h" #include <linux/dma_remapping.h> +#include <linux/uaccess.h> #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) @@ -465,7 +466,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && in_atomic()) + if (obj->active && pagefault_disabled()) return -EFAULT; if (use_cpu_reloc(obj)) @@ -1338,7 +1339,9 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, return ret; } +#ifndef CONFIG_PREEMPT_RT_BASE trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); +#endif i915_gem_execbuffer_move_to_active(vmas, ring); i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index f7929e769250..8ea68ad4a94e 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -39,7 +39,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 57c887843dc3..4021633ca65d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10086,7 +10086,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe) struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - WARN_ON(!in_interrupt()); + WARN_ON_NONRT(!in_interrupt()); if (crtc == NULL) return; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 0e894193accf..2f9de5ecb6ed 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -996,15 +996,12 @@ omap_i2c_isr(int irq, void *dev_id) u16 mask; u16 stat; - spin_lock(&dev->lock); - mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG); stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG); + mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG); if (stat & mask) ret = IRQ_WAKE_THREAD; - spin_unlock(&dev->lock); - return ret; } diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 36f76e28a0bf..394f142f90c7 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision < 0xC2) { /* @@ -325,7 +325,7 @@ out: } pci_dev_put(north); pci_dev_put(isa_dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index 696b6c1ec940..0d0a96629b73 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -1241,7 +1241,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, dma_old = inb(base + 2); - local_irq_save(flags); + local_irq_save_nort(flags); dma_new = dma_old; pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); @@ -1252,7 +1252,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, if (dma_new != dma_old) outb(dma_new, base + 2); - local_irq_restore(flags); + local_irq_restore_nort(flags); printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", hwif->name, base, base + 7); diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 19763977568c..4169433faab5 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, insl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, outsl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 177db6d5b2f5..079ae6bebf18 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) /* disable_irq_nosync ?? */ disable_irq(hwif->irq); /* local CPU only, as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwif->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 376f2dc410c5..f014dd1b73dc 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, if ((stat & ATA_BUSY) == 0) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *rstat = stat; return -EBUSY; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 0b63facd1d87..4ceba37afc0c 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) int bswap = 1; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* read 512 bytes of id info */ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); - local_irq_restore(flags); + local_irq_restore_nort(flags); drive->dev_flags |= IDE_DFLAG_ID_READ; #ifdef DEBUG diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index dabb88b1cbec..2cecea72520a 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, page_is_high = PageHighMem(page); if (page_is_high) - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page) + offset; @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, kunmap_atomic(buf); if (page_is_high) - local_irq_restore(flags); + local_irq_restore_nort(flags); len -= nr_bytes; } @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, } if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0d23e0568deb..140c94ce71c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -821,7 +821,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_dbg_mcast(priv, "restarting multicast task\n"); - local_irq_save(flags); + local_irq_save_nort(flags); netif_addr_lock(dev); spin_lock(&priv->lock); @@ -903,7 +903,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) spin_unlock(&priv->lock); netif_addr_unlock(dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * make sure the in-flight joins have finished before we attempt diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index e853a2134680..5b6aa39a1de7 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index 49794b47b51c..3d7245d6b2f8 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -61,7 +61,7 @@ config LEDS_TRIGGER_BACKLIGHT config LEDS_TRIGGER_CPU bool "LED CPU Trigger" - depends on LEDS_TRIGGERS + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE help This allows LEDs to be controlled by active CPUs. This shows the active CPUs across an array of LEDs so you can see which diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 4d200883c505..98b64ed5cb81 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -1,6 +1,7 @@ config BCACHE tristate "Block device as cache" + depends on !PREEMPT_RT_FULL ---help--- Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 697f34fba06b..98347e2cf04b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2132,7 +2132,7 @@ static void dm_request_fn(struct request_queue *q) /* Establish tio->ti before queuing work (map_tio_request) */ tio->ti = ti; queue_kthread_work(&md->kworker, &tio->work); - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); } goto out; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b6793d2e051f..47413f7f02b9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1918,8 +1918,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) struct raid5_percpu *percpu; unsigned long cpu; - cpu = get_cpu(); + cpu = get_cpu_light(); percpu = per_cpu_ptr(conf->percpu, cpu); + spin_lock(&percpu->lock); if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; @@ -1975,7 +1976,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } - put_cpu(); + spin_unlock(&percpu->lock); + put_cpu_light(); } static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) @@ -6350,6 +6352,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) __func__, cpu); break; } + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); } put_online_cpus(); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896d603ad0da..66199a494235 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -494,6 +494,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { + spinlock_t lock; /* Protection for -RT */ struct page *spare_page; /* Used when checking P/Q in raid6 */ struct flex_array *scribble; /* space for constructing buffer * lists and performing address diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 006242c8bca0..453a61660d2d 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI config ATMEL_TCLIB bool "Atmel AT32/AT91 Timer/Counter Library" depends on (AVR32 || ARCH_AT91) + default y if PREEMPT_RT_FULL help Select this if you want a library to allocate the Timer/Counter blocks found on many Atmel processors. This facilitates using @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC are combined to make a single 32-bit timer. When GENERIC_CLOCKEVENTS is defined, the third timer channel - may be used as a clock event device supporting oneshot mode - (delays of up to two seconds) based on the 32 KiHz clock. + may be used as a clock event device supporting oneshot mode. config ATMEL_TCB_CLKSRC_BLOCK int @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK TC can be used for other purposes, such as PWM generation and interval timing. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK + bool "TC Block use 32 KiHz clock" + depends on ATMEL_TCB_CLKSRC + default y if !PREEMPT_RT_FULL + help + Select this to use 32 KiHz base clock rate as TC block clock + source for clock events. + + config DUMMY_IRQ tristate "Dummy IRQ handler" default n @@ -113,6 +122,35 @@ config IBM_ASM for information on the specific driver level and support statement for your IBM server. +config HWLAT_DETECTOR + tristate "Testing module to detect hardware-induced latencies" + depends on DEBUG_FS + depends on RING_BUFFER + default m + ---help--- + A simple hardware latency detector. Use this module to detect + large latencies introduced by the behavior of the underlying + system firmware external to Linux. We do this using periodic + use of stop_machine to grab all available CPUs and measure + for unexplainable gaps in the CPU timestamp counter(s). By + default, the module is not enabled until the "enable" file + within the "hwlat_detector" debugfs directory is toggled. + + This module is often used to detect SMI (System Management + Interrupts) on x86 systems, though is not x86 specific. To + this end, we default to using a sample window of 1 second, + during which we will sample for 0.5 seconds. If an SMI or + similar event occurs during that time, it is recorded + into an 8K samples global ring buffer until retreived. + + WARNING: This software should never be enabled (it can be built + but should not be turned on after it is loaded) in a production + environment where high latencies are a concern since the + sampling mechanism actually introduces latencies for + regular tasks while the CPU(s) are being held. + + If unsure, say N + config PHANTOM tristate "Sensable PHANToM (PCI)" depends on PCI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7d5c4cd118c4..6a8e39388cf9 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_C2PORT) += c2port/ obj-$(CONFIG_HMC6352) += hmc6352.o obj-y += eeprom/ obj-y += cb710/ +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c new file mode 100644 index 000000000000..2429c4331e68 --- /dev/null +++ b/drivers/misc/hwlat_detector.c @@ -0,0 +1,1240 @@ +/* + * hwlat_detector.c - A simple Hardware Latency detector. + * + * Use this module to detect large system latencies induced by the behavior of + * certain underlying system hardware or firmware, independent of Linux itself. + * The code was developed originally to detect the presence of SMIs on Intel + * and AMD systems, although there is no dependency upon x86 herein. + * + * The classical example usage of this module is in detecting the presence of + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a + * somewhat special form of hardware interrupt spawned from earlier CPU debug + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge + * LPC (or other device) to generate a special interrupt under certain + * circumstances, for example, upon expiration of a special SMI timer device, + * due to certain external thermal readings, on certain I/O address accesses, + * and other situations. An SMI hits a special CPU pin, triggers a special + * SMI mode (complete with special memory map), and the OS is unaware. + * + * Although certain hardware-inducing latencies are necessary (for example, + * a modern system often requires an SMI handler for correct thermal control + * and remote management) they can wreak havoc upon any OS-level performance + * guarantees toward low-latency, especially when the OS is not even made + * aware of the presence of these interrupts. For this reason, we need a + * somewhat brute force mechanism to detect these interrupts. In this case, + * we do it by hogging all of the CPU(s) for configurable timer intervals, + * sampling the built-in CPU timer, looking for discontiguous readings. + * + * WARNING: This implementation necessarily introduces latencies. Therefore, + * you should NEVER use this module in a production environment + * requiring any kind of low-latency performance guarantee(s). + * + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> + * + * Includes useful feedback from Clark Williams <clark@redhat.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/ring_buffer.h> +#include <linux/time.h> +#include <linux/hrtimer.h> +#include <linux/kthread.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/version.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/trace_clock.h> + +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ +#define U64STR_SIZE 22 /* 20 digits max */ + +#define VERSION "1.0.0" +#define BANNER "hwlat_detector: " +#define DRVNAME "hwlat_detector" +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ + +/* Module metadata */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); +MODULE_DESCRIPTION("A simple hardware latency detector"); +MODULE_VERSION(VERSION); + +/* Module parameters */ + +static int debug; +static int enabled; +static int threshold; + +module_param(debug, int, 0); /* enable debug */ +module_param(enabled, int, 0); /* enable detector */ +module_param(threshold, int, 0); /* latency threshold */ + +/* Buffering and sampling */ + +static struct ring_buffer *ring_buffer; /* sample buffer */ +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ +static unsigned long buf_size = BUF_SIZE_DEFAULT; +static struct task_struct *kthread; /* sampling thread */ + +/* DebugFS filesystem entries */ + +static struct dentry *debug_dir; /* debugfs directory */ +static struct dentry *debug_max; /* maximum TSC delta */ +static struct dentry *debug_count; /* total detect count */ +static struct dentry *debug_sample_width; /* sample width us */ +static struct dentry *debug_sample_window; /* sample window us */ +static struct dentry *debug_sample; /* raw samples us */ +static struct dentry *debug_threshold; /* threshold us */ +static struct dentry *debug_enable; /* enable/disable */ + +/* Individual samples and global state */ + +struct sample; /* latency sample */ +struct data; /* Global state */ + +/* Sampling functions */ +static int __buffer_add_sample(struct sample *sample); +static struct sample *buffer_get_sample(struct sample *sample); + +/* Threading and state */ +static int kthread_fn(void *unused); +static int start_kthread(void); +static int stop_kthread(void); +static void __reset_stats(void); +static int init_stats(void); + +/* Debugfs interface */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry); +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry); +static int debug_sample_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static int debug_sample_release(struct inode *inode, struct file *filp); +static int debug_enable_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static ssize_t debug_enable_fwrite(struct file *file, + const char __user *user_buffer, + size_t user_size, loff_t *offset); + +/* Initialization functions */ +static int init_debugfs(void); +static void free_debugfs(void); +static int detector_init(void); +static void detector_exit(void); + +/* Individual latency samples are stored here when detected and packed into + * the ring_buffer circular buffer, where they are overwritten when + * more than buf_size/sizeof(sample) samples are received. */ +struct sample { + u64 seqnum; /* unique sequence */ + u64 duration; /* ktime delta */ + u64 outer_duration; /* ktime delta (outer loop) */ + struct timespec timestamp; /* wall time */ + unsigned long lost; +}; + +/* keep the global state somewhere. */ +static struct data { + + struct mutex lock; /* protect changes */ + + u64 count; /* total since reset */ + u64 max_sample; /* max hardware latency */ + u64 threshold; /* sample threshold level */ + + u64 sample_window; /* total sampling window (on+off) */ + u64 sample_width; /* active sampling portion of window */ + + atomic_t sample_open; /* whether the sample file is open */ + + wait_queue_head_t wq; /* waitqeue for new sample values */ + +} data; + +/** + * __buffer_add_sample - add a new latency sample recording to the ring buffer + * @sample: The new latency sample value + * + * This receives a new latency sample and records it in a global ring buffer. + * No additional locking is used in this case. + */ +static int __buffer_add_sample(struct sample *sample) +{ + return ring_buffer_write(ring_buffer, + sizeof(struct sample), sample); +} + +/** + * buffer_get_sample - remove a hardware latency sample from the ring buffer + * @sample: Pre-allocated storage for the sample + * + * This retrieves a hardware latency sample from the global circular buffer + */ +static struct sample *buffer_get_sample(struct sample *sample) +{ + struct ring_buffer_event *e = NULL; + struct sample *s = NULL; + unsigned int cpu = 0; + + if (!sample) + return NULL; + + mutex_lock(&ring_buffer_mutex); + for_each_online_cpu(cpu) { + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); + if (e) + break; + } + + if (e) { + s = ring_buffer_event_data(e); + memcpy(sample, s, sizeof(struct sample)); + } else + sample = NULL; + mutex_unlock(&ring_buffer_mutex); + + return sample; +} + +#ifndef CONFIG_TRACING +#define time_type ktime_t +#define time_get() ktime_get() +#define time_to_us(x) ktime_to_us(x) +#define time_sub(a, b) ktime_sub(a, b) +#define init_time(a, b) (a).tv64 = b +#define time_u64(a) ((a).tv64) +#else +#define time_type u64 +#define time_get() trace_clock_local() +#define time_to_us(x) div_u64(x, 1000) +#define time_sub(a, b) ((a) - (b)) +#define init_time(a, b) (a = b) +#define time_u64(a) a +#endif +/** + * get_sample - sample the CPU TSC and look for likely hardware latencies + * + * Used to repeatedly capture the CPU TSC (or similar), looking for potential + * hardware-induced latency. Called with interrupts disabled and with + * data.lock held. + */ +static int get_sample(void) +{ + time_type start, t1, t2, last_t2; + s64 diff, total = 0; + u64 sample = 0; + u64 outer_sample = 0; + int ret = -1; + + init_time(last_t2, 0); + start = time_get(); /* start timestamp */ + + do { + + t1 = time_get(); /* we'll look for a discontinuity */ + t2 = time_get(); + + if (time_u64(last_t2)) { + /* Check the delta from outer loop (t2 to next t1) */ + diff = time_to_us(time_sub(t1, last_t2)); + /* This shouldn't happen */ + if (diff < 0) { + pr_err(BANNER "time running backwards\n"); + goto out; + } + if (diff > outer_sample) + outer_sample = diff; + } + last_t2 = t2; + + total = time_to_us(time_sub(t2, start)); /* sample width */ + + /* This checks the inner loop (t1 to t2) */ + diff = time_to_us(time_sub(t2, t1)); /* current diff */ + + /* This shouldn't happen */ + if (diff < 0) { + pr_err(BANNER "time running backwards\n"); + goto out; + } + + if (diff > sample) + sample = diff; /* only want highest value */ + + } while (total <= data.sample_width); + + ret = 0; + + /* If we exceed the threshold value, we have found a hardware latency */ + if (sample > data.threshold || outer_sample > data.threshold) { + struct sample s; + + ret = 1; + + data.count++; + s.seqnum = data.count; + s.duration = sample; + s.outer_duration = outer_sample; + s.timestamp = CURRENT_TIME; + __buffer_add_sample(&s); + + /* Keep a running maximum ever recorded hardware latency */ + if (sample > data.max_sample) + data.max_sample = sample; + } + +out: + return ret; +} + +/* + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread + * @unused: A required part of the kthread API. + * + * Used to periodically sample the CPU TSC via a call to get_sample. We + * disable interrupts, which does (intentionally) introduce latency since we + * need to ensure nothing else might be running (and thus pre-empting). + * Obviously this should never be used in production environments. + * + * Currently this runs on which ever CPU it was scheduled on, but most + * real-worald hardware latency situations occur across several CPUs, + * but we might later generalize this if we find there are any actualy + * systems with alternate SMI delivery or other hardware latencies. + */ +static int kthread_fn(void *unused) +{ + int ret; + u64 interval; + + while (!kthread_should_stop()) { + + mutex_lock(&data.lock); + + local_irq_disable(); + ret = get_sample(); + local_irq_enable(); + + if (ret > 0) + wake_up(&data.wq); /* wake up reader(s) */ + + interval = data.sample_window - data.sample_width; + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ + + mutex_unlock(&data.lock); + + if (msleep_interruptible(interval)) + break; + } + + return 0; +} + +/** + * start_kthread - Kick off the hardware latency sampling/detector kthread + * + * This starts a kernel thread that will sit and sample the CPU timestamp + * counter (TSC or similar) and look for potential hardware latencies. + */ +static int start_kthread(void) +{ + kthread = kthread_run(kthread_fn, NULL, + DRVNAME); + if (IS_ERR(kthread)) { + pr_err(BANNER "could not start sampling thread\n"); + enabled = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * stop_kthread - Inform the hardware latency samping/detector kthread to stop + * + * This kicks the running hardware latency sampling/detector kernel thread and + * tells it to stop sampling now. Use this on unload and at system shutdown. + */ +static int stop_kthread(void) +{ + int ret; + + ret = kthread_stop(kthread); + + return ret; +} + +/** + * __reset_stats - Reset statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We call this + * function in order to reset those when "enable" is toggled on or off, and + * also at initialization. Should be called with data.lock held. + */ +static void __reset_stats(void) +{ + data.count = 0; + data.max_sample = 0; + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ +} + +/** + * init_stats - Setup global state statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We also use + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware + * induced system latencies. This function initializes these structures and + * allocates the global ring buffer also. + */ +static int init_stats(void) +{ + int ret = -ENOMEM; + + mutex_init(&data.lock); + init_waitqueue_head(&data.wq); + atomic_set(&data.sample_open, 0); + + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); + + if (WARN(!ring_buffer, KERN_ERR BANNER + "failed to allocate ring buffer!\n")) + goto out; + + __reset_stats(); + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */ + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ + + ret = 0; + +out: + return ret; + +} + +/* + * simple_data_read - Wrapper read function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * @entry: The entry to read from + * + * This function provides a generic read implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_read directly, but we need to make sure that the data.lock + * is held during the actual read. + */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry) +{ + char buf[U64STR_SIZE]; + u64 val = 0; + int len = 0; + + memset(buf, 0, sizeof(buf)); + + if (!entry) + return -EFAULT; + + mutex_lock(&data.lock); + val = *entry; + mutex_unlock(&data.lock); + + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + +} + +/* + * simple_data_write - Wrapper write function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to write value from + * @cnt: The maximum number of bytes to write + * @ppos: The current "file" position + * @entry: The entry to write to + * + * This function provides a generic write implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_write directly, but we need to make sure that the data.lock + * is held during the actual write. + */ +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + *entry = val; + mutex_unlock(&data.lock); + + return csize; +} + +/** + * debug_count_fopen - Open function for "count" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "count" debugfs + * interface to the hardware latency detector. + */ +static int debug_count_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_count_fread - Read function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to read the + * number of latency readings exceeding the configured threshold since + * the detector was last reset (e.g. by writing a zero into "count"). + */ +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_count_fwrite - Write function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to write a + * desired value, especially to zero the total count. + */ +static ssize_t debug_count_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_enable_fopen - Dummy open function for "enable" debugfs interface + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "enable" debugfs + * interface to the hardware latency detector. + */ +static int debug_enable_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_enable_fread - Read function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to determine + * whether the detector is currently enabled ("0\n" or "1\n" returned). + */ +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[4]; + + if ((cnt < sizeof(buf)) || (*ppos)) + return 0; + + buf[0] = enabled ? '1' : '0'; + buf[1] = '\n'; + buf[2] = '\0'; + if (copy_to_user(ubuf, buf, strlen(buf))) + return -EFAULT; + return *ppos = strlen(buf); +} + +/** + * debug_enable_fwrite - Write function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to enable or + * disable the detector, which will have the side-effect of possibly + * also resetting the global stats and kicking off the measuring + * kthread (on an enable) or the converse (upon a disable). + */ +static ssize_t debug_enable_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[4]; + int csize = min(cnt, sizeof(buf)); + long val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[sizeof(buf)-1] = '\0'; /* just in case */ + err = kstrtoul(buf, 10, &val); + if (0 != err) + return -EINVAL; + + if (val) { + if (enabled) + goto unlock; + enabled = 1; + __reset_stats(); + if (start_kthread()) + return -EFAULT; + } else { + if (!enabled) + goto unlock; + enabled = 0; + err = stop_kthread(); + if (err) { + pr_err(BANNER "cannot stop kthread\n"); + return -EFAULT; + } + wake_up(&data.wq); /* reader(s) should return */ + } +unlock: + return csize; +} + +/** + * debug_max_fopen - Open function for "max" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "max" debugfs + * interface to the hardware latency detector. + */ +static int debug_max_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_max_fread - Read function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to determine + * the maximum latency value observed since it was last reset. + */ +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); +} + +/** + * debug_max_fwrite - Write function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to reset the + * maximum or set it to some other desired value - if, then, subsequent + * measurements exceed this value, the maximum will be updated. + */ +static ssize_t debug_max_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); +} + + +/** + * debug_sample_fopen - An open function for "sample" debugfs interface + * @inode: The in-kernel inode representation of this debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function handles opening the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. Can be opened blocking or non-blocking, + * affecting whether it behaves as a buffer read pipe, or does not. + * Implements simple locking to prevent multiple simultaneous use. + */ +static int debug_sample_fopen(struct inode *inode, struct file *filp) +{ + if (!atomic_add_unless(&data.sample_open, 1, 1)) + return -EBUSY; + else + return 0; +} + +/** + * debug_sample_fread - A read function for "sample" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that will contain the samples read + * @cnt: The maximum bytes to read from the debugfs "file" + * @ppos: The current position in the debugfs "file" + * + * This function handles reading from the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. By default this will block pending a new + * value written into the sample buffer, unless there are already a + * number of value(s) waiting in the buffer, or the sample file was + * previously opened in a non-blocking mode of operation. + */ +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int len = 0; + char buf[64]; + struct sample *sample = NULL; + + if (!enabled) + return 0; + + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); + if (!sample) + return -ENOMEM; + + while (!buffer_get_sample(sample)) { + + DEFINE_WAIT(wait); + + if (filp->f_flags & O_NONBLOCK) { + len = -EAGAIN; + goto out; + } + + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&data.wq, &wait); + + if (signal_pending(current)) { + len = -EINTR; + goto out; + } + + if (!enabled) { /* enable was toggled */ + len = 0; + goto out; + } + } + + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", + sample->timestamp.tv_sec, + sample->timestamp.tv_nsec, + sample->duration, + sample->outer_duration); + + + /* handling partial reads is more trouble than it's worth */ + if (len > cnt) + goto out; + + if (copy_to_user(ubuf, buf, len)) + len = -EFAULT; + +out: + kfree(sample); + return len; +} + +/** + * debug_sample_release - Release function for "sample" debugfs interface + * @inode: The in-kernel inode represenation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function completes the close of the debugfs interface "sample" file. + * Frees the sample_open "lock" so that other users may open the interface. + */ +static int debug_sample_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&data.sample_open); + + return 0; +} + +/** + * debug_threshold_fopen - Open function for "threshold" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "threshold" debugfs + * interface to the hardware latency detector. + */ +static int debug_threshold_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_threshold_fread - Read function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to determine + * the current threshold level at which a latency will be recorded in the + * global ring buffer, typically on the order of 10us. + */ +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); +} + +/** + * debug_threshold_fwrite - Write function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to configure + * the threshold level at which any subsequently detected latencies will + * be recorded into the global ring buffer. + */ +static ssize_t debug_threshold_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + int ret; + + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); + + if (enabled) + wake_up_process(kthread); + + return ret; +} + +/** + * debug_width_fopen - Open function for "width" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "width" debugfs + * interface to the hardware latency detector. + */ +static int debug_width_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_width_fread - Read function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to determine + * for how many us of the total window us we will actively sample for any + * hardware-induced latecy periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. + */ +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); +} + +/** + * debug_width_fwrite - Write function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to configure + * for how many us of the total window us we will actively sample for any + * hardware-induced latency periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. It + * is enforced that width is less that the total window size. + */ +static ssize_t debug_width_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (val < data.sample_window) + data.sample_width = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + if (enabled) + wake_up_process(kthread); + + return csize; +} + +/** + * debug_window_fopen - Open function for "window" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. + */ +static int debug_window_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_window_fread - Read function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to read the total window size. + */ +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); +} + +/** + * debug_window_fwrite - Write function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "window" debufds + * interface to the hardware latency detetector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to write a new total window size. It + * is enfoced that any value written must be greater than the sample width + * size, or an error results. + */ +static ssize_t debug_window_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (data.sample_width < val) + data.sample_window = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + return csize; +} + +/* + * Function pointers for the "count" debugfs file operations + */ +static const struct file_operations count_fops = { + .open = debug_count_fopen, + .read = debug_count_fread, + .write = debug_count_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "enable" debugfs file operations + */ +static const struct file_operations enable_fops = { + .open = debug_enable_fopen, + .read = debug_enable_fread, + .write = debug_enable_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "max" debugfs file operations + */ +static const struct file_operations max_fops = { + .open = debug_max_fopen, + .read = debug_max_fread, + .write = debug_max_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "sample" debugfs file operations + */ +static const struct file_operations sample_fops = { + .open = debug_sample_fopen, + .read = debug_sample_fread, + .release = debug_sample_release, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "threshold" debugfs file operations + */ +static const struct file_operations threshold_fops = { + .open = debug_threshold_fopen, + .read = debug_threshold_fread, + .write = debug_threshold_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "width" debugfs file operations + */ +static const struct file_operations width_fops = { + .open = debug_width_fopen, + .read = debug_width_fread, + .write = debug_width_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "window" debugfs file operations + */ +static const struct file_operations window_fops = { + .open = debug_window_fopen, + .read = debug_window_fread, + .write = debug_window_fwrite, + .owner = THIS_MODULE, +}; + +/** + * init_debugfs - A function to initialize the debugfs interface files + * + * This function creates entries in debugfs for "hwlat_detector", including + * files to read values from the detector, current samples, and the + * maximum sample that has been captured since the hardware latency + * dectector was started. + */ +static int init_debugfs(void) +{ + int ret = -ENOMEM; + + debug_dir = debugfs_create_dir(DRVNAME, NULL); + if (!debug_dir) + goto err_debug_dir; + + debug_sample = debugfs_create_file("sample", 0444, + debug_dir, NULL, + &sample_fops); + if (!debug_sample) + goto err_sample; + + debug_count = debugfs_create_file("count", 0444, + debug_dir, NULL, + &count_fops); + if (!debug_count) + goto err_count; + + debug_max = debugfs_create_file("max", 0444, + debug_dir, NULL, + &max_fops); + if (!debug_max) + goto err_max; + + debug_sample_window = debugfs_create_file("window", 0644, + debug_dir, NULL, + &window_fops); + if (!debug_sample_window) + goto err_window; + + debug_sample_width = debugfs_create_file("width", 0644, + debug_dir, NULL, + &width_fops); + if (!debug_sample_width) + goto err_width; + + debug_threshold = debugfs_create_file("threshold", 0644, + debug_dir, NULL, + &threshold_fops); + if (!debug_threshold) + goto err_threshold; + + debug_enable = debugfs_create_file("enable", 0644, + debug_dir, &enabled, + &enable_fops); + if (!debug_enable) + goto err_enable; + + else { + ret = 0; + goto out; + } + +err_enable: + debugfs_remove(debug_threshold); +err_threshold: + debugfs_remove(debug_sample_width); +err_width: + debugfs_remove(debug_sample_window); +err_window: + debugfs_remove(debug_max); +err_max: + debugfs_remove(debug_count); +err_count: + debugfs_remove(debug_sample); +err_sample: + debugfs_remove(debug_dir); +err_debug_dir: +out: + return ret; +} + +/** + * free_debugfs - A function to cleanup the debugfs file interface + */ +static void free_debugfs(void) +{ + /* could also use a debugfs_remove_recursive */ + debugfs_remove(debug_enable); + debugfs_remove(debug_threshold); + debugfs_remove(debug_sample_width); + debugfs_remove(debug_sample_window); + debugfs_remove(debug_max); + debugfs_remove(debug_count); + debugfs_remove(debug_sample); + debugfs_remove(debug_dir); +} + +/** + * detector_init - Standard module initialization code + */ +static int detector_init(void) +{ + int ret = -ENOMEM; + + pr_info(BANNER "version %s\n", VERSION); + + ret = init_stats(); + if (0 != ret) + goto out; + + ret = init_debugfs(); + if (0 != ret) + goto err_stats; + + if (enabled) + ret = start_kthread(); + + goto out; + +err_stats: + ring_buffer_free(ring_buffer); +out: + return ret; + +} + +/** + * detector_exit - Standard module cleanup code + */ +static void detector_exit(void) +{ + int err; + + if (enabled) { + enabled = 0; + err = stop_kthread(); + if (err) + pr_err(BANNER "cannot stop kthread\n"); + } + + free_debugfs(); + ring_buffer_free(ring_buffer); /* free up the ring buffer */ + +} + +module_init(detector_init); +module_exit(detector_exit); diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index fb266745f824..d42fc084d66f 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1155,15 +1155,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) struct sg_mapping_iter *sg_miter = &host->sg_miter; struct variant_data *variant = host->variant; void __iomem *base = host->base; - unsigned long flags; u32 status; status = readl(base + MMCISTATUS); dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); - local_irq_save(flags); - do { unsigned int remain, len; char *buffer; @@ -1203,8 +1200,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) sg_miter_stop(sg_miter); - local_irq_restore(flags); - /* * If we have less than the fifo 'half-full' threshold to transfer, * trigger a PIO interrupt as soon as any data is available. diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index bec8a307f8cd..acd8c620ec43 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2691,6 +2691,31 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id) return isr ? IRQ_HANDLED : IRQ_NONE; } +#ifdef CONFIG_PREEMPT_RT_BASE +static irqreturn_t sdhci_rt_irq(int irq, void *dev_id) +{ + irqreturn_t ret; + + local_bh_disable(); + ret = sdhci_irq(irq, dev_id); + local_bh_enable(); + if (ret == IRQ_WAKE_THREAD) + ret = sdhci_thread_irq(irq, dev_id); + return ret; +} +#endif + +static int sdhci_req_irq(struct sdhci_host *host) +{ +#ifdef CONFIG_PREEMPT_RT_BASE + return request_threaded_irq(host->irq, NULL, sdhci_rt_irq, + IRQF_SHARED, mmc_hostname(host->mmc), host); +#else + return request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq, + IRQF_SHARED, mmc_hostname(host->mmc), host); +#endif +} + /*****************************************************************************\ * * * Suspend/resume * @@ -2758,9 +2783,7 @@ int sdhci_resume_host(struct sdhci_host *host) } if (!device_may_wakeup(mmc_dev(host->mmc))) { - ret = request_threaded_irq(host->irq, sdhci_irq, - sdhci_thread_irq, IRQF_SHARED, - mmc_hostname(host->mmc), host); + ret = sdhci_req_irq(host); if (ret) return ret; } else { @@ -3421,8 +3444,7 @@ int sdhci_add_host(struct sdhci_host *host) sdhci_init(host, 0); - ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq, - IRQF_SHARED, mmc_hostname(mmc), host); + ret = sdhci_req_irq(host); if (ret) { pr_err("%s: Failed to request IRQ %d: %d\n", mmc_hostname(mmc), host->irq, ret); diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 41095ebad97f..b0a0cb22aec4 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) { struct vortex_private *vp = netdev_priv(dev); unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1916,12 +1916,12 @@ static void vortex_tx_timeout(struct net_device *dev) * Block interrupts because vortex_interrupt does a bare spin_lock() */ unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev); else vortex_interrupt(dev->irq, dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 932bd1862f7a..a50e8d181450 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2213,11 +2213,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, } tpd_req = atl1c_cal_tpd_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) { - if (netif_msg_pktdata(adapter)) - dev_info(&adapter->pdev->dev, "tx locked\n"); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&adapter->tx_lock, flags); if (atl1c_tpd_avail(adapter, type) < tpd_req) { /* no enough descriptor, just stop queue */ diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 59a03a193e83..734f7a7ad2c3 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1880,8 +1880,7 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } tpd_req = atl1e_cal_tdp_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) - return NETDEV_TX_LOCKED; + spin_lock_irqsave(&adapter->tx_lock, flags); if (atl1e_tpd_avail(adapter) < tpd_req) { /* no enough descriptor, just stop queue */ diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 526ea74e82d9..86f467a2c485 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, struct cmdQ *q = &sge->cmdQ[qid]; unsigned int credits, pidx, genbit, count, use_sched_skb = 0; - if (!spin_trylock(&q->lock)) - return NETDEV_TX_LOCKED; + spin_lock(&q->lock); reclaim_completed_tx(sge, q); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4ee080d49bc0..e616b71d5014 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1540,7 +1540,7 @@ static int gfar_suspend(struct device *dev) if (netif_running(ndev)) { - local_irq_save(flags); + local_irq_save_nort(flags); lock_tx_qs(priv); gfar_halt_nodisable(priv); @@ -1556,7 +1556,7 @@ static int gfar_suspend(struct device *dev) gfar_write(®s->maccfg1, tempval); unlock_tx_qs(priv); - local_irq_restore(flags); + local_irq_restore_nort(flags); disable_napi(priv); @@ -1598,7 +1598,7 @@ static int gfar_resume(struct device *dev) /* Disable Magic Packet mode, in case something * else woke us up. */ - local_irq_save(flags); + local_irq_save_nort(flags); lock_tx_qs(priv); tempval = gfar_read(®s->maccfg2); @@ -1608,7 +1608,7 @@ static int gfar_resume(struct device *dev) gfar_start(priv); unlock_tx_qs(priv); - local_irq_restore(flags); + local_irq_restore_nort(flags); netif_device_attach(ndev); @@ -3418,14 +3418,14 @@ static irqreturn_t gfar_error(int irq, void *grp_id) dev->stats.tx_dropped++; atomic64_inc(&priv->extra_stats.tx_underrun); - local_irq_save(flags); + local_irq_save_nort(flags); lock_tx_qs(priv); /* Reactivate the Tx Queues */ gfar_write(®s->tstat, gfargrp->tstat); unlock_tx_qs(priv); - local_irq_restore(flags); + local_irq_restore_nort(flags); } netif_dbg(priv, tx_err, dev, "Transmit Error\n"); } diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 1e0f72b65459..bb5ced2b5194 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -4084,12 +4084,7 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) [skb->priority & (MAX_TX_FIFOS - 1)]; fifo = &mac_control->fifos[queue]; - if (do_spin_lock) - spin_lock_irqsave(&fifo->tx_lock, flags); - else { - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags))) - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&fifo->tx_lock, flags); if (sp->config.multiq) { if (__netif_subqueue_stopped(dev, fifo->fifo_no)) { diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3b98b263bad0..ca4add749410 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2137,10 +2137,8 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; unsigned long flags; - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { - /* Collision - tell upper layer to requeue */ - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&tx_ring->tx_lock, flags); + if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) { netif_stop_queue(netdev); spin_unlock_irqrestore(&tx_ring->tx_lock, flags); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 78bb4ceb1cdd..b5156963ca07 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2229,7 +2229,7 @@ static void rtl8139_poll_controller(struct net_device *dev) struct rtl8139_private *tp = netdev_priv(dev); const int irq = tp->pci_dev->irq; - disable_irq(irq); + disable_irq_nosync(irq); rtl8139_interrupt(irq, dev); enable_irq(irq); } diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index a9cac8413e49..bd70b848174d 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1629,13 +1629,8 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb, unsigned long flags; ENTER; - local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n", - BDX_DRV_NAME, ndev->name); - return NETDEV_TX_LOCKED; - } + + spin_lock_irqsave(&priv->tx_lock, flags); /* build tx descriptor */ BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */ diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index dac7a0d9bb46..e17f600ac9ac 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) unsigned long flags; int add_num = 1; - local_irq_save(flags); - if (!spin_trylock(&rnet->tx_lock)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&rnet->tx_lock, flags); if (is_multicast_ether_addr(eth->h_dest)) add_num = nets[rnet->mport->id].nact; diff --git a/drivers/net/wireless/orinoco/orinoco_usb.c b/drivers/net/wireless/orinoco/orinoco_usb.c index 91f05442de28..8fb1c92724df 100644 --- a/drivers/net/wireless/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/orinoco/orinoco_usb.c @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, while (!ctx->done.done && msecs--) udelay(1000); } else { - wait_event_interruptible(ctx->done.wait, + swait_event_interruptible(ctx->done.wait, ctx->done.done); } break; diff --git a/drivers/pci/access.c b/drivers/pci/access.c index d9b64a175990..1d0f6f8d0a54 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -521,7 +521,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) WARN_ON(!dev->block_cfg_access); dev->block_cfg_access = 0; - wake_up_all(&pci_cfg_wait); + wake_up_all_locked(&pci_cfg_wait); raw_spin_unlock_irqrestore(&pci_lock, flags); } EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index ec193a8357d7..d6b6dde64fb9 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1287,7 +1287,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu) struct sk_buff *skb; #ifdef CONFIG_SMP struct fcoe_percpu_s *p0; - unsigned targ_cpu = get_cpu(); + unsigned targ_cpu = get_cpu_light(); #endif /* CONFIG_SMP */ FCOE_DBG("Destroying receive thread for CPU %d\n", cpu); @@ -1343,7 +1343,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu) kfree_skb(skb); spin_unlock_bh(&p->fcoe_rx_list.lock); } - put_cpu(); + put_cpu_light(); #else /* * This a non-SMP scenario where the singular Rx thread is @@ -1567,11 +1567,11 @@ err2: static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) { struct fcoe_percpu_s *fps; - int rc; + int rc, cpu = get_cpu_light(); - fps = &get_cpu_var(fcoe_percpu); + fps = &per_cpu(fcoe_percpu, cpu); rc = fcoe_get_paged_crc_eof(skb, tlen, fps); - put_cpu_var(fcoe_percpu); + put_cpu_light(); return rc; } @@ -1767,11 +1767,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, return 0; } - stats = per_cpu_ptr(lport->stats, get_cpu()); + stats = per_cpu_ptr(lport->stats, get_cpu_light()); stats->InvalidCRCCount++; if (stats->InvalidCRCCount < 5) printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); - put_cpu(); + put_cpu_light(); return -EINVAL; } @@ -1847,13 +1847,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) goto drop; if (!fcoe_filter_frames(lport, fp)) { - put_cpu(); + put_cpu_light(); fc_exch_recv(lport, fp); return; } drop: stats->ErrorFrames++; - put_cpu(); + put_cpu_light(); kfree_skb(skb); } diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 34a1b1f333b4..d91131210695 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -831,7 +831,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) INIT_LIST_HEAD(&del_list); - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; @@ -867,7 +867,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) sel_time = fcf->time; } } - put_cpu(); + put_cpu_light(); list_for_each_entry_safe(fcf, next, &del_list, list) { /* Removes fcf from current list */ diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 30f9ef0c0d4f..6c686bc01a82 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, } memset(ep, 0, sizeof(*ep)); - cpu = get_cpu(); + cpu = get_cpu_light(); pool = per_cpu_ptr(mp->pool, cpu); spin_lock_bh(&pool->lock); - put_cpu(); + put_cpu_light(); /* peek cache of free slot */ if (pool->left != FC_XID_UNKNOWN) { diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 9c706d8c1441..d968ffc79c08 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) /* TODO: audit callers to ensure they are ready for qc_issue to * unconditionally re-enable interrupts */ - local_irq_save(flags); + local_irq_save_nort(flags); spin_unlock(ap->lock); /* If the device fell off, no sense in issuing commands */ @@ -255,7 +255,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) out: spin_lock(ap->lock); - local_irq_restore(flags); + local_irq_restore_nort(flags); return ret; } diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index fee9eb7c8a60..b42d4adc42dc 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) { unsigned long flags; struct qla_hw_data *ha = rsp->hw; - local_irq_save(flags); + local_irq_save_nort(flags); if (IS_P3P_TYPE(ha)) qla82xx_poll(0, rsp); else ha->isp_ops->intr_handler(0, rsp); - local_irq_restore(flags); + local_irq_restore_nort(flags); } static inline uint8_t * diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 9ea3d9d49ffc..9e68706ae5e2 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,6 +29,7 @@ #include <linux/pm.h> #include <linux/thermal.h> #include <linux/debugfs.h> +#include <linux/work-simple.h> #include <asm/cpu_device_id.h> #include <asm/mce.h> @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } } -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +static void platform_thermal_notify_work(struct swork_event *event) { unsigned long flags; int cpu = smp_processor_id(); @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) pkg_work_scheduled[phy_id]) { disable_pkg_thres_interrupt(); spin_unlock_irqrestore(&pkg_work_lock, flags); - return -EINVAL; + return; } pkg_work_scheduled[phy_id] = 1; spin_unlock_irqrestore(&pkg_work_lock, flags); @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) schedule_delayed_work_on(cpu, &per_cpu(pkg_temp_thermal_threshold_work, cpu), msecs_to_jiffies(notify_delay_ms)); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static struct swork_event notify_work; + +static int thermal_notify_work_init(void) +{ + int err; + + err = swork_get(); + if (err) + return err; + + INIT_SWORK(¬ify_work, platform_thermal_notify_work); return 0; } +static void thermal_notify_work_cleanup(void) +{ + swork_put(); +} + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + swork_queue(¬ify_work); + return 0; +} + +#else /* !CONFIG_PREEMPT_RT_FULL */ + +static int thermal_notify_work_init(void) { return 0; } + +static void thermal_notify_work_cleanup(void) { } + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + platform_thermal_notify_work(NULL); + + return 0; +} +#endif /* CONFIG_PREEMPT_RT_FULL */ + static int find_siblings_cpu(int cpu) { int i; @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; + if (!thermal_notify_work_init()) + return -ENODEV; + spin_lock_init(&pkg_work_lock); platform_thermal_package_notify = pkg_temp_thermal_platform_thermal_notify; @@ -608,7 +651,7 @@ err_ret: kfree(pkg_work_scheduled); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; - + thermal_notify_work_cleanup(); return -ENODEV; } @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void) mutex_unlock(&phy_dev_list_mutex); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; + thermal_notify_work_cleanup(); for_each_online_cpu(i) cancel_delayed_work_sync( &per_cpu(pkg_temp_thermal_threshold_work, i)); diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 4506e405c8f3..822a2e1975aa 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -36,6 +36,7 @@ #include <linux/nmi.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/kdb.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h> #ifdef CONFIG_SPARC @@ -80,7 +81,16 @@ static unsigned int skip_txen_test; /* force skip of txen test at init time */ #define DEBUG_INTR(fmt...) do { } while (0) #endif -#define PASS_LIMIT 512 +/* + * On -rt we can have a more delays, and legitimately + * so - so don't drop work spuriously and spam the + * syslog: + */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define PASS_LIMIT 1000000 +#else +# define PASS_LIMIT 512 +#endif #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) @@ -3364,7 +3374,7 @@ static void serial8250_console_write(struct uart_8250_port *up, const char *s, if (port->sysrq) locked = 0; - else if (oops_in_progress) + else if (oops_in_progress || in_kdb_printk()) locked = spin_trylock_irqsave(&port->lock, flags); else spin_lock_irqsave(&port->lock, flags); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 763eb20fe321..841c42ae19aa 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2000,13 +2000,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) clk_enable(uap->clk); - local_irq_save(flags); + /* + * local_irq_save(flags); + * + * This local_irq_save() is nonsense. If we come in via sysrq + * handling then interrupts are already disabled. Aside of + * that the port.sysrq check is racy on SMP regardless. + */ if (uap->port.sysrq) locked = 0; else if (oops_in_progress) - locked = spin_trylock(&uap->port.lock); + locked = spin_trylock_irqsave(&uap->port.lock, flags); else - spin_lock(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* * First save the CR then disable the interrupts @@ -2028,8 +2034,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) writew(old_cr, uap->port.membase + UART011_CR); if (locked) - spin_unlock(&uap->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&uap->port.lock, flags); clk_disable(uap->clk); } diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7f49172ccd86..f9d192f67cbf 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1282,13 +1282,10 @@ serial_omap_console_write(struct console *co, const char *s, pm_runtime_get_sync(up->dev); - local_irq_save(flags); - if (up->port.sysrq) - locked = 0; - else if (oops_in_progress) - locked = spin_trylock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); else - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); /* * First save the IER then disable the interrupts @@ -1317,8 +1314,7 @@ serial_omap_console_write(struct console *co, const char *s, pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int __init diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 1c1385e3a824..ab14f8471eaf 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1684,9 +1684,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) * and no one may trigger the above deadlock situation when * running complete() in tasklet. */ - local_irq_save(flags); + local_irq_save_nort(flags); urb->complete(urb); - local_irq_restore(flags); + local_irq_restore_nort(flags); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 6e7be91e6097..f51bb89391bb 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1405,7 +1405,7 @@ static void ffs_data_put(struct ffs_data *ffs) pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || - waitqueue_active(&ffs->ep0req_completion.wait)); + swaitqueue_active(&ffs->ep0req_completion.wait)); kfree(ffs->dev_name); kfree(ffs); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 2030565c6789..7b6da0fde247 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -345,7 +345,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { - value = wait_event_interruptible (done.wait, done.done); + value = swait_event_interruptible (done.wait, done.done); if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { @@ -354,7 +354,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); - wait_event (done.wait, done.done); + swait_event (done.wait, done.done); if (epdata->status == -ECONNRESET) epdata->status = -EINTR; } else { |