diff options
author | Steven Rostedt <srostedt@redhat.com> | 2012-08-03 00:16:45 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2012-08-03 00:16:45 -0400 |
commit | 660436ca79a9749f3cc66984dae066ced57c9d84 (patch) | |
tree | dc51b638651aaa67d172ac334c1bf06ac1ea855e | |
parent | 0a085d25fb3ae4f738a50c1e089be936e56e2a3a (diff) | |
parent | f351a1d7efda2edd52c23a150b07b8380c47b6c0 (diff) |
Merge tag 'v3.0.39' into v3.0-rt
This is the 3.0.39 stable release
Conflicts:
kernel/hrtimer.c
kernel/time/ntp.c
kernel/time/timekeeping.c
Required fixup:
diff --cc kernel/hrtimer.c
index 363965f,957869f..0000000
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@@ -1597,17 -1407,24 +1611,24 @@@ void hrtimer_peek_ahead_timers(void
local_irq_restore(flags);
}
+#else /* CONFIG_HIGH_RES_TIMERS */
+
+static inline void __hrtimer_peek_ahead_timers(void) { }
+
+#endif /* !CONFIG_HIGH_RES_TIMERS */
+
static void run_hrtimer_softirq(struct softirq_action *h)
{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+
+ if (cpu_base->clock_was_set) {
+ cpu_base->clock_was_set = 0;
+ clock_was_set();
+ }
+
- hrtimer_peek_ahead_timers();
+ hrtimer_rt_run_pending();
}
-#else /* CONFIG_HIGH_RES_TIMERS */
-
-static inline void __hrtimer_peek_ahead_timers(void) { }
-
-#endif /* !CONFIG_HIGH_RES_TIMERS */
-
/*
* Called from timer softirq every jiffy, expire hrtimers:
*
diff --cc kernel/time/ntp.c
index 419cbaa,61fc450..0000000
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@@ -389,24 -406,7 +406,6 @@@ int second_overflow(unsigned long secs
break;
}
- write_seqcount_end(&xtime_seq);
- raw_spin_unlock(&xtime_lock);
--
- return res;
- }
-
- /*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- */
- void second_overflow(void)
- {
- s64 delta;
-
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
if (time_maxerror > NTP_PHASE_LIMIT) {
diff --cc kernel/time/timekeeping.c
index 6ac6438,678ae31..0000000
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@@ -376,14 -395,9 +396,10 @@@ int do_settimeofday(const struct timesp
xtime = *tv;
- timekeeper.ntp_error = 0;
- ntp_clear();
-
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
- timekeeper.mult);
+ timekeeping_update(true);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock_irqrestore(&xtime_lock, flags);
/* signal hrtimers about time change */
clock_was_set();
@@@ -415,14 -428,9 +431,10 @@@ int timekeeping_inject_offset(struct ti
xtime = timespec_add(xtime, *ts);
wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
- timekeeper.ntp_error = 0;
- ntp_clear();
-
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
- timekeeper.mult);
+ timekeeping_update(true);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock_irqrestore(&xtime_lock, flags);
/* signal hrtimers about time change */
clock_was_set();
@@@ -596,10 -603,10 +608,11 @@@ void __init timekeeping_init(void
}
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
+ update_rt_offset();
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock_irqrestore(&xtime_lock, flags);
}
/* time in seconds when suspend began */
@@@ -646,13 -658,9 +665,10 @@@ void timekeeping_inject_sleeptime(struc
__timekeeping_inject_sleeptime(delta);
- timekeeper.ntp_error = 0;
- ntp_clear();
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
- timekeeper.mult);
+ timekeeping_update(true);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock_irqrestore(&xtime_lock, flags);
/* signal hrtimers about time change */
clock_was_set();
@@@ -686,8 -693,8 +702,9 @@@ static void timekeeping_resume(void
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0;
timekeeping_suspended = 0;
+ timekeeping_update(false);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock_irqrestore(&xtime_lock, flags);
touch_softlockup_watchdog();
@@@ -1111,9 -1124,43 +1136,43 @@@ void get_xtime_and_monotonic_and_sleep_
*xtim = xtime;
*wtom = wall_to_monotonic;
*sleep = total_sleep_time;
- } while (read_seqretry(&xtime_lock, seq));
+ } while (read_seqcount_retry(&xtime_seq, seq));
}
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ /**
+ * ktime_get_update_offsets - hrtimer helper
+ * @real: pointer to storage for monotonic -> realtime offset
+ * @_boot: pointer to storage for monotonic -> boottime offset
+ *
+ * Returns current monotonic time and updates the offsets
+ * Called from hrtimer_interupt() or retrigger_next_event()
+ */
+ ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot)
+ {
+ ktime_t now;
+ unsigned int seq;
+ u64 secs, nsecs;
+
+ do {
- seq = read_seqbegin(&xtime_lock);
++ seq = read_seqcount_begin(&xtime_seq);
+
+ secs = xtime.tv_sec;
+ nsecs = xtime.tv_nsec;
+ nsecs += timekeeping_get_ns();
+ /* If arch requires, add in gettimeoffset() */
+ nsecs += arch_gettimeoffset();
+
+ *real = offs_real;
+ *boot = offs_boot;
- } while (read_seqretry(&xtime_lock, seq));
++ } while (read_seqcount_retry(&xtime_seq, seq));
+
+ now = ktime_add_ns(ktime_set(secs, 0), nsecs);
+ now = ktime_sub(now, *real);
+ return now;
+ }
+ #endif
+
/**
* ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
*/
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
122 files changed, 1532 insertions, 693 deletions
diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt index 21fd05c28e73..e1f856b1b456 100644 --- a/Documentation/stable_kernel_rules.txt +++ b/Documentation/stable_kernel_rules.txt @@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the marked CONFIG_BROKEN), an oops, a hang, data corruption, a real security issue, or some "oh, that's not good" issue. In short, something critical. + - Serious issues as reported by a user of a distribution kernel may also + be considered if they fix a notable performance or interactivity issue. + As these fixes are not as obvious and have a higher risk of a subtle + regression they should only be submitted by a distribution kernel + maintainer and include an addendum linking to a bugzilla entry if it + exists and additional information on the user-visible impact. - New device IDs and quirks are also accepted. - No "theoretical race condition" issues, unless an explanation of how the race can be exploited is also provided. diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 12cecc83cd91..4a37c4759cd2 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -379,10 +379,10 @@ EVENT_PROCESS: # To closer match vmstat scanning statistics, only count isolate_both # and isolate_inactive as scanning. isolate_active is rotation - # isolate_inactive == 0 - # isolate_active == 1 - # isolate_both == 2 - if ($isolate_mode != 1) { + # isolate_inactive == 1 + # isolate_active == 2 + # isolate_both == 3 + if ($isolate_mode != 2) { $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; } $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 0 -SUBLEVEL = 36 +SUBLEVEL = 39 EXTRAVERSION = NAME = Sneaky Weasel diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5d4f038929f2..6ca013d39aff 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -448,9 +448,7 @@ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent); static void ipi_timer(void) { struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent); - irq_enter(); evt->event_handler(evt); - irq_exit(); } #ifdef CONFIG_LOCAL_TIMERS @@ -461,7 +459,9 @@ asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) if (local_timer_ack()) { __inc_irq_stat(cpu, local_timer_irqs); + irq_enter(); ipi_timer(); + irq_exit(); } set_irq_regs(old_regs); @@ -571,7 +571,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) switch (ipinr) { case IPI_TIMER: + irq_enter(); ipi_timer(); + irq_exit(); break; case IPI_RESCHEDULE: @@ -579,15 +581,21 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CALL_FUNC: + irq_enter(); generic_smp_call_function_interrupt(); + irq_exit(); break; case IPI_CALL_FUNC_SINGLE: + irq_enter(); generic_smp_call_function_single_interrupt(); + irq_exit(); break; case IPI_CPU_STOP: + irq_enter(); ipi_cpu_stop(cpu); + irq_exit(); break; default: diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c index e8f2be2d67f2..df14954aa1cc 100644 --- a/arch/arm/plat-samsung/adc.c +++ b/arch/arm/plat-samsung/adc.c @@ -143,11 +143,13 @@ int s3c_adc_start(struct s3c_adc_client *client, return -EINVAL; } - if (client->is_ts && adc->ts_pend) - return -EAGAIN; - spin_lock_irqsave(&adc->lock, flags); + if (client->is_ts && adc->ts_pend) { + spin_unlock_irqrestore(&adc->lock, flags); + return -EAGAIN; + } + client->channel = channel; client->nr_samples = nr_samples; diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 97f8bf6639e7..adda036bba17 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("$28"); #define current_thread_info() __current_thread_info +#endif /* !__ASSEMBLY__ */ + /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) #define THREAD_SIZE_ORDER (1) @@ -97,8 +99,6 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define free_thread_info(info) kfree(info) -#endif /* !__ASSEMBLY__ */ - #define PREEMPT_ACTIVE 0x10000000 /* diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index a81176f44c74..be281c69f6ca 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm/asm-offsets.h> #include <asm/page.h> +#include <asm/thread_info.h> #include <asm-generic/vmlinux.lds.h> #undef mips @@ -73,7 +74,7 @@ SECTIONS .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ - INIT_TASK_DATA(PAGE_SIZE) + INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 42541bbcc7fa..ace178441918 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -975,7 +975,7 @@ static int cpu_cmd(void) /* print cpus waiting or in xmon */ printf("cpus stopped:"); count = 0; - for (cpu = 0; cpu < NR_CPUS; ++cpu) { + for_each_possible_cpu(cpu) { if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 71cc3800712c..c5d941f08bac 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4558f0d0822d..479d03c9c4c3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -416,12 +416,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, return 0; } - if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { + if (intsrc->source_irq == 0) { if (acpi_skip_timer_override) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + printk(PREFIX "BIOS IRQ0 override ignored.\n"); return 0; } - if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + + if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity + && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); } @@ -1327,17 +1329,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) } /* - * Force ignoring BIOS IRQ0 pin2 override + * Force ignoring BIOS IRQ0 override */ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) { - /* - * The ati_ixp4x0_rev() early PCI quirk should have set - * the acpi_skip_timer_override flag already: - */ if (!acpi_skip_timer_override) { - WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); - pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", + pr_notice("%s detected: Ignoring BIOS IRQ0 override\n", d->ident); acpi_skip_timer_override = 1; } @@ -1431,7 +1428,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * is enabled. This input is incorrectly designated the * ISA IRQ 0 via an interrupt source override even though * it is wired to the output of the master 8259A and INTIN0 - * is not connected at all. Force ignoring BIOS IRQ0 pin2 + * is not connected at all. Force ignoring BIOS IRQ0 * override in that cases. */ { @@ -1466,6 +1463,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "FUJITSU SIEMENS", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), + }, + }, {} }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c7f64e6f537a..ea6106c5ef70 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { - { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, + { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d4a705f22831..89d68777f73a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -452,6 +452,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), }, }, + { /* Handle problems with rebooting on the Precision M6600. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), + }, + }, { } }; diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index a43fa1a57d57..1502c50273b5 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -36,6 +36,7 @@ #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 static DEFINE_MUTEX(isolated_cpus_lock); +static DEFINE_MUTEX(round_robin_lock); static unsigned long power_saving_mwait_eax; @@ -107,7 +108,7 @@ static void round_robin_cpu(unsigned int tsk_index) if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) return; - mutex_lock(&isolated_cpus_lock); + mutex_lock(&round_robin_lock); cpumask_clear(tmp); for_each_cpu(cpu, pad_busy_cpus) cpumask_or(tmp, tmp, topology_thread_cpumask(cpu)); @@ -116,7 +117,7 @@ static void round_robin_cpu(unsigned int tsk_index) if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { - mutex_unlock(&isolated_cpus_lock); + mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { @@ -131,7 +132,7 @@ static void round_robin_cpu(unsigned int tsk_index) tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; - mutex_unlock(&isolated_cpus_lock); + mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 77255f250dbb..0364b05fb7a2 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -173,7 +173,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp) { int result = 0; - if (!strncmp(val, "enable", strlen("enable") - 1)) { + if (!strncmp(val, "enable", strlen("enable"))) { result = acpi_debug_trace(trace_method_name, trace_debug_level, trace_debug_layer, 0); if (result) @@ -181,7 +181,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp) goto exit; } - if (!strncmp(val, "disable", strlen("disable") - 1)) { + if (!strncmp(val, "disable", strlen("disable"))) { int name = 0; result = acpi_debug_trace((char *)&name, trace_debug_level, trace_debug_layer, 0); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 45d7c8fc73bd..5fb6aaed2adc 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -224,13 +224,48 @@ int memory_isolate_notify(unsigned long val, void *v) } /* + * The probe routines leave the pages reserved, just as the bootmem code does. + * Make sure they're still that way. + */ +static bool pages_correctly_reserved(unsigned long start_pfn, + unsigned long nr_pages) +{ + int i, j; + struct page *page; + unsigned long pfn = start_pfn; + + /* + * memmap between sections is not contiguous except with + * SPARSEMEM_VMEMMAP. We lookup the page once per section + * and assume memmap is contiguous within each section + */ + for (i = 0; i < sections_per_block; i++, pfn += PAGES_PER_SECTION) { + if (WARN_ON_ONCE(!pfn_valid(pfn))) + return false; + page = pfn_to_page(pfn); + + for (j = 0; j < PAGES_PER_SECTION; j++) { + if (PageReserved(page + j)) + continue; + + printk(KERN_WARNING "section number %ld page number %d " + "not reserved, was it already online?\n", + pfn_to_section_nr(pfn), j); + + return false; + } + } + + return true; +} + +/* * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is * OK to have direct references to sparsemem variables in here. */ static int memory_block_action(unsigned long phys_index, unsigned long action) { - int i; unsigned long start_pfn, start_paddr; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; struct page *first_page; @@ -238,26 +273,13 @@ memory_block_action(unsigned long phys_index, unsigned long action) first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT); - /* - * The probe routines leave the pages reserved, just - * as the bootmem code does. Make sure they're still - * that way. - */ - if (action == MEM_ONLINE) { - for (i = 0; i < nr_pages; i++) { - if (PageReserved(first_page+i)) - continue; - - printk(KERN_WARNING "section number %ld page number %d " - "not reserved, was it already online?\n", - phys_index, i); - return -EBUSY; - } - } - switch (action) { case MEM_ONLINE: start_pfn = page_to_pfn(first_page); + + if (!pages_correctly_reserved(start_pfn, nr_pages)) + return -EBUSY; + ret = online_pages(start_pfn, nr_pages); break; case MEM_OFFLINE: diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 031ca720d926..afa8463b2be7 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -513,6 +513,44 @@ static void process_page(unsigned long data) } } +struct mm_plug_cb { + struct blk_plug_cb cb; + struct cardinfo *card; +}; + +static void mm_unplug(struct blk_plug_cb *cb) +{ + struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb); + + spin_lock_irq(&mmcb->card->lock); + activate(mmcb->card); + spin_unlock_irq(&mmcb->card->lock); + kfree(mmcb); +} + +static int mm_check_plugged(struct cardinfo *card) +{ + struct blk_plug *plug = current->plug; + struct mm_plug_cb *mmcb; + + if (!plug) + return 0; + + list_for_each_entry(mmcb, &plug->cb_list, cb.list) { + if (mmcb->cb.callback == mm_unplug && mmcb->card == card) + return 1; + } + /* Not currently on the callback list */ + mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC); + if (!mmcb) + return 0; + + mmcb->card = card; + mmcb->cb.callback = mm_unplug; + list_add(&mmcb->cb.list, &plug->cb_list); + return 1; +} + static int mm_make_request(struct request_queue *q, struct bio *bio) { struct cardinfo *card = q->queuedata; @@ -523,6 +561,8 @@ static int mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; + if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card)) + activate(card); spin_unlock_irq(&card->lock); return 0; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 1bbb85b9ce4e..a303b613a170 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -584,7 +584,7 @@ static bool drm_monitor_supports_rb(struct edid *edid) { if (edid->revision >= 4) { - bool ret; + bool ret = false; drm_for_each_detailed_block((u8 *)edid, is_rb, &ret); return ret; } diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index bc7dcaa1c689..5ad0b5124b75 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -739,8 +739,11 @@ static void i915_restore_display(struct drm_device *dev) if (HAS_PCH_SPLIT(dev)) { I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->saveBLC_PWM_CTL); I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->saveBLC_PWM_CTL2); - I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL); + /* NOTE: BLC_PWM_CPU_CTL must be written after BLC_PWM_CPU_CTL2; + * otherwise we get blank eDP screen after S3 on some machines + */ I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2); + I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL); I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS); I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS); I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 39aee6d4daf8..ea71f78fb552 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -487,7 +487,7 @@ int nouveau_fbcon_init(struct drm_device *dev) nfbdev->helper.funcs = &nouveau_fbcon_helper_funcs; ret = drm_fb_helper_init(dev, &nfbdev->helper, - nv_two_heads(dev) ? 2 : 1, 4); + dev->mode_config.num_crtc, 4); if (ret) { kfree(nfbdev); return ret; diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 4c0743660e9c..d99aa8484777 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -215,7 +215,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) int i; if (send_command(cmd) || send_argument(key)) { - pr_warn("%s: read arg fail\n", key); + pr_warn("%.4s: read arg fail\n", key); return -EIO; } @@ -223,7 +223,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) for (i = 0; i < len; i++) { if (__wait_status(0x05)) { - pr_warn("%s: read data fail\n", key); + pr_warn("%.4s: read data fail\n", key); return -EIO; } buffer[i] = inb(APPLESMC_DATA_PORT); diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 3cf235385f89..252defdd2f13 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -709,7 +709,7 @@ static void __cpuinit get_core_online(unsigned int cpu) * sensors. We check this bit only, all the early CPUs * without thermal sensors will be filtered out. */ - if (!cpu_has(c, X86_FEATURE_DTS)) + if (!cpu_has(c, X86_FEATURE_DTHERM)) return; if (!pdev) { diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 5f5247750430..b358c87c3bca 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -2057,7 +2057,7 @@ static void __devinit it87_init_device(struct platform_device *pdev) /* Start monitoring */ it87_write_value(data, IT87_REG_CONFIG, - (it87_read_value(data, IT87_REG_CONFIG) & 0x36) + (it87_read_value(data, IT87_REG_CONFIG) & 0x3e) | (update_vbat ? 0x41 : 0x01)); } diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 56abf3d0e911..92c7be14bd4a 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -142,6 +142,7 @@ static const struct xpad_device { { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX }, { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX }, + { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX }, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9bfd057be686..42ef54f9260c 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1210,7 +1210,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * We need to dec pending if this was a write. */ if (rw == WRITE) { - if (!(bio->bi_rw & REQ_FLUSH)) + if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) dm_rh_dec(ms->rh, map_context->ll); return error; } diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 7771ed212182..69732e03eb34 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) return; } + if (bio->bi_rw & REQ_DISCARD) + return; + /* We must inform the log that the sync count has changed. */ log->type->set_region_sync(log, region, 0); @@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) struct bio *bio; for (bio = bios->head; bio; bio = bio->bi_next) { - if (bio->bi_rw & REQ_FLUSH) + if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)) continue; rh_inc(rh, dm_rh_bio_to_region(rh, bio)); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0d6c42f70a35..b65a7c50eb63 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1858,6 +1858,12 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, /* want to reconstruct this device */ rb2 = r10_bio; sect = raid10_find_virt(conf, sector_nr, i); + if (sect >= mddev->resync_max_sectors) { + /* last stripe is not complete - don't + * try to recover this sector. + */ + continue; + } /* Unless we are doing a full sync, we only need * to recover the block if it is set in the bitmap */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8f6f5d796409..02e786ecb4ee 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -199,12 +199,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) BUG_ON(!list_empty(&sh->lru)); BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { - if (test_bit(STRIPE_DELAYED, &sh->state)) + if (test_bit(STRIPE_DELAYED, &sh->state) && + !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && sh->bm_seq - conf->seq_write > 0) list_add_tail(&sh->lru, &conf->bitmap_list); else { + clear_bit(STRIPE_DELAYED, &sh->state); clear_bit(STRIPE_BIT_DELAY, &sh->state); list_add_tail(&sh->lru, &conf->handle_list); } @@ -3848,7 +3850,6 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) raid_bio->bi_next = (void*)rdev; align_bi->bi_bdev = rdev->bdev; align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); - align_bi->bi_sector += rdev->data_offset; if (!bio_fits_rdev(align_bi)) { /* too big in some way */ @@ -3857,6 +3858,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) return 0; } + /* No reshape active, so we can trust rdev->data_offset */ + align_bi->bi_sector += rdev->data_offset; + spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0, diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c index f73287775953..d5cda35dcc7e 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.c +++ b/drivers/media/dvb/dvb-core/dvbdev.c @@ -243,6 +243,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, if (minor == MAX_DVB_MINORS) { kfree(dvbdevfops); kfree(dvbdev); + up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); return -EINVAL; } diff --git a/drivers/media/dvb/siano/smsusb.c b/drivers/media/dvb/siano/smsusb.c index d755407fb4f9..4e5719ee6eb1 100644 --- a/drivers/media/dvb/siano/smsusb.c +++ b/drivers/media/dvb/siano/smsusb.c @@ -543,6 +543,8 @@ static const struct usb_device_id smsusb_id_table[] __devinitconst = { .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { USB_DEVICE(0x2040, 0xc0a0), .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, + { USB_DEVICE(0x2040, 0xf5a0), + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { } /* Terminating entry */ }; diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index 87ebb4e5b0c3..f5cdc565b3ef 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -102,7 +102,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL }; static int cafe_device_ready(struct mtd_info *mtd) { struct cafe_priv *cafe = mtd->priv; - int result = !!(cafe_readl(cafe, NAND_STATUS) | 0x40000000); + int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000); uint32_t irqs = cafe_readl(cafe, NAND_IRQ); cafe_writel(cafe, irqs, NAND_IRQ); diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 357e8c5252a8..63c8048d70b7 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -28,7 +28,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/vmalloc.h> -#include <asm/div64.h> +#include <linux/math64.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> @@ -547,12 +547,6 @@ static char *get_partition_name(int i) return kstrdup(buf, GFP_KERNEL); } -static uint64_t divide(uint64_t n, uint32_t d) -{ - do_div(n, d); - return n; -} - /* * Initialize the nandsim structure. * @@ -581,7 +575,7 @@ static int init_nandsim(struct mtd_info *mtd) ns->geom.oobsz = mtd->oobsize; ns->geom.secsz = mtd->erasesize; ns->geom.pgszoob = ns->geom.pgsz + ns->geom.oobsz; - ns->geom.pgnum = divide(ns->geom.totsz, ns->geom.pgsz); + ns->geom.pgnum = div_u64(ns->geom.totsz, ns->geom.pgsz); ns->geom.totszoob = ns->geom.totsz + (uint64_t)ns->geom.pgnum * ns->geom.oobsz; ns->geom.secshift = ffs(ns->geom.secsz) - 1; ns->geom.pgshift = chip->page_shift; @@ -924,7 +918,7 @@ static int setup_wear_reporting(struct mtd_info *mtd) if (!rptwear) return 0; - wear_eb_count = divide(mtd->size, mtd->erasesize); + wear_eb_count = div_u64(mtd->size, mtd->erasesize); mem = wear_eb_count * sizeof(unsigned long); if (mem / sizeof(unsigned long) != wear_eb_count) { NS_ERR("Too many erase blocks for wear reporting\n"); diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index a485f7fdaf37..2ce5db5e9c60 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -763,6 +763,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, copied = make_tx_wrbs(adapter, skb, wrb_cnt, dummy_wrb); if (copied) { + int gso_segs = skb_shinfo(skb)->gso_segs; + /* record the sent skb in the sent_skb table */ BUG_ON(tx_obj->sent_skb_list[start]); tx_obj->sent_skb_list[start] = skb; @@ -780,8 +782,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, be_txq_notify(adapter, txq->id, wrb_cnt); - be_tx_stats_update(adapter, wrb_cnt, copied, - skb_shinfo(skb)->gso_segs, stopped); + be_tx_stats_update(adapter, wrb_cnt, copied, gso_segs, stopped); } else { txq->head = start; dev_kfree_skb_any(skb); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e6da842cb288..504e201f3fd7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -77,6 +77,7 @@ #include <net/route.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/pkt_sched.h> #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -388,8 +389,6 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) return next; } -#define bond_queue_mapping(skb) (*(u16 *)((skb)->cb)) - /** * bond_dev_queue_xmit - Prepare skb for xmit. * @@ -403,7 +402,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, skb->dev = slave_dev; skb->priority = 1; - skb->queue_mapping = bond_queue_mapping(skb); + BUILD_BUG_ON(sizeof(skb->queue_mapping) != + sizeof(qdisc_skb_cb(skb)->bond_queue_mapping)); + skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping; if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); @@ -4240,7 +4241,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) /* * Save the original txq to restore before passing to the driver */ - bond_queue_mapping(skb) = skb->queue_mapping; + qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { do { diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 1f8a8244f23e..1bf80328cc69 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -592,8 +592,8 @@ static void c_can_chip_config(struct net_device *dev) priv->write_reg(priv, &priv->regs->control, CONTROL_ENABLE_AR); - if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY & - CAN_CTRLMODE_LOOPBACK)) { + if ((priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) && + (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)) { /* loopback + silent mode : useful for hot self-test */ priv->write_reg(priv, &priv->regs->control, CONTROL_EIE | CONTROL_SIE | CONTROL_IE | CONTROL_TEST); diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 3fa19c162112..098ff315694f 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -37,6 +37,7 @@ #include <linux/rtnetlink.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> +#include <linux/sched.h> static int numdummies = 1; @@ -186,8 +187,10 @@ static int __init dummy_init_module(void) rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); - for (i = 0; i < numdummies && !err; i++) + for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); + cond_resched(); + } if (err < 0) __rtnl_link_unregister(&dummy_link_ops); rtnl_unlock(); diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c index 8295f2192439..5278e8456b68 100644 --- a/drivers/net/e1000e/82571.c +++ b/drivers/net/e1000e/82571.c @@ -1573,6 +1573,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) ctrl = er32(CTRL); status = er32(STATUS); rxcw = er32(RXCW); + /* SYNCH bit and IV bit are sticky */ + udelay(10); + rxcw = er32(RXCW); if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index f2e31c87d9fe..9d4a2a38545a 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -4206,10 +4206,12 @@ static int sky2_set_features(struct net_device *dev, u32 features) struct sky2_port *sky2 = netdev_priv(dev); u32 changed = dev->features ^ features; - if (changed & NETIF_F_RXCSUM) { - u32 on = features & NETIF_F_RXCSUM; - sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR), - on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); + if ((changed & NETIF_F_RXCSUM) && + !(sky2->hw->flags & SKY2_HW_NEW_LE)) { + sky2_write32(sky2->hw, + Q_ADDR(rxqaddr[sky2->port], Q_CSR), + (features & NETIF_F_RXCSUM) + ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); } if (changed & NETIF_F_RXHASH) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 9cf4e47e55bf..db9b2126fb9a 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -59,6 +59,7 @@ #define USB_PRODUCT_IPHONE_3G 0x1292 #define USB_PRODUCT_IPHONE_3GS 0x1294 #define USB_PRODUCT_IPHONE_4 0x1297 +#define USB_PRODUCT_IPAD 0x129a #define USB_PRODUCT_IPHONE_4_VZW 0x129c #define USB_PRODUCT_IPHONE_4S 0x12a0 @@ -101,6 +102,10 @@ static struct usb_device_id ipheth_table[] = { IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, IPHETH_USBINTF_PROTO) }, { USB_DEVICE_AND_INTERFACE_INFO( + USB_VENDOR_APPLE, USB_PRODUCT_IPAD, + IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, + IPHETH_USBINTF_PROTO) }, + { USB_DEVICE_AND_INTERFACE_INFO( USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW, IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, IPHETH_USBINTF_PROTO) }, diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 7c2f06ed7a1d..9130a5aa1c9e 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -530,7 +530,7 @@ static int __ath9k_hw_init(struct ath_hw *ah) if (ah->config.serialize_regmode == SER_REG_MODE_AUTO) { if (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI || - ((AR_SREV_9160(ah) || AR_SREV_9280(ah)) && + ((AR_SREV_9160(ah) || AR_SREV_9280(ah) || AR_SREV_9287(ah)) && !ah->is_pciexpress)) { ah->config.serialize_regmode = SER_REG_MODE_ON; @@ -682,13 +682,25 @@ static void ath9k_hw_init_qos(struct ath_hw *ah) u32 ar9003_get_pll_sqsum_dvc(struct ath_hw *ah) { + struct ath_common *common = ath9k_hw_common(ah); + int i = 0; + REG_CLR_BIT(ah, PLL3, PLL3_DO_MEAS_MASK); udelay(100); REG_SET_BIT(ah, PLL3, PLL3_DO_MEAS_MASK); - while ((REG_READ(ah, PLL4) & PLL4_MEAS_DONE) == 0) + while ((REG_READ(ah, PLL4) & PLL4_MEAS_DONE) == 0) { + udelay(100); + if (WARN_ON_ONCE(i >= 100)) { + ath_err(common, "PLL4 meaurement not done\n"); + break; + } + + i++; + } + return (REG_READ(ah, PLL3) & SQSUM_DVC_MASK) >> 3; } EXPORT_SYMBOL(ar9003_get_pll_sqsum_dvc); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a126a3e238f6..633f96203e2f 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -648,6 +648,15 @@ void ath_hw_pll_work(struct work_struct *work) hw_pll_work.work); u32 pll_sqsum; + /* + * ensure that the PLL WAR is executed only + * after the STA is associated (or) if the + * beaconing had started in interfaces that + * uses beacons. + */ + if (!(sc->sc_flags & SC_OP_BEACONS)) + return; + if (AR_SREV_9485(sc->sc_ah)) { ath9k_ps_wakeup(sc); diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.c b/drivers/net/wireless/mwifiex/11n_rxreorder.c index e5dfdc39a921..d2358cfcbe93 100644 --- a/drivers/net/wireless/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/mwifiex/11n_rxreorder.c @@ -267,7 +267,8 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta, else last_seq = priv->rx_seq[tid]; - if (last_seq >= new_node->start_win) + if (last_seq != MWIFIEX_DEF_11N_RX_SEQ_NUM && + last_seq >= new_node->start_win) new_node->start_win = last_seq + 1; new_node->win_size = win_size; @@ -612,5 +613,5 @@ void mwifiex_11n_cleanup_reorder_tbl(struct mwifiex_private *priv) spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); INIT_LIST_HEAD(&priv->rx_reorder_tbl_ptr); - memset(priv->rx_seq, 0, sizeof(priv->rx_seq)); + mwifiex_reset_11n_rx_seq_num(priv); } diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.h b/drivers/net/wireless/mwifiex/11n_rxreorder.h index f3ca8c8c18f9..7576c2ab93b5 100644 --- a/drivers/net/wireless/mwifiex/11n_rxreorder.h +++ b/drivers/net/wireless/mwifiex/11n_rxreorder.h @@ -37,6 +37,13 @@ #define ADDBA_RSP_STATUS_ACCEPT 0 +#define MWIFIEX_DEF_11N_RX_SEQ_NUM 0xffff + +static inline void mwifiex_reset_11n_rx_seq_num(struct mwifiex_private *priv) +{ + memset(priv->rx_seq, 0xff, sizeof(priv->rx_seq)); +} + int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *, u16 seqNum, u16 tid, u8 *ta, diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c index 91634daec306..2cdb41ac7439 100644 --- a/drivers/net/wireless/mwifiex/wmm.c +++ b/drivers/net/wireless/mwifiex/wmm.c @@ -406,6 +406,8 @@ mwifiex_wmm_init(struct mwifiex_adapter *adapter) priv->add_ba_param.tx_win_size = MWIFIEX_AMPDU_DEF_TXWINSIZE; priv->add_ba_param.rx_win_size = MWIFIEX_AMPDU_DEF_RXWINSIZE; + mwifiex_reset_11n_rx_seq_num(priv); + atomic_set(&priv->wmm.tx_pkts_queued, 0); atomic_set(&priv->wmm.highest_queued_prio, HIGH_PRIO_TID); } diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index 54f0b1345fc9..99fa41614403 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -426,8 +426,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue) case QID_RX: if (!rt2x00queue_full(queue)) rt2x00queue_for_each_entry(queue, - Q_INDEX_DONE, Q_INDEX, + Q_INDEX_DONE, NULL, rt2x00usb_kick_rx_entry); break; diff --git a/drivers/net/wireless/rtl818x/rtl8187/leds.c b/drivers/net/wireless/rtl818x/rtl8187/leds.c index 2e0de2f5f0f9..c2d5b495c179 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/leds.c +++ b/drivers/net/wireless/rtl818x/rtl8187/leds.c @@ -117,7 +117,7 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev, radio_on = true; } else if (radio_on) { radio_on = false; - cancel_delayed_work_sync(&priv->led_on); + cancel_delayed_work(&priv->led_on); ieee80211_queue_delayed_work(hw, &priv->led_off, 0); } } else if (radio_on) { diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c index f088cdc563d0..20820d03b7c6 100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@ -25,7 +25,7 @@ static int oprofile_perf_enabled; static DEFINE_MUTEX(oprofile_perf_mutex); static struct op_counter_config *counter_config; -static struct perf_event **perf_events[nr_cpumask_bits]; +static struct perf_event **perf_events[NR_CPUS]; static int num_counters; /* diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 46767c53917a..475a3407d7a1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -726,6 +726,18 @@ static int pci_pm_suspend_noirq(struct device *dev) pci_pm_set_unknown_state(pci_dev); + /* + * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's + * PCI COMMAND register isn't 0, the BIOS assumes that the controller + * hasn't been quiesced and tries to turn it off. If the controller + * is already in D3, this can hang or cause memory corruption. + * + * Since the value of the COMMAND register doesn't matter once the + * device has been suspended, we can safely set it to 0 here. + */ + if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI) + pci_write_config_word(pci_dev, PCI_COMMAND, 0); + return 0; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index bf401aead873..d549bbc93cdd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1682,11 +1682,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; - /* Some devices mustn't be in D3 during system sleep */ - if (target_state == PCI_D3hot && - (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)) - return 0; - pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 975af4353e73..a6b07ddad715 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2856,32 +2856,6 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); -/* - * The Intel 6 Series/C200 Series chipset's EHCI controllers on many - * ASUS motherboards will cause memory corruption or a system crash - * if they are in D3 while the system is put into S3 sleep. - */ -static void __devinit asus_ehci_no_d3(struct pci_dev *dev) -{ - const char *sys_info; - static const char good_Asus_board[] = "P8Z68-V"; - - if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP) - return; - if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK) - return; - sys_info = dmi_get_system_info(DMI_BOARD_NAME); - if (sys_info && memcmp(sys_info, good_Asus_board, - sizeof(good_Asus_board) - 1) == 0) - return; - - dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n"); - dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP; - device_set_wakeup_capable(&dev->dev, false); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3); - static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 5ffe7c398148..e66bbba9929d 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -72,6 +72,7 @@ #include <linux/string.h> #include <linux/tick.h> #include <linux/timer.h> +#include <linux/dmi.h> #include <drm/i915_drm.h> #include <asm/msr.h> #include <asm/processor.h> @@ -1505,6 +1506,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = { MODULE_DEVICE_TABLE(pci, ips_id_table); +static int ips_blacklist_callback(const struct dmi_system_id *id) +{ + pr_info("Blacklisted intel_ips for %s\n", id->ident); + return 1; +} + +static const struct dmi_system_id ips_blacklist[] = { + { + .callback = ips_blacklist_callback, + .ident = "HP ProBook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"), + }, + }, + { } /* terminating entry */ +}; + static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) { u64 platform_info; @@ -1514,6 +1533,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) u16 htshi, trc, trc_required_mask; u8 tse; + if (dmi_check_system(ips_blacklist)) + return -ENODEV; + ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL); if (!ips) return -ENOMEM; diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 39e41fbdf08b..51603543def6 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -191,10 +191,11 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id) struct platform_device *pdev = dev_id; struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; + unsigned long flags; u32 status; u32 events = 0; - spin_lock_irq(&pdata->rtc->irq_lock); + spin_lock_irqsave(&pdata->rtc->irq_lock, flags); status = readw(ioaddr + RTC_RTCISR) & readw(ioaddr + RTC_RTCIENR); /* clear interrupt sources */ writew(status, ioaddr + RTC_RTCISR); @@ -217,7 +218,7 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id) rtc_update_alarm(&pdev->dev, &pdata->g_rtc_alarm); rtc_update_irq(pdata->rtc, 1, events); - spin_unlock_irq(&pdata->rtc->irq_lock); + spin_unlock_irqrestore(&pdata->rtc->irq_lock, flags); return IRQ_HANDLED; } diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 7491e21cc6ae..a40541c3df8c 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -64,7 +64,8 @@ static struct ft_tport *ft_tport_create(struct fc_lport *lport) struct ft_tport *tport; int i; - tport = rcu_dereference(lport->prov[FC_TYPE_FCP]); + tport = rcu_dereference_protected(lport->prov[FC_TYPE_FCP], + lockdep_is_held(&ft_lport_lock)); if (tport && tport->tpg) return tport; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 5a244cfbeb42..8a72e054fd10 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -457,6 +457,8 @@ retry: goto retry; } if (!desc->reslength) { /* zero length read */ + dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__); + clear_bit(WDM_READ, &desc->flags); spin_unlock_irq(&desc->iuspin); goto retry; } diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 0757b1934da5..9ab094c0cf3c 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -755,6 +755,7 @@ EXPORT_SYMBOL_GPL(usb_is_intel_switchable_xhci); */ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev) { +#if defined(CONFIG_USB_XHCI_HCD) || defined(CONFIG_USB_XHCI_HCD_MODULE) u32 ports_available; ports_available = 0xffffffff; @@ -782,6 +783,18 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev) &ports_available); dev_dbg(&xhci_pdev->dev, "USB 2.0 ports that are now switched over " "to xHCI: 0x%x\n", ports_available); +#else + /* Don't switchover the ports if the user hasn't compiled the xHCI + * driver. Otherwise they will see "dead" USB ports that don't power + * the devices. + */ + dev_warn(&xhci_pdev->dev, + "CONFIG_USB_XHCI_HCD is turned off, " + "defaulting to EHCI.\n"); + dev_warn(&xhci_pdev->dev, + "USB 3.0 devices will work at USB 2.0 speeds.\n"); +#endif /* CONFIG_USB_XHCI_HCD || CONFIG_USB_XHCI_HCD_MODULE */ + } EXPORT_SYMBOL_GPL(usb_enable_xhci_ports); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index aa0c43f1473a..35e6b5f6ac19 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -93,6 +93,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */ { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */ { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */ + { USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */ { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */ { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */ { USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */ @@ -134,7 +135,13 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */ + { USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */ + { USB_DEVICE(0x166A, 0x0301) }, /* Clipsal 5800PC C-Bus Wireless PC Interface */ { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */ + { USB_DEVICE(0x166A, 0x0304) }, /* Clipsal 5000CT2 C-Bus Black and White Touchscreen */ + { USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */ + { USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */ + { USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */ { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */ @@ -146,7 +153,11 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ + { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ + { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ + { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ + { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ { } /* Terminating Entry */ }; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ce024577400e..d2326230b54c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -236,6 +236,7 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_G1 0xA001 #define NOVATELWIRELESS_PRODUCT_G1_M 0xA002 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 +#define NOVATELWIRELESS_PRODUCT_MC551 0xB001 /* AMOI PRODUCTS */ #define AMOI_VENDOR_ID 0x1614 @@ -496,6 +497,19 @@ static void option_instat_callback(struct urb *urb); /* MediaTek products */ #define MEDIATEK_VENDOR_ID 0x0e8d +#define MEDIATEK_PRODUCT_DC_1COM 0x00a0 +#define MEDIATEK_PRODUCT_DC_4COM 0x00a5 +#define MEDIATEK_PRODUCT_DC_5COM 0x00a4 +#define MEDIATEK_PRODUCT_7208_1COM 0x7101 +#define MEDIATEK_PRODUCT_7208_2COM 0x7102 +#define MEDIATEK_PRODUCT_FP_1COM 0x0003 +#define MEDIATEK_PRODUCT_FP_2COM 0x0023 +#define MEDIATEK_PRODUCT_FPDC_1COM 0x0043 +#define MEDIATEK_PRODUCT_FPDC_2COM 0x0033 + +/* Cellient products */ +#define CELLIENT_VENDOR_ID 0x2692 +#define CELLIENT_PRODUCT_MEN200 0x9005 /* some devices interfaces need special handling due to a number of reasons */ enum option_blacklist_reason { @@ -549,6 +563,10 @@ static const struct option_blacklist_info net_intf1_blacklist = { .reserved = BIT(1), }; +static const struct option_blacklist_info net_intf2_blacklist = { + .reserved = BIT(2), +}; + static const struct option_blacklist_info net_intf3_blacklist = { .reserved = BIT(3), }; @@ -734,6 +752,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1_M) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) }, + /* Novatel Ovation MC551 a.k.a. Verizon USB551L */ + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, @@ -1090,6 +1110,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1298, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1299, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1300, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1402, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_k3765_z_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) }, @@ -1231,6 +1253,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x02, 0x01) }, /* MediaTek MT6276M modem & app port */ + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_1COM, 0x0a, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x02, 0x01) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x02, 0x01) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_1COM, 0x02, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_2COM, 0x02, 0x02, 0x01) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_1COM, 0x0a, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_2COM, 0x0a, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_1COM, 0x0a, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_2COM, 0x0a, 0x00, 0x00) }, + { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ea966b356352..61047fe31206 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -217,6 +217,8 @@ static int vhost_worker(void *data) if (work) { __set_current_state(TASK_RUNNING); work->fn(work); + if (need_resched()) + schedule(); } else schedule(); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ac8db5dc0a3..57106a99b52d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -801,7 +801,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, #ifdef CONFIG_MIGRATION static int btree_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { /* * we can't safely write a btree page from here, @@ -816,7 +817,7 @@ static int btree_migratepage(struct address_space *mapping, if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7fa128d32f7a..faf7d0bc0a01 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -691,6 +691,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, kfree(name); iput(inode); + + btrfs_run_delayed_items(trans, root); return ret; } @@ -896,6 +898,7 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, victim_name, victim_name_len); + btrfs_run_delayed_items(trans, root); } kfree(victim_name); ptr = (unsigned long)(victim_ref + 1) + victim_name_len; @@ -1476,6 +1479,9 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); BUG_ON(ret); + + btrfs_run_delayed_items(trans, root); + kfree(name); iput(inode); diff --git a/fs/buffer.c b/fs/buffer.c index 45f85891b88e..cf9eb135a8d1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1076,6 +1076,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { + int ret; + struct buffer_head *bh; + /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1088,20 +1091,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } - for (;;) { - struct buffer_head * bh; - int ret; +retry: + bh = __find_get_block(bdev, block, size); + if (bh) + return bh; + ret = grow_buffers(bdev, block, size); + if (ret == 0) { + free_more_memory(); + goto retry; + } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; - - ret = grow_buffers(bdev, block, size); - if (ret < 0) - return NULL; - if (ret == 0) - free_more_memory(); } + return NULL; } /* diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 6751e745bbc6..c71032ba5b75 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -85,9 +85,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { - /* FIXME: check for inode number changes? */ - if (dentry->d_inode != NULL) + inode = dentry->d_inode; + /* update inode in place if i_ino didn't change */ + if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); return dentry; + } d_drop(dentry); dput(dentry); } diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 69f994a7d524..0dbe58a8b172 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file, (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; - if (flags & O_RDONLY) { + if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 0dc5a3d554a4..de42310cd4fb 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) { + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EINVAL; + } mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) goto out_unlock_daemon; } daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; + file->private_data = daemon; atomic_inc(&ecryptfs_num_miscdev_opens); out_unlock_daemon: mutex_unlock(&daemon->mux); @@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) mutex_lock(&ecryptfs_daemon_hash_mux); rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) + daemon = file->private_data; mutex_lock(&daemon->mux); - BUG_ON(daemon->pid != task_pid(current)); BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; atomic_dec(&ecryptfs_num_miscdev_opens); @@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, u16 msg_flags, struct ecryptfs_daemon *daemon) { - int rc = 0; + struct ecryptfs_message *msg; - mutex_lock(&msg_ctx->mux); - msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), - GFP_KERNEL); - if (!msg_ctx->msg) { - rc = -ENOMEM; + msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL); + if (!msg) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to kmalloc(%zd, GFP_KERNEL)\n", __func__, - (sizeof(*msg_ctx->msg) + data_size)); - goto out_unlock; + (sizeof(*msg) + data_size)); + return -ENOMEM; } + + mutex_lock(&msg_ctx->mux); + msg_ctx->msg = msg; msg_ctx->msg->index = msg_ctx->index; msg_ctx->msg->data_len = data_size; msg_ctx->type = msg_type; memcpy(msg_ctx->msg->data, data, data_size); msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); - mutex_lock(&daemon->mux); list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); + mutex_unlock(&msg_ctx->mux); + + mutex_lock(&daemon->mux); daemon->num_queued_msg_ctx++; wake_up_interruptible(&daemon->wait); mutex_unlock(&daemon->mux); -out_unlock: - mutex_unlock(&msg_ctx->mux); - return rc; + + return 0; } /** @@ -246,8 +251,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) { + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EINVAL; + } mutex_lock(&daemon->mux); + if (task_pid(current) != daemon->pid) { + mutex_unlock(&daemon->mux); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EPERM; + } if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -284,9 +297,6 @@ check_list: * message from the queue; try again */ goto check_list; } - BUG_ON(euid != daemon->euid); - BUG_ON(current_user_ns() != daemon->user_ns); - BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); BUG_ON(!msg_ctx); diff --git a/fs/fifo.c b/fs/fifo.c index b1a524d798e7..cf6f4345ceb0 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -14,7 +14,7 @@ #include <linux/sched.h> #include <linux/pipe_fs_i.h> -static void wait_for_partner(struct inode* inode, unsigned int *cnt) +static int wait_for_partner(struct inode* inode, unsigned int *cnt) { int cur = *cnt; @@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt) if (signal_pending(current)) break; } + return cur == *cnt ? -ERESTARTSYS : 0; } static void wake_up_partner(struct inode* inode) @@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * seen a writer */ filp->f_version = pipe->w_counter; } else { - wait_for_partner(inode, &pipe->w_counter); - if(signal_pending(current)) + if (wait_for_partner(inode, &pipe->w_counter)) goto err_rd; } } @@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp) wake_up_partner(inode); if (!pipe->readers) { - wait_for_partner(inode, &pipe->r_counter); - if (signal_pending(current)) + if (wait_for_partner(inode, &pipe->r_counter)) goto err_wr; } break; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8b0c87530b04..6327a069d83d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -568,7 +568,8 @@ static int hugetlbfs_set_page_dirty(struct page *page) } static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { int rc; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347a2daa..4f10d8188ab6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); #else #define nfs_migrate_page NULL #endif diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f2f80c005c02..58bb9994b947 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1662,7 +1662,7 @@ out_error: #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page) + struct page *page, enum migrate_mode mode) { /* * If PagePrivate is set, then the page is currently associated with @@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, nfs_fscache_release_page(page, GFP_KERNEL); - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } #endif diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 08a07a218d26..57ceaf33d177 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) while (!list_empty(head)) { ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index bb24ab6c282f..6f24e67162c0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) continue; list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); iput(&ii->vfs_inode); } } diff --git a/fs/open.c b/fs/open.c index b52cf013ffa1..7e18c4d6e1f2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -396,10 +396,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct file *file; struct inode *inode; - int error; + int error, fput_needed; error = -EBADF; - file = fget(fd); + file = fget_raw_light(fd, &fput_needed); if (!file) goto out; @@ -413,7 +413,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!error) set_fs_pwd(current->fs, &file->f_path); out_putf: - fput(file); + fput_light(file, fput_needed); out: return error; } diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index fbb0b478a346..d5378d028589 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* prevent the page from being discarded on memory pressure */ SetPageDirty(page); + SetPageUptodate(page); unlock_page(page); put_page(page); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index c606f010e8df..1250016bfb9f 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -715,8 +715,12 @@ static int fixup_free_space(struct ubifs_info *c) lnum = ubifs_next_log_lnum(c, lnum); } - /* Fixup the current log head */ - err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); if (err) goto out; diff --git a/fs/udf/super.c b/fs/udf/super.c index 7f0e18aa25d4..a8e867ae11c9 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -56,6 +56,7 @@ #include <linux/seq_file.h> #include <linux/bitmap.h> #include <linux/crc-itu-t.h> +#include <linux/log2.h> #include <asm/byteorder.h> #include "udf_sb.h" @@ -1244,16 +1245,65 @@ out_bh: return ret; } +static int udf_load_sparable_map(struct super_block *sb, + struct udf_part_map *map, + struct sparablePartitionMap *spm) +{ + uint32_t loc; + uint16_t ident; + struct sparingTable *st; + struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing; + int i; + struct buffer_head *bh; + + map->s_partition_type = UDF_SPARABLE_MAP15; + sdata->s_packet_len = le16_to_cpu(spm->packetLength); + if (!is_power_of_2(sdata->s_packet_len)) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Invalid packet length %u\n", + (unsigned)sdata->s_packet_len); + return -EIO; + } + if (spm->numSparingTables > 4) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Too many sparing tables (%d)\n", + (int)spm->numSparingTables); + return -EIO; + } + + for (i = 0; i < spm->numSparingTables; i++) { + loc = le32_to_cpu(spm->locSparingTable[i]); + bh = udf_read_tagged(sb, loc, loc, &ident); + if (!bh) + continue; + + st = (struct sparingTable *)bh->b_data; + if (ident != 0 || + strncmp(st->sparingIdent.ident, UDF_ID_SPARING, + strlen(UDF_ID_SPARING)) || + sizeof(*st) + le16_to_cpu(st->reallocationTableLen) > + sb->s_blocksize) { + brelse(bh); + continue; + } + + sdata->s_spar_map[i] = bh; + } + map->s_partition_func = udf_get_pblock_spar15; + return 0; +} + static int udf_load_logicalvol(struct super_block *sb, sector_t block, struct kernel_lb_addr *fileset) { struct logicalVolDesc *lvd; - int i, j, offset; + int i, offset; uint8_t type; struct udf_sb_info *sbi = UDF_SB(sb); struct genericPartitionMap *gpm; uint16_t ident; struct buffer_head *bh; + unsigned int table_len; int ret = 0; bh = udf_read_tagged(sb, block, block, &ident); @@ -1261,15 +1311,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, return 1; BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; - - i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); - if (i != 0) { - ret = i; + table_len = le32_to_cpu(lvd->mapTableLength); + if (sizeof(*lvd) + table_len > sb->s_blocksize) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Partition table too long (%u > %lu)\n", table_len, + sb->s_blocksize - sizeof(*lvd)); goto out_bh; } + ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); + if (ret) + goto out_bh; + for (i = 0, offset = 0; - i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); + i < sbi->s_partitions && offset < table_len; i++, offset += gpm->partitionMapLength) { struct udf_part_map *map = &sbi->s_partmaps[i]; gpm = (struct genericPartitionMap *) @@ -1304,38 +1359,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, } else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { - uint32_t loc; - struct sparingTable *st; - struct sparablePartitionMap *spm = - (struct sparablePartitionMap *)gpm; - - map->s_partition_type = UDF_SPARABLE_MAP15; - map->s_type_specific.s_sparing.s_packet_len = - le16_to_cpu(spm->packetLength); - for (j = 0; j < spm->numSparingTables; j++) { - struct buffer_head *bh2; - - loc = le32_to_cpu( - spm->locSparingTable[j]); - bh2 = udf_read_tagged(sb, loc, loc, - &ident); - map->s_type_specific.s_sparing. - s_spar_map[j] = bh2; - - if (bh2 == NULL) - continue; - - st = (struct sparingTable *)bh2->b_data; - if (ident != 0 || strncmp( - st->sparingIdent.ident, - UDF_ID_SPARING, - strlen(UDF_ID_SPARING))) { - brelse(bh2); - map->s_type_specific.s_sparing. - s_spar_map[j] = NULL; - } - } - map->s_partition_func = udf_get_pblock_spar15; + if (udf_load_sparable_map(sb, map, + (struct sparablePartitionMap *)gpm) < 0) + goto out_bh; } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index e9eaec522655..7a7e5fd2a277 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -89,42 +89,33 @@ extern void rebuild_sched_domains(void); extern void cpuset_print_task_mems_allowed(struct task_struct *p); /* - * reading current mems_allowed and mempolicy in the fastpath must protected - * by get_mems_allowed() + * get_mems_allowed is required when making decisions involving mems_allowed + * such as during page allocation. mems_allowed can be updated in parallel + * and depending on the new value an operation can fail potentially causing + * process failure. A retry loop with get_mems_allowed and put_mems_allowed + * prevents these artificial failures. */ -static inline void get_mems_allowed(void) +static inline unsigned int get_mems_allowed(void) { - current->mems_allowed_change_disable++; - - /* - * ensure that reading mems_allowed and mempolicy happens after the - * update of ->mems_allowed_change_disable. - * - * the write-side task finds ->mems_allowed_change_disable is not 0, - * and knows the read-side task is reading mems_allowed or mempolicy, - * so it will clear old bits lazily. - */ - smp_mb(); + return read_seqcount_begin(¤t->mems_allowed_seq); } -static inline void put_mems_allowed(void) +/* + * If this returns false, the operation that took place after get_mems_allowed + * may have failed. It is up to the caller to retry the operation if + * appropriate. + */ +static inline bool put_mems_allowed(unsigned int seq) { - /* - * ensure that reading mems_allowed and mempolicy before reducing - * mems_allowed_change_disable. - * - * the write-side task will know that the read-side task is still - * reading mems_allowed or mempolicy, don't clears old bits in the - * nodemask. - */ - smp_mb(); - --ACCESS_ONCE(current->mems_allowed_change_disable); + return !read_seqcount_retry(¤t->mems_allowed_seq, seq); } static inline void set_mems_allowed(nodemask_t nodemask) { task_lock(current); + write_seqcount_begin(¤t->mems_allowed_seq); current->mems_allowed = nodemask; + write_seqcount_end(¤t->mems_allowed_seq); task_unlock(current); } @@ -234,12 +225,14 @@ static inline void set_mems_allowed(nodemask_t nodemask) { } -static inline void get_mems_allowed(void) +static inline unsigned int get_mems_allowed(void) { + return 0; } -static inline void put_mems_allowed(void) +static inline bool put_mems_allowed(unsigned int seq) { + return true; } #endif /* !CONFIG_CPUSETS */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 82466c3d9573..aceb8af96c21 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -523,6 +523,7 @@ enum positive_aop_returns { struct page; struct address_space; struct writeback_control; +enum migrate_mode; struct iov_iter { const struct iovec *iov; @@ -607,9 +608,12 @@ struct address_space_operations { loff_t offset, unsigned long nr_segs); int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); - /* migrate the contents of a page to the specified target */ + /* + * migrate the contents of a page to the specified target. If sync + * is false, it must not block. + */ int (*migratepage) (struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); @@ -2480,7 +2484,8 @@ extern int generic_check_addressable(unsigned, u64); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, + enum migrate_mode); #else #define buffer_migrate_page NULL #endif diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 740876033baa..7259cd308281 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -171,6 +171,7 @@ enum hrtimer_base_type { * @lock: lock protecting the base and associated clock bases * and timers * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -183,7 +184,8 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - unsigned long active_bases; + unsigned int active_bases; + unsigned int clock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -295,6 +297,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC HIGH_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_HIGH_RES +extern void clock_was_set_delayed(void); + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC @@ -315,6 +319,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; } + +static inline void clock_was_set_delayed(void) { } + #endif extern void clock_was_set(void); @@ -329,6 +336,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); extern ktime_t ktime_get_monotonic_offset(void); +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7b7509616d29..e21dfa2428a5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -30,6 +30,13 @@ extern struct fs_struct init_fs; #define INIT_THREADGROUP_FORK_LOCK(sig) #endif +#ifdef CONFIG_CPUSETS +#define INIT_CPUSET_SEQ \ + .mems_allowed_seq = SEQCNT_ZERO, +#else +#define INIT_CPUSET_SEQ +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ @@ -202,6 +209,7 @@ extern struct cred init_cred; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ + INIT_CPUSET_SEQ \ } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 313a00eca40e..4a8da84487b7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,8 @@ enum mem_cgroup_page_stat_item { extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + isolate_mode_t mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e39aeecfe9a2..eaf867412f7a 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -6,18 +6,31 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); +/* + * MIGRATE_ASYNC means never block + * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking + * on most operations but not ->writepage as the potential stall time + * is too significant + * MIGRATE_SYNC will block when migrating pages + */ +enum migrate_mode { + MIGRATE_ASYNC, + MIGRATE_SYNC_LIGHT, + MIGRATE_SYNC, +}; + #ifdef CONFIG_MIGRATION #define PAGE_MIGRATION 1 extern void putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync); + enum migrate_mode mode); extern int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync); + enum migrate_mode mode); extern int fail_migrate_page(struct address_space *, struct page *, struct page *); @@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, static inline void putback_lru_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync) { return -ENOSYS; } + enum migrate_mode mode) { return -ENOSYS; } static inline int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync) { return -ENOSYS; } + enum migrate_mode mode) { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9f7c3ebcbbad..b32f3f9182ca 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -158,6 +158,20 @@ static inline int is_unevictable_lru(enum lru_list l) return (l == LRU_UNEVICTABLE); } +/* Isolate inactive pages */ +#define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1) +/* Isolate active pages */ +#define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2) +/* Isolate clean file */ +#define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) +/* Isolate unmapped file */ +#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) +/* Isolate for asynchronous migration */ +#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10) + +/* LRU Isolation modes. */ +typedef unsigned __bitwise__ isolate_mode_t; + enum zone_watermarks { WMARK_MIN, WMARK_LOW, @@ -633,7 +647,7 @@ typedef struct pglist_data { range, including holes */ int node_id; wait_queue_head_t kswapd_wait; - struct task_struct *kswapd; + struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ int kswapd_max_order; enum zone_type classzone_idx; } pg_data_t; diff --git a/include/linux/pci.h b/include/linux/pci.h index ff5970b7a176..c446b5ca2d38 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -174,8 +174,6 @@ enum pci_dev_flags { PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, /* Device configuration is irrevocably lost if disabled into D3 */ PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, - /* Device causes system crash if in D3 during S3 sleep */ - PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { diff --git a/include/linux/sched.h b/include/linux/sched.h index d25892e56f4c..c973594dc95a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1503,7 +1503,7 @@ struct task_struct { #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; /* Protected by alloc_lock */ - int mems_allowed_change_disable; + seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index a273468f8285..e73799d3b1c5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -243,11 +243,6 @@ static inline void lru_cache_add_file(struct page *page) __lru_cache_add(page, LRU_INACTIVE_FILE); } -/* LRU Isolation modes. */ -#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ -#define ISOLATE_ACTIVE 1 /* Isolate active pages. */ -#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */ - /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -259,7 +254,7 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, unsigned int swappiness, struct zone *zone, unsigned long *nr_scanned); -extern int __isolate_lru_page(struct page *page, int mode, int file); +extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); diff --git a/include/linux/timex.h b/include/linux/timex.h index aa60fe7b6ed6..08e90fb81acc 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -266,7 +266,7 @@ static inline int ntp_synced(void) /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 tick_length; -extern void second_overflow(void); +extern int second_overflow(unsigned long secs); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); extern void hardpps(const struct timespec *, const struct timespec *); diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index abd443604c9f..4af554bb971e 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -42,6 +42,7 @@ #include <net/netlabel.h> #include <net/request_sock.h> #include <asm/atomic.h> +#include <asm/unaligned.h> /* known doi values */ #define CIPSO_V4_DOI_UNKNOWN 0x00000000 @@ -285,7 +286,33 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, static inline int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { - return -ENOSYS; + unsigned char *opt = *option; + unsigned char err_offset = 0; + u8 opt_len = opt[1]; + u8 opt_iter; + + if (opt_len < 8) { + err_offset = 1; + goto out; + } + + if (get_unaligned_be32(&opt[2]) == 0) { + err_offset = 2; + goto out; + } + + for (opt_iter = 6; opt_iter < opt_len;) { + if (opt[opt_iter + 1] > (opt_len - opt_iter)) { + err_offset = opt_iter + 1; + goto out; + } + opt_iter += opt[opt_iter + 1]; + } + +out: + *option = opt + err_offset; + return err_offset; + } #endif /* CONFIG_NETLABEL */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f1fbe2d5e055..af2e047fc06e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -219,13 +219,16 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - unsigned char data[24]; + u16 bond_queue_mapping; + u16 _pad; + unsigned char data[20]; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz); + + BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz); BUILD_BUG_ON(sizeof(qcb->data) < sz); } diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index b2c33bd955fa..edc4b3d25a2d 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -179,6 +179,83 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re TP_ARGS(nr_reclaimed) ); +TRACE_EVENT(mm_shrink_slab_start, + TP_PROTO(struct shrinker *shr, struct shrink_control *sc, + long nr_objects_to_shrink, unsigned long pgs_scanned, + unsigned long lru_pgs, unsigned long cache_items, + unsigned long long delta, unsigned long total_scan), + + TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs, + cache_items, delta, total_scan), + + TP_STRUCT__entry( + __field(struct shrinker *, shr) + __field(void *, shrink) + __field(long, nr_objects_to_shrink) + __field(gfp_t, gfp_flags) + __field(unsigned long, pgs_scanned) + __field(unsigned long, lru_pgs) + __field(unsigned long, cache_items) + __field(unsigned long long, delta) + __field(unsigned long, total_scan) + ), + + TP_fast_assign( + __entry->shr = shr; + __entry->shrink = shr->shrink; + __entry->nr_objects_to_shrink = nr_objects_to_shrink; + __entry->gfp_flags = sc->gfp_mask; + __entry->pgs_scanned = pgs_scanned; + __entry->lru_pgs = lru_pgs; + __entry->cache_items = cache_items; + __entry->delta = delta; + __entry->total_scan = total_scan; + ), + + TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld", + __entry->shrink, + __entry->shr, + __entry->nr_objects_to_shrink, + show_gfp_flags(__entry->gfp_flags), + __entry->pgs_scanned, + __entry->lru_pgs, + __entry->cache_items, + __entry->delta, + __entry->total_scan) +); + +TRACE_EVENT(mm_shrink_slab_end, + TP_PROTO(struct shrinker *shr, int shrinker_retval, + long unused_scan_cnt, long new_scan_cnt), + + TP_ARGS(shr, shrinker_retval, unused_scan_cnt, new_scan_cnt), + + TP_STRUCT__entry( + __field(struct shrinker *, shr) + __field(void *, shrink) + __field(long, unused_scan) + __field(long, new_scan) + __field(int, retval) + __field(long, total_scan) + ), + + TP_fast_assign( + __entry->shr = shr; + __entry->shrink = shr->shrink; + __entry->unused_scan = unused_scan_cnt; + __entry->new_scan = new_scan_cnt; + __entry->retval = shrinker_retval; + __entry->total_scan = new_scan_cnt - unused_scan_cnt; + ), + + TP_printk("%pF %p: unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", + __entry->shrink, + __entry->shr, + __entry->unused_scan, + __entry->new_scan, + __entry->total_scan, + __entry->retval) +); DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, @@ -189,7 +266,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), @@ -201,7 +278,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_taken) __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) - __field(int, isolate_mode) + __field(isolate_mode_t, isolate_mode) ), TP_fast_assign( @@ -235,7 +312,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) @@ -250,7 +327,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9c9b7545c810..b2e84bd3ceb9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task) struct cpuset, css); } +#ifdef CONFIG_NUMA +static inline bool task_has_mempolicy(struct task_struct *task) +{ + return task->mempolicy; +} +#else +static inline bool task_has_mempolicy(struct task_struct *task) +{ + return false; +} +#endif + + /* bits in struct cpuset flags field */ typedef enum { CS_CPU_EXCLUSIVE, @@ -949,7 +962,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { -repeat: + bool need_loop; + /* * Allow tasks that have access to memory reserves because they have * been OOM killed to get memory anywhere. @@ -960,46 +974,27 @@ repeat: return; task_lock(tsk); - nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); - mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); - - /* - * ensure checking ->mems_allowed_change_disable after setting all new - * allowed nodes. - * - * the read-side task can see an nodemask with new allowed nodes and - * old allowed nodes. and if it allocates page when cpuset clears newly - * disallowed ones continuous, it can see the new allowed bits. - * - * And if setting all new allowed nodes is after the checking, setting - * all new allowed nodes and clearing newly disallowed ones will be done - * continuous, and the read-side task may find no node to alloc page. + * Determine if a loop is necessary if another thread is doing + * get_mems_allowed(). If at least one node remains unchanged and + * tsk does not have a mempolicy, then an empty nodemask will not be + * possible when mems_allowed is larger than a word. */ - smp_mb(); + need_loop = task_has_mempolicy(tsk) || + !nodes_intersects(*newmems, tsk->mems_allowed); - /* - * Allocation of memory is very fast, we needn't sleep when waiting - * for the read-side. - */ - while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { - task_unlock(tsk); - if (!task_curr(tsk)) - yield(); - goto repeat; - } + if (need_loop) + write_seqcount_begin(&tsk->mems_allowed_seq); - /* - * ensure checking ->mems_allowed_change_disable before clearing all new - * disallowed nodes. - * - * if clearing newly disallowed bits before the checking, the read-side - * task may find no node to alloc page. - */ - smp_mb(); + nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); + mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); tsk->mems_allowed = *newmems; + + if (need_loop) + write_seqcount_end(&tsk->mems_allowed_seq); + task_unlock(tsk); } diff --git a/kernel/fork.c b/kernel/fork.c index 895617c20687..086388f4b787 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1009,6 +1009,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) #ifdef CONFIG_CGROUPS init_rwsem(&sig->threadgroup_fork_lock); #endif +#ifdef CONFIG_CPUSETS + seqcount_init(&tsk->mems_allowed_seq); +#endif sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 363965fc9730..73278468fadc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -669,6 +669,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } +static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) +{ + ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; + ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; + + return ktime_get_update_offsets(offs_real, offs_boot); +} + /* * Retrigger next event is called after clock was set * @@ -677,22 +685,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); - struct timespec realtime_offset, xtim, wtm, sleep; if (!hrtimer_hres_active()) return; - /* Optimized out for !HIGH_RES */ - get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - /* Adjust CLOCK_REALTIME offset */ raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - + hrtimer_update_base(base); hrtimer_force_reprogram(base, 0); raw_spin_unlock(&base->lock); } @@ -722,13 +720,25 @@ static int hrtimer_switch_to_hres(void) base->clock_base[i].resolution = KTIME_HIGH_RES; tick_setup_sched_timer(); - /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); return 1; } +/* + * Called from timekeeping code to reprogramm the hrtimer interrupt + * device. If called from the timer interrupt context we defer it to + * softirq context. + */ +void clock_was_set_delayed(void) +{ + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + cpu_base->clock_was_set = 1; + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -1438,11 +1448,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; - entry_time = now = ktime_get(); + raw_spin_lock(&cpu_base->lock); + entry_time = now = hrtimer_update_base(cpu_base); retry: expires_next.tv64 = KTIME_MAX; - - raw_spin_lock(&cpu_base->lock); /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1534,8 +1543,12 @@ retry: * We need to prevent that we loop forever in the hrtimer * interrupt routine. We give it 3 attempts to avoid * overreacting on some spurious event. + * + * Acquire base lock for updating the offsets and retrieving + * the current time. */ - now = ktime_get(); + raw_spin_lock(&cpu_base->lock); + now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) goto retry; @@ -1547,6 +1560,7 @@ retry: */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; + raw_spin_unlock(&cpu_base->lock); delta = ktime_sub(now, entry_time); if (delta.tv64 > cpu_base->max_hang_time.tv64) cpu_base->max_hang_time = delta; @@ -1605,6 +1619,13 @@ static inline void __hrtimer_peek_ahead_timers(void) { } static void run_hrtimer_softirq(struct softirq_action *h) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + if (cpu_base->clock_was_set) { + cpu_base->clock_was_set = 0; + clock_was_set(); + } + hrtimer_rt_run_pending(); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 419cbaa2f5c0..8b3a185b3210 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -31,8 +31,6 @@ unsigned long tick_nsec; u64 tick_length; static u64 tick_length_base; -static struct hrtimer leap_timer; - #define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) @@ -350,63 +348,64 @@ void ntp_clear(void) } /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + * Also handles leap second processing, and returns leap offset */ -static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) +int second_overflow(unsigned long secs) { - enum hrtimer_restart res = HRTIMER_NORESTART; - - raw_spin_lock(&xtime_lock); - write_seqcount_begin(&xtime_seq); + int leap = 0; + s64 delta; + /* + * Leap second processing. If in leap-insert state at the end of the + * day, the system clock is set back one second; if in leap-delete + * state, the system clock is set ahead one second. + */ switch (time_state) { case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; break; case TIME_INS: - timekeeping_leap_insert(-1); - time_state = TIME_OOP; - printk(KERN_NOTICE - "Clock: inserting leap second 23:59:60 UTC\n"); - hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); - res = HRTIMER_RESTART; + if (!(time_status & STA_INS)) + time_state = TIME_OK; + else if (secs % 86400 == 0) { + leap = -1; + time_state = TIME_OOP; + time_tai++; + printk(KERN_NOTICE + "Clock: inserting leap second 23:59:60 UTC\n"); + } break; case TIME_DEL: - timekeeping_leap_insert(1); - time_tai--; - time_state = TIME_WAIT; - printk(KERN_NOTICE - "Clock: deleting leap second 23:59:59 UTC\n"); + if (!(time_status & STA_DEL)) + time_state = TIME_OK; + else if ((secs + 1) % 86400 == 0) { + leap = 1; + time_tai--; + time_state = TIME_WAIT; + printk(KERN_NOTICE + "Clock: deleting leap second 23:59:59 UTC\n"); + } break; case TIME_OOP: - time_tai++; time_state = TIME_WAIT; - /* fall through */ + break; + case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; break; } - write_seqcount_end(&xtime_seq); - raw_spin_unlock(&xtime_lock); - - return res; -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - */ -void second_overflow(void) -{ - s64 delta; - /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; if (time_maxerror > NTP_PHASE_LIMIT) { @@ -425,23 +424,25 @@ void second_overflow(void) pps_dec_valid(); if (!time_adjust) - return; + goto out; if (time_adjust > MAX_TICKADJ) { time_adjust -= MAX_TICKADJ; tick_length += MAX_TICKADJ_SCALED; - return; + goto out; } if (time_adjust < -MAX_TICKADJ) { time_adjust += MAX_TICKADJ; tick_length -= MAX_TICKADJ_SCALED; - return; + goto out; } tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; time_adjust = 0; +out: + return leap; } #ifdef CONFIG_GENERIC_CMOS_UPDATE @@ -503,27 +504,6 @@ static void notify_cmos_timer(void) static inline void notify_cmos_timer(void) { } #endif -/* - * Start the leap seconds timer: - */ -static inline void ntp_start_leap_timer(struct timespec *ts) -{ - long now = ts->tv_sec; - - if (time_status & STA_INS) { - time_state = TIME_INS; - now += 86400 - now % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - - return; - } - - if (time_status & STA_DEL) { - time_state = TIME_DEL; - now += 86400 - (now + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - } -} /* * Propagate a new txc->status value into the NTP state: @@ -548,22 +528,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) time_status &= STA_RONLY; time_status |= txc->status & ~STA_RONLY; - switch (time_state) { - case TIME_OK: - ntp_start_leap_timer(ts); - break; - case TIME_INS: - case TIME_DEL: - time_state = TIME_OK; - ntp_start_leap_timer(ts); - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - break; - case TIME_OOP: - hrtimer_restart(&leap_timer); - break; - } } /* * Called with the xtime lock held, so we can access and modify @@ -645,9 +609,6 @@ int do_adjtimex(struct timex *txc) (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) return -EINVAL; - - if (txc->modes & ADJ_STATUS && time_state != TIME_OK) - hrtimer_cancel(&leap_timer); } if (txc->modes & ADJ_SETOFFSET) { @@ -975,6 +936,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); - hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - leap_timer.function = ntp_leap_second; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6ac64385b8b1..df92e132483b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -161,23 +161,43 @@ static struct timespec xtime __attribute__ ((aligned (16))); static struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static struct timespec total_sleep_time; +/* Offset clock monotonic -> clock realtime */ +static ktime_t offs_real; + +/* Offset clock monotonic -> clock boottime */ +static ktime_t offs_boot; + /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ static struct timespec raw_time; -/* flag for if timekeeping is suspended */ -int __read_mostly timekeeping_suspended; +/* must hold write on xtime_lock */ +static void update_rt_offset(void) +{ + struct timespec tmp, *wtm = &wall_to_monotonic; -/* must hold xtime_lock */ -void timekeeping_leap_insert(int leapsecond) + set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); + offs_real = timespec_to_ktime(tmp); +} + +/* must hold write on xtime_lock */ +static void timekeeping_update(bool clearntp) { - xtime.tv_sec += leapsecond; - wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + if (clearntp) { + timekeeper.ntp_error = 0; + ntp_clear(); + } + update_rt_offset(); + update_vsyscall(&xtime, &wall_to_monotonic, + timekeeper.clock, timekeeper.mult); } + + +/* flag for if timekeeping is suspended */ +int __read_mostly timekeeping_suspended; + /** * timekeeping_forward_now - update clock to the current time * @@ -376,11 +396,7 @@ int do_settimeofday(const struct timespec *tv) xtime = *tv; - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); write_seqcount_end(&xtime_seq); raw_spin_unlock_irqrestore(&xtime_lock, flags); @@ -415,11 +431,7 @@ int timekeeping_inject_offset(struct timespec *ts) xtime = timespec_add(xtime, *ts); wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts); - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); write_seqcount_end(&xtime_seq); raw_spin_unlock_irqrestore(&xtime_lock, flags); @@ -596,6 +608,7 @@ void __init timekeeping_init(void) } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); + update_rt_offset(); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_seqcount_end(&xtime_seq); @@ -605,6 +618,12 @@ void __init timekeeping_init(void) /* time in seconds when suspend began */ static struct timespec timekeeping_suspend_time; +static void update_sleep_time(struct timespec t) +{ + total_sleep_time = t; + offs_boot = timespec_to_ktime(t); +} + /** * __timekeeping_inject_sleeptime - Internal function to add sleep interval * @delta: pointer to a timespec delta value @@ -616,7 +635,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) { xtime = timespec_add(xtime, *delta); wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta); - total_sleep_time = timespec_add(total_sleep_time, *delta); + update_sleep_time(timespec_add(total_sleep_time, *delta)); } @@ -646,10 +665,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) __timekeeping_inject_sleeptime(delta); - timekeeper.ntp_error = 0; - ntp_clear(); - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); write_seqcount_end(&xtime_seq); raw_spin_unlock_irqrestore(&xtime_lock, flags); @@ -686,6 +702,7 @@ static void timekeeping_resume(void) timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); write_seqcount_end(&xtime_seq); raw_spin_unlock_irqrestore(&xtime_lock, flags); @@ -840,9 +857,14 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; while (timekeeper.xtime_nsec >= nsecps) { + int leap; timekeeper.xtime_nsec -= nsecps; xtime.tv_sec++; - second_overflow(); + leap = second_overflow(xtime.tv_sec); + xtime.tv_sec += leap; + wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } /* Accumulate raw time */ @@ -948,14 +970,17 @@ static void update_wall_time(void) * xtime.tv_nsec isn't larger then NSEC_PER_SEC */ if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { + int leap; xtime.tv_nsec -= NSEC_PER_SEC; xtime.tv_sec++; - second_overflow(); + leap = second_overflow(xtime.tv_sec); + xtime.tv_sec += leap; + wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } - /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(false); } /** @@ -1114,6 +1139,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, } while (read_seqcount_retry(&xtime_seq, seq)); } +#ifdef CONFIG_HIGH_RES_TIMERS +/** + * ktime_get_update_offsets - hrtimer helper + * @real: pointer to storage for monotonic -> realtime offset + * @_boot: pointer to storage for monotonic -> boottime offset + * + * Returns current monotonic time and updates the offsets + * Called from hrtimer_interupt() or retrigger_next_event() + */ +ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot) +{ + ktime_t now; + unsigned int seq; + u64 secs, nsecs; + + do { + seq = read_seqcount_begin(&xtime_seq); + + secs = xtime.tv_sec; + nsecs = xtime.tv_nsec; + nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + + *real = offs_real; + *boot = offs_boot; + } while (read_seqcount_retry(&xtime_seq, seq)); + + now = ktime_add_ns(ktime_set(secs, 0), nsecs); + now = ktime_sub(now, *real); + return now; +} +#endif + /** * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0a42c28633ae..a21ab6aa3d32 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2443,10 +2443,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (cpumask_test_cpu(cpu, tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_inc(&global_trace.data[cpu]->disabled); + ring_buffer_record_disable_cpu(global_trace.buffer, cpu); } if (!cpumask_test_cpu(cpu, tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_dec(&global_trace.data[cpu]->disabled); + ring_buffer_record_enable_cpu(global_trace.buffer, cpu); } } arch_spin_unlock(&ftrace_max_lock); diff --git a/mm/compaction.c b/mm/compaction.c index c4bc5acf865d..8ea7308601bc 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -35,10 +35,6 @@ struct compact_control { unsigned long migrate_pfn; /* isolate_migratepages search base */ bool sync; /* Synchronous migration */ - /* Account for isolated anon and file pages */ - unsigned long nr_anon; - unsigned long nr_file; - unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; @@ -223,17 +219,13 @@ static void isolate_freepages(struct zone *zone, static void acct_isolated(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned int count[NR_LRU_LISTS] = { 0, }; + unsigned int count[2] = { 0, }; - list_for_each_entry(page, &cc->migratepages, lru) { - int lru = page_lru_base_type(page); - count[lru]++; - } + list_for_each_entry(page, &cc->migratepages, lru) + count[!!page_is_file_cache(page)]++; - cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); } /* Similar to reclaim, but different enough that they don't share logic */ @@ -269,6 +261,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; + isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; /* Do not scan outside zone boundaries */ low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); @@ -378,8 +371,11 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, continue; } + if (!cc->sync) + mode |= ISOLATE_ASYNC_MIGRATE; + /* Try isolate the page */ - if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) + if (__isolate_lru_page(page, mode, 0) != 0) continue; VM_BUG_ON(PageTransCompound(page)); @@ -581,7 +577,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, (unsigned long)cc, false, - cc->sync); + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -596,8 +592,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) if (err) { putback_lru_pages(&cc->migratepages); cc->nr_migratepages = 0; + if (err == -ENOMEM) { + ret = COMPACT_PARTIAL; + goto out; + } } - } out: diff --git a/mm/filemap.c b/mm/filemap.c index b75ac2cb1945..09d5e27041fc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -516,10 +516,13 @@ struct page *__page_cache_alloc(gfp_t gfp) struct page *page; if (cpuset_do_page_mem_spread()) { - get_mems_allowed(); - n = cpuset_mem_spread_node(); - page = alloc_pages_exact_node(n, gfp, 0); - put_mems_allowed(); + unsigned int cpuset_mems_cookie; + do { + cpuset_mems_cookie = get_mems_allowed(); + n = cpuset_mem_spread_node(); + page = alloc_pages_exact_node(n, gfp, 0); + } while (!put_mems_allowed(cpuset_mems_cookie) && !page); + return page; } return alloc_pages(gfp, 0); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 05f8fd425f69..ae60a53f0506 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -460,8 +460,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, struct zonelist *zonelist; struct zone *zone; struct zoneref *z; + unsigned int cpuset_mems_cookie; - get_mems_allowed(); +retry_cpuset: + cpuset_mems_cookie = get_mems_allowed(); zonelist = huge_zonelist(vma, address, htlb_alloc_mask, &mpol, &nodemask); /* @@ -488,10 +490,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, } } } -err: + mpol_cond_put(mpol); - put_mems_allowed(); + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; return page; + +err: + mpol_cond_put(mpol); + return NULL; } static void update_and_free_page(struct hstate *h, struct page *page) diff --git a/mm/madvise.c b/mm/madvise.c index 2221491ed503..deabe5f603ad 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -13,6 +13,7 @@ #include <linux/hugetlb.h> #include <linux/sched.h> #include <linux/ksm.h> +#include <linux/file.h> /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -197,14 +198,16 @@ static long madvise_remove(struct vm_area_struct *vma, struct address_space *mapping; loff_t offset, endoff; int error; + struct file *f; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) return -EINVAL; - if (!vma->vm_file || !vma->vm_file->f_mapping - || !vma->vm_file->f_mapping->host) { + f = vma->vm_file; + + if (!f || !f->f_mapping || !f->f_mapping->host) { return -EINVAL; } @@ -218,9 +221,16 @@ static long madvise_remove(struct vm_area_struct *vma, endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + /* + * vmtruncate_range may need to take i_mutex and i_alloc_sem. + * We need to explicitly grab a reference because the vma (and + * hence the vma's reference to the file) can go away as soon as + * we drop mmap_sem. + */ + get_file(f); up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); + fput(f); down_read(¤t->mm->mmap_sem); return error; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ffb99b4e7527..57cdf5ad6924 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1251,7 +1251,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + isolate_mode_t mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file) { diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 740c4f52059c..6496748df214 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1464,7 +1464,7 @@ int soft_offline_page(struct page *page, int flags) page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, - 0, true); + 0, MIGRATE_SYNC); if (ret) { putback_lru_pages(&pagelist); pr_info("soft offline: %#lx: migration failed %d, type %lx\n", diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c46887b5a11e..ae5a3f21010b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -747,7 +747,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) } /* this function returns # of failed pages */ ret = migrate_pages(&source, hotremove_migrate_alloc, 0, - true, true); + true, MIGRATE_SYNC); if (ret) putback_lru_pages(&source); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3dac2d168e47..cff919fe7025 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -926,7 +926,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_node_page, dest, - false, true); + false, MIGRATE_SYNC); if (err) putback_lru_pages(&pagelist); } @@ -1810,18 +1810,24 @@ struct page * alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, int node) { - struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct mempolicy *pol; struct zonelist *zl; struct page *page; + unsigned int cpuset_mems_cookie; + +retry_cpuset: + pol = get_vma_policy(current, vma, addr); + cpuset_mems_cookie = get_mems_allowed(); - get_mems_allowed(); if (unlikely(pol->mode == MPOL_INTERLEAVE)) { unsigned nid; nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); mpol_cond_put(pol); page = alloc_page_interleave(gfp, order, nid); - put_mems_allowed(); + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; + return page; } zl = policy_zonelist(gfp, pol, node); @@ -1832,7 +1838,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, struct page *page = __alloc_pages_nodemask(gfp, order, zl, policy_nodemask(gfp, pol)); __mpol_put(pol); - put_mems_allowed(); + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; return page; } /* @@ -1840,7 +1847,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, */ page = __alloc_pages_nodemask(gfp, order, zl, policy_nodemask(gfp, pol)); - put_mems_allowed(); + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; return page; } @@ -1867,11 +1875,14 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { struct mempolicy *pol = current->mempolicy; struct page *page; + unsigned int cpuset_mems_cookie; if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; - get_mems_allowed(); +retry_cpuset: + cpuset_mems_cookie = get_mems_allowed(); + /* * No reference counting needed for current->mempolicy * nor system default_policy @@ -1882,7 +1893,10 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) page = __alloc_pages_nodemask(gfp, order, policy_zonelist(gfp, pol, numa_node_id()), policy_nodemask(gfp, pol)); - put_mems_allowed(); + + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; + return page; } EXPORT_SYMBOL(alloc_pages_current); diff --git a/mm/migrate.c b/mm/migrate.c index 14d0a6a632f6..480714b6f3fd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -220,6 +220,56 @@ out: pte_unmap_unlock(ptep, ptl); } +#ifdef CONFIG_BLOCK +/* Returns true if all buffers are successfully locked */ +static bool buffer_migrate_lock_buffers(struct buffer_head *head, + enum migrate_mode mode) +{ + struct buffer_head *bh = head; + + /* Simple case, sync compaction */ + if (mode != MIGRATE_ASYNC) { + do { + get_bh(bh); + lock_buffer(bh); + bh = bh->b_this_page; + + } while (bh != head); + + return true; + } + + /* async case, we cannot block on lock_buffer so use trylock_buffer */ + do { + get_bh(bh); + if (!trylock_buffer(bh)) { + /* + * We failed to lock the buffer and cannot stall in + * async migration. Release the taken locks + */ + struct buffer_head *failed_bh = bh; + put_bh(failed_bh); + bh = head; + while (bh != failed_bh) { + unlock_buffer(bh); + put_bh(bh); + bh = bh->b_this_page; + } + return false; + } + + bh = bh->b_this_page; + } while (bh != head); + return true; +} +#else +static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, + enum migrate_mode mode) +{ + return true; +} +#endif /* CONFIG_BLOCK */ + /* * Replace the page in the mapping. * @@ -229,7 +279,8 @@ out: * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ static int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + struct buffer_head *head, enum migrate_mode mode) { int expected_count; void **pslot; @@ -259,6 +310,20 @@ static int migrate_page_move_mapping(struct address_space *mapping, } /* + * In the async migration case of moving a page with buffers, lock the + * buffers using trylock before the mapping is moved. If the mapping + * was moved, we later failed to lock the buffers and could not move + * the mapping back due to an elevated page count, we would have to + * block waiting on other references to be dropped. + */ + if (mode == MIGRATE_ASYNC && head && + !buffer_migrate_lock_buffers(head, mode)) { + page_unfreeze_refs(page, expected_count); + spin_unlock_irq(&mapping->tree_lock); + return -EAGAIN; + } + + /* * Now we know that no one else is looking at the page. */ get_page(newpage); /* add cache reference */ @@ -415,13 +480,14 @@ EXPORT_SYMBOL(fail_migrate_page); * Pages are locked upon entry and exit. */ int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { int rc; BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page); + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); if (rc) return rc; @@ -438,28 +504,28 @@ EXPORT_SYMBOL(migrate_page); * exist. */ int buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, enum migrate_mode mode) { struct buffer_head *bh, *head; int rc; if (!page_has_buffers(page)) - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); head = page_buffers(page); - rc = migrate_page_move_mapping(mapping, newpage, page); + rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); if (rc) return rc; - bh = head; - do { - get_bh(bh); - lock_buffer(bh); - bh = bh->b_this_page; - - } while (bh != head); + /* + * In the async case, migrate_page_move_mapping locked the buffers + * with an IRQ-safe spinlock held. In the sync case, the buffers + * need to be locked now + */ + if (mode != MIGRATE_ASYNC) + BUG_ON(!buffer_migrate_lock_buffers(head, mode)); ClearPagePrivate(page); set_page_private(newpage, page_private(page)); @@ -536,10 +602,14 @@ static int writeout(struct address_space *mapping, struct page *page) * Default handling if a filesystem does not provide a migration function. */ static int fallback_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, enum migrate_mode mode) { - if (PageDirty(page)) + if (PageDirty(page)) { + /* Only writeback pages in full synchronous migration */ + if (mode != MIGRATE_SYNC) + return -EBUSY; return writeout(mapping, page); + } /* * Buffers may be managed in a filesystem specific way. @@ -549,7 +619,7 @@ static int fallback_migrate_page(struct address_space *mapping, !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } /* @@ -564,7 +634,7 @@ static int fallback_migrate_page(struct address_space *mapping, * == 0 - success */ static int move_to_new_page(struct page *newpage, struct page *page, - int remap_swapcache, bool sync) + int remap_swapcache, enum migrate_mode mode) { struct address_space *mapping; int rc; @@ -585,29 +655,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, mapping = page_mapping(page); if (!mapping) - rc = migrate_page(mapping, newpage, page); - else { + rc = migrate_page(mapping, newpage, page, mode); + else if (mapping->a_ops->migratepage) /* - * Do not writeback pages if !sync and migratepage is - * not pointing to migrate_page() which is nonblocking - * (swapcache/tmpfs uses migratepage = migrate_page). + * Most pages have a mapping and most filesystems provide a + * migratepage callback. Anonymous pages are part of swap + * space which also has its own migratepage callback. This + * is the most common path for page migration. */ - if (PageDirty(page) && !sync && - mapping->a_ops->migratepage != migrate_page) - rc = -EBUSY; - else if (mapping->a_ops->migratepage) - /* - * Most pages have a mapping and most filesystems - * should provide a migration function. Anonymous - * pages are part of swap space which also has its - * own migration function. This is the most common - * path for page migration. - */ - rc = mapping->a_ops->migratepage(mapping, - newpage, page); - else - rc = fallback_migrate_page(mapping, newpage, page); - } + rc = mapping->a_ops->migratepage(mapping, + newpage, page, mode); + else + rc = fallback_migrate_page(mapping, newpage, page, mode); if (rc) { newpage->mapping = NULL; @@ -621,38 +680,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, return rc; } -/* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. - */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, bool sync) +static int __unmap_and_move(struct page *page, struct page *newpage, + int force, bool offlining, enum migrate_mode mode) { - int rc = 0; - int *result = NULL; - struct page *newpage = get_new_page(page, private, &result); + int rc = -EAGAIN; int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; - if (!newpage) - return -ENOMEM; - - if (page_count(page) == 1) { - /* page was freed from under us. So we are done. */ - goto move_newpage; - } - if (unlikely(PageTransHuge(page))) - if (unlikely(split_huge_page(page))) - goto move_newpage; - - /* prepare cgroup just returns 0 or -ENOMEM */ - rc = -EAGAIN; - if (!trylock_page(page)) { - if (!force || !sync) - goto move_newpage; + if (!force || mode == MIGRATE_ASYNC) + goto out; /* * It's not safe for direct compaction to call lock_page. @@ -668,7 +707,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * altogether. */ if (current->flags & PF_MEMALLOC) - goto move_newpage; + goto out; lock_page(page); } @@ -697,10 +736,12 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (PageWriteback(page)) { /* - * For !sync, there is no point retrying as the retry loop - * is expected to be too short for PageWriteback to be cleared + * Only in the case of a full syncronous migration is it + * necessary to wait for PageWriteback. In the async case, + * the retry loop is too short and in the sync-light case, + * the overhead of stalling is too much */ - if (!sync) { + if (mode != MIGRATE_SYNC) { rc = -EBUSY; goto uncharge; } @@ -771,7 +812,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, skip_unmap: if (!page_mapped(page)) - rc = move_to_new_page(newpage, page, remap_swapcache, sync); + rc = move_to_new_page(newpage, page, remap_swapcache, mode); if (rc && remap_swapcache) remove_migration_ptes(page, page); @@ -785,27 +826,53 @@ uncharge: mem_cgroup_end_migration(mem, page, newpage, rc == 0); unlock: unlock_page(page); +out: + return rc; +} -move_newpage: +/* + * Obtain the lock on page, remove all ptes and migrate the page + * to the newly allocated page in newpage. + */ +static int unmap_and_move(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, + enum migrate_mode mode) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + + rc = __unmap_and_move(page, newpage, force, offlining, mode); +out: if (rc != -EAGAIN) { - /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been - * migrated will have kepts its references and be - * restored. - */ - list_del(&page->lru); + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); putback_lru_page(page); } - /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ putback_lru_page(newpage); - if (result) { if (rc) *result = rc; @@ -835,7 +902,8 @@ move_newpage: */ static int unmap_and_move_huge_page(new_page_t get_new_page, unsigned long private, struct page *hpage, - int force, bool offlining, bool sync) + int force, bool offlining, + enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -848,7 +916,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, rc = -EAGAIN; if (!trylock_page(hpage)) { - if (!force || !sync) + if (!force || mode != MIGRATE_SYNC) goto out; lock_page(hpage); } @@ -859,7 +927,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); if (!page_mapped(hpage)) - rc = move_to_new_page(new_hpage, hpage, 1, sync); + rc = move_to_new_page(new_hpage, hpage, 1, mode); if (rc) remove_migration_ptes(hpage, hpage); @@ -902,7 +970,7 @@ out: */ int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, - bool sync) + enum migrate_mode mode) { int retry = 1; int nr_failed = 0; @@ -923,7 +991,7 @@ int migrate_pages(struct list_head *from, rc = unmap_and_move(get_new_page, private, page, pass > 2, offlining, - sync); + mode); switch(rc) { case -ENOMEM: @@ -953,7 +1021,7 @@ out: int migrate_huge_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, - bool sync) + enum migrate_mode mode) { int retry = 1; int nr_failed = 0; @@ -970,7 +1038,7 @@ int migrate_huge_pages(struct list_head *from, rc = unmap_and_move_huge_page(get_new_page, private, page, pass > 2, offlining, - sync); + mode); switch(rc) { case -ENOMEM: @@ -1099,7 +1167,7 @@ set_status: err = 0; if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_page_node, - (unsigned long)pm, 0, true); + (unsigned long)pm, 0, MIGRATE_SYNC); if (err) putback_lru_pages(&pagelist); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3950dab34bf8..2de8eef09fc5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1956,14 +1956,20 @@ static struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress, - bool sync_migration) + int migratetype, bool sync_migration, + bool *deferred_compaction, + unsigned long *did_some_progress) { struct page *page; - if (!order || compaction_deferred(preferred_zone)) + if (!order) return NULL; + if (compaction_deferred(preferred_zone)) { + *deferred_compaction = true; + return NULL; + } + current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask, sync_migration); @@ -1991,7 +1997,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, * but not enough to satisfy watermarks. */ count_vm_event(COMPACTFAIL); - defer_compaction(preferred_zone); + + /* + * As async compaction considers a subset of pageblocks, only + * defer if the failure was a sync compaction failure. + */ + if (sync_migration) + defer_compaction(preferred_zone); cond_resched(); } @@ -2003,8 +2015,9 @@ static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress, - bool sync_migration) + int migratetype, bool sync_migration, + bool *deferred_compaction, + unsigned long *did_some_progress) { return NULL; } @@ -2154,6 +2167,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned long pages_reclaimed = 0; unsigned long did_some_progress; bool sync_migration = false; + bool deferred_compaction = false; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -2234,12 +2248,22 @@ rebalance: zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, - migratetype, &did_some_progress, - sync_migration); + migratetype, sync_migration, + &deferred_compaction, + &did_some_progress); if (page) goto got_pg; sync_migration = true; + /* + * If compaction is deferred for high-order allocations, it is because + * sync compaction recently failed. In this is the case and the caller + * has requested the system not be heavily disrupted, fail the + * allocation now instead of entering direct reclaim + */ + if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) + goto nopage; + /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, zonelist, high_zoneidx, @@ -2302,8 +2326,9 @@ rebalance: zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, - migratetype, &did_some_progress, - sync_migration); + migratetype, sync_migration, + &deferred_compaction, + &did_some_progress); if (page) goto got_pg; } @@ -2327,8 +2352,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; - struct page *page; + struct page *page = NULL; int migratetype = allocflags_to_migratetype(gfp_mask); + unsigned int cpuset_mems_cookie; gfp_mask &= gfp_allowed_mask; @@ -2347,15 +2373,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (unlikely(!zonelist->_zonerefs->zone)) return NULL; - get_mems_allowed(); +retry_cpuset: + cpuset_mems_cookie = get_mems_allowed(); + /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); - if (!preferred_zone) { - put_mems_allowed(); - return NULL; - } + if (!preferred_zone) + goto out; /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, @@ -2365,9 +2391,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); - put_mems_allowed(); trace_mm_page_alloc(page, order, gfp_mask, migratetype); + +out: + /* + * When updating a task's mems_allowed, it is possible to race with + * parallel threads in such a way that an allocation can fail while + * the mask is being updated. If a page allocation is about to fail, + * check if the cpuset changed during allocation and if so, retry. + */ + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; + return page; } EXPORT_SYMBOL(__alloc_pages_nodemask); @@ -2591,13 +2627,15 @@ void si_meminfo_node(struct sysinfo *val, int nid) bool skip_free_areas_node(unsigned int flags, int nid) { bool ret = false; + unsigned int cpuset_mems_cookie; if (!(flags & SHOW_MEM_FILTER_NODES)) goto out; - get_mems_allowed(); - ret = !node_isset(nid, cpuset_current_mems_allowed); - put_mems_allowed(); + do { + cpuset_mems_cookie = get_mems_allowed(); + ret = !node_isset(nid, cpuset_current_mems_allowed); + } while (!put_mems_allowed(cpuset_mems_cookie)); out: return ret; } @@ -3477,25 +3515,33 @@ static void setup_zone_migrate_reserve(struct zone *zone) if (page_to_nid(page) != zone_to_nid(zone)) continue; - /* Blocks with reserved pages will never free, skip them. */ - block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); - if (pageblock_is_reserved(pfn, block_end_pfn)) - continue; - block_migratetype = get_pageblock_migratetype(page); - /* If this block is reserved, account for it */ - if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) { - reserve--; - continue; - } + /* Only test what is necessary when the reserves are not met */ + if (reserve > 0) { + /* + * Blocks with reserved pages will never free, skip + * them. + */ + block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); + if (pageblock_is_reserved(pfn, block_end_pfn)) + continue; - /* Suitable for reserving if this block is movable */ - if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) { - set_pageblock_migratetype(page, MIGRATE_RESERVE); - move_freepages_block(zone, page, MIGRATE_RESERVE); - reserve--; - continue; + /* If this block is reserved, account for it */ + if (block_migratetype == MIGRATE_RESERVE) { + reserve--; + continue; + } + + /* Suitable for reserving if this block is movable */ + if (block_migratetype == MIGRATE_MOVABLE) { + set_pageblock_migratetype(page, + MIGRATE_RESERVE); + move_freepages_block(zone, page, + MIGRATE_RESERVE); + reserve--; + continue; + } } /* diff --git a/mm/slab.c b/mm/slab.c index 827e2d6429dc..f4516508cc47 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3379,12 +3379,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) if (in_interrupt() || (flags & __GFP_THISNODE)) return NULL; nid_alloc = nid_here = numa_mem_id(); - get_mems_allowed(); if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) nid_alloc = cpuset_slab_spread_node(); else if (current->mempolicy) nid_alloc = slab_node(current->mempolicy); - put_mems_allowed(); if (nid_alloc != nid_here) return ____cache_alloc_node(cachep, flags, nid_alloc); return NULL; @@ -3407,14 +3405,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) enum zone_type high_zoneidx = gfp_zone(flags); void *obj = NULL; int nid; + unsigned int cpuset_mems_cookie; if (flags & __GFP_THISNODE) return NULL; - get_mems_allowed(); - zonelist = node_zonelist(slab_node(current->mempolicy), flags); local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); +retry_cpuset: + cpuset_mems_cookie = get_mems_allowed(); + zonelist = node_zonelist(slab_node(current->mempolicy), flags); + retry: /* * Look through allowed nodes for objects available @@ -3467,7 +3468,9 @@ retry: } } } - put_mems_allowed(); + + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj)) + goto retry_cpuset; return obj; } diff --git a/mm/slub.c b/mm/slub.c index 10ab2335e2ea..ae6e80ed1e5c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1457,6 +1457,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) struct zone *zone; enum zone_type high_zoneidx = gfp_zone(flags); struct page *page; + unsigned int cpuset_mems_cookie; /* * The defrag ratio allows a configuration of the tradeoffs between @@ -1480,23 +1481,32 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) get_cycles() % 1024 > s->remote_node_defrag_ratio) return NULL; - get_mems_allowed(); - zonelist = node_zonelist(slab_node(current->mempolicy), flags); - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { - struct kmem_cache_node *n; - - n = get_node(s, zone_to_nid(zone)); - - if (n && cpuset_zone_allowed_hardwall(zone, flags) && - n->nr_partial > s->min_partial) { - page = get_partial_node(n); - if (page) { - put_mems_allowed(); - return page; + do { + cpuset_mems_cookie = get_mems_allowed(); + zonelist = node_zonelist(slab_node(current->mempolicy), flags); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + struct kmem_cache_node *n; + + n = get_node(s, zone_to_nid(zone)); + + if (n && cpuset_zone_allowed_hardwall(zone, flags) && + n->nr_partial > s->min_partial) { + page = get_partial_node(n); + if (page) { + /* + * Return the object even if + * put_mems_allowed indicated that + * the cpuset mems_allowed was + * updated in parallel. It's a + * harmless race between the alloc + * and the cpuset update. + */ + put_mems_allowed(cpuset_mems_cookie); + return page; + } } } - } - put_mems_allowed(); + } while (!put_mems_allowed(cpuset_mems_cookie)); #endif return NULL; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 47fc26e5831c..ee0d80af4bcc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -248,35 +248,66 @@ unsigned long shrink_slab(struct shrink_control *shrink, list_for_each_entry(shrinker, &shrinker_list, list) { unsigned long long delta; - unsigned long total_scan; - unsigned long max_pass; + long total_scan; + long max_pass; + int shrink_ret = 0; + long nr; + long new_nr; max_pass = do_shrinker_shrink(shrinker, shrink, 0); + if (max_pass <= 0) + continue; + + /* + * copy the current shrinker scan count into a local variable + * and zero it so that other concurrent shrinker invocations + * don't also do this scanning work. + */ + do { + nr = shrinker->nr; + } while (cmpxchg(&shrinker->nr, nr, 0) != nr); + + total_scan = nr; delta = (4 * nr_pages_scanned) / shrinker->seeks; delta *= max_pass; do_div(delta, lru_pages + 1); - shrinker->nr += delta; - if (shrinker->nr < 0) { + total_scan += delta; + if (total_scan < 0) { printk(KERN_ERR "shrink_slab: %pF negative objects to " "delete nr=%ld\n", - shrinker->shrink, shrinker->nr); - shrinker->nr = max_pass; + shrinker->shrink, total_scan); + total_scan = max_pass; } /* + * We need to avoid excessive windup on filesystem shrinkers + * due to large numbers of GFP_NOFS allocations causing the + * shrinkers to return -1 all the time. This results in a large + * nr being built up so when a shrink that can do some work + * comes along it empties the entire cache due to nr >>> + * max_pass. This is bad for sustaining a working set in + * memory. + * + * Hence only allow the shrinker to scan the entire cache when + * a large delta change is calculated directly. + */ + if (delta < max_pass / 4) + total_scan = min(total_scan, max_pass / 2); + + /* * Avoid risking looping forever due to too large nr value: * never try to free more than twice the estimate number of * freeable entries. */ - if (shrinker->nr > max_pass * 2) - shrinker->nr = max_pass * 2; + if (total_scan > max_pass * 2) + total_scan = max_pass * 2; - total_scan = shrinker->nr; - shrinker->nr = 0; + trace_mm_shrink_slab_start(shrinker, shrink, nr, + nr_pages_scanned, lru_pages, + max_pass, delta, total_scan); while (total_scan >= SHRINK_BATCH) { long this_scan = SHRINK_BATCH; - int shrink_ret; int nr_before; nr_before = do_shrinker_shrink(shrinker, shrink, 0); @@ -292,7 +323,19 @@ unsigned long shrink_slab(struct shrink_control *shrink, cond_resched(); } - shrinker->nr += total_scan; + /* + * move the unused scan count back into the shrinker in a + * manner that handles concurrent updates. If we exhausted the + * scan, there is no need to do an update. + */ + do { + nr = shrinker->nr; + new_nr = total_scan + nr; + if (total_scan <= 0) + break; + } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr); + + trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); } up_read(&shrinker_rwsem); out: @@ -683,7 +726,13 @@ static enum page_references page_check_references(struct page *page, */ SetPageReferenced(page); - if (referenced_page) + if (referenced_page || referenced_ptes > 1) + return PAGEREF_ACTIVATE; + + /* + * Activate file-backed executable pages after first usage. + */ + if (vm_flags & VM_EXEC) return PAGEREF_ACTIVATE; return PAGEREF_KEEP; @@ -972,23 +1021,27 @@ keep_lumpy: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, int mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) { + bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; + all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == + (ISOLATE_ACTIVE|ISOLATE_INACTIVE); + /* * When checking the active state, we need to be sure we are * dealing with comparible boolean values. Take the logical not * of each. */ - if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) + if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) return ret; - if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file) + if (!all_lru_mode && !!page_is_file_cache(page) != file) return ret; /* @@ -1001,6 +1054,43 @@ int __isolate_lru_page(struct page *page, int mode, int file) ret = -EBUSY; + /* + * To minimise LRU disruption, the caller can indicate that it only + * wants to isolate pages it will be able to operate on without + * blocking - clean pages for the most part. + * + * ISOLATE_CLEAN means that only clean pages should be isolated. This + * is used by reclaim when it is cannot write to backing storage + * + * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages + * that it is possible to migrate without blocking + */ + if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { + /* All the caller can do on PageWriteback is block */ + if (PageWriteback(page)) + return ret; + + if (PageDirty(page)) { + struct address_space *mapping; + + /* ISOLATE_CLEAN means only clean pages */ + if (mode & ISOLATE_CLEAN) + return ret; + + /* + * Only pages without mappings or that have a + * ->migratepage callback are possible to migrate + * without blocking + */ + mapping = page_mapping(page); + if (mapping && !mapping->a_ops->migratepage) + return ret; + } + } + + if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) + return ret; + if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're @@ -1036,7 +1126,8 @@ int __isolate_lru_page(struct page *page, int mode, int file) */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct list_head *src, struct list_head *dst, - unsigned long *scanned, int order, int mode, int file) + unsigned long *scanned, int order, isolate_mode_t mode, + int file) { unsigned long nr_taken = 0; unsigned long nr_lumpy_taken = 0; @@ -1111,7 +1202,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, * anon page which don't already have a swap slot is * pointless. */ - if (nr_swap_pages <= 0 && PageAnon(cursor_page) && + if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) && !PageSwapCache(cursor_page)) break; @@ -1161,8 +1252,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, static unsigned long isolate_pages_global(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, - int active, int file) + isolate_mode_t mode, + struct zone *z, int active, int file) { int lru = LRU_BASE; if (active) @@ -1413,6 +1504,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1423,15 +1515,21 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, } set_reclaim_mode(priority, sc, false); + if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) + reclaim_mode |= ISOLATE_ACTIVE; + lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { - nr_taken = isolate_pages_global(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, 0, file); + nr_taken = isolate_pages_global(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, 0, file); zone->pages_scanned += nr_scanned; if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, @@ -1440,12 +1538,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } else { - nr_taken = mem_cgroup_isolate_pages(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, sc->mem_cgroup, - 0, file); + nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, + sc->mem_cgroup, 0, file); /* * mem_cgroup_isolate_pages() keeps track of * scanned pages on its own. @@ -1542,19 +1637,26 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, struct page *page; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); unsigned long nr_rotated = 0; + isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, 1, file); zone->pages_scanned += pgscanned; } else { nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, sc->mem_cgroup, 1, file); /* * mem_cgroup_isolate_pages() keeps track of @@ -1747,23 +1849,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, u64 fraction[2], denominator; enum lru_list l; int noswap = 0; - int force_scan = 0; + bool force_scan = false; unsigned long nr_force_scan[2]; - - anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); - - if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) { - /* kswapd does zone balancing and need to scan this zone */ - if (scanning_global_lru(sc) && current_is_kswapd()) - force_scan = 1; - /* memcg may have small limit and need to avoid priority drop */ - if (!scanning_global_lru(sc)) - force_scan = 1; - } + /* kswapd does zone balancing and needs to scan this zone */ + if (scanning_global_lru(sc) && current_is_kswapd() && + zone->all_unreclaimable) + force_scan = true; + /* memcg may have small limit and need to avoid priority drop */ + if (!scanning_global_lru(sc)) + force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ if (!sc->may_swap || (nr_swap_pages <= 0)) { @@ -1776,6 +1871,11 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, goto out; } + anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); + file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + if (scanning_global_lru(sc)) { free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, @@ -1912,8 +2012,9 @@ static inline bool should_continue_reclaim(struct zone *zone, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + if (nr_swap_pages > 0) + inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; @@ -1985,6 +2086,42 @@ restart: throttle_vm_writeout(sc->gfp_mask); } +/* Returns true if compaction should go ahead for a high-order request */ +static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) +{ + unsigned long balance_gap, watermark; + bool watermark_ok; + + /* Do not consider compaction for orders reclaim is meant to satisfy */ + if (sc->order <= PAGE_ALLOC_COSTLY_ORDER) + return false; + + /* + * Compaction takes time to run and there are potentially other + * callers using the pages just freed. Continue reclaiming until + * there is a buffer of free pages available to give compaction + * a reasonable chance of completing and allocating the page + */ + balance_gap = min(low_wmark_pages(zone), + (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / + KSWAPD_ZONE_BALANCE_GAP_RATIO); + watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order); + watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); + + /* + * If compaction is deferred, reclaim up to a point where + * compaction will have a chance of success when re-enabled + */ + if (compaction_deferred(zone)) + return watermark_ok; + + /* If compaction is not ready to start, keep reclaiming */ + if (!compaction_suitable(zone, sc->order)) + return false; + + return watermark_ok; +} + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -2000,14 +2137,20 @@ restart: * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. + * + * This function returns true if a zone is being reclaimed for a costly + * high-order allocation and compaction is ready to begin. This indicates to + * the caller that it should consider retrying the allocation instead of + * further reclaim. */ -static void shrink_zones(int priority, struct zonelist *zonelist, +static bool shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; + bool aborted_reclaim = false; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -2022,6 +2165,21 @@ static void shrink_zones(int priority, struct zonelist *zonelist, continue; if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ + if (COMPACTION_BUILD) { + /* + * If we already have plenty of memory free for + * compaction in this zone, don't free any more. + * Even though compaction is invoked for any + * non-zero order, only frequent costly order + * reclamation is disruptive enough to become a + * noticable problem, like transparent huge page + * allocations. + */ + if (compaction_ready(zone, sc)) { + aborted_reclaim = true; + continue; + } + } /* * This steals pages from memory cgroups over softlimit * and returns the number of reclaimed pages and @@ -2039,6 +2197,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist, shrink_zone(priority, zone, sc); } + + return aborted_reclaim; } static bool zone_reclaimable(struct zone *zone) @@ -2092,8 +2252,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; unsigned long writeback_threshold; + bool aborted_reclaim; - get_mems_allowed(); delayacct_freepages_start(); if (scanning_global_lru(sc)) @@ -2103,7 +2263,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->nr_scanned = 0; if (!priority) disable_swap_token(sc->mem_cgroup); - shrink_zones(priority, zonelist, sc); + aborted_reclaim = shrink_zones(priority, zonelist, sc); + /* * Don't shrink slabs when reclaiming memory from * over limit cgroups @@ -2155,7 +2316,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, out: delayacct_freepages_end(); - put_mems_allowed(); if (sc->nr_reclaimed) return sc->nr_reclaimed; @@ -2168,6 +2328,10 @@ out: if (oom_killer_disabled) return 0; + /* Aborted reclaim to try compaction? don't OOM, then */ + if (aborted_reclaim) + return 1; + /* top priority shrink_zones still had more to do? don't OOM, then */ if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) return 1; @@ -2459,6 +2623,9 @@ loop_again: high_wmark_pages(zone), 0, 0)) { end_zone = i; break; + } else { + /* If balanced, clear the congested flag */ + zone_clear_flag(zone, ZONE_CONGESTED); } } if (i < 0) @@ -2695,7 +2862,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) * them before going back to sleep. */ set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); - schedule(); + + if (!kthread_should_stop()) + schedule(); + set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); } else { if (remaining) @@ -2722,7 +2892,9 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) static int kswapd(void *p) { unsigned long order, new_order; + unsigned balanced_order; int classzone_idx, new_classzone_idx; + int balanced_classzone_idx; pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; @@ -2753,7 +2925,9 @@ static int kswapd(void *p) set_freezable(); order = new_order = 0; + balanced_order = 0; classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; + balanced_classzone_idx = classzone_idx; for ( ; ; ) { int ret; @@ -2762,7 +2936,8 @@ static int kswapd(void *p) * new request of a similar or harder type will succeed soon * so consider going to sleep on the basis we reclaimed at */ - if (classzone_idx >= new_classzone_idx && order == new_order) { + if (balanced_classzone_idx >= new_classzone_idx && + balanced_order == new_order) { new_order = pgdat->kswapd_max_order; new_classzone_idx = pgdat->classzone_idx; pgdat->kswapd_max_order = 0; @@ -2777,9 +2952,12 @@ static int kswapd(void *p) order = new_order; classzone_idx = new_classzone_idx; } else { - kswapd_try_to_sleep(pgdat, order, classzone_idx); + kswapd_try_to_sleep(pgdat, balanced_order, + balanced_classzone_idx); order = pgdat->kswapd_max_order; classzone_idx = pgdat->classzone_idx; + new_order = order; + new_classzone_idx = classzone_idx; pgdat->kswapd_max_order = 0; pgdat->classzone_idx = pgdat->nr_zones - 1; } @@ -2794,7 +2972,9 @@ static int kswapd(void *p) */ if (!ret) { trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); - order = balance_pgdat(pgdat, order, &classzone_idx); + balanced_classzone_idx = classzone_idx; + balanced_order = balance_pgdat(pgdat, order, + &balanced_classzone_idx); } } return 0; @@ -2952,14 +3132,17 @@ int kswapd_run(int nid) } /* - * Called by memory hotplug when all memory in a node is offlined. + * Called by memory hotplug when all memory in a node is offlined. Caller must + * hold lock_memory_hotplug(). */ void kswapd_stop(int nid) { struct task_struct *kswapd = NODE_DATA(nid)->kswapd; - if (kswapd) + if (kswapd) { kthread_stop(kswapd); + NODE_DATA(nid)->kswapd = NULL; + } } static int __init kswapd_init(void) diff --git a/mm/vmstat.c b/mm/vmstat.c index 8da80e99476f..aae9f56c8bc0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -78,7 +78,7 @@ void vm_events_fold_cpu(int cpu) * * vm_stat contains the global counters */ -atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; +atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; EXPORT_SYMBOL(vm_stat); #ifdef CONFIG_SMP diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 449087373d88..eae6a4e9cbfc 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -241,6 +241,7 @@ int br_add_bridge(struct net *net, const char *name) return -ENOMEM; dev_net_set(dev, net); + dev->rtnl_link_ops = &br_link_ops; res = register_netdev(dev); if (res) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2c1605525680..71861a9c4008 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -203,7 +203,7 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static struct rtnl_link_ops br_link_ops __read_mostly = { +struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1ca1b1c7560e..7c1f3a09712c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -529,6 +529,7 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) #endif /* br_netlink.c */ +extern struct rtnl_link_ops br_link_ops; extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4fb77049e83d..891b19f2c003 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1964,6 +1964,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GTXCSUM: case ETHTOOL_GSG: + case ETHTOOL_GSSET_INFO: case ETHTOOL_GSTRINGS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 05db410fd135..207a178f73b2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -357,22 +357,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { - int total_len, eth_len, ip_len, udp_len; + int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; udp_len = len + sizeof(*udph); - ip_len = eth_len = udp_len + sizeof(*iph); - total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); - skb = find_skb(np, total_len, total_len - len); + skb = find_skb(np, total_len + np->dev->needed_tailroom, + total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); - skb->len += len; + skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); diff --git a/net/core/sock.c b/net/core/sock.c index ebbd3e8d4f1f..e276947777e2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1501,6 +1501,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, gfp_t gfp_mask; long timeo; int err; + int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + err = -EMSGSIZE; + if (npages > MAX_SKB_FRAGS) + goto failure; gfp_mask = sk->sk_allocation; if (gfp_mask & __GFP_WAIT) @@ -1519,14 +1524,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { skb = alloc_skb(header_len, gfp_mask); if (skb) { - int npages; int i; /* No pages, we're done... */ if (!data_len) break; - npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; skb->truesize += data_len; skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7410a8c28e14..6e33b79cb688 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5761,6 +5761,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e600f827fe7..7c5b4cb88382 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2846,10 +2846,6 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; -#ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#endif net->ipv6.ip6_rt_gc_expire = 30*HZ; ret = 0; @@ -2870,10 +2866,6 @@ out_ip6_dst_ops: static void __net_exit ip6_route_net_exit(struct net *net) { -#ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); -#endif kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -2882,11 +2874,33 @@ static void __net_exit ip6_route_net_exit(struct net *net) dst_entries_destroy(&net->ipv6.ip6_dst_ops); } +static int __net_init ip6_route_net_init_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + return 0; +} + +static void __net_exit ip6_route_net_exit_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif +} + static struct pernet_operations ip6_route_net_ops = { .init = ip6_route_net_init, .exit = ip6_route_net_exit, }; +static struct pernet_operations ip6_route_net_late_ops = { + .init = ip6_route_net_init_late, + .exit = ip6_route_net_exit_late, +}; + static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, .priority = 0, @@ -2936,19 +2950,25 @@ int __init ip6_route_init(void) if (ret) goto xfrm6_init; + ret = register_pernet_subsys(&ip6_route_net_late_ops); + if (ret) + goto fib6_rules_init; + ret = -ENOBUFS; if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) - goto fib6_rules_init; + goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); if (ret) - goto fib6_rules_init; + goto out_register_late_subsys; out: return ret; +out_register_late_subsys: + unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: @@ -2967,6 +2987,7 @@ out_kmem_cache: void ip6_route_cleanup(void) { unregister_netdevice_notifier(&ip6_route_dev_notifier); + unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_cleanup(); xfrm6_fini(); fib6_gc_cleanup(); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d2726a74597d..3c55f633928e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -167,6 +167,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (dev) { unregister_netdev(dev); spriv->dev = NULL; + module_put(THIS_MODULE); } } } @@ -254,6 +255,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p if (rc < 0) goto out_del_dev; + __module_get(THIS_MODULE); /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 423b0fb7fc39..1909d990d8a1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2291,7 +2291,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) * frames that we didn't handle, including returning unknown * ones. For all other modes we will return them to the sender, * setting the 0x80 bit in the action category, as required by - * 802.11-2007 7.3.1.11. + * 802.11-2012 9.24.4. * Newer versions of hostapd shall also use the management frame * registration mechanisms, but older ones still use cooked * monitor interfaces so push all frames there. @@ -2301,6 +2301,9 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) return RX_DROP_MONITOR; + if (is_multicast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) return RX_DROP_UNUSABLE; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7457697016e3..90b73d1f902a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1358,7 +1358,7 @@ static void reg_set_request_processed(void) spin_unlock(®_requests_lock); if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) - cancel_delayed_work_sync(®_timeout); + cancel_delayed_work(®_timeout); if (need_more_processing) schedule_work(®_work); diff --git a/net/wireless/util.c b/net/wireless/util.c index 30f68dc76ac0..bbcb58e61ed5 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -807,7 +807,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, ntype == NL80211_IFTYPE_P2P_CLIENT)) return -EBUSY; - if (ntype != otype) { + if (ntype != otype && netif_running(dev)) { err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, ntype); if (err) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 51412e1296f7..baa7a49acbb2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -20132,6 +20132,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0272, .name = "ALC272", .patch = patch_alc662 }, { .id = 0x10ec0275, .name = "ALC275", .patch = patch_alc269 }, { .id = 0x10ec0276, .name = "ALC276", .patch = patch_alc269 }, + { .id = 0x10ec0280, .name = "ALC280", .patch = patch_alc269 }, { .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660", .patch = patch_alc861 }, { .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd }, diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 789453d44ec5..0b08bb7aa3ab 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -949,9 +949,7 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream, } found: - data = snd_soc_read(codec, AIC3X_PLL_PROGA_REG); - snd_soc_write(codec, AIC3X_PLL_PROGA_REG, - data | (pll_p << PLLP_SHIFT)); + snd_soc_update_bits(codec, AIC3X_PLL_PROGA_REG, PLLP_MASK, pll_p); snd_soc_write(codec, AIC3X_OVRF_STATUS_AND_PLLR_REG, pll_r << PLLR_SHIFT); snd_soc_write(codec, AIC3X_PLL_PROGB_REG, pll_j << PLLJ_SHIFT); diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h index 06a19784b162..16d999957b89 100644 --- a/sound/soc/codecs/tlv320aic3x.h +++ b/sound/soc/codecs/tlv320aic3x.h @@ -166,6 +166,7 @@ /* PLL registers bitfields */ #define PLLP_SHIFT 0 +#define PLLP_MASK 7 #define PLLQ_SHIFT 3 #define PLLR_SHIFT 0 #define PLLJ_SHIFT 2 |