aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorGary S. Robertson <gary.robertson@linaro.org>2015-01-02 10:31:32 -0600
committerGary S. Robertson <gary.robertson@linaro.org>2015-01-02 10:31:32 -0600
commit2c245921406e568af3e35edc2539262c83d89098 (patch)
tree44c621f00542d3c9fe0cba4057a52be8e04b0035 /arch
parent0de06acbd437c51e725c7b7bd9b867b2656a8cfd (diff)
parentd72e78d889908d4ff8524a4a97378d3b646746cc (diff)
Merge tag 'lsk-v3.14-14.12' of http://git.linaro.org/kernel/linux-linaro-stable into linux-linaro-lng-v3.14
LSK 14.12 v3.14
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/thread_info.h11
-rw-r--r--arch/arm/kernel/traps.c31
-rw-r--r--arch/arm/mm/proc-v7.S2
-rw-r--r--arch/arm/mm/proc-xscale.S4
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/mips/loongson/common/Makefile3
-rw-r--r--arch/mips/oprofile/backtrace.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c3
-rw-r--r--arch/powerpc/platforms/powernv/pci.c4
-rw-r--r--arch/powerpc/platforms/pseries/msi.c2
-rw-r--r--arch/powerpc/xmon/xmon.c6
-rw-r--r--arch/sparc/include/uapi/asm/swab.h12
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/asm/page_32_types.h1
-rw-r--r--arch/x86/include/asm/page_64_types.h11
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/entry_64.S81
-rw-r--r--arch/x86/kernel/traps.c71
-rw-r--r--arch/x86/mm/init_64.c11
-rw-r--r--arch/x86/tools/calc_run_size.pl11
25 files changed, 142 insertions, 150 deletions
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 71a06b293489..3e635eed9c6c 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -43,16 +43,6 @@ struct cpu_context_save {
__u32 extra[2]; /* Xscale 'acc' register, etc */
};
-struct arm_restart_block {
- union {
- /* For user cache flushing */
- struct {
- unsigned long start;
- unsigned long end;
- } cache;
- };
-};
-
/*
* low level task data that entry.S needs immediate access to.
* __switch_to() assumes cpu_context follows immediately after cpu_domain.
@@ -78,7 +68,6 @@ struct thread_info {
unsigned long thumbee_state; /* ThumbEE Handler Base register */
#endif
struct restart_block restart_block;
- struct arm_restart_block arm_restart_block;
};
#define INIT_THREAD_INFO(tsk) \
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 9265b8bb529a..3f314433d653 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -510,8 +510,6 @@ static int bad_syscall(int n, struct pt_regs *regs)
return regs->ARM_r0;
}
-static long do_cache_op_restart(struct restart_block *);
-
static inline int
__do_cache_op(unsigned long start, unsigned long end)
{
@@ -520,24 +518,8 @@ __do_cache_op(unsigned long start, unsigned long end)
do {
unsigned long chunk = min(PAGE_SIZE, end - start);
- if (signal_pending(current)) {
- struct thread_info *ti = current_thread_info();
-
- ti->restart_block = (struct restart_block) {
- .fn = do_cache_op_restart,
- };
-
- ti->arm_restart_block = (struct arm_restart_block) {
- {
- .cache = {
- .start = start,
- .end = end,
- },
- },
- };
-
- return -ERESTART_RESTARTBLOCK;
- }
+ if (fatal_signal_pending(current))
+ return 0;
ret = flush_cache_user_range(start, start + chunk);
if (ret)
@@ -550,15 +532,6 @@ __do_cache_op(unsigned long start, unsigned long end)
return 0;
}
-static long do_cache_op_restart(struct restart_block *unused)
-{
- struct arm_restart_block *restart_block;
-
- restart_block = &current_thread_info()->arm_restart_block;
- return __do_cache_op(restart_block->cache.start,
- restart_block->cache.end);
-}
-
static inline int
do_cache_op(unsigned long start, unsigned long end, int flags)
{
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 74f6033e76dd..fdedc31e0f40 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -211,7 +211,6 @@ __v7_pj4b_setup:
/* Auxiliary Debug Modes Control 1 Register */
#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */
#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */
-#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */
#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */
/* Auxiliary Debug Modes Control 2 Register */
@@ -234,7 +233,6 @@ __v7_pj4b_setup:
/* Auxiliary Debug Modes Control 1 Register */
mrc p15, 1, r0, c15, c1, 1
orr r0, r0, #PJ4B_CLEAN_LINE
- orr r0, r0, #PJ4B_BCK_OFF_STREX
orr r0, r0, #PJ4B_INTER_PARITY
bic r0, r0, #PJ4B_STATIC_BP
mcr p15, 1, r0, c15, c1, 1
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index d19b1cfcad91..b34b95f45cb3 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -535,7 +535,7 @@ ENTRY(cpu_xscale_do_suspend)
mrc p15, 0, r5, c15, c1, 0 @ CP access reg
mrc p15, 0, r6, c13, c0, 0 @ PID
mrc p15, 0, r7, c3, c0, 0 @ domain ID
- mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg
+ mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg
mrc p15, 0, r9, c1, c0, 0 @ control reg
bic r4, r4, #2 @ clear frequency change bit
stmia r0, {r4 - r9} @ store cp regs
@@ -552,7 +552,7 @@ ENTRY(cpu_xscale_do_resume)
mcr p15, 0, r6, c13, c0, 0 @ PID
mcr p15, 0, r7, c3, c0, 0 @ domain ID
mcr p15, 0, r1, c2, c0, 0 @ translation table base addr
- mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg
+ mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg
mov r0, r9 @ control register
b cpu_resume_mmu
ENDPROC(cpu_xscale_do_resume)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 33608f263ad0..e28747e15a36 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -271,7 +271,7 @@ static inline pmd_t pte_pmd(pte_t pte)
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
-#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd)
+#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
static inline int has_transparent_hugepage(void)
{
diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile
index 9e4484ccbb03..9005a8d60969 100644
--- a/arch/mips/loongson/common/Makefile
+++ b/arch/mips/loongson/common/Makefile
@@ -11,7 +11,8 @@ obj-$(CONFIG_PCI) += pci.o
# Serial port support
#
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_SERIAL_8250) += serial.o
+loongson-serial-$(CONFIG_SERIAL_8250) := serial.o
+obj-y += $(loongson-serial-m) $(loongson-serial-y)
obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c
index 6854ed5097d2..83a1dfd8f0e3 100644
--- a/arch/mips/oprofile/backtrace.c
+++ b/arch/mips/oprofile/backtrace.c
@@ -92,7 +92,7 @@ static inline int unwind_user_frame(struct stackframe *old_frame,
/* This marks the end of the previous function,
which means we overran. */
break;
- stack_size = (unsigned) stack_adjustment;
+ stack_size = (unsigned long) stack_adjustment;
} else if (is_ra_save_ins(&ip)) {
int ra_slot = ip.i_format.simmediate;
if (ra_slot < 0)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index beedaf0c5e75..d558b8595e6f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -902,7 +902,6 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int is_64, struct msi_msg *msg)
{
struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
- struct pci_dn *pdn = pci_get_pdn(dev);
struct irq_data *idata;
struct irq_chip *ichip;
unsigned int xive_num = hwirq - phb->msi_base;
@@ -918,7 +917,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
return -ENXIO;
/* Force 32-bit MSI on some broken devices */
- if (pdn && pdn->force_32bit_msi)
+ if (dev->no_64bit_msi)
is_64 = 0;
/* Assign XIVE to PE */
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 8518817dcdfd..52c1162bcee3 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1,3 +1,4 @@
+
/*
* Support PCI/PCIe on PowerNV platforms
*
@@ -50,9 +51,8 @@ static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(pdev);
- if (pdn && pdn->force_32bit_msi && !phb->msi32_support)
+ if (pdev->no_64bit_msi && !phb->msi32_support)
return -ENODEV;
return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV;
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 0c882e83c4ce..6849d85ea0d5 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -428,7 +428,7 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
*/
again:
if (type == PCI_CAP_ID_MSI) {
- if (pdn->force_32bit_msi) {
+ if (pdev->no_64bit_msi) {
rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
if (rc < 0) {
/*
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index b07909850f77..bc5fbc201bcb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -288,10 +288,10 @@ static inline void disable_surveillance(void)
args.token = rtas_token("set-indicator");
if (args.token == RTAS_UNKNOWN_SERVICE)
return;
- args.nargs = 3;
- args.nret = 1;
+ args.nargs = cpu_to_be32(3);
+ args.nret = cpu_to_be32(1);
args.rets = &args.args[3];
- args.args[0] = SURVEILLANCE_TOKEN;
+ args.args[0] = cpu_to_be32(SURVEILLANCE_TOKEN);
args.args[1] = 0;
args.args[2] = 0;
enter_rtas(__pa(&args));
diff --git a/arch/sparc/include/uapi/asm/swab.h b/arch/sparc/include/uapi/asm/swab.h
index a34ad079487e..4c7c12d69bea 100644
--- a/arch/sparc/include/uapi/asm/swab.h
+++ b/arch/sparc/include/uapi/asm/swab.h
@@ -9,9 +9,9 @@ static inline __u16 __arch_swab16p(const __u16 *addr)
{
__u16 ret;
- __asm__ __volatile__ ("lduha [%1] %2, %0"
+ __asm__ __volatile__ ("lduha [%2] %3, %0"
: "=r" (ret)
- : "r" (addr), "i" (ASI_PL));
+ : "m" (*addr), "r" (addr), "i" (ASI_PL));
return ret;
}
#define __arch_swab16p __arch_swab16p
@@ -20,9 +20,9 @@ static inline __u32 __arch_swab32p(const __u32 *addr)
{
__u32 ret;
- __asm__ __volatile__ ("lduwa [%1] %2, %0"
+ __asm__ __volatile__ ("lduwa [%2] %3, %0"
: "=r" (ret)
- : "r" (addr), "i" (ASI_PL));
+ : "m" (*addr), "r" (addr), "i" (ASI_PL));
return ret;
}
#define __arch_swab32p __arch_swab32p
@@ -31,9 +31,9 @@ static inline __u64 __arch_swab64p(const __u64 *addr)
{
__u64 ret;
- __asm__ __volatile__ ("ldxa [%1] %2, %0"
+ __asm__ __volatile__ ("ldxa [%2] %3, %0"
: "=r" (ret)
- : "r" (addr), "i" (ASI_PL));
+ : "m" (*addr), "r" (addr), "i" (ASI_PL));
return ret;
}
#define __arch_swab64p __arch_swab64p
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 5f1296872aed..1717156f4dd1 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -203,6 +203,7 @@
#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */
+#define X86_FEATURE_VMMCALL (8*32+15) /* Prefer vmmcall to vmcall */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c7678e43465b..e62cf897f781 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
#define _ASM_X86_KVM_PARA_H
#include <asm/processor.h>
+#include <asm/alternative.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-/* This instruction is vmcall. On non-VT architectures, it will generate a
- * trap that we will then rewrite to the appropriate instruction.
+#ifdef CONFIG_DEBUG_RODATA
+#define KVM_HYPERCALL \
+ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+#else
+/* On AMD processors, vmcall will generate a trap that we will
+ * then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f48b17df4224..3a52ee0e726d 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -20,7 +20,6 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define STACKFAULT_STACK 0
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 0
#define DEBUG_STACK 0
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8de6d9cf3b95..d54d1eebeffe 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,12 +14,11 @@
#define IRQ_STACK_ORDER 2
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+#define DOUBLEFAULT_STACK 1
+#define NMI_STACK 2
+#define DEBUG_STACK 3
+#define MCE_STACK 4
+#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e1940c06ed02..e870ea9232c3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -144,7 +144,7 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
- _TIF_USER_RETURN_NOTIFY)
+ _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 58d66fe06b61..b409b17efb48 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void);
#ifdef CONFIG_TRACING
asmlinkage void trace_page_fault(void);
+#define trace_stack_segment stack_segment
#define trace_divide_error divide_error
#define trace_bounds bounds
#define trace_invalid_op invalid_op
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c67ffa686064..c005fdd52529 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -508,6 +508,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
}
#endif
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
/* F16h erratum 793, CVE-2013-6885 */
if (c->x86 == 0x16 && c->x86_model <= 0xf) {
u64 val;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3f27f5fd0847..e6bddd5b9da3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -144,6 +144,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
static int __init x86_xsave_setup(char *s)
{
+ if (strlen(s))
+ return 0;
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
setup_clear_cpu_cap(X86_FEATURE_AVX);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index addb207dab92..66e274a3d968 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = {
[ DEBUG_STACK-1 ] = "#DB",
[ NMI_STACK-1 ] = "NMI",
[ DOUBLEFAULT_STACK-1 ] = "#DF",
- [ STACKFAULT_STACK-1 ] = "#SS",
[ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[ N_EXCEPTION_STACKS ...
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 03cd2a8f6009..02553d6d183d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1053,9 +1053,15 @@ ENTRY(native_iret)
jnz native_irq_return_ldt
#endif
+.global native_irq_return_iret
native_irq_return_iret:
+ /*
+ * This may fault. Non-paranoid faults on return to userspace are
+ * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
+ * Double-faults due to espfix64 are handled in do_double_fault.
+ * Other faults here are fatal.
+ */
iretq
- _ASM_EXTABLE(native_irq_return_iret, bad_iret)
#ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt:
@@ -1083,25 +1089,6 @@ native_irq_return_ldt:
jmp native_irq_return_iret
#endif
- .section .fixup,"ax"
-bad_iret:
- /*
- * The iret traps when the %cs or %ss being restored is bogus.
- * We've lost the original trap vector and error code.
- * #GPF is the most likely one to get for an invalid selector.
- * So pretend we completed the iret and took the #GPF in user mode.
- *
- * We are now running with the kernel GS after exception recovery.
- * But error_entry expects us to have user GS to match the user %cs,
- * so swap back.
- */
- pushq $0
-
- SWAPGS
- jmp general_protection
-
- .previous
-
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
@@ -1147,37 +1134,6 @@ ENTRY(retint_kernel)
CFI_ENDPROC
END(common_interrupt)
- /*
- * If IRET takes a fault on the espfix stack, then we
- * end up promoting it to a doublefault. In that case,
- * modify the stack to make it look like we just entered
- * the #GP handler from user space, similar to bad_iret.
- */
-#ifdef CONFIG_X86_ESPFIX64
- ALIGN
-__do_double_fault:
- XCPT_FRAME 1 RDI+8
- movq RSP(%rdi),%rax /* Trap on the espfix stack? */
- sarq $PGDIR_SHIFT,%rax
- cmpl $ESPFIX_PGD_ENTRY,%eax
- jne do_double_fault /* No, just deliver the fault */
- cmpl $__KERNEL_CS,CS(%rdi)
- jne do_double_fault
- movq RIP(%rdi),%rax
- cmpq $native_irq_return_iret,%rax
- jne do_double_fault /* This shouldn't happen... */
- movq PER_CPU_VAR(kernel_stack),%rax
- subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
- movq %rax,RSP(%rdi)
- movq $0,(%rax) /* Missing (lost) #GP error code */
- movq $general_protection,RIP(%rdi)
- retq
- CFI_ENDPROC
-END(__do_double_fault)
-#else
-# define __do_double_fault do_double_fault
-#endif
-
/*
* End of kprobes section
*/
@@ -1379,7 +1335,7 @@ zeroentry overflow do_overflow
zeroentry bounds do_bounds
zeroentry invalid_op do_invalid_op
zeroentry device_not_available do_device_not_available
-paranoiderrorentry double_fault __do_double_fault
+paranoiderrorentry double_fault do_double_fault
zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
errorentry invalid_TSS do_invalid_TSS
errorentry segment_not_present do_segment_not_present
@@ -1549,7 +1505,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
paranoidzeroentry_ist debug do_debug DEBUG_STACK
paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
-paranoiderrorentry stack_segment do_stack_segment
+errorentry stack_segment do_stack_segment
#ifdef CONFIG_XEN
zeroentry xen_debug do_debug
zeroentry xen_int3 do_int3
@@ -1659,16 +1615,15 @@ error_sti:
/*
* There are two places in the kernel that can potentially fault with
- * usergs. Handle them here. The exception handlers after iret run with
- * kernel gs again, so don't set the user space flag. B stepping K8s
- * sometimes report an truncated RIP for IRET exceptions returning to
- * compat mode. Check for these here too.
+ * usergs. Handle them here. B stepping K8s sometimes report a
+ * truncated RIP for IRET exceptions returning to compat mode. Check
+ * for these here too.
*/
error_kernelspace:
incl %ebx
leaq native_irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
- je error_swapgs
+ je error_bad_iret
movl %ecx,%eax /* zero extend */
cmpq %rax,RIP+8(%rsp)
je bstep_iret
@@ -1679,7 +1634,15 @@ error_kernelspace:
bstep_iret:
/* Fix truncated RIP */
movq %rcx,RIP+8(%rsp)
- jmp error_swapgs
+ /* fall through */
+
+error_bad_iret:
+ SWAPGS
+ mov %rsp,%rdi
+ call fixup_bad_iret
+ mov %rax,%rsp
+ decl %ebx /* Return to usergs */
+ jmp error_sti
CFI_ENDPROC
END(error_entry)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 57409f6b8c62..f9d976e0ae67 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -218,32 +218,40 @@ DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL
DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun )
DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS )
DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present )
-#ifdef CONFIG_X86_32
DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment )
-#endif
DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 )
#ifdef CONFIG_X86_64
/* Runs on IST stack */
-dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
-{
- enum ctx_state prev_state;
-
- prev_state = exception_enter();
- if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
- X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
- preempt_conditional_sti(regs);
- do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
- }
- exception_exit(prev_state);
-}
-
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
+#ifdef CONFIG_X86_ESPFIX64
+ extern unsigned char native_irq_return_iret[];
+
+ /*
+ * If IRET takes a non-IST fault on the espfix64 stack, then we
+ * end up promoting it to a doublefault. In that case, modify
+ * the stack to make it look like we just entered the #GP
+ * handler from user space, similar to bad_iret.
+ */
+ if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+ regs->cs == __KERNEL_CS &&
+ regs->ip == (unsigned long)native_irq_return_iret)
+ {
+ struct pt_regs *normal_regs = task_pt_regs(current);
+
+ /* Fake a #GP(0) from userspace. */
+ memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
+ normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
+ regs->ip = (unsigned long)general_protection;
+ regs->sp = (unsigned long)&normal_regs->orig_ax;
+ return;
+ }
+#endif
+
exception_enter();
/* Return not checked because double check cannot be ignored */
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -376,6 +384,35 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
*regs = *eregs;
return regs;
}
+
+struct bad_iret_stack {
+ void *error_entry_ret;
+ struct pt_regs regs;
+};
+
+asmlinkage __visible
+struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+{
+ /*
+ * This is called from entry_64.S early in handling a fault
+ * caused by a bad iret to user mode. To handle the fault
+ * correctly, we want move our stack frame to task_pt_regs
+ * and we want to pretend that the exception came from the
+ * iret target.
+ */
+ struct bad_iret_stack *new_stack =
+ container_of(task_pt_regs(current),
+ struct bad_iret_stack, regs);
+
+ /* Copy the IRET target to the new stack. */
+ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
+
+ /* Copy the remainder of the stack from the current stack. */
+ memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
+
+ BUG_ON(!user_mode_vm(&new_stack->regs));
+ return new_stack;
+}
#endif
/*
@@ -748,7 +785,7 @@ void __init trap_init(void)
set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
set_intr_gate(X86_TRAP_TS, invalid_TSS);
set_intr_gate(X86_TRAP_NP, segment_not_present);
- set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK);
+ set_intr_gate(X86_TRAP_SS, stack_segment);
set_intr_gate(X86_TRAP_GP, general_protection);
set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
set_intr_gate(X86_TRAP_MF, coprocessor_error);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f35c66c5959a..2308a401a1c5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1110,7 +1110,7 @@ void mark_rodata_ro(void)
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
- unsigned long all_end = PFN_ALIGN(&_end);
+ unsigned long all_end;
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -1121,7 +1121,16 @@ void mark_rodata_ro(void)
/*
* The rodata/data/bss/brk section (but not the kernel text!)
* should also be not-executable.
+ *
+ * We align all_end to PMD_SIZE because the existing mapping
+ * is a full PMD. If we would align _brk_end to PAGE_SIZE we
+ * split the PMD and the reminder between _brk_end and the end
+ * of the PMD will remain mapped executable.
+ *
+ * Any PMD which was setup after the one which covers _brk_end
+ * has been zapped already via cleanup_highmem().
*/
+ all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl
index 0b0b124d3ece..23210baade2d 100644
--- a/arch/x86/tools/calc_run_size.pl
+++ b/arch/x86/tools/calc_run_size.pl
@@ -19,7 +19,16 @@ while (<>) {
if ($file_offset == 0) {
$file_offset = $offset;
} elsif ($file_offset != $offset) {
- die ".bss and .brk lack common file offset\n";
+ # BFD linker shows the same file offset in ELF.
+ # Gold linker shows them as consecutive.
+ next if ($file_offset + $mem_size == $offset + $size);
+
+ printf STDERR "file_offset: 0x%lx\n", $file_offset;
+ printf STDERR "mem_size: 0x%lx\n", $mem_size;
+ printf STDERR "offset: 0x%lx\n", $offset;
+ printf STDERR "size: 0x%lx\n", $size;
+
+ die ".bss and .brk are non-contiguous\n";
}
}
}