diff options
author | Mark Brown <broonie@kernel.org> | 2014-09-08 12:05:48 +0100 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2014-09-08 12:05:48 +0100 |
commit | bb0dddf6157bc679de9143507375fce3f13fcd00 (patch) | |
tree | de120f5da2b49f6c32fb71454a83111a974811a1 | |
parent | 4e65656dfa8cfd0660bac70cba7466d04e565ba6 (diff) | |
parent | b60edabe236826e659aa814326b0d9c9ecb294c1 (diff) |
Merge remote-tracking branch 'lsk/v3.10/topic/aosp' into linux-linaro-lsk-android
Conflicts:
arch/arm64/Kconfig
arch/arm64/crypto/Makefile
arch/arm64/crypto/aes-glue.c
arch/arm64/crypto/ghash-ce-core.S
arch/arm64/crypto/ghash-ce-glue.c
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/thread_info.h
arch/arm64/kernel/fpsimd.c
crypto/ablk_helper.c
drivers/base/cpu.c
drivers/hid/hid-core.c
net/ipv4/raw.c
100 files changed, 1283 insertions, 302 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index b4f332475b7..ec0eff6c6c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7139,6 +7139,16 @@ S: Maintained F: drivers/mmc/host/sdhci.* F: drivers/mmc/host/sdhci-pltfm.[ch] +SECURE COMPUTING +M: Kees Cook <keescook@chromium.org> +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp +S: Supported +F: kernel/seccomp.c +F: include/uapi/linux/seccomp.h +F: include/linux/seccomp.h +K: \bsecure_computing +K: \bTIF_SECCOMP\b + SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) M: Anton Vorontsov <avorontsov@ru.mvista.com> L: linuxppc-dev@lists.ozlabs.org diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 5b888487ede..bdc4749a195 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -24,6 +24,7 @@ CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_INET=y CONFIG_INET_ESP=y +CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MANGLE=y diff --git a/arch/Kconfig b/arch/Kconfig index 00e3702ec79..4c0a1d03ae0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. + - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index bd4e7c31e9e..993973b061d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,7 +22,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB - select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -1811,6 +1811,11 @@ config OABI_COMPAT in memory differs between the legacy ABI and the new ARM EABI (only for non "thumb" binaries). This option adds a tiny overhead to all syscalls and produces a slightly larger kernel. + + The seccomp filter system will not be available when this is + selected, since there is no way yet to sensibly distinguish + between calling conventions during filtering. + If you know you'll be using only pure EABI user space then you can say N here. If this option is not selected and you attempt to execute a legacy ABI binary then the result will be diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index cbd61977c99..43876245fc5 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls (380) +#define __NR_syscalls (384) #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index af33b44990e..bbe80a7cba0 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -406,6 +406,12 @@ #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) #define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_SYSCALL_BASE+380) + * #define __NR_sched_getattr (__NR_SYSCALL_BASE+381) + * #define __NR_renameat2 (__NR_SYSCALL_BASE+382) + */ +#define __NR_seccomp (__NR_SYSCALL_BASE+383) /* * This may need to be greater than __NR_last_syscall+1 in order to diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index c6ca7e37677..1a2e529a134 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -389,6 +389,10 @@ CALL(sys_process_vm_writev) CALL(sys_kcmp) CALL(sys_finit_module) +/* 380 */ CALL(sys_ni_syscall) /* CALL(sys_sched_setattr) */ + CALL(sys_ni_syscall) /* CALL(sys_sched_getattr) */ + CALL(sys_ni_syscall) /* CALL(sys_renameat2) */ + CALL(sys_seccomp) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 2070a56ecc4..a3f935fde97 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -35,4 +35,4 @@ AFLAGS_aes-neon.o := -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE - $(call if_changed_dep,cc_o_c) + $(call if_changed_rule,cc_o_c) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 60f2f4c1225..79cd911ef88 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index b9e6eaf41c9..607928ad1fd 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -19,13 +19,15 @@ #include <linux/linkage.h> #include <asm/assembler.h> - DATA .req v0 - SHASH .req v1 - IN1 .req v2 + SHASH .req v0 + SHASH2 .req v1 T1 .req v2 T2 .req v3 - T3 .req v4 - VZR .req v5 + MASK .req v4 + XL .req v5 + XM .req v6 + XH .req v7 + IN1 .req v7 .text .arch armv8-a+crypto @@ -35,61 +37,51 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - ld1 {DATA.16b}, [x1] ld1 {SHASH.16b}, [x3] - eor VZR.16b, VZR.16b, VZR.16b + ld1 {XL.16b}, [x1] + movi MASK.16b, #0xe1 + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 + shl MASK.2d, MASK.2d, #57 + eor SHASH2.16b, SHASH2.16b, SHASH.16b /* do the head block first, if supplied */ cbz x4, 0f - ld1 {IN1.2d}, [x4] + ld1 {T1.2d}, [x4] b 1f -0: ld1 {IN1.2d}, [x2], #16 +0: ld1 {T1.2d}, [x2], #16 sub w0, w0, #1 -1: ext IN1.16b, IN1.16b, IN1.16b, #8 -CPU_LE( rev64 IN1.16b, IN1.16b ) - eor DATA.16b, DATA.16b, IN1.16b - /* multiply DATA by SHASH in GF(2^128) */ - ext T2.16b, DATA.16b, DATA.16b, #8 - ext T3.16b, SHASH.16b, SHASH.16b, #8 - eor T2.16b, T2.16b, DATA.16b - eor T3.16b, T3.16b, SHASH.16b +1: /* multiply XL by SHASH in GF(2^128) */ +CPU_LE( rev64 T1.16b, T1.16b ) - pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 - pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 - pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) - eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) - eor T2.16b, T2.16b, DATA.16b + ext T2.16b, XL.16b, XL.16b, #8 + ext IN1.16b, T1.16b, T1.16b, #8 + eor T1.16b, T1.16b, T2.16b + eor XL.16b, XL.16b, IN1.16b - ext T3.16b, VZR.16b, T2.16b, #8 - ext T2.16b, T2.16b, VZR.16b, #8 - eor DATA.16b, DATA.16b, T3.16b - eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of - // carry-less multiplication + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 + eor T1.16b, T1.16b, XL.16b + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 + pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) - /* first phase of the reduction */ - shl T3.2d, DATA.2d, #1 - eor T3.16b, T3.16b, DATA.16b - shl T3.2d, T3.2d, #5 - eor T3.16b, T3.16b, DATA.16b - shl T3.2d, T3.2d, #57 - ext T2.16b, VZR.16b, T3.16b, #8 - ext T3.16b, T3.16b, VZR.16b, #8 - eor DATA.16b, DATA.16b, T2.16b - eor T1.16b, T1.16b, T3.16b + ext T1.16b, XL.16b, XH.16b, #8 + eor T2.16b, XL.16b, XH.16b + eor XM.16b, XM.16b, T1.16b + eor XM.16b, XM.16b, T2.16b + pmull T2.1q, XL.1d, MASK.1d - /* second phase of the reduction */ - ushr T2.2d, DATA.2d, #5 - eor T2.16b, T2.16b, DATA.16b - ushr T2.2d, T2.2d, #1 - eor T2.16b, T2.16b, DATA.16b - ushr T2.2d, T2.2d, #1 - eor T1.16b, T1.16b, T2.16b - eor DATA.16b, DATA.16b, T1.16b + mov XH.d[0], XM.d[1] + mov XM.d[1], XL.d[0] + + eor XL.16b, XM.16b, T2.16b + ext T2.16b, XL.16b, XL.16b, #8 + pmull XL.1q, XL.1d, MASK.1d + eor T2.16b, T2.16b, XH.16b + eor XL.16b, XL.16b, T2.16b cbnz w0, 0b - st1 {DATA.16b}, [x1] + st1 {XL.16b}, [x1] ret ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index b92baf3f68c..833ec1e3f3e 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -67,11 +67,12 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, blocks = len / GHASH_BLOCK_SIZE; len %= GHASH_BLOCK_SIZE; - kernel_neon_begin_partial(6); + kernel_neon_begin_partial(8); pmull_ghash_update(blocks, ctx->digest, src, key, partial ? ctx->buf : NULL); kernel_neon_end(); src += blocks * GHASH_BLOCK_SIZE; + partial = 0; } if (len) memcpy(ctx->buf + partial, src, len); @@ -88,7 +89,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); - kernel_neon_begin_partial(6); + kernel_neon_begin_partial(8); pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); kernel_neon_end(); } diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index c43b4ac1300..50f559f574f 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -37,8 +37,21 @@ struct fpsimd_state { u32 fpcr; }; }; + /* the id of the last cpu to have restored this state */ + unsigned int cpu; }; +/* + * Struct for stacking the bottom 'n' FP/SIMD registers. + */ +struct fpsimd_partial_state { + u32 fpsr; + u32 fpcr; + u32 num_regs; + __uint128_t vregs[32]; +}; + + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f @@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state); extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); +extern void fpsimd_preserve_current_state(void); +extern void fpsimd_restore_current_state(void); +extern void fpsimd_update_current_state(struct fpsimd_state *state); + +extern void fpsimd_flush_task_state(struct task_struct *target); + +extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, + u32 num_regs); +extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); + #endif #endif diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index bbec599c96b..768414d55e6 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -62,3 +62,38 @@ ldr w\tmpnr, [\state, #16 * 2 + 4] msr fpcr, x\tmpnr .endm + +.altmacro +.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 + mrs x\tmpnr1, fpsr + str w\numnr, [\state, #8] + mrs x\tmpnr2, fpcr + stp w\tmpnr1, w\tmpnr2, [\state] + adr x\tmpnr1, 0f + add \state, \state, x\numnr, lsl #4 + sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 + br x\tmpnr1 + .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 + .irp qb, %(qa + 1) + stp q\qa, q\qb, [\state, # -16 * \qa - 16] + .endr + .endr +0: +.endm + +.macro fpsimd_restore_partial state, tmpnr1, tmpnr2 + ldp w\tmpnr1, w\tmpnr2, [\state] + msr fpsr, x\tmpnr1 + msr fpcr, x\tmpnr2 + adr x\tmpnr1, 0f + ldr w\tmpnr2, [\state, #8] + add \state, \state, x\tmpnr2, lsl #4 + sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 + br x\tmpnr1 + .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 + .irp qb, %(qa + 1) + ldp q\qa, q\qb, [\state, # -16 * \qa - 16] + .endr + .endr +0: +.endm diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h new file mode 100644 index 00000000000..13ce4cc18e2 --- /dev/null +++ b/arch/arm64/include/asm/neon.h @@ -0,0 +1,18 @@ +/* + * linux/arch/arm64/include/asm/neon.h + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> + +#define cpu_has_neon() (1) + +#define kernel_neon_begin() kernel_neon_begin_partial(32) + +void kernel_neon_begin_partial(u32 num_regs); +void kernel_neon_end(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 45b20cd6cbc..001b764fffa 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -136,8 +136,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) -#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc -#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp +#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) +#define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk)) /* * Prefetching support diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a429b5940be..59bd5ad7a02 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -133,12 +133,7 @@ struct pt_regs { (!((regs)->pstate & PSR_F_BIT)) #define user_stack_pointer(regs) \ - (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) - -static inline unsigned long regs_return_value(struct pt_regs *regs) -{ - return regs->regs[0]; -} + (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) /* * Are the current registers suitable for user mode? (used to maintain diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 59f151f8241..215f935619e 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -109,6 +109,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 @@ -128,10 +129,11 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa..d358ccacfc0 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) + fpsimd_save_partial x0, 1, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) + fpsimd_restore_partial x0, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 56ef569b2b6..0437186e4c7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -590,7 +590,7 @@ fast_work_pending: str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ + /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' tst x2, #PSR_MODE_MASK // user mode regs? diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 522df9c7f3a..845fae3aeb2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -34,6 +34,60 @@ #define FPEXC_IDF (1 << 7) /* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + * the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + * been loaded into its FPSIMD registers most recently, or whether it has + * been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + * cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + * userland FPSIMD state is copied from memory to the registers, the task's + * fpsimd_state.cpu field is set to the id of the current CPU, the current + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + * TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + * restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + * register contents to memory, clears the fpsimd_last_state per-cpu variable + * and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + * whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + +/* * Trapped FP/ASIMD access. */ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) @@ -71,43 +125,138 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) void fpsimd_thread_switch(struct task_struct *next) { - /* check if not kernel threads */ - if (current->mm) + /* + * Save the current FPSIMD state to memory, but only if whatever is in + * the registers is in fact the most recent userland FPSIMD state of + * 'current'. + */ + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - if (next->mm) - fpsimd_load_state(&next->thread.fpsimd_state); + + if (next->mm) { + /* + * If we are switching to a task whose most recent userland + * FPSIMD state is already in the registers of *this* cpu, + * we can skip loading the state from memory. Otherwise, set + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded + * upon the next return to userland. + */ + struct fpsimd_state *st = &next->thread.fpsimd_state; + + if (__this_cpu_read(fpsimd_last_state) == st + && st->cpu == smp_processor_id()) + clear_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + else + set_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + } } void fpsimd_flush_thread(void) { preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); +} + +/* + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' + */ +void fpsimd_preserve_current_state(void) +{ + preempt_disable(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); preempt_enable(); } +/* + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current' + */ +void fpsimd_restore_current_state(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + fpsimd_load_state(st); + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' + */ +void fpsimd_update_current_state(struct fpsimd_state *state) +{ + preempt_disable(); + fpsimd_load_state(state); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ + t->thread.fpsimd_state.cpu = NR_CPUS; +} + #ifdef CONFIG_KERNEL_MODE_NEON +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + /* * Kernel-side NEON support functions */ -void kernel_neon_begin(void) +void kernel_neon_begin_partial(u32 num_regs) { - /* Avoid using the NEON in interrupt context */ - BUG_ON(in_interrupt()); - preempt_disable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - if (current->mm) - fpsimd_save_state(¤t->thread.fpsimd_state); + BUG_ON(num_regs > 32); + fpsimd_save_partial_state(s, roundup(num_regs, 2)); + } else { + /* + * Save the userland FPSIMD state if we have one and if we + * haven't done so already. Clear fpsimd_last_state to indicate + * that there is no longer userland FPSIMD state in the + * registers. + */ + preempt_disable(); + if (current->mm && + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } } -EXPORT_SYMBOL(kernel_neon_begin); +EXPORT_SYMBOL(kernel_neon_begin_partial); void kernel_neon_end(void) { - if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); - - preempt_enable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + fpsimd_load_partial_state(s); + } else { + preempt_enable(); + } } EXPORT_SYMBOL(kernel_neon_end); @@ -119,12 +268,13 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - if (current->mm) + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); break; case CPU_PM_EXIT: if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); break; case CPU_PM_ENTER_FAILED: default: diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 3193bf35dbc..15c91e25d04 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -215,7 +215,7 @@ void release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_preserve_current_state(); *dst = *src; return 0; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 096a7ad5f00..993cdb79b70 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -521,6 +521,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return ret; target->thread.fpsimd_state.user_fpsimd = newstate; + fpsimd_flush_task_state(target); return ret; } @@ -768,6 +769,7 @@ static int compat_vfp_set(struct task_struct *target, uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; } + fpsimd_flush_task_state(target); return ret; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e3cf0962624..bbc1aad21ce 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) int err; /* dump the hardware registers to the fpsimd_state structure */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); @@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); /* load the hardware registers from the fpsimd_state structure */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } @@ -423,4 +420,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e51bbe79f5b..02de4326033 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -219,7 +219,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -282,11 +282,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 52806427e15..4ec14ef00b9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -110,9 +110,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) } #ifdef CONFIG_HAVE_ARCH_PFN_VALID +#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1) + int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 1dee279f966..af4d5c0a2f0 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -369,16 +369,22 @@ #define __NR_process_vm_writev (__NR_Linux + 346) #define __NR_kcmp (__NR_Linux + 347) #define __NR_finit_module (__NR_Linux + 348) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_Linux + 349) + * #define __NR_sched_getattr (__NR_Linux + 350) + * #define __NR_renameat2 (__NR_Linux + 351) + */ +#define __NR_seccomp (__NR_Linux + 352) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 348 +#define __NR_Linux_syscalls 352 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 348 +#define __NR_O32_Linux_syscalls 352 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -695,16 +701,22 @@ #define __NR_kcmp (__NR_Linux + 306) #define __NR_finit_module (__NR_Linux + 307) #define __NR_getdents64 (__NR_Linux + 308) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_Linux + 309) + * #define __NR_sched_getattr (__NR_Linux + 310) + * #define __NR_renameat2 (__NR_Linux + 311) + */ +#define __NR_seccomp (__NR_Linux + 312) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 308 +#define __NR_Linux_syscalls 312 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 308 +#define __NR_64_Linux_syscalls 312 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1025,15 +1037,21 @@ #define __NR_process_vm_writev (__NR_Linux + 310) #define __NR_kcmp (__NR_Linux + 311) #define __NR_finit_module (__NR_Linux + 312) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_Linux + 313) + * #define __NR_sched_getattr (__NR_Linux + 314) + * #define __NR_renameat2 (__NR_Linux + 315) + */ +#define __NR_seccomp (__NR_Linux + 316) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 312 +#define __NR_Linux_syscalls 316 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 312 +#define __NR_N32_Linux_syscalls 316 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 9b36424b03c..bcb2184e8a4 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -593,6 +593,12 @@ einval: li v0, -ENOSYS sys sys_process_vm_writev 6 sys sys_kcmp 5 sys sys_finit_module 3 + /* Backporting seccomp, skip a few ... */ + sys sys_ni_syscall 0 /* sys_sched_setattr */ + sys sys_ni_syscall 0 /* sys_sched_getattr */ /* 4350 */ + sys sys_ni_syscall 0 /* sys_renameat2 */ + sys sys_seccomp 3 + .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 97a5909a61c..285872f9d6d 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -424,4 +424,8 @@ sys_call_table: PTR sys_kcmp PTR sys_finit_module PTR sys_getdents64 + sys sys_ni_syscall /* sys_sched_setattr */ + sys sys_ni_syscall /* sys_sched_getattr */ /* 5310 */ + sys sys_ni_syscall /* sys_renameat2 */ + sys sys_seccomp .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index edcb6594e7b..bdee1a1ed1c 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -417,4 +417,8 @@ EXPORT(sysn32_call_table) PTR compat_sys_process_vm_writev /* 6310 */ PTR sys_kcmp PTR sys_finit_module + sys sys_ni_syscall /* sys_sched_setattr */ + sys sys_ni_syscall /* sys_sched_getattr */ + sys sys_ni_syscall /* sys_renameat2 */ /* 6315 */ + sys sys_seccomp .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 74f485d3c0e..a1f826a2457 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -541,4 +541,8 @@ sys_call_table: PTR compat_sys_process_vm_writev PTR sys_kcmp PTR sys_finit_module - .size sys_call_table,.-sys_call_table + sys sys_ni_syscall /* sys_sched_setattr */ + sys sys_ni_syscall /* sys_sched_getattr */ /* 4350 */ + sys sys_ni_syscall /* sys_renameat2 */ + sys sys_seccomp + .size sys32_call_table,.-sys32_call_table diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aabfb8380a1..de6d048d030 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -357,3 +357,8 @@ 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module +# Backporting seccomp, skip a few ... +# 351 i386 sched_setattr sys_sched_setattr +# 352 i386 sched_getattr sys_sched_getattr +# 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 63a899304d2..960cbb025ca 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,6 +320,11 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module +# Backporting seccomp, skip a few ... +# 314 common sched_setattr sys_sched_setattr +# 315 common sched_getattr sys_sched_getattr +# 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c index 62568b1fc88..ffe7278d4bd 100644 --- a/crypto/ablk_helper.c +++ b/crypto/ablk_helper.c @@ -75,7 +75,7 @@ int ablk_encrypt(struct ablkcipher_request *req) struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); + *cryptd_req = *req; ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); return crypto_ablkcipher_encrypt(cryptd_req); @@ -94,7 +94,7 @@ int ablk_decrypt(struct ablkcipher_request *req) struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); + *cryptd_req = *req; ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); return crypto_ablkcipher_decrypt(cryptd_req); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 607efe6b7dc..ac292eee83d 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -319,7 +319,7 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) cpu->dev.id = num; cpu->dev.bus = &cpu_subsys; cpu->dev.release = cpu_device_release; -#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE cpu->dev.bus->uevent = cpu_uevent; #endif error = device_register(&cpu->dev); diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 01e21037d8f..d7872b96019 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -27,6 +27,7 @@ #include <linux/pm.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> +#include <linux/reboot.h> #include <generated/utsrelease.h> @@ -130,6 +131,7 @@ struct firmware_buf { struct page **pages; int nr_pages; int page_array_size; + struct list_head pending_list; #endif char fw_id[]; }; @@ -171,6 +173,9 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name, strcpy(buf->fw_id, fw_name); buf->fwc = fwc; init_completion(&buf->completion); +#ifdef CONFIG_FW_LOADER_USER_HELPER + INIT_LIST_HEAD(&buf->pending_list); +#endif pr_debug("%s: fw-%s buf=%p\n", __func__, fw_name, buf); @@ -446,10 +451,8 @@ static struct firmware_priv *to_firmware_priv(struct device *dev) return container_of(dev, struct firmware_priv, dev); } -static void fw_load_abort(struct firmware_priv *fw_priv) +static void __fw_load_abort(struct firmware_buf *buf) { - struct firmware_buf *buf = fw_priv->buf; - /* * There is a small window in which user can write to 'loading' * between loading done and disappearance of 'loading' @@ -457,8 +460,16 @@ static void fw_load_abort(struct firmware_priv *fw_priv) if (test_bit(FW_STATUS_DONE, &buf->status)) return; + list_del_init(&buf->pending_list); set_bit(FW_STATUS_ABORT, &buf->status); complete_all(&buf->completion); +} + +static void fw_load_abort(struct firmware_priv *fw_priv) +{ + struct firmware_buf *buf = fw_priv->buf; + + __fw_load_abort(buf); /* avoid user action after loading abort */ fw_priv->buf = NULL; @@ -467,6 +478,25 @@ static void fw_load_abort(struct firmware_priv *fw_priv) #define is_fw_load_aborted(buf) \ test_bit(FW_STATUS_ABORT, &(buf)->status) +static LIST_HEAD(pending_fw_head); + +/* reboot notifier for avoid deadlock with usermode_lock */ +static int fw_shutdown_notify(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + mutex_lock(&fw_lock); + while (!list_empty(&pending_fw_head)) + __fw_load_abort(list_first_entry(&pending_fw_head, + struct firmware_buf, + pending_list)); + mutex_unlock(&fw_lock); + return NOTIFY_DONE; +} + +static struct notifier_block fw_shutdown_nb = { + .notifier_call = fw_shutdown_notify, +}; + static ssize_t firmware_timeout_show(struct class *class, struct class_attribute *attr, char *buf) @@ -619,6 +649,7 @@ static ssize_t firmware_loading_store(struct device *dev, * is completed. * */ fw_map_pages_buf(fw_buf); + list_del_init(&fw_buf->pending_list); complete_all(&fw_buf->completion); break; } @@ -853,8 +884,15 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent, goto err_del_dev; } + mutex_lock(&fw_lock); + list_add(&buf->pending_list, &pending_fw_head); + mutex_unlock(&fw_lock); + retval = device_create_file(f_dev, &dev_attr_loading); if (retval) { + mutex_lock(&fw_lock); + list_del_init(&buf->pending_list); + mutex_unlock(&fw_lock); dev_err(f_dev, "%s: device_create_file failed\n", __func__); goto err_del_bin_attr; } @@ -1526,6 +1564,7 @@ static int __init firmware_class_init(void) { fw_cache_init(); #ifdef CONFIG_FW_LOADER_USER_HELPER + register_reboot_notifier(&fw_shutdown_nb); return class_register(&firmware_class); #else return 0; @@ -1539,6 +1578,7 @@ static void __exit firmware_class_exit(void) unregister_pm_notifier(&fw_cache.pm_notify); #endif #ifdef CONFIG_FW_LOADER_USER_HELPER + unregister_reboot_notifier(&fw_shutdown_nb); class_unregister(&firmware_class); #endif } diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 4d2557d738c..3425fbbb173 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -116,7 +116,7 @@ struct cpufreq_interactive_tunables { }; /* For cases where we have single governor instance for system */ -struct cpufreq_interactive_tunables *common_tunables; +static struct cpufreq_interactive_tunables *common_tunables; static struct attribute_group *get_sysfs_attr(void); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 0aba4d75055..5fd66daa03b 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -427,7 +427,7 @@ static void add_all_freq_table(unsigned int freq) unsigned int size; size = sizeof(unsigned int) * (all_freq_table->table_size + 1); all_freq_table->freq_table = krealloc(all_freq_table->freq_table, - size, GFP_KERNEL); + size, GFP_ATOMIC); if (IS_ERR(all_freq_table->freq_table)) { pr_warn("Could not reallocate memory for freq_table\n"); all_freq_table->freq_table = NULL; diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c index eaaccde8221..7fbf7247e65 100644 --- a/drivers/input/keyreset.c +++ b/drivers/input/keyreset.c @@ -27,9 +27,10 @@ struct keyreset_state { int restart_requested; int (*reset_fn)(void); struct platform_device *pdev_child; + struct work_struct restart_work; }; -static void do_restart(void) +static void do_restart(struct work_struct *unused) { sys_sync(); kernel_restart(NULL); @@ -44,7 +45,7 @@ static void do_reset_fn(void *priv) state->restart_requested = state->reset_fn(); } else { pr_info("keyboard reset\n"); - do_restart(); + schedule_work(&state->restart_work); state->restart_requested = 1; } } @@ -69,6 +70,7 @@ static int keyreset_probe(struct platform_device *pdev) if (!state->pdev_child) return -ENOMEM; state->pdev_child->dev.parent = &pdev->dev; + INIT_WORK(&state->restart_work, do_restart); keyp = pdata->keys_down; while ((key = *keyp++)) { diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 3511b084036..ccaef8b48eb 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -363,7 +363,9 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) if (!sc->nr_to_scan) return lru_count; - mutex_lock(&ashmem_mutex); + if (!mutex_trylock(&ashmem_mutex)) + return -1; + list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { loff_t start = range->pgstart * PAGE_SIZE; loff_t end = (range->pgend + 1) * PAGE_SIZE; diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 53e50b5e861..a401acdceb4 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -662,10 +662,17 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, break; } - if (count > BULK_BUFFER_SIZE) + if (count > BULK_BUFFER_SIZE) { xfer = BULK_BUFFER_SIZE; - else + /* ZLP, They will be more TX requests so not yet. */ + req->zero = 0; + } else { xfer = count; + /* If the data length is a multple of the + * maxpacket size then send a zero length packet(ZLP). + */ + req->zero = ((xfer % dev->ep_in->maxpacket) == 0); + } if (copy_from_user(req->buf, buf, xfer)) { r = -EFAULT; break; diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 56dcf217cfe..21ced13c83d 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -24,7 +24,7 @@ #define SAMPLE_RATE 44100 #define FRAMES_PER_MSEC (SAMPLE_RATE / 1000) -#define IN_EP_MAX_PACKET_SIZE 384 +#define IN_EP_MAX_PACKET_SIZE 256 /* Number of requests to allocate */ #define IN_EP_REQ_COUNT 4 @@ -580,12 +580,18 @@ audio_bind(struct usb_configuration *c, struct usb_function *f) goto fail; ac_interface_desc.bInterfaceNumber = status; + /* AUDIO_AC_INTERFACE */ + ac_header_desc.baInterfaceNr[0] = status; + status = usb_interface_id(c, f); if (status < 0) goto fail; as_interface_alt_0_desc.bInterfaceNumber = status; as_interface_alt_1_desc.bInterfaceNumber = status; + /* AUDIO_AS_INTERFACE */ + ac_header_desc.baInterfaceNr[1] = status; + status = -ENODEV; /* allocate our endpoint */ diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 21c5ee2482d..7646a564bfd 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -25,7 +25,6 @@ #include "u_ether.h" #include "rndis.h" - /* * This function is an RNDIS Ethernet port -- a Microsoft protocol that's * been promoted instead of the standard CDC Ethernet. The published RNDIS @@ -67,6 +66,16 @@ * - MS-Windows drivers sometimes emit undocumented requests. */ +static bool rndis_multipacket_dl_disable; +module_param(rndis_multipacket_dl_disable, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(rndis_multipacket_dl_disable, + "Disable RNDIS Multi-packet support in DownLink"); + +static unsigned int rndis_ul_max_pkt_per_xfer = 3; +module_param(rndis_ul_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer, + "Maximum packets per transfer for UL aggregation"); + struct f_rndis { struct gether port; u8 ctrl_id, data_id; @@ -748,6 +757,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) rndis_set_param_medium(rndis->config, RNDIS_MEDIUM_802_3, 0); rndis_set_host_mac(rndis->config, rndis->ethaddr); + rndis_set_max_pkt_xfer(rndis->config, rndis_ul_max_pkt_per_xfer); if (rndis->manufacturer && rndis->vendorID && rndis_set_param_vendor(rndis->config, rndis->vendorID, @@ -854,6 +864,7 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN], rndis->port.header_len = sizeof(struct rndis_packet_msg_type); rndis->port.wrap = rndis_add_header; rndis->port.unwrap = rndis_rm_hdr; + rndis->port.ul_max_pkts_per_xfer = rndis_ul_max_pkt_per_xfer; rndis->port.func.name = "rndis"; rndis->port.func.strings = rndis_strings; diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index 693f0c24d51..cb2767df3fb 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -59,6 +59,16 @@ MODULE_PARM_DESC (rndis_debug, "enable debugging"); #define RNDIS_MAX_CONFIGS 1 +int rndis_ul_max_pkt_per_xfer_rcvd; +module_param(rndis_ul_max_pkt_per_xfer_rcvd, int, S_IRUGO); +MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer_rcvd, + "Max num of REMOTE_NDIS_PACKET_MSGs received in a single transfer"); + +int rndis_ul_max_xfer_size_rcvd; +module_param(rndis_ul_max_xfer_size_rcvd, int, S_IRUGO); +MODULE_PARM_DESC(rndis_ul_max_xfer_size_rcvd, + "Max size of bus transfer received"); + static rndis_params rndis_per_dev_params[RNDIS_MAX_CONFIGS]; @@ -585,12 +595,12 @@ static int rndis_init_response(int configNr, rndis_init_msg_type *buf) resp->MinorVersion = cpu_to_le32(RNDIS_MINOR_VERSION); resp->DeviceFlags = cpu_to_le32(RNDIS_DF_CONNECTIONLESS); resp->Medium = cpu_to_le32(RNDIS_MEDIUM_802_3); - resp->MaxPacketsPerTransfer = cpu_to_le32(1); - resp->MaxTransferSize = cpu_to_le32( - params->dev->mtu + resp->MaxPacketsPerTransfer = cpu_to_le32(params->max_pkt_per_xfer); + resp->MaxTransferSize = cpu_to_le32(params->max_pkt_per_xfer * + (params->dev->mtu + sizeof(struct ethhdr) + sizeof(struct rndis_packet_msg_type) - + 22); + + 22)); resp->PacketAlignmentFactor = cpu_to_le32(0); resp->AFListOffset = cpu_to_le32(0); resp->AFListSize = cpu_to_le32(0); @@ -686,6 +696,12 @@ static int rndis_reset_response(int configNr, rndis_reset_msg_type *buf) rndis_reset_cmplt_type *resp; rndis_resp_t *r; struct rndis_params *params = rndis_per_dev_params + configNr; + u32 length; + u8 *xbuf; + + /* drain the response queue */ + while ((xbuf = rndis_get_next_response(configNr, &length))) + rndis_free_response(configNr, xbuf); r = rndis_add_response(configNr, sizeof(rndis_reset_cmplt_type)); if (!r) @@ -910,6 +926,8 @@ int rndis_set_param_dev(u8 configNr, struct net_device *dev, u16 *cdc_filter) rndis_per_dev_params[configNr].dev = dev; rndis_per_dev_params[configNr].filter = cdc_filter; + rndis_ul_max_xfer_size_rcvd = 0; + rndis_ul_max_pkt_per_xfer_rcvd = 0; return 0; } @@ -936,6 +954,13 @@ int rndis_set_param_medium(u8 configNr, u32 medium, u32 speed) return 0; } +void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer) +{ + pr_debug("%s:\n", __func__); + + rndis_per_dev_params[configNr].max_pkt_per_xfer = max_pkt_per_xfer; +} + void rndis_add_hdr(struct sk_buff *skb) { struct rndis_packet_msg_type *header; @@ -1008,23 +1033,73 @@ int rndis_rm_hdr(struct gether *port, struct sk_buff *skb, struct sk_buff_head *list) { - /* tmp points to a struct rndis_packet_msg_type */ - __le32 *tmp = (void *)skb->data; + int num_pkts = 1; - /* MessageType, MessageLength */ - if (cpu_to_le32(RNDIS_MSG_PACKET) - != get_unaligned(tmp++)) { - dev_kfree_skb_any(skb); - return -EINVAL; - } - tmp++; + if (skb->len > rndis_ul_max_xfer_size_rcvd) + rndis_ul_max_xfer_size_rcvd = skb->len; + + while (skb->len) { + struct rndis_packet_msg_type *hdr; + struct sk_buff *skb2; + u32 msg_len, data_offset, data_len; + + /* some rndis hosts send extra byte to avoid zlp, ignore it */ + if (skb->len == 1) { + dev_kfree_skb_any(skb); + return 0; + } + + if (skb->len < sizeof *hdr) { + pr_err("invalid rndis pkt: skblen:%u hdr_len:%u", + skb->len, sizeof *hdr); + dev_kfree_skb_any(skb); + return -EINVAL; + } + + hdr = (void *)skb->data; + msg_len = le32_to_cpu(hdr->MessageLength); + data_offset = le32_to_cpu(hdr->DataOffset); + data_len = le32_to_cpu(hdr->DataLength); + + if (skb->len < msg_len || + ((data_offset + data_len + 8) > msg_len)) { + pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n", + le32_to_cpu(hdr->MessageType), + msg_len, data_offset, data_len, skb->len); + dev_kfree_skb_any(skb); + return -EOVERFLOW; + } + if (le32_to_cpu(hdr->MessageType) != RNDIS_MSG_PACKET) { + pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n", + le32_to_cpu(hdr->MessageType), + msg_len, data_offset, data_len, skb->len); + dev_kfree_skb_any(skb); + return -EINVAL; + } + + skb_pull(skb, data_offset + 8); - /* DataOffset, DataLength */ - if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8)) { - dev_kfree_skb_any(skb); - return -EOVERFLOW; + if (msg_len == skb->len) { + skb_trim(skb, data_len); + break; + } + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) { + pr_err("%s:skb clone failed\n", __func__); + dev_kfree_skb_any(skb); + return -ENOMEM; + } + + skb_pull(skb, msg_len - sizeof *hdr); + skb_trim(skb2, data_len); + skb_queue_tail(list, skb2); + + num_pkts++; } - skb_trim(skb, get_unaligned_le32(tmp++)); + + if (num_pkts > rndis_ul_max_pkt_per_xfer_rcvd) + rndis_ul_max_pkt_per_xfer_rcvd = num_pkts; skb_queue_tail(list, skb); return 0; diff --git a/drivers/usb/gadget/rndis.h b/drivers/usb/gadget/rndis.h index 0647f2f34e8..12045b31a31 100644 --- a/drivers/usb/gadget/rndis.h +++ b/drivers/usb/gadget/rndis.h @@ -189,6 +189,7 @@ typedef struct rndis_params struct net_device *dev; u32 vendorID; + u8 max_pkt_per_xfer; const char *vendorDescr; void (*resp_avail)(void *v); void *v; @@ -204,6 +205,7 @@ int rndis_set_param_dev (u8 configNr, struct net_device *dev, int rndis_set_param_vendor (u8 configNr, u32 vendorID, const char *vendorDescr); int rndis_set_param_medium (u8 configNr, u32 medium, u32 speed); +void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer); void rndis_add_hdr (struct sk_buff *skb); int rndis_rm_hdr(struct gether *port, struct sk_buff *skb, struct sk_buff_head *list); diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 4b76124ce96..801e326e54c 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -64,6 +64,7 @@ struct eth_dev { struct sk_buff_head rx_frames; unsigned header_len; + unsigned ul_max_pkts_per_xfer; struct sk_buff *(*wrap)(struct gether *, struct sk_buff *skb); int (*unwrap)(struct gether *, struct sk_buff *skb, @@ -226,9 +227,13 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags) size += out->maxpacket - 1; size -= size % out->maxpacket; + if (dev->ul_max_pkts_per_xfer) + size *= dev->ul_max_pkts_per_xfer; + if (dev->port_usb->is_fixed) size = max_t(size_t, size, dev->port_usb->fixed_out_len); + DBG(dev, "%s: size: %d\n", __func__, size); skb = alloc_skb(size + NET_IP_ALIGN, gfp_flags); if (skb == NULL) { DBG(dev, "no rx skb\n"); @@ -882,6 +887,7 @@ struct net_device *gether_connect(struct gether *link) dev->header_len = link->header_len; dev->unwrap = link->unwrap; dev->wrap = link->wrap; + dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer; spin_lock(&dev->lock); dev->port_usb = link; diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h index 02522338a70..ce803d41588 100644 --- a/drivers/usb/gadget/u_ether.h +++ b/drivers/usb/gadget/u_ether.h @@ -54,6 +54,10 @@ struct gether { bool is_fixed; u32 fixed_out_len; u32 fixed_in_len; + unsigned ul_max_pkts_per_xfer; +/* Max number of SKB packets to be used to create Multi Packet RNDIS */ +#define TX_SKB_HOLD_THRESHOLD 3 + bool multi_pkt_xfer; struct sk_buff *(*wrap)(struct gether *port, struct sk_buff *skb); int (*unwrap)(struct gether *port, diff --git a/drivers/video/adf/adf.c b/drivers/video/adf/adf.c index 231881c2b35..42c30c05826 100644 --- a/drivers/video/adf/adf.c +++ b/drivers/video/adf/adf.c @@ -613,6 +613,10 @@ void adf_device_destroy(struct adf_device *dev) } mutex_destroy(&dev->post_lock); mutex_destroy(&dev->client_lock); + + if (dev->timeline) + sync_timeline_destroy(&dev->timeline->obj); + adf_obj_destroy(&dev->base, &adf_devices); } EXPORT_SYMBOL(adf_device_destroy); diff --git a/fs/exec.c b/fs/exec.c index dd6aa61c854..9a02fc256c8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against - * PTRACE_ATTACH + * PTRACE_ATTACH or seccomp thread-sync */ static int check_unsafe_exec(struct linux_binprm *bprm) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 790b14c5f26..87e24a2edee 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2062,7 +2062,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *, + unsigned long blkdev_flags); /* inode.c */ struct buffer_head *ext4_getblk(handle_t *, struct inode *, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 42624a995b0..744d3a753b0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -597,11 +597,13 @@ resizefs_out: return err; } + case FIDTRIM: case FITRIM: { struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; + int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -609,13 +611,15 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q)) + return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; range.minlen = max((unsigned int)range.minlen, q->limits.discard_granularity); - ret = ext4_trim_fs(sb, &range); + ret = ext4_trim_fs(sb, &range, flags); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 162b80d527a..576155cb0e4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2707,7 +2707,8 @@ int ext4_mb_release(struct super_block *sb) } static inline int ext4_issue_discard(struct super_block *sb, - ext4_group_t block_group, ext4_grpblk_t cluster, int count) + ext4_group_t block_group, ext4_grpblk_t cluster, int count, + unsigned long flags) { ext4_fsblk_t discard_block; @@ -2716,7 +2717,7 @@ static inline int ext4_issue_discard(struct super_block *sb, count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags); } /* @@ -2738,7 +2739,7 @@ static void ext4_free_data_callback(struct super_block *sb, if (test_opt(sb, DISCARD)) { err = ext4_issue_discard(sb, entry->efd_group, entry->efd_start_cluster, - entry->efd_count); + entry->efd_count, 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%d failed" @@ -4789,7 +4790,8 @@ do_more: * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count); + err = ext4_issue_discard(sb, block_group, bit, count, + 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%lu failed" @@ -4984,13 +4986,15 @@ error_return: * @count: number of blocks to TRIM * @group: alloc. group we are working with * @e4b: ext4 buddy for the group + * @blkdev_flags: flags for the block device * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) + ext4_group_t group, struct ext4_buddy *e4b, + unsigned long blkdev_flags) { struct ext4_free_extent ex; int ret = 0; @@ -5009,7 +5013,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - ret = ext4_issue_discard(sb, group, start, count); + ret = ext4_issue_discard(sb, group, start, count, blkdev_flags); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); return ret; @@ -5022,6 +5026,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @blkdev_flags: flags for the block device * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -5036,7 +5041,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, unsigned long blkdev_flags) { void *bitmap; ext4_grpblk_t next, count = 0, free_count = 0; @@ -5069,7 +5074,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((next - start) >= minblocks) { ret = ext4_trim_extent(sb, start, - next - start, group, &e4b); + next - start, group, &e4b, + blkdev_flags); if (ret && ret != -EOPNOTSUPP) break; ret = 0; @@ -5111,6 +5117,7 @@ out: * ext4_trim_fs() -- trim ioctl handle function * @sb: superblock for filesystem * @range: fstrim_range structure + * @blkdev_flags: flags for the block device * * start: First Byte to trim * len: number of Bytes to trim from start @@ -5119,7 +5126,8 @@ out: * start to start+len. For each such a group ext4_trim_all_free function * is invoked to trim all free space. */ -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range, + unsigned long blkdev_flags) { struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; @@ -5175,7 +5183,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, blkdev_flags); if (cnt < 0) { ret = cnt; break; diff --git a/fs/seq_file.c b/fs/seq_file.c index 3dd44db1465..d84ca5932c7 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -8,8 +8,10 @@ #include <linux/fs.h> #include <linux/export.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/cred.h> +#include <linux/mm.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m) m->count = m->size; } +static void *seq_buf_alloc(unsigned long size) +{ + void *buf; + + buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!buf && size > PAGE_SIZE) + buf = vmalloc(size); + return buf; +} + /** * seq_open - initialize sequential file * @file: file we initialize @@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset) return 0; } if (!m->buf) { - m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) return -ENOMEM; } @@ -135,8 +147,8 @@ static int traverse(struct seq_file *m, loff_t offset) Eoverflow: m->op->stop(m, p); - kfree(m->buf); - m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + kvfree(m->buf); + m->buf = seq_buf_alloc(m->size <<= 1); return !m->buf ? -ENOMEM : -EAGAIN; } @@ -191,7 +203,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) /* grab buffer if we didn't have one */ if (!m->buf) { - m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) goto Enomem; } @@ -231,8 +243,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (m->count < m->size) goto Fill; m->op->stop(m, p); - kfree(m->buf); - m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + kvfree(m->buf); + m->buf = seq_buf_alloc(m->size <<= 1); if (!m->buf) goto Enomem; m->count = 0; @@ -349,7 +361,7 @@ EXPORT_SYMBOL(seq_lseek); int seq_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; - kfree(m->buf); + kvfree(m->buf); kfree(m); return 0; } @@ -604,13 +616,13 @@ EXPORT_SYMBOL(single_open); int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), void *data, size_t size) { - char *buf = kmalloc(size, GFP_KERNEL); + char *buf = seq_buf_alloc(size); int ret; if (!buf) return -ENOMEM; ret = single_open(file, show, data); if (ret) { - kfree(buf); + kvfree(buf); return ret; } ((struct seq_file *)file->private_data)->buf = buf; diff --git a/include/linux/mm.h b/include/linux/mm.h index fc3883852f9..cbaa3130537 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -324,6 +324,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +extern void kvfree(const void *addr); + static inline void compound_lock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 6f19cfd1840..9687691799f 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,8 @@ #include <uapi/linux/seccomp.h> +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) + #ifdef CONFIG_SECCOMP #include <linux/thread_info.h> @@ -14,11 +16,11 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. - * @filter: The metadata and ruleset for determining what system calls - * are allowed for a task. + * @filter: must always point to a valid seccomp-filter or NULL as it is + * accessed without locking during system call entry. * * @filter must only be accessed from the context of current as there - * is no locking. + * is no read locking. */ struct seccomp { int mode; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 84662ecc7b5..4e98d717413 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -846,4 +846,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h index 40ec3482d1e..8ad2dbd0c29 100644 --- a/include/linux/wlan_plat.h +++ b/include/linux/wlan_plat.h @@ -15,13 +15,15 @@ #ifndef _LINUX_WLAN_PLAT_H_ #define _LINUX_WLAN_PLAT_H_ +#define WLAN_PLAT_NODFS_FLAG 0x01 + struct wifi_platform_data { int (*set_power)(int val); int (*set_reset)(int val); int (*set_carddetect)(int val); void *(*mem_prealloc)(int section, unsigned long size); int (*get_mac_addr)(unsigned char *buf); - void *(*get_country_code)(char *ccode); + void *(*get_country_code)(char *ccode, u32 flags); }; #endif diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 304e41381a1..d9681a288ce 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2497,7 +2497,7 @@ struct wiphy_vendor_command { struct nl80211_vendor_cmd_info info; u32 flags; int (*doit)(struct wiphy *wiphy, struct wireless_dev *wdev, - void *data, int data_len); + const void *data, int data_len); }; /** @@ -3696,8 +3696,8 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp); static inline struct sk_buff * cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen) { - return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_TESTMODE, - NL80211_ATTR_TESTDATA, approxlen); + return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_VENDOR, + NL80211_ATTR_VENDOR_DATA, approxlen); } /** diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e361f488242..4ac12e14c6d 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,6 +23,8 @@ struct fib_rule { struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + kuid_t uid_start; + kuid_t uid_end; struct rcu_head rcu; struct net * fr_net; }; @@ -80,7 +82,9 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_START] = { .type = NLA_U32 }, \ + [FRA_UID_END] = { .type = NLA_U32 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 628e11b98c5..c91e2aae3fb 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -10,6 +10,7 @@ #include <linux/socket.h> #include <linux/in6.h> #include <linux/atomic.h> +#include <linux/uidgid.h> struct flowi_common { int flowic_oif; @@ -23,6 +24,7 @@ struct flowi_common { #define FLOWI_FLAG_CAN_SLEEP 0x02 #define FLOWI_FLAG_KNOWN_NH 0x04 __u32 flowic_secid; + kuid_t flowic_uid; }; union flowi_uli { @@ -59,6 +61,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -78,7 +81,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = 0; @@ -88,6 +92,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -115,6 +120,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -158,6 +164,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/ip.h b/include/net/ip.h index 67aa2d86615..1f6794b2cac 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -153,6 +153,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 8d977b34364..6be6debb536 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -136,7 +136,7 @@ extern int rt6_route_rcv(struct net_device *dev, const struct in6_addr *gwaddr); extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark); + int oif, u32 mark, kuid_t uid); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); diff --git a/include/net/route.h b/include/net/route.h index 2ea40c1b5e0..647bb2adbff 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -142,7 +142,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -253,7 +253,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sock_i_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 0cc74c4403e..b422ad5d238 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 __SYSCALL(__NR_finit_module, sys_finit_module) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr 274 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) + * #define __NR_sched_getattr 275 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) + * #define __NR_renameat2 276 +__SYSCALL(__NR_renameat2, sys_renameat2) + */ +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 274 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 51da65b68b8..9dcdb6251cb 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -49,6 +49,8 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_UID_START, /* UID range */ + FRA_UID_END, __FRA_MAX }; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0ea..5014a5c472e 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -154,6 +154,8 @@ struct inodes_stat_t { #define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FIDTRIM _IOWR('f', 128, struct fstrim_range) /* Deep discard trim */ + #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a2144e1afa..07c1146c1f5 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -297,6 +297,7 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, + RTA_UID, __RTA_MAX }; diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f7297..0f238a43ff1 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,13 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 + +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/fork.c b/kernel/fork.c index a0fbe527722..76665c42028 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto free_ti; tsk->stack = ti; +#ifdef CONFIG_SECCOMP + /* + * We must handle setting up seccomp filters once we're under + * the sighand lock in case orig has changed between now and + * then. Until then, filter must be NULL to avoid messing up + * the usage counts on the error path calling free_task. + */ + tsk->seccomp.filter = NULL; +#endif setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); @@ -1103,6 +1112,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) p->flags = new_flags; } +static void copy_seccomp(struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + /* + * Must be called with sighand->lock held, which is common to + * all threads in the group. Holding cred_guard_mutex is not + * needed because this new task is not yet running and cannot + * be racing exec. + */ + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Ref-count the new filter user, and assign it. */ + get_seccomp_filter(current); + p->seccomp = current->seccomp; + + /* + * Explicitly enable no_new_privs here in case it got set + * between the task_struct being duplicated and holding the + * sighand lock. The seccomp state and nnp must be in sync. + */ + if (task_no_new_privs(current)) + task_set_no_new_privs(p); + + /* + * If the parent gained a seccomp mode after copying thread + * flags and between before we held the sighand lock, we have + * to manually enable the seccomp thread flag here. + */ + if (p->seccomp.mode != SECCOMP_MODE_DISABLED) + set_tsk_thread_flag(p, TIF_SECCOMP); +#endif +} + SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -1207,7 +1249,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; ftrace_graph_init_task(p); - get_seccomp_filter(p); rt_mutex_init_task(p); @@ -1450,6 +1491,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_lock(¤t->sighand->siglock); /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + + /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b7a10048a32..2d13b264d85 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,15 +18,17 @@ #include <linux/compat.h> #include <linux/sched.h> #include <linux/seccomp.h> +#include <linux/slab.h> +#include <linux/syscalls.h> /* #define SECCOMP_DEBUG 1 */ #ifdef CONFIG_SECCOMP_FILTER #include <asm/syscall.h> #include <linux/filter.h> +#include <linux/pid.h> #include <linux/ptrace.h> #include <linux/security.h> -#include <linux/slab.h> #include <linux/tracehook.h> #include <linux/uaccess.h> @@ -201,45 +203,184 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) */ static u32 seccomp_run_filters(int syscall) { - struct seccomp_filter *f; + struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); + struct seccomp_data sd; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ - if (WARN_ON(current->seccomp.filter == NULL)) + if (unlikely(WARN_ON(f == NULL))) return SECCOMP_RET_KILL; + /* Make sure cross-thread synced filter points somewhere sane. */ + smp_read_barrier_depends(); + + populate_seccomp_data(&sd); + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ - for (f = current->seccomp.filter; f; f = f->prev) { - u32 cur_ret = sk_run_filter(NULL, f->insns); + for (; f; f = f->prev) { + u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); + if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; } return ret; } +#endif /* CONFIG_SECCOMP_FILTER */ + +static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) +{ + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) + return false; + + return true; +} + +static inline void seccomp_assign_mode(struct task_struct *task, + unsigned long seccomp_mode) +{ + BUG_ON(!spin_is_locked(&task->sighand->siglock)); + + task->seccomp.mode = seccomp_mode; + /* + * Make sure TIF_SECCOMP cannot be set before the mode (and + * filter) is set. + */ + smp_mb__before_atomic(); + set_tsk_thread_flag(task, TIF_SECCOMP); +} + +#ifdef CONFIG_SECCOMP_FILTER +/* Returns 1 if the parent is an ancestor of the child. */ +static int is_ancestor(struct seccomp_filter *parent, + struct seccomp_filter *child) +{ + /* NULL is the root ancestor. */ + if (parent == NULL) + return 1; + for (; child; child = child->prev) + if (child == parent) + return 1; + return 0; +} /** - * seccomp_attach_filter: Attaches a seccomp filter to current. + * seccomp_can_sync_threads: checks if all threads can be synchronized + * + * Expects sighand and cred_guard_mutex locks to be held. + * + * Returns 0 on success, -ve on error, or the pid of a thread which was + * either not in the correct seccomp mode or it did not have an ancestral + * seccomp filter. + */ +static inline pid_t seccomp_can_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Validate all threads being eligible for synchronization. */ + caller = current; + for_each_thread(caller, thread) { + pid_t failed; + + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || + (thread->seccomp.mode == SECCOMP_MODE_FILTER && + is_ancestor(thread->seccomp.filter, + caller->seccomp.filter))) + continue; + + /* Return the first thread that cannot be synchronized. */ + failed = task_pid_vnr(thread); + /* If the pid cannot be resolved, then return -ESRCH */ + if (unlikely(WARN_ON(failed == 0))) + failed = -ESRCH; + return failed; + } + + return 0; +} + +/** + * seccomp_sync_threads: sets all threads to use current's filter + * + * Expects sighand and cred_guard_mutex locks to be held, and for + * seccomp_can_sync_threads() to have returned success already + * without dropping the locks. + * + */ +static inline void seccomp_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Synchronize all threads. */ + caller = current; + for_each_thread(caller, thread) { + /* Skip current, since it needs no changes. */ + if (thread == caller) + continue; + + /* Get a task reference for the new leaf node. */ + get_seccomp_filter(caller); + /* + * Drop the task reference to the shared ancestor since + * current's path will hold a reference. (This also + * allows a put before the assignment.) + */ + put_seccomp_filter(thread); + smp_store_release(&thread->seccomp.filter, + caller->seccomp.filter); + /* + * Opt the other thread into seccomp if needed. + * As threads are considered to be trust-realm + * equivalent (see ptrace_may_access), it is safe to + * allow one thread to transition the other. + */ + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + } + } +} + +/** + * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install * - * Returns 0 on success or an errno on failure. + * Returns filter on success or an ERR_PTR on failure. */ -static long seccomp_attach_filter(struct sock_fprog *fprog) +static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; - unsigned long fp_size = fprog->len * sizeof(struct sock_filter); - unsigned long total_insns = fprog->len; + unsigned long fp_size; + struct sock_filter *fp; + int new_len; long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) - return -EINVAL; - - for (filter = current->seccomp.filter; filter; filter = filter->prev) - total_insns += filter->len + 4; /* include a 4 instr penalty */ - if (total_insns > MAX_INSNS_PER_PATH) - return -ENOMEM; + return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); + fp_size = fprog->len * sizeof(struct sock_filter); /* * Installing a seccomp filter requires that the task have @@ -250,15 +391,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (!current->no_new_privs && security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) - return -EACCES; + return ERR_PTR(-EACCES); - /* Allocate a new seccomp_filter */ - filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, - GFP_KERNEL|__GFP_NOWARN); - if (!filter) - return -ENOMEM; - atomic_set(&filter->usage, 1); - filter->len = fprog->len; + fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); + if (!fp) + return ERR_PTR(-ENOMEM); /* Copy the instructions from fprog. */ ret = -EFAULT; @@ -273,30 +410,46 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) /* Check and rewrite the fprog for seccomp use */ ret = seccomp_check_filter(filter->insns, filter->len); if (ret) - goto fail; + goto free_prog; - /* - * If there is an existing filter, make it the prev and don't drop its - * task reference. - */ - filter->prev = current->seccomp.filter; - current->seccomp.filter = filter; - return 0; -fail: + /* Allocate a new seccomp_filter */ + ret = -ENOMEM; + filter = kzalloc(sizeof(struct seccomp_filter) + + sizeof(struct sock_filter_int) * new_len, + GFP_KERNEL|__GFP_NOWARN); + if (!filter) + goto free_prog; + + ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); + if (ret) + goto free_filter; + kfree(fp); + + atomic_set(&filter->usage, 1); + filter->len = new_len; + + return filter; + +free_filter_prog: + kfree(filter->prog); +free_filter: kfree(filter); - return ret; +free_prog: + kfree(fp); + return ERR_PTR(ret); } /** - * seccomp_attach_user_filter - attaches a user-supplied sock_fprog + * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog * @user_filter: pointer to the user data containing a sock_fprog. * * Returns 0 on success and non-zero otherwise. */ -long seccomp_attach_user_filter(char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; - long ret = -EFAULT; + struct seccomp_filter *filter = ERR_PTR(-EFAULT); #ifdef CONFIG_COMPAT if (is_compat_task()) { @@ -309,9 +462,56 @@ long seccomp_attach_user_filter(char __user *user_filter) #endif if (copy_from_user(&fprog, user_filter, sizeof(fprog))) goto out; - ret = seccomp_attach_filter(&fprog); + filter = seccomp_prepare_filter(&fprog); out: - return ret; + return filter; +} + +/** + * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior + * @filter: seccomp filter to add to the current process + * + * Caller must be holding current->sighand->siglock lock. + * + * Returns 0 on success, -ve on error. + */ +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) +{ + unsigned long total_insns; + struct seccomp_filter *walker; + + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Validate resulting filter length. */ + total_insns = filter->prog->len; + for (walker = current->seccomp.filter; walker; walker = walker->prev) + total_insns += walker->prog->len + 4; /* 4 instr penalty */ + if (total_insns > MAX_INSNS_PER_PATH) + return -ENOMEM; + + /* If thread sync has been requested, check that it is possible. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) { + int ret; + + ret = seccomp_can_sync_threads(); + if (ret) + return ret; + } + + /* + * If there is an existing filter, make it the prev and don't drop its + * task reference. + */ + filter->prev = current->seccomp.filter; + current->seccomp.filter = filter; + + /* Now that the new filter is in place, synchronize to all threads. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + seccomp_sync_threads(); + + return 0; } /* get_seccomp_filter - increments the reference count of the filter on @tsk */ @@ -324,6 +524,14 @@ void get_seccomp_filter(struct task_struct *tsk) atomic_inc(&orig->usage); } +static inline void seccomp_filter_free(struct seccomp_filter *filter) +{ + if (filter) { + sk_filter_free(filter->prog); + kfree(filter); + } +} + /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ void put_seccomp_filter(struct task_struct *tsk) { @@ -332,7 +540,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - kfree(freeme); + seccomp_filter_free(freeme); } } @@ -376,12 +584,17 @@ static int mode1_syscalls_32[] = { int __secure_computing(int this_syscall) { - int mode = current->seccomp.mode; int exit_sig = 0; int *syscall; u32 ret; - switch (mode) { + /* + * Make sure that any changes to mode from another thread have + * been seen after TIF_SECCOMP was seen. + */ + rmb(); + + switch (current->seccomp.mode) { case SECCOMP_MODE_STRICT: syscall = mode1_syscalls; #ifdef CONFIG_COMPAT @@ -467,47 +680,152 @@ long prctl_get_seccomp(void) } /** - * prctl_set_seccomp: configures current->seccomp.mode - * @seccomp_mode: requested mode to use - * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER + * seccomp_set_mode_strict: internal function for setting strict seccomp * - * This function may be called repeatedly with a @seccomp_mode of - * SECCOMP_MODE_FILTER to install additional filters. Every filter - * successfully installed will be evaluated (in reverse order) for each system - * call the task makes. + * Once current->seccomp.mode is non-zero, it may not be changed. + * + * Returns 0 on success or -EINVAL on failure. + */ +static long seccomp_set_mode_strict(void) +{ + const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; + long ret = -EINVAL; + + spin_lock_irq(¤t->sighand->siglock); + + if (!seccomp_may_assign_mode(seccomp_mode)) + goto out; + +#ifdef TIF_NOTSC + disable_TSC(); +#endif + seccomp_assign_mode(current, seccomp_mode); + ret = 0; + +out: + spin_unlock_irq(¤t->sighand->siglock); + + return ret; +} + +#ifdef CONFIG_SECCOMP_FILTER +/** + * seccomp_set_mode_filter: internal function for setting seccomp filter + * @flags: flags to change filter behavior + * @filter: struct sock_fprog containing filter + * + * This function may be called repeatedly to install additional filters. + * Every filter successfully installed will be evaluated (in reverse order) + * for each system call the task makes. * * Once current->seccomp.mode is non-zero, it may not be changed. * * Returns 0 on success or -EINVAL on failure. */ -long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +static long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { + const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; + struct seccomp_filter *prepared = NULL; long ret = -EINVAL; - if (current->seccomp.mode && - current->seccomp.mode != seccomp_mode) + /* Validate flags. */ + if (flags & ~SECCOMP_FILTER_FLAG_MASK) + return -EINVAL; + + /* Prepare the new filter before holding any locks. */ + prepared = seccomp_prepare_user_filter(filter); + if (IS_ERR(prepared)) + return PTR_ERR(prepared); + + /* + * Make sure we cannot change seccomp or nnp state via TSYNC + * while another thread is in the middle of calling exec. + */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC && + mutex_lock_killable(¤t->signal->cred_guard_mutex)) + goto out_free; + + spin_lock_irq(¤t->sighand->siglock); + + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; + ret = seccomp_attach_filter(flags, prepared); + if (ret) + goto out; + /* Do not free the successfully attached filter. */ + prepared = NULL; + + seccomp_assign_mode(current, seccomp_mode); +out: + spin_unlock_irq(¤t->sighand->siglock); + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + mutex_unlock(¤t->signal->cred_guard_mutex); +out_free: + seccomp_filter_free(prepared); + return ret; +} +#else +static inline long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) +{ + return -EINVAL; +} +#endif + +/* Common entry point for both prctl and syscall. */ +static long do_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs) +{ + switch (op) { + case SECCOMP_SET_MODE_STRICT: + if (flags != 0 || uargs != NULL) + return -EINVAL; + return seccomp_set_mode_strict(); + case SECCOMP_SET_MODE_FILTER: + return seccomp_set_mode_filter(flags, uargs); + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, + const char __user *, uargs) +{ + return do_seccomp(op, flags, uargs); +} + +/** + * prctl_set_seccomp: configures current->seccomp.mode + * @seccomp_mode: requested mode to use + * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER + * + * Returns 0 on success or -EINVAL on failure. + */ +long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +{ + unsigned int op; + char __user *uargs; + switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - ret = 0; -#ifdef TIF_NOTSC - disable_TSC(); -#endif + op = SECCOMP_SET_MODE_STRICT; + /* + * Setting strict mode through prctl always ignored filter, + * so make sure it is always NULL here to pass the internal + * check in do_seccomp(). + */ + uargs = NULL; break; -#ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: - ret = seccomp_attach_user_filter(filter); - if (ret) - goto out; + op = SECCOMP_SET_MODE_FILTER; + uargs = filter; break; -#endif default: - goto out; + return -EINVAL; } - current->seccomp.mode = seccomp_mode; - set_thread_flag(TIF_SECCOMP); -out: - return ret; + /* prctl interface doesn't have flags, so they are always zero. */ + return do_seccomp(op, 0, uargs); } diff --git a/kernel/sys.c b/kernel/sys.c index ab7fda5fbe1..65d3e55bd28 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2187,7 +2187,7 @@ static int prctl_set_vma_anon_name(unsigned long start, unsigned long end, tmp = end; /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = prctl_update_vma_anon_name(vma, &prev, start, end, + error = prctl_update_vma_anon_name(vma, &prev, start, tmp, (const char __user *)arg); if (error) return error; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7078052284f..7e7fc0a082c 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); + +/* operate on Secure Computing state */ +cond_syscall(sys_seccomp); diff --git a/mm/util.c b/mm/util.c index ab1424dbe2e..36aa4815e3d 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,6 +7,7 @@ #include <linux/security.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> #include "internal.h" @@ -384,6 +385,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, } EXPORT_SYMBOL(vm_mmap); +void kvfree(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} +EXPORT_SYMBOL(kvfree); + struct address_space *page_mapping(struct page *page) { struct address_space *mapping = page->mapping; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 55e08e2de3a..da78f5c6d29 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -31,6 +31,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->uid_start = INVALID_UID; + r->uid_end = INVALID_UID; r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable @@ -179,6 +181,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static inline kuid_t fib_nl_uid(struct nlattr *nla) +{ + return make_kuid(current_user_ns(), nla_get_u32(nla)); +} + +static int nla_put_uid(struct sk_buff *skb, int idx, kuid_t uid) +{ + return nla_put_u32(skb, idx, from_kuid_munged(current_user_ns(), uid)); +} + +static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +{ + return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || + (uid_gte(fl->flowi_uid, rule->uid_start) && + uid_lte(fl->flowi_uid, rule->uid_end)); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -193,6 +212,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; + if (!fib_uid_range_match(fl, rule)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -363,6 +385,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + /* UID start and end must either both be valid or both unspecified. */ + rule->uid_start = rule->uid_end = INVALID_UID; + if (tb[FRA_UID_START] || tb[FRA_UID_END]) { + if (tb[FRA_UID_START] && tb[FRA_UID_END]) { + rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); + rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); + } + if (!uid_valid(rule->uid_start) || + !uid_valid(rule->uid_end) || + !uid_lte(rule->uid_start, rule->uid_end)) + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -469,6 +504,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) continue; + if (tb[FRA_UID_START] && + !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) + continue; + + if (tb[FRA_UID_END] && + !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -525,7 +568,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ - + nla_total_size(4); /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_UID_START */ + + nla_total_size(4); /* FRA_UID_END */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -579,7 +624,11 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, ((rule->mark_mask || rule->mark) && nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) || (rule->target && - nla_put_u32(skb, FRA_GOTO, rule->target))) + nla_put_u32(skb, FRA_GOTO, rule->target)) || + (uid_valid(rule->uid_start) && + nla_put_uid(skb, FRA_UID_START, rule->uid_start)) || + (uid_valid(rule->uid_end) && + nla_put_uid(skb, FRA_UID_END, rule->uid_end))) goto nla_put_failure; if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4556cd25acd..ea47f2fc3ea 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -531,6 +531,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 442087d371f..6dfec2f1821 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -422,7 +422,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, sk->sk_protocol, flags, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -458,7 +459,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 66431e0c243..a5e529663be 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1502,7 +1502,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b61b28a2121..64d9d4345cb 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -768,7 +768,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b4a1c42a627..2dfe804fd66 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -571,9 +571,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | - (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + daddr, saddr, 0, 0, + sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 43ef33d5b91..271732355ee 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -515,7 +515,7 @@ void __ip_select_ident(struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -531,11 +531,12 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sk ? sock_i_uid(sk) : 0); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, - const struct sock *sk) + struct sock *sk) { const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; @@ -546,7 +547,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); } -static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) +static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; @@ -560,11 +561,12 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, + sock_i_uid(sk)); rcu_read_unlock(); } -static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, const struct sk_buff *skb) { if (skb) @@ -2307,6 +2309,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2356,6 +2363,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int err; int mark; struct sk_buff *skb; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2383,6 +2391,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2390,6 +2402,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5abb45e281b..c94032b95c6 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -353,7 +353,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3075b55224..58c5dbd5fb9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -963,7 +963,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a4cfde67fcb..d29ae19ae69 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -694,6 +694,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index bb02e176cb7..b903e19463c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -630,7 +630,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 1aef8b22ba7..b58e3f24643 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -162,6 +162,7 @@ ipv4_connected: fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 40ffd72243a..fdc81cb29e8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -449,7 +449,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 84bdcd06dd3..c1b611cc55a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -90,7 +90,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index f1493138d21..65a46058c85 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -84,6 +84,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl6->flowi6_mark = inet_rsk(req)->ir_mark; fl6->fl6_dport = inet_rsk(req)->rmt_port; fl6->fl6_sport = inet_rsk(req)->loc_port; + fl6->flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); @@ -211,6 +212,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); final_p = fl6_update_dst(fl6, np->opt, &final); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 7af5aee75d9..a1beb59a841 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -78,7 +78,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8d1c2064056..5f0d294b36c 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -159,6 +159,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.saddr = np->saddr; fl6.daddr = *daddr; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dff1f4b2c66..a4bf16d0d3d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -761,6 +761,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 18e27efa1bc..03b1e6fde27 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1149,7 +1149,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1162,6 +1162,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1173,7 +1174,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk)); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -2250,6 +2251,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2636,6 +2638,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 1efbc6f44a6..ba8622daffd 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -243,6 +243,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 747374c9887..9c2f2b9c17e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -252,6 +252,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c46539a1df5..e06772c4bcc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1151,6 +1151,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 95d93678bf5..9abf1969c11 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9153,7 +9153,8 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_vendor_cmd, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, }, }; @@ -10973,11 +10974,11 @@ int nl80211_init(void) err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp); if (err) goto err_out; -#endif err = genl_register_mc_group(&nl80211_fam, &nl80211_vendor_mcgrp); if (err) goto err_out; +#endif err = netlink_register_notifier(&nl80211_netlink_notifier); if (err) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4db2177a69e..ab406d0462b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -55,7 +55,7 @@ * also linked into the probe response struct. */ -#define IEEE80211_SCAN_RESULT_EXPIRE (3 * HZ) +#define IEEE80211_SCAN_RESULT_EXPIRE (7 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) { diff --git a/security/Kconfig b/security/Kconfig index e9c6ac724fe..beb86b500ad 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -103,7 +103,7 @@ config INTEL_TXT config LSM_MMAP_MIN_ADDR int "Low address space for LSM to protect from user allocation" depends on SECURITY && SECURITY_SELINUX - default 32768 if ARM + default 32768 if ARM || (ARM64 && COMPAT) default 65536 help This is the portion of low virtual memory which should be protected diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 40aedd9f73e..4a8cbfeef8b 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -65,7 +65,6 @@ extern int apparmor_initialized __initdata; char *aa_split_fqname(char *args, char **ns_name); void aa_info_message(const char *str); void *kvmalloc(size_t size); -void kvfree(void *buffer); /** diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7430298116d..ce8d9a84ab2 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -103,35 +103,3 @@ void *kvmalloc(size_t size) } return buffer; } - -/** - * do_vfree - workqueue routine for freeing vmalloced memory - * @work: data to be freed - * - * The work_struct is overlaid to the data being freed, as at the point - * the work is scheduled the data is no longer valid, be its freeing - * needs to be delayed until safe. - */ -static void do_vfree(struct work_struct *work) -{ - vfree(work); -} - -/** - * kvfree - free an allocation do by kvmalloc - * @buffer: buffer to free (MAYBE_NULL) - * - * Free a buffer allocated by kvmalloc - */ -void kvfree(void *buffer) -{ - if (is_vmalloc_addr(buffer)) { - /* Data is no longer valid so just use the allocated space - * as the work_struct - */ - struct work_struct *work = (struct work_struct *) buffer; - INIT_WORK(work, do_vfree); - schedule_work(work); - } else - kfree(buffer); -} diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e0058526653..a8485c1fc3a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -423,6 +423,13 @@ static int sb_finish_set_opts(struct super_block *sb) if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) sbsec->flags |= SE_SBLABELSUPP; + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + sbsec->flags |= SE_SBLABELSUPP; + /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); |