From dacbe1f2be8f8d84a39a7926c21a2ac0d1bd5263 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:55:12 -0700 Subject: mm/slub: support left redzone SLUB already has a redzone debugging feature. But it is only positioned at the end of object (aka right redzone) so it cannot catch left oob. Although current object's right redzone acts as left redzone of next object, first object in a slab cannot take advantage of this effect. This patch explicitly adds a left red zone to each object to detect left oob more precisely. Background: Someone complained to me that left OOB doesn't catch even if KASAN is enabled which does page allocation debugging. That page is out of our control so it would be allocated when left OOB happens and, in this case, we can't find OOB. Moreover, SLUB debugging feature can be enabled without page allocator debugging and, in this case, we will miss that OOB. Before trying to implement, I expected that changes would be too complex, but, it doesn't look that complex to me now. Almost changes are applied to debug specific functions so I feel okay. Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d86bd1bece6fc41d59253002db5441fe960a37f6) Signed-off-by: Alex Shi --- include/linux/slub_def.h | 1 + mm/slub.c | 100 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 72 insertions(+), 29 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 33885118523c..f4e857e920cd 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -81,6 +81,7 @@ struct kmem_cache { int reserved; /* Reserved bytes at the end of slabs */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ + int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif diff --git a/mm/slub.c b/mm/slub.c index fbadb3753d4d..41f7cae64a49 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #endif } +static inline void *fixup_red_left(struct kmem_cache *s, void *p) +{ + if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) + p += s->red_left_pad; + + return p; +} + static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL @@ -224,24 +232,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) * Core slab cache functions *******************************************************************/ -/* Verify that a pointer has an address that is valid within a slab page */ -static inline int check_valid_pointer(struct kmem_cache *s, - struct page *page, const void *object) -{ - void *base; - - if (!object) - return 1; - - base = page_address(page); - if (object < base || object >= base + page->objects * s->size || - (object - base) % s->size) { - return 0; - } - - return 1; -} - static inline void *get_freepointer(struct kmem_cache *s, void *object) { return *(void **)(object + s->offset); @@ -271,12 +261,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ - for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ - __p += (__s)->size) + for (__p = fixup_red_left(__s, __addr); \ + __p < (__addr) + (__objects) * (__s)->size; \ + __p += (__s)->size) #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ - for (__p = (__addr), __idx = 1; __idx <= __objects;\ - __p += (__s)->size, __idx++) + for (__p = fixup_red_left(__s, __addr), __idx = 1; \ + __idx <= __objects; \ + __p += (__s)->size, __idx++) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -456,6 +448,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) set_bit(slab_index(p, s, addr), map); } +static inline int size_from_object(struct kmem_cache *s) +{ + if (s->flags & SLAB_RED_ZONE) + return s->size - s->red_left_pad; + + return s->size; +} + +static inline void *restore_red_left(struct kmem_cache *s, void *p) +{ + if (s->flags & SLAB_RED_ZONE) + p -= s->red_left_pad; + + return p; +} + /* * Debug settings: */ @@ -489,6 +497,26 @@ static inline void metadata_access_disable(void) /* * Object debugging */ + +/* Verify that a pointer has an address that is valid within a slab page */ +static inline int check_valid_pointer(struct kmem_cache *s, + struct page *page, void *object) +{ + void *base; + + if (!object) + return 1; + + base = page_address(page); + object = restore_red_left(s, object); + if (object < base || object >= base + page->objects * s->size || + (object - base) % s->size) { + return 0; + } + + return 1; +} + static void print_section(char *text, u8 *addr, unsigned int length) { metadata_access_enable(); @@ -628,7 +656,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); - if (p > addr + 16) + if (s->flags & SLAB_RED_ZONE) + print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); + else if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); print_section("Object ", p, min_t(unsigned long, s->object_size, @@ -645,9 +675,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); - if (off != s->size) + if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section("Padding ", p + off, s->size - off); + print_section("Padding ", p + off, size_from_object(s) - off); dump_stack(); } @@ -677,6 +707,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) { u8 *p = object; + if (s->flags & SLAB_RED_ZONE) + memset(p - s->red_left_pad, val, s->red_left_pad); + if (s->flags & __OBJECT_POISON) { memset(p, POISON_FREE, s->object_size - 1); p[s->object_size - 1] = POISON_END; @@ -769,11 +802,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) /* We also have user information there */ off += 2 * sizeof(struct track); - if (s->size == off) + if (size_from_object(s) == off) return 1; return check_bytes_and_report(s, page, p, "Object padding", - p + off, POISON_INUSE, s->size - off); + p + off, POISON_INUSE, size_from_object(s) - off); } /* Check the pad bytes at the end of a slab page */ @@ -817,6 +850,10 @@ static int check_object(struct kmem_cache *s, struct page *page, u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { + if (!check_bytes_and_report(s, page, object, "Redzone", + object - s->red_left_pad, val, s->red_left_pad)) + return 0; + if (!check_bytes_and_report(s, page, object, "Redzone", endobject, val, s->inuse - s->object_size)) return 0; @@ -1468,7 +1505,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, p, NULL); } - page->freelist = start; + page->freelist = fixup_red_left(s, start); page->inuse = page->objects; page->frozen = 1; @@ -3283,7 +3320,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ size += 2 * sizeof(struct track); - if (flags & SLAB_RED_ZONE) + if (flags & SLAB_RED_ZONE) { /* * Add some empty padding so that we can catch * overwrites from earlier objects rather than let @@ -3292,6 +3329,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * of the object. */ size += sizeof(void *); + + s->red_left_pad = sizeof(void *); + s->red_left_pad = ALIGN(s->red_left_pad, s->align); + size += s->red_left_pad; + } #endif /* -- cgit v1.2.3 From 455003e59e7981b5fc2e660b3d93b74b85a1a55f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 22 Aug 2016 11:53:59 -0500 Subject: usercopy: fix overlap check for kernel text When running with a local patch which moves the '_stext' symbol to the very beginning of the kernel text area, I got the following panic with CONFIG_HARDENED_USERCOPY: usercopy: kernel memory exposure attempt detected from ffff88103dfff000 () (4096 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:79! invalid opcode: 0000 [#1] SMP ... CPU: 0 PID: 4800 Comm: cp Not tainted 4.8.0-rc3.after+ #1 Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.5.4 01/22/2016 task: ffff880817444140 task.stack: ffff880816274000 RIP: 0010:[] __check_object_size+0x76/0x413 RSP: 0018:ffff880816277c40 EFLAGS: 00010246 RAX: 000000000000006b RBX: ffff88103dfff000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88081f80dfa8 RDI: ffff88081f80dfa8 RBP: ffff880816277c90 R08: 000000000000054c R09: 0000000000000000 R10: 0000000000000005 R11: 0000000000000006 R12: 0000000000001000 R13: ffff88103e000000 R14: ffff88103dffffff R15: 0000000000000001 FS: 00007fb9d1750800(0000) GS:ffff88081f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000021d2000 CR3: 000000081a08f000 CR4: 00000000001406f0 Stack: ffff880816277cc8 0000000000010000 000000043de07000 0000000000000000 0000000000001000 ffff880816277e60 0000000000001000 ffff880816277e28 000000000000c000 0000000000001000 ffff880816277ce8 ffffffff8136c3a6 Call Trace: [] copy_page_to_iter_iovec+0xa6/0x1c0 [] copy_page_to_iter+0x16/0x90 [] generic_file_read_iter+0x3e3/0x7c0 [] ? xfs_file_buffered_aio_write+0xad/0x260 [xfs] [] ? down_read+0x12/0x40 [] xfs_file_buffered_aio_read+0x51/0xc0 [xfs] [] xfs_file_read_iter+0x62/0xb0 [xfs] [] __vfs_read+0xdf/0x130 [] vfs_read+0x8e/0x140 [] SyS_read+0x55/0xc0 [] do_syscall_64+0x67/0x160 [] entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:[<00007fb9d0c33c00>] 0x7fb9d0c33c00 RSP: 002b:00007ffc9c262f28 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: fffffffffff8ffff RCX: 00007fb9d0c33c00 RDX: 0000000000010000 RSI: 00000000021c3000 RDI: 0000000000000004 RBP: 00000000021c3000 R08: 0000000000000000 R09: 00007ffc9c264d6c R10: 00007ffc9c262c50 R11: 0000000000000246 R12: 0000000000010000 R13: 00007ffc9c2630b0 R14: 0000000000000004 R15: 0000000000010000 Code: 81 48 0f 44 d0 48 c7 c6 90 4d a3 81 48 c7 c0 bb b3 a2 81 48 0f 44 f0 4d 89 e1 48 89 d9 48 c7 c7 68 16 a3 81 31 c0 e8 f4 57 f7 ff <0f> 0b 48 8d 90 00 40 00 00 48 39 d3 0f 83 22 01 00 00 48 39 c3 RIP [] __check_object_size+0x76/0x413 RSP The checked object's range [ffff88103dfff000, ffff88103e000000) is valid, so there shouldn't have been a BUG. The hardened usercopy code got confused because the range's ending address is the same as the kernel's text starting address at 0xffff88103e000000. The overlap check is slightly off. Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Josh Poimboeuf Signed-off-by: Kees Cook (cherry picked from commit 94cd97af690dd9537818dc9841d0ec68bb1dd877) Signed-off-by: Alex Shi --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 8ebae91a6b55..6b1c20f9d486 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -83,7 +83,7 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ - if (check_low >= high || check_high < low) + if (check_low >= high || check_high <= low) return false; return true; -- cgit v1.2.3 From 4663a70283d6b3bb1270627d2dd3fc3ba94cd723 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Sep 2016 09:54:34 -0700 Subject: usercopy: remove page-spanning test for now A custom allocator without __GFP_COMP that copies to userspace has been found in vmw_execbuf_process[1], so this disables the page-span checker by placing it behind a CONFIG for future work where such things can be tracked down later. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1373326 Reported-by: Vinson Lee Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Kees Cook (cherry picked from commit 8e1f74ea02cf4562404c48c6882214821552c13f) Signed-off-by: Alex Shi --- mm/usercopy.c | 61 ++++++++++++++++++++++++++++++++------------------------ security/Kconfig | 11 ++++++++++ 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 6b1c20f9d486..c56b97b7c49c 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -134,30 +134,15 @@ static inline const char *check_bogus_address(const void *ptr, unsigned long n) return NULL; } -static inline const char *check_heap_object(const void *ptr, unsigned long n, - bool to_user) +/* Checks for allocs that are marked in some way as spanning multiple pages. */ +static inline const char *check_page_span(const void *ptr, unsigned long n, + struct page *page, bool to_user) { - struct page *page, *endpage; +#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; + struct page *endpage; bool is_reserved, is_cma; - /* - * Some architectures (arm64) return true for virt_addr_valid() on - * vmalloced addresses. Work around this by checking for vmalloc - * first. - */ - if (is_vmalloc_addr(ptr)) - return NULL; - - if (!virt_addr_valid(ptr)) - return NULL; - - page = virt_to_head_page(ptr); - - /* Check slab allocator for flags and size. */ - if (PageSlab(page)) - return __check_heap_object(ptr, n, page); - /* * Sometimes the kernel data regions are not marked Reserved (see * check below). And sometimes [_sdata,_edata) does not cover @@ -186,7 +171,7 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, ((unsigned long)end & (unsigned long)PAGE_MASK))) return NULL; - /* Allow if start and end are inside the same compound page. */ + /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) return NULL; @@ -199,20 +184,44 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) - goto reject; + return ""; for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) - goto reject; + return ""; if (is_cma && !is_migrate_cma_page(page)) - goto reject; + return ""; } +#endif return NULL; +} + +static inline const char *check_heap_object(const void *ptr, unsigned long n, + bool to_user) +{ + struct page *page; + + /* + * Some architectures (arm64) return true for virt_addr_valid() on + * vmalloced addresses. Work around this by checking for vmalloc + * first. + */ + if (is_vmalloc_addr(ptr)) + return NULL; + + if (!virt_addr_valid(ptr)) + return NULL; + + page = virt_to_head_page(ptr); + + /* Check slab allocator for flags and size. */ + if (PageSlab(page)) + return __check_heap_object(ptr, n, page); -reject: - return ""; + /* Verify object does not incorrectly span multiple pages. */ + return check_page_span(ptr, n, page, to_user); } /* diff --git a/security/Kconfig b/security/Kconfig index 46c00a674eec..ddb3e8a8d9bd 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -146,6 +146,17 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. +config HARDENED_USERCOPY_PAGESPAN + bool "Refuse to copy allocations that span multiple pages" + depends on HARDENED_USERCOPY + depends on !COMPILE_TEST + help + When a multi-page allocation is done without __GFP_COMP, + hardened usercopy will reject attempts to copy it. There are, + however, several cases of this in the kernel that have not all + been removed. This config is intended to be used only while + trying to find such users. + source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig -- cgit v1.2.3 From ed67fb82b17db1f5e3c7818e10560814c7d2e019 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Aug 2016 13:02:01 -0700 Subject: unsafe_[get|put]_user: change interface to use a error target label When I initially added the unsafe_[get|put]_user() helpers in commit 5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses"), I made the mistake of modeling the interface on our traditional __[get|put]_user() functions, which return zero on success, or -EFAULT on failure. That interface is fairly easy to use, but it's actually fairly nasty for good code generation, since it essentially forces the caller to check the error value for each access. In particular, since the error handling is already internally implemented with an exception handler, and we already use "asm goto" for various other things, we could fairly easily make the error cases just jump directly to an error label instead, and avoid the need for explicit checking after each operation. So switch the interface to pass in an error label, rather than checking the error value in the caller. Best do it now before we start growing more users (the signal handling code in particular would be a good place to use the new interface). So rather than if (unsafe_get_user(x, ptr)) ... handle error .. the interface is now unsafe_get_user(x, ptr, label); where an error during the user mode fetch will now just cause a jump to 'label' in the caller. Right now the actual _implementation_ of this all still ends up being a "if (err) goto label", and does not take advantage of any exception label tricks, but for "unsafe_put_user()" in particular it should be fairly straightforward to convert to using the exception table model. Note that "unsafe_get_user()" is much harder to convert to a clever exception table model, because current versions of gcc do not allow the use of "asm goto" (for the exception) with output values (for the actual value to be fetched). But that is hopefully not a limitation in the long term. [ Also note that it might be a good idea to switch unsafe_get_user() to actually _return_ the value it fetches from user space, but this commit only changes the error handling semantics ] Signed-off-by: Linus Torvalds (cherry picked from commit 1bd4403d86a1c06cb6cc9ac87664a0c9d3413d51) Signed-off-by: Alex Shi --- arch/x86/include/asm/uaccess.h | 16 ++++++++-------- include/linux/uaccess.h | 4 ++-- lib/strncpy_from_user.c | 8 ++++---- lib/strnlen_user.c | 7 +++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index dd73cf90fb18..be439e246d91 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -773,21 +773,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #define user_access_begin() __uaccess_begin() #define user_access_end() __uaccess_end() -#define unsafe_put_user(x, ptr) \ -({ \ +#define unsafe_put_user(x, ptr, err_label) \ +do { \ int __pu_err; \ __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ - __builtin_expect(__pu_err, 0); \ -}) + if (unlikely(__pu_err)) goto err_label; \ +} while (0) -#define unsafe_get_user(x, ptr) \ -({ \ +#define unsafe_get_user(x, ptr, err_label) \ +do { \ int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __builtin_expect(__gu_err, 0); \ -}) + if (unlikely(__gu_err)) goto err_label; \ +} while (0) #endif /* _ASM_X86_UACCESS_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 349557825428..f30c187ed785 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -114,8 +114,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #ifndef user_access_begin #define user_access_begin() do { } while (0) #define user_access_end() do { } while (0) -#define unsafe_get_user(x, ptr) __get_user(x, ptr) -#define unsafe_put_user(x, ptr) __put_user(x, ptr) +#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0) +#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) #endif #endif /* __LINUX_UACCESS_H__ */ diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 33840324138c..5a003a2ebd96 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -39,8 +39,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long unsigned long c, data; /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) - break; + unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time); + *(unsigned long *)(dst+res) = c; if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); @@ -55,8 +55,7 @@ byte_at_a_time: while (max) { char c; - if (unlikely(unsafe_get_user(c,src+res))) - return -EFAULT; + unsafe_get_user(c,src+res, efault); dst[res] = c; if (!c) return res; @@ -75,6 +74,7 @@ byte_at_a_time: * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's an EFAULT. */ +efault: return -EFAULT; } diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 2625943625d7..8e105ed4df12 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, src -= align; max += align; - if (unlikely(unsafe_get_user(c,(unsigned long __user *)src))) - return 0; + unsafe_get_user(c, (unsigned long __user *)src, efault); c |= aligned_byte_mask(align); for (;;) { @@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); - if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) - return 0; + unsafe_get_user(c, (unsigned long __user *)(src+res), efault); } res -= align; @@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's 0. */ +efault: return 0; } -- cgit v1.2.3