aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile4
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mlock.c3
-rw-r--r--mm/mmap.c58
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/page_alloc.c35
-rw-r--r--mm/shmem.c13
-rw-r--r--mm/slab.c30
-rw-r--r--mm/slub.c140
-rw-r--r--mm/swapfile.c42
-rw-r--r--mm/usercopy.c278
-rw-r--r--mm/vmscan.c43
13 files changed, 590 insertions, 64 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 98c4eaeabdcb..38f9ccc6afa1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,6 +5,9 @@
KASAN_SANITIZE_slab_common.o := n
KASAN_SANITIZE_slub.o := n
+# Since __builtin_frame_address does work as used, disable the warning.
+CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
+
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
@@ -78,3 +81,4 @@ obj-$(CONFIG_CMA) += cma.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
+obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
diff --git a/mm/madvise.c b/mm/madvise.c
index 64bb8a22110c..13f762e52d0b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -103,7 +103,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
- vma->vm_file, pgoff, vma_policy(vma));
+ vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (*prev) {
vma = *prev;
goto success;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 18c63b754e49..b8acd0da8b60 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -723,7 +723,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
- new_pol);
+ new_pol, vma_get_anon_name(vma));
if (prev) {
vma = prev;
next = vma->vm_next;
diff --git a/mm/mlock.c b/mm/mlock.c
index 3d3ee6cad776..91aed2ae7eca 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -510,7 +510,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
- vma->vm_file, pgoff, vma_policy(vma));
+ vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (*prev) {
vma = *prev;
goto success;
diff --git a/mm/mmap.c b/mm/mmap.c
index d30b8f8f02b1..ae2c6440a702 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -57,6 +57,18 @@
#define arch_rebalance_pgtables(addr, len) (addr)
#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
+const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
+int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
+const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
+int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
+#endif
+
+
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
@@ -923,7 +935,8 @@ again: remove_next = 1 + (end > next->vm_end);
* per-vma resources, so we don't attempt to merge those.
*/
static inline int is_mergeable_vma(struct vm_area_struct *vma,
- struct file *file, unsigned long vm_flags)
+ struct file *file, unsigned long vm_flags,
+ const char __user *anon_name)
{
/*
* VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -939,6 +952,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
return 0;
if (vma->vm_ops && vma->vm_ops->close)
return 0;
+ if (vma_get_anon_name(vma) != anon_name)
+ return 0;
return 1;
}
@@ -969,9 +984,10 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
*/
static int
can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+ struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+ const char __user *anon_name)
{
- if (is_mergeable_vma(vma, file, vm_flags) &&
+ if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
if (vma->vm_pgoff == vm_pgoff)
return 1;
@@ -988,9 +1004,10 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
*/
static int
can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+ struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+ const char __user *anon_name)
{
- if (is_mergeable_vma(vma, file, vm_flags) &&
+ if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
pgoff_t vm_pglen;
vm_pglen = vma_pages(vma);
@@ -1001,9 +1018,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
}
/*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor. Or both (it neatly fills a hole).
*
* In most cases - when called for mmap, brk or mremap - [addr,end) is
* certain not to be mapped by the time vma_merge is called; but when
@@ -1032,8 +1049,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct vm_area_struct *prev, unsigned long addr,
unsigned long end, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file,
- pgoff_t pgoff, struct mempolicy *policy)
+ struct anon_vma *anon_vma, struct file *file,
+ pgoff_t pgoff, struct mempolicy *policy,
+ const char __user *anon_name)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
@@ -1059,15 +1077,15 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/
if (prev && prev->vm_end == addr &&
mpol_equal(vma_policy(prev), policy) &&
- can_vma_merge_after(prev, vm_flags,
- anon_vma, file, pgoff)) {
+ can_vma_merge_after(prev, vm_flags, anon_vma,
+ file, pgoff, anon_name)) {
/*
* OK, it can. Can we now merge in the successor as well?
*/
if (next && end == next->vm_start &&
mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen) &&
+ can_vma_merge_before(next, vm_flags, anon_vma,
+ file, pgoff+pglen, anon_name) &&
is_mergeable_anon_vma(prev->anon_vma,
next->anon_vma, NULL)) {
/* cases 1, 6 */
@@ -1087,8 +1105,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/
if (next && end == next->vm_start &&
mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen)) {
+ can_vma_merge_before(next, vm_flags, anon_vma,
+ file, pgoff+pglen, anon_name)) {
if (prev && addr < prev->vm_end) /* case 4 */
err = vma_adjust(prev, prev->vm_start,
addr, prev->vm_pgoff, NULL);
@@ -1575,7 +1593,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* Can we just expand an old mapping?
*/
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
- NULL);
+ NULL, NULL);
if (vma)
goto out;
@@ -2599,6 +2617,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
return 0;
}
+EXPORT_SYMBOL(do_munmap);
int vm_munmap(unsigned long start, size_t len)
{
@@ -2776,7 +2795,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
/* Can we just expand an old private anonymous mapping? */
vma = vma_merge(mm, prev, addr, addr + len, flags,
- NULL, NULL, pgoff, NULL);
+ NULL, NULL, pgoff, NULL, NULL);
if (vma)
goto out;
@@ -2932,7 +2951,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
return NULL; /* should never get here */
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (new_vma) {
/*
* Source vma may have been merged into new_vma
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 88584838e704..769deb085e00 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -290,7 +290,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
*/
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*pprev = vma_merge(mm, *pprev, start, end, newflags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (*pprev) {
vma = *pprev;
goto success;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3a57d216fd60..f320d4436eee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -200,8 +200,21 @@ static char * const zone_names[MAX_NR_ZONES] = {
"Movable",
};
+/*
+ * Try to keep at least this much lowmem free. Do not allow normal
+ * allocations below this point, only high priority ones. Automatically
+ * tuned according to the amount of memory in the system.
+ */
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
+int min_free_order_shift = 1;
+
+/*
+ * Extra memory for the system to try freeing. Used to temporarily
+ * free memory, to make space for new workloads. Anyone can allocate
+ * down to the min watermarks controlled by min_free_kbytes above.
+ */
+int extra_free_kbytes = 0;
static unsigned long __meminitdata nr_kernel_pages;
static unsigned long __meminitdata nr_all_pages;
@@ -1894,7 +1907,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
free_pages -= z->free_area[o].nr_free << o;
/* Require fewer higher order pages to be free */
- min >>= 1;
+ min >>= min_free_order_shift;
if (free_pages <= min)
return false;
@@ -5752,6 +5765,7 @@ static void setup_per_zone_lowmem_reserve(void)
static void __setup_per_zone_wmarks(void)
{
unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+ unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
unsigned long lowmem_pages = 0;
struct zone *zone;
unsigned long flags;
@@ -5763,11 +5777,14 @@ static void __setup_per_zone_wmarks(void)
}
for_each_zone(zone) {
- u64 tmp;
+ u64 min, low;
spin_lock_irqsave(&zone->lock, flags);
- tmp = (u64)pages_min * zone->managed_pages;
- do_div(tmp, lowmem_pages);
+ min = (u64)pages_min * zone->managed_pages;
+ do_div(min, lowmem_pages);
+ low = (u64)pages_low * zone->managed_pages;
+ do_div(low, vm_total_pages);
+
if (is_highmem(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -5788,11 +5805,13 @@ static void __setup_per_zone_wmarks(void)
* If it's a lowmem zone, reserve a number of pages
* proportionate to the zone's size.
*/
- zone->watermark[WMARK_MIN] = tmp;
+ zone->watermark[WMARK_MIN] = min;
}
- zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
- zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+ zone->watermark[WMARK_LOW] = min_wmark_pages(zone) +
+ low + (min >> 2);
+ zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+ low + (min >> 1);
__mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
@@ -5916,7 +5935,7 @@ core_initcall(init_per_zone_wmark_min)
/*
* min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
* that we can call two helper functions whenever min_free_kbytes
- * changes.
+ * or extra_free_kbytes changes.
*/
int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
diff --git a/mm/shmem.c b/mm/shmem.c
index feaaf6ea1b86..4e7d116bb51d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3394,6 +3394,14 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
}
EXPORT_SYMBOL_GPL(shmem_file_setup);
+void shmem_set_file(struct vm_area_struct *vma, struct file *file)
+{
+ if (vma->vm_file)
+ fput(vma->vm_file);
+ vma->vm_file = file;
+ vma->vm_ops = &shmem_vm_ops;
+}
+
/**
* shmem_zero_setup - setup a shared anonymous mapping
* @vma: the vma to be mmapped is prepared by do_mmap_pgoff
@@ -3413,10 +3421,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
if (IS_ERR(file))
return PTR_ERR(file);
- if (vma->vm_file)
- fput(vma->vm_file);
- vma->vm_file = file;
- vma->vm_ops = &shmem_vm_ops;
+ shmem_set_file(vma, file);
return 0;
}
diff --git a/mm/slab.c b/mm/slab.c
index 330039fdcf18..c8d4de0f2e89 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4224,6 +4224,36 @@ static int __init slab_proc_init(void)
module_init(slab_proc_init);
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page)
+{
+ struct kmem_cache *cachep;
+ unsigned int objnr;
+ unsigned long offset;
+
+ /* Find and validate object. */
+ cachep = page->slab_cache;
+ objnr = obj_to_index(cachep, page, (void *)ptr);
+ BUG_ON(objnr >= cachep->num);
+
+ /* Find offset within object. */
+ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+
+ /* Allow address range falling entirely within object size. */
+ if (offset <= cachep->object_size && n <= cachep->object_size - offset)
+ return NULL;
+
+ return cachep->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
/**
* ksize - get the actual amount of memory allocated for a given object
* @objp: Pointer to the object
diff --git a/mm/slub.c b/mm/slub.c
index 993483e5f993..b7f531d6ccba 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
#endif
}
+static inline void *fixup_red_left(struct kmem_cache *s, void *p)
+{
+ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
+ p += s->red_left_pad;
+
+ return p;
+}
+
static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -224,24 +232,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
* Core slab cache functions
*******************************************************************/
-/* Verify that a pointer has an address that is valid within a slab page */
-static inline int check_valid_pointer(struct kmem_cache *s,
- struct page *page, const void *object)
-{
- void *base;
-
- if (!object)
- return 1;
-
- base = page_address(page);
- if (object < base || object >= base + page->objects * s->size ||
- (object - base) % s->size) {
- return 0;
- }
-
- return 1;
-}
-
static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
return *(void **)(object + s->offset);
@@ -271,12 +261,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
/* Loop over all objects in a slab */
#define for_each_object(__p, __s, __addr, __objects) \
- for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
- __p += (__s)->size)
+ for (__p = fixup_red_left(__s, __addr); \
+ __p < (__addr) + (__objects) * (__s)->size; \
+ __p += (__s)->size)
#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
- for (__p = (__addr), __idx = 1; __idx <= __objects;\
- __p += (__s)->size, __idx++)
+ for (__p = fixup_red_left(__s, __addr), __idx = 1; \
+ __idx <= __objects; \
+ __p += (__s)->size, __idx++)
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -456,6 +448,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
set_bit(slab_index(p, s, addr), map);
}
+static inline int size_from_object(struct kmem_cache *s)
+{
+ if (s->flags & SLAB_RED_ZONE)
+ return s->size - s->red_left_pad;
+
+ return s->size;
+}
+
+static inline void *restore_red_left(struct kmem_cache *s, void *p)
+{
+ if (s->flags & SLAB_RED_ZONE)
+ p -= s->red_left_pad;
+
+ return p;
+}
+
/*
* Debug settings:
*/
@@ -489,6 +497,26 @@ static inline void metadata_access_disable(void)
/*
* Object debugging
*/
+
+/* Verify that a pointer has an address that is valid within a slab page */
+static inline int check_valid_pointer(struct kmem_cache *s,
+ struct page *page, void *object)
+{
+ void *base;
+
+ if (!object)
+ return 1;
+
+ base = page_address(page);
+ object = restore_red_left(s, object);
+ if (object < base || object >= base + page->objects * s->size ||
+ (object - base) % s->size) {
+ return 0;
+ }
+
+ return 1;
+}
+
static void print_section(char *text, u8 *addr, unsigned int length)
{
metadata_access_enable();
@@ -628,7 +656,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
p, p - addr, get_freepointer(s, p));
- if (p > addr + 16)
+ if (s->flags & SLAB_RED_ZONE)
+ print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
+ else if (p > addr + 16)
print_section("Bytes b4 ", p - 16, 16);
print_section("Object ", p, min_t(unsigned long, s->object_size,
@@ -645,9 +675,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);
- if (off != s->size)
+ if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
- print_section("Padding ", p + off, s->size - off);
+ print_section("Padding ", p + off, size_from_object(s) - off);
dump_stack();
}
@@ -677,6 +707,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
{
u8 *p = object;
+ if (s->flags & SLAB_RED_ZONE)
+ memset(p - s->red_left_pad, val, s->red_left_pad);
+
if (s->flags & __OBJECT_POISON) {
memset(p, POISON_FREE, s->object_size - 1);
p[s->object_size - 1] = POISON_END;
@@ -769,11 +802,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
/* We also have user information there */
off += 2 * sizeof(struct track);
- if (s->size == off)
+ if (size_from_object(s) == off)
return 1;
return check_bytes_and_report(s, page, p, "Object padding",
- p + off, POISON_INUSE, s->size - off);
+ p + off, POISON_INUSE, size_from_object(s) - off);
}
/* Check the pad bytes at the end of a slab page */
@@ -818,6 +851,10 @@ static int check_object(struct kmem_cache *s, struct page *page,
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, page, object, "Redzone",
+ object - s->red_left_pad, val, s->red_left_pad))
+ return 0;
+
+ if (!check_bytes_and_report(s, page, object, "Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
} else {
@@ -1447,7 +1484,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
set_freepointer(s, p, NULL);
}
- page->freelist = start;
+ page->freelist = fixup_red_left(s, start);
page->inuse = page->objects;
page->frozen = 1;
out:
@@ -3071,7 +3108,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*/
size += 2 * sizeof(struct track);
- if (flags & SLAB_RED_ZONE)
+ if (flags & SLAB_RED_ZONE) {
/*
* Add some empty padding so that we can catch
* overwrites from earlier objects rather than let
@@ -3080,6 +3117,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
* of the object.
*/
size += sizeof(void *);
+
+ s->red_left_pad = sizeof(void *);
+ s->red_left_pad = ALIGN(s->red_left_pad, s->align);
+ size += s->red_left_pad;
+ }
#endif
/*
@@ -3373,6 +3415,46 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
EXPORT_SYMBOL(__kmalloc_node);
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page)
+{
+ struct kmem_cache *s;
+ unsigned long offset;
+ size_t object_size;
+
+ /* Find object and usable object size. */
+ s = page->slab_cache;
+ object_size = slab_ksize(s);
+
+ /* Reject impossible pointers. */
+ if (ptr < page_address(page))
+ return s->name;
+
+ /* Find offset within object. */
+ offset = (ptr - page_address(page)) % s->size;
+
+ /* Adjust for redzone and reject if within the redzone. */
+ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
+ if (offset < s->red_left_pad)
+ return s->name;
+ offset -= s->red_left_pad;
+ }
+
+ /* Allow address range falling entirely within object size. */
+ if (offset <= object_size && n <= object_size - offset)
+ return NULL;
+
+ return s->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
static size_t __ksize(const void *object)
{
struct page *page;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a7e72103f23b..738c2662209e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -875,6 +875,48 @@ int page_swapcount(struct page *page)
}
/*
+ * How many references to @entry are currently swapped out?
+ * This considers COUNT_CONTINUED so it returns exact answer.
+ */
+int swp_swapcount(swp_entry_t entry)
+{
+ int count, tmp_count, n;
+ struct swap_info_struct *p;
+ struct page *page;
+ pgoff_t offset;
+ unsigned char *map;
+
+ p = swap_info_get(entry);
+ if (!p)
+ return 0;
+
+ count = swap_count(p->swap_map[swp_offset(entry)]);
+ if (!(count & COUNT_CONTINUED))
+ goto out;
+
+ count &= ~COUNT_CONTINUED;
+ n = SWAP_MAP_MAX + 1;
+
+ offset = swp_offset(entry);
+ page = vmalloc_to_page(p->swap_map + offset);
+ offset &= ~PAGE_MASK;
+ VM_BUG_ON(page_private(page) != SWP_CONTINUED);
+
+ do {
+ page = list_entry(page->lru.next, struct page, lru);
+ map = kmap_atomic(page);
+ tmp_count = map[offset];
+ kunmap_atomic(map);
+
+ count += (tmp_count & ~COUNT_CONTINUED) * n;
+ n *= (SWAP_CONT_MAX + 1);
+ } while (tmp_count & COUNT_CONTINUED);
+out:
+ spin_unlock(&p->lock);
+ return count;
+}
+
+/*
* We can write to an anon page without COW if there are no other references
* to it. And as a side-effect, free up its swap: because the old content
* on disk will never be read, and seeking back there to write new content
diff --git a/mm/usercopy.c b/mm/usercopy.c
new file mode 100644
index 000000000000..b34996a3860b
--- /dev/null
+++ b/mm/usercopy.c
@@ -0,0 +1,278 @@
+/*
+ * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
+ * which are designed to protect kernel memory from needless exposure
+ * and overwrite under many unintended conditions. This code is based
+ * on PAX_USERCOPY, which is:
+ *
+ * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
+ * Security Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/sections.h>
+
+enum {
+ BAD_STACK = -1,
+ NOT_STACK = 0,
+ GOOD_FRAME,
+ GOOD_STACK,
+};
+
+/*
+ * Checks if a given pointer and length is contained by the current
+ * stack frame (if possible).
+ *
+ * Returns:
+ * NOT_STACK: not at all on the stack
+ * GOOD_FRAME: fully within a valid stack frame
+ * GOOD_STACK: fully on the stack (when can't do frame-checking)
+ * BAD_STACK: error condition (invalid stack position or bad stack frame)
+ */
+static noinline int check_stack_object(const void *obj, unsigned long len)
+{
+ const void * const stack = task_stack_page(current);
+ const void * const stackend = stack + THREAD_SIZE;
+ int ret;
+
+ /* Object is not on the stack at all. */
+ if (obj + len <= stack || stackend <= obj)
+ return NOT_STACK;
+
+ /*
+ * Reject: object partially overlaps the stack (passing the
+ * the check above means at least one end is within the stack,
+ * so if this check fails, the other end is outside the stack).
+ */
+ if (obj < stack || stackend < obj + len)
+ return BAD_STACK;
+
+ /* Check if object is safely within a valid frame. */
+ ret = arch_within_stack_frames(stack, stackend, obj, len);
+ if (ret)
+ return ret;
+
+ return GOOD_STACK;
+}
+
+static void report_usercopy(const void *ptr, unsigned long len,
+ bool to_user, const char *type)
+{
+ pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
+ to_user ? "exposure" : "overwrite",
+ to_user ? "from" : "to", ptr, type ? : "unknown", len);
+ /*
+ * For greater effect, it would be nice to do do_group_exit(),
+ * but BUG() actually hooks all the lock-breaking and per-arch
+ * Oops code, so that is used here instead.
+ */
+ BUG();
+}
+
+/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
+static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
+ unsigned long high)
+{
+ unsigned long check_low = (uintptr_t)ptr;
+ unsigned long check_high = check_low + n;
+
+ /* Does not overlap if entirely above or entirely below. */
+ if (check_low >= high || check_high <= low)
+ return false;
+
+ return true;
+}
+
+/* Is this address range in the kernel text area? */
+static inline const char *check_kernel_text_object(const void *ptr,
+ unsigned long n)
+{
+ unsigned long textlow = (unsigned long)_stext;
+ unsigned long texthigh = (unsigned long)_etext;
+ unsigned long textlow_linear, texthigh_linear;
+
+ if (overlaps(ptr, n, textlow, texthigh))
+ return "<kernel text>";
+
+ /*
+ * Some architectures have virtual memory mappings with a secondary
+ * mapping of the kernel text, i.e. there is more than one virtual
+ * kernel address that points to the kernel image. It is usually
+ * when there is a separate linear physical memory mapping, in that
+ * __pa() is not just the reverse of __va(). This can be detected
+ * and checked:
+ */
+ textlow_linear = (unsigned long)__va(__pa(textlow));
+ /* No different mapping: we're done. */
+ if (textlow_linear == textlow)
+ return NULL;
+
+ /* Check the secondary mapping... */
+ texthigh_linear = (unsigned long)__va(__pa(texthigh));
+ if (overlaps(ptr, n, textlow_linear, texthigh_linear))
+ return "<linear kernel text>";
+
+ return NULL;
+}
+
+static inline const char *check_bogus_address(const void *ptr, unsigned long n)
+{
+ /* Reject if object wraps past end of memory. */
+ if ((unsigned long)ptr + n < (unsigned long)ptr)
+ return "<wrapped address>";
+
+ /* Reject if NULL or ZERO-allocation. */
+ if (ZERO_OR_NULL_PTR(ptr))
+ return "<null>";
+
+ return NULL;
+}
+
+/* Checks for allocs that are marked in some way as spanning multiple pages. */
+static inline const char *check_page_span(const void *ptr, unsigned long n,
+ struct page *page, bool to_user)
+{
+#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
+ const void *end = ptr + n - 1;
+ struct page *endpage;
+ bool is_reserved, is_cma;
+
+ /*
+ * Sometimes the kernel data regions are not marked Reserved (see
+ * check below). And sometimes [_sdata,_edata) does not cover
+ * rodata and/or bss, so check each range explicitly.
+ */
+
+ /* Allow reads of kernel rodata region (if not marked as Reserved). */
+ if (ptr >= (const void *)__start_rodata &&
+ end <= (const void *)__end_rodata) {
+ if (!to_user)
+ return "<rodata>";
+ return NULL;
+ }
+
+ /* Allow kernel data region (if not marked as Reserved). */
+ if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
+ return NULL;
+
+ /* Allow kernel bss region (if not marked as Reserved). */
+ if (ptr >= (const void *)__bss_start &&
+ end <= (const void *)__bss_stop)
+ return NULL;
+
+ /* Is the object wholly within one base page? */
+ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
+ ((unsigned long)end & (unsigned long)PAGE_MASK)))
+ return NULL;
+
+ /* Allow if fully inside the same compound (__GFP_COMP) page. */
+ endpage = virt_to_head_page(end);
+ if (likely(endpage == page))
+ return NULL;
+
+ /*
+ * Reject if range is entirely either Reserved (i.e. special or
+ * device memory), or CMA. Otherwise, reject since the object spans
+ * several independently allocated pages.
+ */
+ is_reserved = PageReserved(page);
+ is_cma = is_migrate_cma_page(page);
+ if (!is_reserved && !is_cma)
+ return "<spans multiple pages>";
+
+ for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
+ page = virt_to_head_page(ptr);
+ if (is_reserved && !PageReserved(page))
+ return "<spans Reserved and non-Reserved pages>";
+ if (is_cma && !is_migrate_cma_page(page))
+ return "<spans CMA and non-CMA pages>";
+ }
+#endif
+
+ return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+ bool to_user)
+{
+ struct page *page;
+
+ /*
+ * Some architectures (arm64) return true for virt_addr_valid() on
+ * vmalloced addresses. Work around this by checking for vmalloc
+ * first.
+ */
+ if (is_vmalloc_addr(ptr))
+ return NULL;
+
+ if (!virt_addr_valid(ptr))
+ return NULL;
+
+ page = virt_to_head_page(ptr);
+
+ /* Check slab allocator for flags and size. */
+ if (PageSlab(page))
+ return __check_heap_object(ptr, n, page);
+
+ /* Verify object does not incorrectly span multiple pages. */
+ return check_page_span(ptr, n, page, to_user);
+}
+
+/*
+ * Validates that the given object is:
+ * - not bogus address
+ * - known-safe heap or stack object
+ * - not in kernel text
+ */
+void __check_object_size(const void *ptr, unsigned long n, bool to_user)
+{
+ const char *err;
+
+ /* Skip all tests if size is zero. */
+ if (!n)
+ return;
+
+ /* Check for invalid addresses. */
+ err = check_bogus_address(ptr, n);
+ if (err)
+ goto report;
+
+ /* Check for bad heap object. */
+ err = check_heap_object(ptr, n, to_user);
+ if (err)
+ goto report;
+
+ /* Check for bad stack object. */
+ switch (check_stack_object(ptr, n)) {
+ case NOT_STACK:
+ /* Object is not touching the current process stack. */
+ break;
+ case GOOD_FRAME:
+ case GOOD_STACK:
+ /*
+ * Object is either in the correct frame (when it
+ * is possible to check) or just generally on the
+ * process stack (when frame checking not available).
+ */
+ return;
+ default:
+ err = "<process stack>";
+ goto report;
+ }
+
+ /* Check for object in kernel to avoid text exposure. */
+ err = check_kernel_text_object(ptr, n);
+ if (!err)
+ return;
+
+report:
+ report_usercopy(ptr, n, to_user, err);
+}
+EXPORT_SYMBOL(__check_object_size);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e6d12d02bea2..2f7aa08fc3eb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -46,6 +46,7 @@
#include <linux/oom.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -220,6 +221,39 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
}
+struct dentry *debug_file;
+
+static int debug_shrinker_show(struct seq_file *s, void *unused)
+{
+ struct shrinker *shrinker;
+ struct shrink_control sc;
+
+ sc.gfp_mask = -1;
+ sc.nr_to_scan = 0;
+
+ down_read(&shrinker_rwsem);
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ int num_objs;
+
+ num_objs = shrinker->count_objects(shrinker, &sc);
+ seq_printf(s, "%pf %d\n", shrinker->scan_objects, num_objs);
+ }
+ up_read(&shrinker_rwsem);
+ return 0;
+}
+
+static int debug_shrinker_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, debug_shrinker_show, inode->i_private);
+}
+
+static const struct file_operations debug_shrinker_fops = {
+ .open = debug_shrinker_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
/*
* Add a shrinker callback to be called from the vm.
*/
@@ -249,6 +283,15 @@ int register_shrinker(struct shrinker *shrinker)
}
EXPORT_SYMBOL(register_shrinker);
+static int __init add_shrinker_debug(void)
+{
+ debugfs_create_file("shrinker", 0644, NULL, NULL,
+ &debug_shrinker_fops);
+ return 0;
+}
+
+late_initcall(add_shrinker_debug);
+
/*
* Remove one
*/