From df50da335684932e7b51029c7dff5406ecd1a3f4 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 4 Dec 2015 11:55:32 -0800 Subject: Revert "mmc: block: Remove call to mmc_blk_set_blksize" This reverts commit 25a6b9535be00c88385fad39bbd2d10899578492. Doesn't related to CONFIG_MMC_BLOCK_DEFERRED_RESUME, which hasn't built for awhile. Change-Id: Ic52d1465ca5d1fce6687ca40c898fcc18e11682d Signed-off-by: John Stultz (cherry picked from commit c52d1465ca5d1fce6687ca40c898fcc18e11682d) --- drivers/mmc/card/block.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 774dae656210..7343014afe74 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2011,6 +2011,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) return 0; } +static int +mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card); + static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) { int ret; @@ -2021,8 +2024,10 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) unsigned int cmd_flags = req ? req->cmd_flags : 0; #ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME - if (mmc_bus_needs_resume(card->host)) + if (mmc_bus_needs_resume(card->host)) { mmc_resume_bus(card->host); + mmc_blk_set_blksize(md, card); + } #endif if (req && !mq->mqrq_prev->req) -- cgit v1.2.3 From 7af2e7d7eec767cebd71bad8df84d60d03a760b7 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 4 Dec 2015 11:56:16 -0800 Subject: Revert "mmc: mmcblk: Add support for deferred SD bus resume" This reverts commit bc4dc52f178828e0000ce857f9a97fd8912c8e85. Adds which hasn't built for awhile, so remove it. Change-Id: Ife20b1b155ffe9afb3f0c5223727f7b8bd799d15 Signed-off-by: John Stultz (cherry picked from commit fe20b1b155ffe9afb3f0c5223727f7b8bd799d15) --- drivers/mmc/card/Kconfig | 9 --------- drivers/mmc/card/block.c | 16 ---------------- drivers/mmc/core/core.c | 6 ------ 3 files changed, 31 deletions(-) diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig index 79d82124413f..5562308699bc 100644 --- a/drivers/mmc/card/Kconfig +++ b/drivers/mmc/card/Kconfig @@ -50,15 +50,6 @@ config MMC_BLOCK_BOUNCE If unsure, say Y here. -config MMC_BLOCK_DEFERRED_RESUME - bool "Deferr MMC layer resume until I/O is requested" - depends on MMC_BLOCK - default n - help - Say Y here to enable deferred MMC resume until I/O - is requested. This will reduce overall resume latency and - save power when theres an SD card inserted but not being used. - config SDIO_UART tristate "SDIO UART/GPS class support" depends on TTY diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 7343014afe74..30a4ac58e8c8 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2011,9 +2011,6 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) return 0; } -static int -mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card); - static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) { int ret; @@ -2023,13 +2020,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) unsigned long flags; unsigned int cmd_flags = req ? req->cmd_flags : 0; -#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME - if (mmc_bus_needs_resume(card->host)) { - mmc_resume_bus(card->host); - mmc_blk_set_blksize(md, card); - } -#endif - if (req && !mq->mqrq_prev->req) /* claim host only for the first request */ mmc_get_card(card); @@ -2469,9 +2459,6 @@ static int mmc_blk_probe(struct mmc_card *card) mmc_set_drvdata(card, md); -#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME - mmc_set_bus_resume_policy(card->host, 1); -#endif if (mmc_add_disk(md)) goto out; @@ -2514,9 +2501,6 @@ static void mmc_blk_remove(struct mmc_card *card) pm_runtime_put_noidle(&card->dev); mmc_blk_remove_req(md); mmc_set_drvdata(card, NULL); -#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME - mmc_set_bus_resume_policy(card->host, 0); -#endif } static int _mmc_blk_suspend(struct mmc_card *card) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 9166ae24ca90..0cba35121de5 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2455,12 +2455,6 @@ void mmc_rescan(struct work_struct *work) host->detect_change = 0; - /* If the card was removed the bus will be marked - * as dead - extend the wakelock so userspace - * can respond */ - if (host->bus_dead) - extend_wakelock = 1; - /* * Let mmc_bus_put() free the bus/bus_ops if we've found that * the card is no longer present. -- cgit v1.2.3 From 2902b20e5db6e90ab9d5cb8ce98254a6c1e4c3f1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 4 Dec 2015 12:13:13 -0800 Subject: mmc: Extend wakelock if bus is dead This patch sets extend_wakelock if the host's bus is marked dead due to card removal. This change was originally made in bc4dc52f178828 (mmc: mmcblk: Add support for deferred SD bus resume), which was reverted due to the majority of the patch no longer building or making sense. However one small part of that patch ought to be saved. Change-Id: Idde72f9e313fc52d467ef0aad41cecd2c9f9f212 Signed-off-by: John Stultz (cherry picked from commit dde72f9e313fc52d467ef0aad41cecd2c9f9f212) --- drivers/mmc/core/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 0cba35121de5..9166ae24ca90 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2455,6 +2455,12 @@ void mmc_rescan(struct work_struct *work) host->detect_change = 0; + /* If the card was removed the bus will be marked + * as dead - extend the wakelock so userspace + * can respond */ + if (host->bus_dead) + extend_wakelock = 1; + /* * Let mmc_bus_put() free the bus/bus_ops if we've found that * the card is no longer present. -- cgit v1.2.3 From 552b578f4d89d7794611a584c75b83e7f2241dc9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 8 Sep 2015 15:00:24 -0700 Subject: BACKPORT: mm: /proc/pid/smaps:: show proportional swap share of the mapping We want to know per-process workingset size for smart memory management on userland and we use swap(ex, zram) heavily to maximize memory efficiency so workingset includes swap as well as RSS. On such system, if there are lots of shared anonymous pages, it's really hard to figure out exactly how many each process consumes memory(ie, rss + wap) if the system has lots of shared anonymous memory(e.g, android). This patch introduces SwapPss field on /proc//smaps so we can get more exact workingset size per process. Bongkyu tested it. Result is below. 1. 50M used swap SwapTotal: 461976 kB SwapFree: 411192 kB $ adb shell cat /proc/*/smaps | grep "SwapPss:" | awk '{sum += $2} END {print sum}'; 48236 $ adb shell cat /proc/*/smaps | grep "Swap:" | awk '{sum += $2} END {print sum}'; 141184 2. 240M used swap SwapTotal: 461976 kB SwapFree: 216808 kB $ adb shell cat /proc/*/smaps | grep "SwapPss:" | awk '{sum += $2} END {print sum}'; 230315 $ adb shell cat /proc/*/smaps | grep "Swap:" | awk '{sum += $2} END {print sum}'; 1387744 [akpm@linux-foundation.org: simplify kunmap_atomic() call] Signed-off-by: Minchan Kim Reported-by: Bongkyu Kim Tested-by: Bongkyu Kim Cc: Hugh Dickins Cc: Sergey Senozhatsky Cc: Jonathan Corbet Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 26190646 Change-Id: Idf92d682fdef432bdd66e530a7e7cdff8f375db1 Signed-off-by: Thierry Strudel --- Documentation/filesystems/proc.txt | 18 +++++++++++----- fs/proc/task_mmu.c | 20 +++++++++++++++--- include/linux/swap.h | 6 ++++++ mm/swapfile.c | 42 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 8 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e2def60b3465..cf6b64a65179 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -417,6 +417,7 @@ Private_Dirty: 0 kB Referenced: 892 kB Anonymous: 0 kB Swap: 0 kB +SwapPss: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Locked: 374 kB @@ -427,16 +428,23 @@ the first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. The remaining lines show the size of the mapping (size), the amount of the mapping that is currently resident in RAM (RSS), the process' proportional share of this mapping (PSS), the number of clean and -dirty private pages in the mapping. Note that even a page which is part of a -MAP_SHARED mapping, but has only a single pte mapped, i.e. is currently used -by only one process, is accounted as private and not as shared. "Referenced" -indicates the amount of memory currently marked as referenced or accessed. +dirty private pages in the mapping. + +The "proportional set size" (PSS) of a process is the count of pages it has +in memory, where each page is divided by the number of processes sharing it. +So if a process has 1000 pages all to itself, and 1000 shared with one other +process, its PSS will be 1500. +Note that even a page which is part of a MAP_SHARED mapping, but has only +a single pte mapped, i.e. is currently used by only one process, is accounted +as private and not as shared. +"Referenced" indicates the amount of memory currently marked as referenced or +accessed. "Anonymous" shows the amount of memory that does not belong to any file. Even a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE and a page is modified, the file page is replaced by a private anonymous copy. "Swap" shows how much would-be-anonymous memory is also used, but out on swap. - +"SwapPss" shows proportional swap share of this mapping. "VmFlags" field deserves a separate description. This member represents the kernel flags associated with the particular virtual memory area in two letter encoded manner. The codes are the following: diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index cd55bf71906d..4bdb216f3e23 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -501,6 +501,7 @@ struct mem_size_stats { unsigned long swap; unsigned long nonlinear; u64 pss; + u64 swap_pss; }; @@ -518,9 +519,20 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr, } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); - if (!non_swap_entry(swpent)) - mss->swap += ptent_size; - else if (is_migration_entry(swpent)) + if (!non_swap_entry(swpent)) { + int mapcount; + + mss->swap += PAGE_SIZE; + mapcount = swp_swapcount(swpent); + if (mapcount >= 2) { + u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; + + do_div(pss_delta, mapcount); + mss->swap_pss += pss_delta; + } else { + mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; + } + } else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); } else if (pte_file(ptent)) { if (pte_to_pgoff(ptent) != pgoff) @@ -676,6 +688,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) "Anonymous: %8lu kB\n" "AnonHugePages: %8lu kB\n" "Swap: %8lu kB\n" + "SwapPss: %8lu kB\n" "KernelPageSize: %8lu kB\n" "MMUPageSize: %8lu kB\n" "Locked: %8lu kB\n", @@ -690,6 +703,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) mss.anonymous >> 10, mss.anonymous_thp >> 10, mss.swap >> 10, + (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)), vma_kernel_pagesize(vma) >> 10, vma_mmu_pagesize(vma) >> 10, (vma->vm_flags & VM_LOCKED) ? diff --git a/include/linux/swap.h b/include/linux/swap.h index 37a585beef5c..5acebabba477 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -440,6 +440,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); @@ -541,6 +542,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int swp_swapcount(swp_entry_t entry) +{ + return 0; +} + #define reuse_swap_page(page) (page_mapcount(page) == 1) static inline int try_to_free_swap(struct page *page) diff --git a/mm/swapfile.c b/mm/swapfile.c index 8798b2e0ac59..e7c0f024c625 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -874,6 +874,48 @@ int page_swapcount(struct page *page) return count; } +/* + * How many references to @entry are currently swapped out? + * This considers COUNT_CONTINUED so it returns exact answer. + */ +int swp_swapcount(swp_entry_t entry) +{ + int count, tmp_count, n; + struct swap_info_struct *p; + struct page *page; + pgoff_t offset; + unsigned char *map; + + p = swap_info_get(entry); + if (!p) + return 0; + + count = swap_count(p->swap_map[swp_offset(entry)]); + if (!(count & COUNT_CONTINUED)) + goto out; + + count &= ~COUNT_CONTINUED; + n = SWAP_MAP_MAX + 1; + + offset = swp_offset(entry); + page = vmalloc_to_page(p->swap_map + offset); + offset &= ~PAGE_MASK; + VM_BUG_ON(page_private(page) != SWP_CONTINUED); + + do { + page = list_entry(page->lru.next, struct page, lru); + map = kmap_atomic(page); + tmp_count = map[offset]; + kunmap_atomic(map); + + count += (tmp_count & ~COUNT_CONTINUED) * n; + n *= (SWAP_CONT_MAX + 1); + } while (tmp_count & COUNT_CONTINUED); +out: + spin_unlock(&p->lock); + return count; +} + /* * We can write to an anon page without COW if there are no other references * to it. And as a side-effect, free up its swap: because the old content -- cgit v1.2.3 From af00c100aa6b3a0e3138884bced716949777414f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 13:21:24 +0000 Subject: ANDROID: dm verity: port upstream changes to 3.18 Upstream dm-verity has different optional parameters. Port back the relevant changes. Bug: 21893453 Change-Id: I0ddadfcc4104b9d38554003203f6e65191ffe995 Signed-off-by: Sami Tolvanen --- drivers/md/dm-verity.c | 108 +++++++++++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 40 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 32a6a8130557..f60991a10fdf 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -24,23 +24,24 @@ #define DM_MSG_PREFIX "verity" #define DM_VERITY_ENV_LENGTH 42 -#define DM_VERITY_ENV_VAR_NAME "VERITY_ERR_BLOCK_NR" +#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" -#define DM_VERITY_IO_VEC_INLINE 16 -#define DM_VERITY_MEMPOOL_SIZE 4 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 #define DM_VERITY_MAX_LEVELS 63 #define DM_VERITY_MAX_CORRUPTED_ERRS 100 +#define DM_VERITY_OPT_LOGGING "ignore_corruption" +#define DM_VERITY_OPT_RESTART "restart_on_corruption" + static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); enum verity_mode { - DM_VERITY_MODE_EIO = 0, - DM_VERITY_MODE_LOGGING = 1, - DM_VERITY_MODE_RESTART = 2 + DM_VERITY_MODE_EIO, + DM_VERITY_MODE_LOGGING, + DM_VERITY_MODE_RESTART }; enum verity_block_type { @@ -73,8 +74,6 @@ struct dm_verity { enum verity_mode mode; /* mode for handling verification errors */ unsigned corrupted_errs;/* Number of errors for corrupted blocks */ - mempool_t *vec_mempool; /* mempool of bio vector */ - struct workqueue_struct *verify_wq; /* starting blocks for each tree level. 0 is the lowest level. */ @@ -196,17 +195,20 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, * Handle verification errors. */ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, - unsigned long long block) + unsigned long long block) { char verity_env[DM_VERITY_ENV_LENGTH]; char *envp[] = { verity_env, NULL }; const char *type_str = ""; struct mapped_device *md = dm_table_get_md(v->ti->table); + /* Corruption should be visible in device status in all modes */ + v->hash_failed = 1; + if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS) goto out; - ++v->corrupted_errs; + v->corrupted_errs++; switch (type) { case DM_VERITY_BLOCK_TYPE_DATA: @@ -219,8 +221,8 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, BUG(); } - DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name, - type_str, block); + DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, + block); if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) DMERR("%s: reached maximum errors", v->data_dev->name); @@ -265,7 +267,7 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, verity_hash_at_level(v, block, level, &hash_block, &offset); data = dm_bufio_read(v->bufio, hash_block, &buf); - if (unlikely(IS_ERR(data))) + if (IS_ERR(data)) return PTR_ERR(data); aux = dm_bufio_get_aux_data(buf); @@ -317,8 +319,6 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, goto release_ret_r; } if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - v->hash_failed = 1; - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { r = -EIO; @@ -435,8 +435,6 @@ test_block_hash: return r; } if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - v->hash_failed = 1; - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, io->block + b)) return -EIO; @@ -615,6 +613,19 @@ static void verity_status(struct dm_target *ti, status_type_t type, else for (x = 0; x < v->salt_size; x++) DMEMIT("%02x", v->salt[x]); + if (v->mode != DM_VERITY_MODE_EIO) { + DMEMIT(" 1 "); + switch (v->mode) { + case DM_VERITY_MODE_LOGGING: + DMEMIT(DM_VERITY_OPT_LOGGING); + break; + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_RESTART); + break; + default: + BUG(); + } + } break; } } @@ -676,9 +687,6 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); - if (v->vec_mempool) - mempool_destroy(v->vec_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -716,13 +724,19 @@ static void verity_dtr(struct dm_target *ti) static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct dm_verity *v; - unsigned num; + struct dm_arg_set as; + const char *opt_string; + unsigned int num, opt_params; unsigned long long num_ll; int r; int i; sector_t hash_position; char dummy; + static struct dm_arg _args[] = { + {0, 1, "Invalid number of feature args"}, + }; + v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); if (!v) { ti->error = "Cannot allocate verity structure"; @@ -737,14 +751,14 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - if (argc < 10 || argc > 11) { - ti->error = "Invalid argument count: 10-11 arguments required"; + if (argc < 10) { + ti->error = "Not enough arguments"; r = -EINVAL; goto bad; } - if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 || - num < 0 || num > 1) { + if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 || + num > 1) { ti->error = "Invalid version"; r = -EINVAL; goto bad; @@ -859,15 +873,37 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } - if (argc > 10) { - if (sscanf(argv[10], "%d%c", &num, &dummy) != 1 || - num < DM_VERITY_MODE_EIO || - num > DM_VERITY_MODE_RESTART) { - ti->error = "Invalid mode"; - r = -EINVAL; + argv += 10; + argc -= 10; + + /* Optional parameters */ + if (argc) { + as.argc = argc; + as.argv = argv; + + r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (r) goto bad; + + while (opt_params) { + opt_params--; + opt_string = dm_shift_arg(&as); + if (!opt_string) { + ti->error = "Not enough feature arguments"; + r = -EINVAL; + goto bad; + } + + if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) + v->mode = DM_VERITY_MODE_LOGGING; + else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) + v->mode = DM_VERITY_MODE_RESTART; + else { + ti->error = "Invalid feature arguments"; + r = -EINVAL; + goto bad; + } } - v->mode = num; } v->hash_per_block_bits = @@ -919,14 +955,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); - v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - BIO_MAX_PAGES * sizeof(struct bio_vec)); - if (!v->vec_mempool) { - ti->error = "Cannot allocate vector mempool"; - r = -ENOMEM; - goto bad; - } - /* WQ_UNBOUND greatly improves performance when running on ramdisk */ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); if (!v->verify_wq) { -- cgit v1.2.3 From cfb1f868294a85319c5c641a65a2b98452e037be Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 13:33:29 +0000 Subject: UPSTREAM: dm verity: clean up duplicate hashing code (cherry-picked from 6dbeda3469ced777bc3138ed5918f7ae79670b7b) Handle dm-verity salting in one place to simplify the code. Bug: 21893453 Change-Id: Ic7fd1c714554509d5b1583630de45f378c3360d6 Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Mike Snitzer --- drivers/md/dm-verity.c | 262 +++++++++++++++++++++++++++---------------------- 1 file changed, 147 insertions(+), 115 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index f60991a10fdf..9e87babca406 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -173,6 +173,84 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, return block >> (level * v->hash_per_block_bits); } +/* + * Wrapper for crypto_shash_init, which handles verity salting. + */ +static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc) +{ + int r; + + desc->tfm = v->tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + r = crypto_shash_init(desc); + + if (unlikely(r < 0)) { + DMERR("crypto_shash_init failed: %d", r); + return r; + } + + if (likely(v->version >= 1)) { + r = crypto_shash_update(desc, v->salt, v->salt_size); + + if (unlikely(r < 0)) { + DMERR("crypto_shash_update failed: %d", r); + return r; + } + } + + return 0; +} + +static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len) +{ + int r = crypto_shash_update(desc, data, len); + + if (unlikely(r < 0)) + DMERR("crypto_shash_update failed: %d", r); + + return r; +} + +static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, + u8 *digest) +{ + int r; + + if (unlikely(!v->version)) { + r = crypto_shash_update(desc, v->salt, v->salt_size); + + if (r < 0) { + DMERR("crypto_shash_update failed: %d", r); + return r; + } + } + + r = crypto_shash_final(desc, digest); + + if (unlikely(r < 0)) + DMERR("crypto_shash_final failed: %d", r); + + return r; +} + +static int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest) +{ + int r; + + r = verity_hash_init(v, desc); + if (unlikely(r < 0)) + return r; + + r = verity_hash_update(v, desc, data, len); + if (unlikely(r < 0)) + return r; + + return verity_hash_final(v, desc, digest); +} + static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, sector_t *hash_block, unsigned *offset) { @@ -253,10 +331,10 @@ out: * If "skip_unverified" is false, unverified buffer is hashed and verified * against current value of io_want_digest(v, io). */ -static int verity_verify_level(struct dm_verity_io *io, sector_t block, - int level, bool skip_unverified) +static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, int level, bool skip_unverified, + u8 *want_digest) { - struct dm_verity *v = io->v; struct dm_buffer *buf; struct buffer_aux *aux; u8 *data; @@ -273,74 +351,71 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, aux = dm_bufio_get_aux_data(buf); if (!aux->hash_verified) { - struct shash_desc *desc; - u8 *result; - if (skip_unverified) { r = 1; goto release_ret_r; } - desc = io_hash_desc(v, io); - desc->tfm = v->tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - r = crypto_shash_init(desc); - if (r < 0) { - DMERR("crypto_shash_init failed: %d", r); + r = verity_hash(v, io_hash_desc(v, io), + data, 1 << v->hash_dev_block_bits, + io_real_digest(v, io)); + if (unlikely(r < 0)) goto release_ret_r; - } - - if (likely(v->version >= 1)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - goto release_ret_r; - } - } - r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - goto release_ret_r; - } - - if (!v->version) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - goto release_ret_r; - } - } - - result = io_real_digest(v, io); - r = crypto_shash_final(desc, result); - if (r < 0) { - DMERR("crypto_shash_final failed: %d", r); + if (likely(memcmp(io_real_digest(v, io), want_digest, + v->digest_size) == 0)) + aux->hash_verified = 1; + else if (verity_handle_err(v, + DM_VERITY_BLOCK_TYPE_METADATA, + hash_block)) { + r = -EIO; goto release_ret_r; } - if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, - hash_block)) { - r = -EIO; - goto release_ret_r; - } - } else - aux->hash_verified = 1; } data += offset; - - memcpy(io_want_digest(v, io), data, v->digest_size); - - dm_bufio_release(buf); - return 0; + memcpy(want_digest, data, v->digest_size); + r = 0; release_ret_r: dm_bufio_release(buf); - return r; } +/* + * Find a hash for a given block, write it to digest and verify the integrity + * of the hash tree if necessary. + */ +static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest) +{ + int i; + int r; + + if (likely(v->levels)) { + /* + * First, we try to get the requested hash for + * the current block. If the hash block itself is + * verified, zero is returned. If it isn't, this + * function returns 1 and we fall back to whole + * chain verification. + */ + r = verity_verify_level(v, io, block, 0, true, digest); + if (likely(r <= 0)) + return r; + } + + memcpy(digest, v->root_digest, v->digest_size); + + for (i = v->levels - 1; i >= 0; i--) { + r = verity_verify_level(v, io, block, i, false, digest); + if (unlikely(r)) + return r; + } + + return 0; +} + /* * Verify one "dm_verity_io" structure. */ @@ -350,54 +425,21 @@ static int verity_verify_io(struct dm_verity_io *io) struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); unsigned b; - int i; for (b = 0; b < io->n_blocks; b++) { - struct shash_desc *desc; - u8 *result; int r; unsigned todo; + struct shash_desc *desc = io_hash_desc(v, io); - if (likely(v->levels)) { - /* - * First, we try to get the requested hash for - * the current block. If the hash block itself is - * verified, zero is returned. If it isn't, this - * function returns 0 and we fall back to whole - * chain verification. - */ - int r = verity_verify_level(io, io->block + b, 0, true); - if (likely(!r)) - goto test_block_hash; - if (r < 0) - return r; - } - - memcpy(io_want_digest(v, io), v->root_digest, v->digest_size); - - for (i = v->levels - 1; i >= 0; i--) { - int r = verity_verify_level(io, io->block + b, i, false); - if (unlikely(r)) - return r; - } + r = verity_hash_for_block(v, io, io->block + b, + io_want_digest(v, io)); + if (unlikely(r < 0)) + return r; -test_block_hash: - desc = io_hash_desc(v, io); - desc->tfm = v->tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - r = crypto_shash_init(desc); - if (r < 0) { - DMERR("crypto_shash_init failed: %d", r); + r = verity_hash_init(v, desc); + if (unlikely(r < 0)) return r; - } - if (likely(v->version >= 1)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } - } todo = 1 << v->data_dev_block_bits; do { u8 *page; @@ -408,37 +450,27 @@ test_block_hash: len = bv.bv_len; if (likely(len >= todo)) len = todo; - r = crypto_shash_update(desc, page + bv.bv_offset, len); + r = verity_hash_update(v, desc, page + bv.bv_offset, + len); kunmap_atomic(page); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); + if (unlikely(r < 0)) return r; - } bio_advance_iter(bio, &io->iter, len); todo -= len; } while (todo); - if (!v->version) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } - } - - result = io_real_digest(v, io); - r = crypto_shash_final(desc, result); - if (r < 0) { - DMERR("crypto_shash_final failed: %d", r); + r = verity_hash_final(v, desc, io_real_digest(v, io)); + if (unlikely(r < 0)) return r; - } - if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) - return -EIO; - } + + if (likely(memcmp(io_real_digest(v, io), + io_want_digest(v, io), v->digest_size) == 0)) + continue; + else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b)) + return -EIO; } return 0; -- cgit v1.2.3 From 32df92dbd3a75d4ebf3cd3767c833598d5f8faf6 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 13:37:41 +0000 Subject: UPSTREAM: dm verity: separate function for parsing opt args (cherry-picked from 753c1fd02807cb43a1c5d01d75d454054d46bdad) Move optional argument parsing into a separate function to make it easier to add more of them without making verity_ctr even longer. Bug: 21893453 Change-Id: I61ab0a659fdbd67acb8f04bc43f14b9613f7a0a8 Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Mike Snitzer --- drivers/md/dm-verity.c | 71 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e87babca406..00f9e619f9ff 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -34,6 +34,8 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" +#define DM_VERITY_OPTS_MAX 1 + static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); @@ -739,6 +741,44 @@ static void verity_dtr(struct dm_target *ti) kfree(v); } +static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) +{ + int r; + unsigned argc; + struct dm_target *ti = v->ti; + const char *arg_name; + + static struct dm_arg _args[] = { + {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"}, + }; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + if (!argc) + return 0; + + do { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) { + v->mode = DM_VERITY_MODE_LOGGING; + continue; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { + v->mode = DM_VERITY_MODE_RESTART; + continue; + } + + ti->error = "Unrecognized verity feature request"; + return -EINVAL; + } while (argc && !r); + + return r; +} + /* * Target parameters: * The current format is version 1. @@ -757,18 +797,13 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct dm_verity *v; struct dm_arg_set as; - const char *opt_string; - unsigned int num, opt_params; + unsigned int num; unsigned long long num_ll; int r; int i; sector_t hash_position; char dummy; - static struct dm_arg _args[] = { - {0, 1, "Invalid number of feature args"}, - }; - v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); if (!v) { ti->error = "Cannot allocate verity structure"; @@ -913,29 +948,9 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; - r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); - if (r) + r = verity_parse_opt_args(&as, v); + if (r < 0) goto bad; - - while (opt_params) { - opt_params--; - opt_string = dm_shift_arg(&as); - if (!opt_string) { - ti->error = "Not enough feature arguments"; - r = -EINVAL; - goto bad; - } - - if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) - v->mode = DM_VERITY_MODE_LOGGING; - else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) - v->mode = DM_VERITY_MODE_RESTART; - else { - ti->error = "Invalid feature arguments"; - r = -EINVAL; - goto bad; - } - } } v->hash_per_block_bits = -- cgit v1.2.3 From 2e27c838fddcf01b8575d008ff9afbe51c29b9e0 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 13:39:32 +0000 Subject: UPSTREAM: dm verity: move dm-verity.c to dm-verity-target.c (cherry-pick from 03045cbafa2d663ad8d0a583ac219d202d824344) Prepare for extending dm-verity with an optional object. Follows the naming convention used by other DM targets (e.g. dm-cache and dm-era). Bug: 21893453 Change-Id: I5c7c5aa219859c563846ca6e21eab57ab9d2def3 Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/Makefile | 1 + drivers/md/dm-verity-target.c | 1058 +++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-verity.c | 1058 ----------------------------------------- 3 files changed, 1059 insertions(+), 1058 deletions(-) create mode 100644 drivers/md/dm-verity-target.c delete mode 100644 drivers/md/dm-verity.c diff --git a/drivers/md/Makefile b/drivers/md/Makefile index a2da532b1c2b..2095ce7bf9ab 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -15,6 +15,7 @@ dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o dm-cache-mq-y += dm-cache-policy-mq.o dm-cache-cleaner-y += dm-cache-policy-cleaner.o dm-era-y += dm-era-target.o +dm-verity-y += dm-verity-target.o md-mod-y += md.o bitmap.o raid456-y += raid5.o diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c new file mode 100644 index 000000000000..00f9e619f9ff --- /dev/null +++ b/drivers/md/dm-verity-target.c @@ -0,0 +1,1058 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * Author: Mikulas Patocka + * + * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors + * + * This file is released under the GPLv2. + * + * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set + * default prefetch value. Data are read in "prefetch_cluster" chunks from the + * hash device. Setting this greatly improves performance when data and hash + * are on the same disk on different partitions on devices with poor random + * access behavior. + */ + +#include "dm-bufio.h" + +#include +#include +#include +#include + +#define DM_MSG_PREFIX "verity" + +#define DM_VERITY_ENV_LENGTH 42 +#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" + +#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 + +#define DM_VERITY_MAX_LEVELS 63 +#define DM_VERITY_MAX_CORRUPTED_ERRS 100 + +#define DM_VERITY_OPT_LOGGING "ignore_corruption" +#define DM_VERITY_OPT_RESTART "restart_on_corruption" + +#define DM_VERITY_OPTS_MAX 1 + +static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; + +module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); + +enum verity_mode { + DM_VERITY_MODE_EIO, + DM_VERITY_MODE_LOGGING, + DM_VERITY_MODE_RESTART +}; + +enum verity_block_type { + DM_VERITY_BLOCK_TYPE_DATA, + DM_VERITY_BLOCK_TYPE_METADATA +}; + +struct dm_verity { + struct dm_dev *data_dev; + struct dm_dev *hash_dev; + struct dm_target *ti; + struct dm_bufio_client *bufio; + char *alg_name; + struct crypto_shash *tfm; + u8 *root_digest; /* digest of the root block */ + u8 *salt; /* salt: its size is salt_size */ + unsigned salt_size; + sector_t data_start; /* data offset in 512-byte sectors */ + sector_t hash_start; /* hash start in blocks */ + sector_t data_blocks; /* the number of data blocks */ + sector_t hash_blocks; /* the number of hash blocks */ + unsigned char data_dev_block_bits; /* log2(data blocksize) */ + unsigned char hash_dev_block_bits; /* log2(hash blocksize) */ + unsigned char hash_per_block_bits; /* log2(hashes in hash block) */ + unsigned char levels; /* the number of tree levels */ + unsigned char version; + unsigned digest_size; /* digest size for the current hash algorithm */ + unsigned shash_descsize;/* the size of temporary space for crypto */ + int hash_failed; /* set to 1 if hash of any block failed */ + enum verity_mode mode; /* mode for handling verification errors */ + unsigned corrupted_errs;/* Number of errors for corrupted blocks */ + + struct workqueue_struct *verify_wq; + + /* starting blocks for each tree level. 0 is the lowest level. */ + sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; +}; + +struct dm_verity_io { + struct dm_verity *v; + + /* original values of bio->bi_end_io and bio->bi_private */ + bio_end_io_t *orig_bi_end_io; + void *orig_bi_private; + + sector_t block; + unsigned n_blocks; + + struct bvec_iter iter; + + struct work_struct work; + + /* + * Three variably-size fields follow this struct: + * + * u8 hash_desc[v->shash_descsize]; + * u8 real_digest[v->digest_size]; + * u8 want_digest[v->digest_size]; + * + * To access them use: io_hash_desc(), io_real_digest() and io_want_digest(). + */ +}; + +struct dm_verity_prefetch_work { + struct work_struct work; + struct dm_verity *v; + sector_t block; + unsigned n_blocks; +}; + +static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) +{ + return (struct shash_desc *)(io + 1); +} + +static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io) +{ + return (u8 *)(io + 1) + v->shash_descsize; +} + +static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io) +{ + return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; +} + +/* + * Auxiliary structure appended to each dm-bufio buffer. If the value + * hash_verified is nonzero, hash of the block has been verified. + * + * The variable hash_verified is set to 0 when allocating the buffer, then + * it can be changed to 1 and it is never reset to 0 again. + * + * There is no lock around this value, a race condition can at worst cause + * that multiple processes verify the hash of the same buffer simultaneously + * and write 1 to hash_verified simultaneously. + * This condition is harmless, so we don't need locking. + */ +struct buffer_aux { + int hash_verified; +}; + +/* + * Initialize struct buffer_aux for a freshly created buffer. + */ +static void dm_bufio_alloc_callback(struct dm_buffer *buf) +{ + struct buffer_aux *aux = dm_bufio_get_aux_data(buf); + + aux->hash_verified = 0; +} + +/* + * Translate input sector number to the sector number on the target device. + */ +static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector) +{ + return v->data_start + dm_target_offset(v->ti, bi_sector); +} + +/* + * Return hash position of a specified block at a specified tree level + * (0 is the lowest level). + * The lowest "hash_per_block_bits"-bits of the result denote hash position + * inside a hash block. The remaining bits denote location of the hash block. + */ +static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, + int level) +{ + return block >> (level * v->hash_per_block_bits); +} + +/* + * Wrapper for crypto_shash_init, which handles verity salting. + */ +static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc) +{ + int r; + + desc->tfm = v->tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + r = crypto_shash_init(desc); + + if (unlikely(r < 0)) { + DMERR("crypto_shash_init failed: %d", r); + return r; + } + + if (likely(v->version >= 1)) { + r = crypto_shash_update(desc, v->salt, v->salt_size); + + if (unlikely(r < 0)) { + DMERR("crypto_shash_update failed: %d", r); + return r; + } + } + + return 0; +} + +static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len) +{ + int r = crypto_shash_update(desc, data, len); + + if (unlikely(r < 0)) + DMERR("crypto_shash_update failed: %d", r); + + return r; +} + +static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, + u8 *digest) +{ + int r; + + if (unlikely(!v->version)) { + r = crypto_shash_update(desc, v->salt, v->salt_size); + + if (r < 0) { + DMERR("crypto_shash_update failed: %d", r); + return r; + } + } + + r = crypto_shash_final(desc, digest); + + if (unlikely(r < 0)) + DMERR("crypto_shash_final failed: %d", r); + + return r; +} + +static int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest) +{ + int r; + + r = verity_hash_init(v, desc); + if (unlikely(r < 0)) + return r; + + r = verity_hash_update(v, desc, data, len); + if (unlikely(r < 0)) + return r; + + return verity_hash_final(v, desc, digest); +} + +static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, + sector_t *hash_block, unsigned *offset) +{ + sector_t position = verity_position_at_level(v, block, level); + unsigned idx; + + *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits); + + if (!offset) + return; + + idx = position & ((1 << v->hash_per_block_bits) - 1); + if (!v->version) + *offset = idx * v->digest_size; + else + *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits); +} + +/* + * Handle verification errors. + */ +static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, + unsigned long long block) +{ + char verity_env[DM_VERITY_ENV_LENGTH]; + char *envp[] = { verity_env, NULL }; + const char *type_str = ""; + struct mapped_device *md = dm_table_get_md(v->ti->table); + + /* Corruption should be visible in device status in all modes */ + v->hash_failed = 1; + + if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS) + goto out; + + v->corrupted_errs++; + + switch (type) { + case DM_VERITY_BLOCK_TYPE_DATA: + type_str = "data"; + break; + case DM_VERITY_BLOCK_TYPE_METADATA: + type_str = "metadata"; + break; + default: + BUG(); + } + + DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, + block); + + if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) + DMERR("%s: reached maximum errors", v->data_dev->name); + + snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu", + DM_VERITY_ENV_VAR_NAME, type, block); + + kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp); + +out: + if (v->mode == DM_VERITY_MODE_LOGGING) + return 0; + + if (v->mode == DM_VERITY_MODE_RESTART) + kernel_restart("dm-verity device corrupted"); + + return 1; +} + +/* + * Verify hash of a metadata block pertaining to the specified data block + * ("block" argument) at a specified level ("level" argument). + * + * On successful return, io_want_digest(v, io) contains the hash value for + * a lower tree level or for the data block (if we're at the lowest leve). + * + * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned. + * If "skip_unverified" is false, unverified buffer is hashed and verified + * against current value of io_want_digest(v, io). + */ +static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, int level, bool skip_unverified, + u8 *want_digest) +{ + struct dm_buffer *buf; + struct buffer_aux *aux; + u8 *data; + int r; + sector_t hash_block; + unsigned offset; + + verity_hash_at_level(v, block, level, &hash_block, &offset); + + data = dm_bufio_read(v->bufio, hash_block, &buf); + if (IS_ERR(data)) + return PTR_ERR(data); + + aux = dm_bufio_get_aux_data(buf); + + if (!aux->hash_verified) { + if (skip_unverified) { + r = 1; + goto release_ret_r; + } + + r = verity_hash(v, io_hash_desc(v, io), + data, 1 << v->hash_dev_block_bits, + io_real_digest(v, io)); + if (unlikely(r < 0)) + goto release_ret_r; + + if (likely(memcmp(io_real_digest(v, io), want_digest, + v->digest_size) == 0)) + aux->hash_verified = 1; + else if (verity_handle_err(v, + DM_VERITY_BLOCK_TYPE_METADATA, + hash_block)) { + r = -EIO; + goto release_ret_r; + } + } + + data += offset; + memcpy(want_digest, data, v->digest_size); + r = 0; + +release_ret_r: + dm_bufio_release(buf); + return r; +} + +/* + * Find a hash for a given block, write it to digest and verify the integrity + * of the hash tree if necessary. + */ +static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest) +{ + int i; + int r; + + if (likely(v->levels)) { + /* + * First, we try to get the requested hash for + * the current block. If the hash block itself is + * verified, zero is returned. If it isn't, this + * function returns 1 and we fall back to whole + * chain verification. + */ + r = verity_verify_level(v, io, block, 0, true, digest); + if (likely(r <= 0)) + return r; + } + + memcpy(digest, v->root_digest, v->digest_size); + + for (i = v->levels - 1; i >= 0; i--) { + r = verity_verify_level(v, io, block, i, false, digest); + if (unlikely(r)) + return r; + } + + return 0; +} + +/* + * Verify one "dm_verity_io" structure. + */ +static int verity_verify_io(struct dm_verity_io *io) +{ + struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, + v->ti->per_bio_data_size); + unsigned b; + + for (b = 0; b < io->n_blocks; b++) { + int r; + unsigned todo; + struct shash_desc *desc = io_hash_desc(v, io); + + r = verity_hash_for_block(v, io, io->block + b, + io_want_digest(v, io)); + if (unlikely(r < 0)) + return r; + + r = verity_hash_init(v, desc); + if (unlikely(r < 0)) + return r; + + todo = 1 << v->data_dev_block_bits; + do { + u8 *page; + unsigned len; + struct bio_vec bv = bio_iter_iovec(bio, io->iter); + + page = kmap_atomic(bv.bv_page); + len = bv.bv_len; + if (likely(len >= todo)) + len = todo; + r = verity_hash_update(v, desc, page + bv.bv_offset, + len); + kunmap_atomic(page); + + if (unlikely(r < 0)) + return r; + + bio_advance_iter(bio, &io->iter, len); + todo -= len; + } while (todo); + + r = verity_hash_final(v, desc, io_real_digest(v, io)); + if (unlikely(r < 0)) + return r; + + if (likely(memcmp(io_real_digest(v, io), + io_want_digest(v, io), v->digest_size) == 0)) + continue; + else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b)) + return -EIO; + } + + return 0; +} + +/* + * End one "io" structure with a given error. + */ +static void verity_finish_io(struct dm_verity_io *io, int error) +{ + struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); + + bio->bi_end_io = io->orig_bi_end_io; + bio->bi_private = io->orig_bi_private; + + bio_endio_nodec(bio, error); +} + +static void verity_work(struct work_struct *w) +{ + struct dm_verity_io *io = container_of(w, struct dm_verity_io, work); + + verity_finish_io(io, verity_verify_io(io)); +} + +static void verity_end_io(struct bio *bio, int error) +{ + struct dm_verity_io *io = bio->bi_private; + + if (error) { + verity_finish_io(io, error); + return; + } + + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); +} + +/* + * Prefetch buffers for the specified io. + * The root buffer is not prefetched, it is assumed that it will be cached + * all the time. + */ +static void verity_prefetch_io(struct work_struct *work) +{ + struct dm_verity_prefetch_work *pw = + container_of(work, struct dm_verity_prefetch_work, work); + struct dm_verity *v = pw->v; + int i; + + for (i = v->levels - 2; i >= 0; i--) { + sector_t hash_block_start; + sector_t hash_block_end; + verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL); + verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL); + if (!i) { + unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); + + cluster >>= v->data_dev_block_bits; + if (unlikely(!cluster)) + goto no_prefetch_cluster; + + if (unlikely(cluster & (cluster - 1))) + cluster = 1 << __fls(cluster); + + hash_block_start &= ~(sector_t)(cluster - 1); + hash_block_end |= cluster - 1; + if (unlikely(hash_block_end >= v->hash_blocks)) + hash_block_end = v->hash_blocks - 1; + } +no_prefetch_cluster: + dm_bufio_prefetch(v->bufio, hash_block_start, + hash_block_end - hash_block_start + 1); + } + + kfree(pw); +} + +static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) +{ + struct dm_verity_prefetch_work *pw; + + pw = kmalloc(sizeof(struct dm_verity_prefetch_work), + GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!pw) + return; + + INIT_WORK(&pw->work, verity_prefetch_io); + pw->v = v; + pw->block = io->block; + pw->n_blocks = io->n_blocks; + queue_work(v->verify_wq, &pw->work); +} + +/* + * Bio map function. It allocates dm_verity_io structure and bio vector and + * fills them. Then it issues prefetches and the I/O. + */ +static int verity_map(struct dm_target *ti, struct bio *bio) +{ + struct dm_verity *v = ti->private; + struct dm_verity_io *io; + + bio->bi_bdev = v->data_dev->bdev; + bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector); + + if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & + ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) { + DMERR_LIMIT("unaligned io"); + return -EIO; + } + + if (bio_end_sector(bio) >> + (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { + DMERR_LIMIT("io out of range"); + return -EIO; + } + + if (bio_data_dir(bio) == WRITE) + return -EIO; + + io = dm_per_bio_data(bio, ti->per_bio_data_size); + io->v = v; + io->orig_bi_end_io = bio->bi_end_io; + io->orig_bi_private = bio->bi_private; + io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); + io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; + + bio->bi_end_io = verity_end_io; + bio->bi_private = io; + io->iter = bio->bi_iter; + + verity_submit_prefetch(v, io); + + generic_make_request(bio); + + return DM_MAPIO_SUBMITTED; +} + +/* + * Status: V (valid) or C (corruption found) + */ +static void verity_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) +{ + struct dm_verity *v = ti->private; + unsigned sz = 0; + unsigned x; + + switch (type) { + case STATUSTYPE_INFO: + DMEMIT("%c", v->hash_failed ? 'C' : 'V'); + break; + case STATUSTYPE_TABLE: + DMEMIT("%u %s %s %u %u %llu %llu %s ", + v->version, + v->data_dev->name, + v->hash_dev->name, + 1 << v->data_dev_block_bits, + 1 << v->hash_dev_block_bits, + (unsigned long long)v->data_blocks, + (unsigned long long)v->hash_start, + v->alg_name + ); + for (x = 0; x < v->digest_size; x++) + DMEMIT("%02x", v->root_digest[x]); + DMEMIT(" "); + if (!v->salt_size) + DMEMIT("-"); + else + for (x = 0; x < v->salt_size; x++) + DMEMIT("%02x", v->salt[x]); + if (v->mode != DM_VERITY_MODE_EIO) { + DMEMIT(" 1 "); + switch (v->mode) { + case DM_VERITY_MODE_LOGGING: + DMEMIT(DM_VERITY_OPT_LOGGING); + break; + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_RESTART); + break; + default: + BUG(); + } + } + break; + } +} + +static int verity_ioctl(struct dm_target *ti, unsigned cmd, + unsigned long arg) +{ + struct dm_verity *v = ti->private; + int r = 0; + + if (v->data_start || + ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT) + r = scsi_verify_blk_ioctl(NULL, cmd); + + return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode, + cmd, arg); +} + +static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct dm_verity *v = ti->private; + struct request_queue *q = bdev_get_queue(v->data_dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = v->data_dev->bdev; + bvm->bi_sector = verity_map_sector(v, bvm->bi_sector); + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + +static int verity_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct dm_verity *v = ti->private; + + return fn(ti, v->data_dev, v->data_start, ti->len, data); +} + +static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct dm_verity *v = ti->private; + + if (limits->logical_block_size < 1 << v->data_dev_block_bits) + limits->logical_block_size = 1 << v->data_dev_block_bits; + + if (limits->physical_block_size < 1 << v->data_dev_block_bits) + limits->physical_block_size = 1 << v->data_dev_block_bits; + + blk_limits_io_min(limits, limits->logical_block_size); +} + +static void verity_dtr(struct dm_target *ti) +{ + struct dm_verity *v = ti->private; + + if (v->verify_wq) + destroy_workqueue(v->verify_wq); + + if (v->bufio) + dm_bufio_client_destroy(v->bufio); + + kfree(v->salt); + kfree(v->root_digest); + + if (v->tfm) + crypto_free_shash(v->tfm); + + kfree(v->alg_name); + + if (v->hash_dev) + dm_put_device(ti, v->hash_dev); + + if (v->data_dev) + dm_put_device(ti, v->data_dev); + + kfree(v); +} + +static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) +{ + int r; + unsigned argc; + struct dm_target *ti = v->ti; + const char *arg_name; + + static struct dm_arg _args[] = { + {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"}, + }; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + if (!argc) + return 0; + + do { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) { + v->mode = DM_VERITY_MODE_LOGGING; + continue; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { + v->mode = DM_VERITY_MODE_RESTART; + continue; + } + + ti->error = "Unrecognized verity feature request"; + return -EINVAL; + } while (argc && !r); + + return r; +} + +/* + * Target parameters: + * The current format is version 1. + * Vsn 0 is compatible with original Chromium OS releases. + * + * + * + * + * + * + * + * + * Hex string or "-" if no salt. + */ +static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + struct dm_verity *v; + struct dm_arg_set as; + unsigned int num; + unsigned long long num_ll; + int r; + int i; + sector_t hash_position; + char dummy; + + v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); + if (!v) { + ti->error = "Cannot allocate verity structure"; + return -ENOMEM; + } + ti->private = v; + v->ti = ti; + + if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) { + ti->error = "Device must be readonly"; + r = -EINVAL; + goto bad; + } + + if (argc < 10) { + ti->error = "Not enough arguments"; + r = -EINVAL; + goto bad; + } + + if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 || + num > 1) { + ti->error = "Invalid version"; + r = -EINVAL; + goto bad; + } + v->version = num; + + r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev); + if (r) { + ti->error = "Data device lookup failed"; + goto bad; + } + + r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev); + if (r) { + ti->error = "Data device lookup failed"; + goto bad; + } + + if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 || + !num || (num & (num - 1)) || + num < bdev_logical_block_size(v->data_dev->bdev) || + num > PAGE_SIZE) { + ti->error = "Invalid data device block size"; + r = -EINVAL; + goto bad; + } + v->data_dev_block_bits = __ffs(num); + + if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 || + !num || (num & (num - 1)) || + num < bdev_logical_block_size(v->hash_dev->bdev) || + num > INT_MAX) { + ti->error = "Invalid hash device block size"; + r = -EINVAL; + goto bad; + } + v->hash_dev_block_bits = __ffs(num); + + if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || + (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { + ti->error = "Invalid data blocks"; + r = -EINVAL; + goto bad; + } + v->data_blocks = num_ll; + + if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) { + ti->error = "Data device is too small"; + r = -EINVAL; + goto bad; + } + + if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || + (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) + >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { + ti->error = "Invalid hash start"; + r = -EINVAL; + goto bad; + } + v->hash_start = num_ll; + + v->alg_name = kstrdup(argv[7], GFP_KERNEL); + if (!v->alg_name) { + ti->error = "Cannot allocate algorithm name"; + r = -ENOMEM; + goto bad; + } + + v->tfm = crypto_alloc_shash(v->alg_name, 0, 0); + if (IS_ERR(v->tfm)) { + ti->error = "Cannot initialize hash function"; + r = PTR_ERR(v->tfm); + v->tfm = NULL; + goto bad; + } + v->digest_size = crypto_shash_digestsize(v->tfm); + if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { + ti->error = "Digest size too big"; + r = -EINVAL; + goto bad; + } + v->shash_descsize = + sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm); + + v->root_digest = kmalloc(v->digest_size, GFP_KERNEL); + if (!v->root_digest) { + ti->error = "Cannot allocate root digest"; + r = -ENOMEM; + goto bad; + } + if (strlen(argv[8]) != v->digest_size * 2 || + hex2bin(v->root_digest, argv[8], v->digest_size)) { + ti->error = "Invalid root digest"; + r = -EINVAL; + goto bad; + } + + if (strcmp(argv[9], "-")) { + v->salt_size = strlen(argv[9]) / 2; + v->salt = kmalloc(v->salt_size, GFP_KERNEL); + if (!v->salt) { + ti->error = "Cannot allocate salt"; + r = -ENOMEM; + goto bad; + } + if (strlen(argv[9]) != v->salt_size * 2 || + hex2bin(v->salt, argv[9], v->salt_size)) { + ti->error = "Invalid salt"; + r = -EINVAL; + goto bad; + } + } + + argv += 10; + argc -= 10; + + /* Optional parameters */ + if (argc) { + as.argc = argc; + as.argv = argv; + + r = verity_parse_opt_args(&as, v); + if (r < 0) + goto bad; + } + + v->hash_per_block_bits = + __fls((1 << v->hash_dev_block_bits) / v->digest_size); + + v->levels = 0; + if (v->data_blocks) + while (v->hash_per_block_bits * v->levels < 64 && + (unsigned long long)(v->data_blocks - 1) >> + (v->hash_per_block_bits * v->levels)) + v->levels++; + + if (v->levels > DM_VERITY_MAX_LEVELS) { + ti->error = "Too many tree levels"; + r = -E2BIG; + goto bad; + } + + hash_position = v->hash_start; + for (i = v->levels - 1; i >= 0; i--) { + sector_t s; + v->hash_level_block[i] = hash_position; + s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1) + >> ((i + 1) * v->hash_per_block_bits); + if (hash_position + s < hash_position) { + ti->error = "Hash device offset overflow"; + r = -E2BIG; + goto bad; + } + hash_position += s; + } + v->hash_blocks = hash_position; + + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, + 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), + dm_bufio_alloc_callback, NULL); + if (IS_ERR(v->bufio)) { + ti->error = "Cannot initialize dm-bufio"; + r = PTR_ERR(v->bufio); + v->bufio = NULL; + goto bad; + } + + if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) { + ti->error = "Hash device is too small"; + r = -E2BIG; + goto bad; + } + + ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); + + /* WQ_UNBOUND greatly improves performance when running on ramdisk */ + v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); + if (!v->verify_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + + return 0; + +bad: + verity_dtr(ti); + + return r; +} + +static struct target_type verity_target = { + .name = "verity", + .version = {1, 2, 0}, + .module = THIS_MODULE, + .ctr = verity_ctr, + .dtr = verity_dtr, + .map = verity_map, + .status = verity_status, + .ioctl = verity_ioctl, + .merge = verity_merge, + .iterate_devices = verity_iterate_devices, + .io_hints = verity_io_hints, +}; + +static int __init dm_verity_init(void) +{ + int r; + + r = dm_register_target(&verity_target); + if (r < 0) + DMERR("register failed %d", r); + + return r; +} + +static void __exit dm_verity_exit(void) +{ + dm_unregister_target(&verity_target); +} + +module_init(dm_verity_init); +module_exit(dm_verity_exit); + +MODULE_AUTHOR("Mikulas Patocka "); +MODULE_AUTHOR("Mandeep Baines "); +MODULE_AUTHOR("Will Drewry "); +MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c deleted file mode 100644 index 00f9e619f9ff..000000000000 --- a/drivers/md/dm-verity.c +++ /dev/null @@ -1,1058 +0,0 @@ -/* - * Copyright (C) 2012 Red Hat, Inc. - * - * Author: Mikulas Patocka - * - * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors - * - * This file is released under the GPLv2. - * - * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set - * default prefetch value. Data are read in "prefetch_cluster" chunks from the - * hash device. Setting this greatly improves performance when data and hash - * are on the same disk on different partitions on devices with poor random - * access behavior. - */ - -#include "dm-bufio.h" - -#include -#include -#include -#include - -#define DM_MSG_PREFIX "verity" - -#define DM_VERITY_ENV_LENGTH 42 -#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" - -#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 - -#define DM_VERITY_MAX_LEVELS 63 -#define DM_VERITY_MAX_CORRUPTED_ERRS 100 - -#define DM_VERITY_OPT_LOGGING "ignore_corruption" -#define DM_VERITY_OPT_RESTART "restart_on_corruption" - -#define DM_VERITY_OPTS_MAX 1 - -static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; - -module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); - -enum verity_mode { - DM_VERITY_MODE_EIO, - DM_VERITY_MODE_LOGGING, - DM_VERITY_MODE_RESTART -}; - -enum verity_block_type { - DM_VERITY_BLOCK_TYPE_DATA, - DM_VERITY_BLOCK_TYPE_METADATA -}; - -struct dm_verity { - struct dm_dev *data_dev; - struct dm_dev *hash_dev; - struct dm_target *ti; - struct dm_bufio_client *bufio; - char *alg_name; - struct crypto_shash *tfm; - u8 *root_digest; /* digest of the root block */ - u8 *salt; /* salt: its size is salt_size */ - unsigned salt_size; - sector_t data_start; /* data offset in 512-byte sectors */ - sector_t hash_start; /* hash start in blocks */ - sector_t data_blocks; /* the number of data blocks */ - sector_t hash_blocks; /* the number of hash blocks */ - unsigned char data_dev_block_bits; /* log2(data blocksize) */ - unsigned char hash_dev_block_bits; /* log2(hash blocksize) */ - unsigned char hash_per_block_bits; /* log2(hashes in hash block) */ - unsigned char levels; /* the number of tree levels */ - unsigned char version; - unsigned digest_size; /* digest size for the current hash algorithm */ - unsigned shash_descsize;/* the size of temporary space for crypto */ - int hash_failed; /* set to 1 if hash of any block failed */ - enum verity_mode mode; /* mode for handling verification errors */ - unsigned corrupted_errs;/* Number of errors for corrupted blocks */ - - struct workqueue_struct *verify_wq; - - /* starting blocks for each tree level. 0 is the lowest level. */ - sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; -}; - -struct dm_verity_io { - struct dm_verity *v; - - /* original values of bio->bi_end_io and bio->bi_private */ - bio_end_io_t *orig_bi_end_io; - void *orig_bi_private; - - sector_t block; - unsigned n_blocks; - - struct bvec_iter iter; - - struct work_struct work; - - /* - * Three variably-size fields follow this struct: - * - * u8 hash_desc[v->shash_descsize]; - * u8 real_digest[v->digest_size]; - * u8 want_digest[v->digest_size]; - * - * To access them use: io_hash_desc(), io_real_digest() and io_want_digest(). - */ -}; - -struct dm_verity_prefetch_work { - struct work_struct work; - struct dm_verity *v; - sector_t block; - unsigned n_blocks; -}; - -static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) -{ - return (struct shash_desc *)(io + 1); -} - -static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io) -{ - return (u8 *)(io + 1) + v->shash_descsize; -} - -static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io) -{ - return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; -} - -/* - * Auxiliary structure appended to each dm-bufio buffer. If the value - * hash_verified is nonzero, hash of the block has been verified. - * - * The variable hash_verified is set to 0 when allocating the buffer, then - * it can be changed to 1 and it is never reset to 0 again. - * - * There is no lock around this value, a race condition can at worst cause - * that multiple processes verify the hash of the same buffer simultaneously - * and write 1 to hash_verified simultaneously. - * This condition is harmless, so we don't need locking. - */ -struct buffer_aux { - int hash_verified; -}; - -/* - * Initialize struct buffer_aux for a freshly created buffer. - */ -static void dm_bufio_alloc_callback(struct dm_buffer *buf) -{ - struct buffer_aux *aux = dm_bufio_get_aux_data(buf); - - aux->hash_verified = 0; -} - -/* - * Translate input sector number to the sector number on the target device. - */ -static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector) -{ - return v->data_start + dm_target_offset(v->ti, bi_sector); -} - -/* - * Return hash position of a specified block at a specified tree level - * (0 is the lowest level). - * The lowest "hash_per_block_bits"-bits of the result denote hash position - * inside a hash block. The remaining bits denote location of the hash block. - */ -static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, - int level) -{ - return block >> (level * v->hash_per_block_bits); -} - -/* - * Wrapper for crypto_shash_init, which handles verity salting. - */ -static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc) -{ - int r; - - desc->tfm = v->tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - - r = crypto_shash_init(desc); - - if (unlikely(r < 0)) { - DMERR("crypto_shash_init failed: %d", r); - return r; - } - - if (likely(v->version >= 1)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - - if (unlikely(r < 0)) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } - } - - return 0; -} - -static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc, - const u8 *data, size_t len) -{ - int r = crypto_shash_update(desc, data, len); - - if (unlikely(r < 0)) - DMERR("crypto_shash_update failed: %d", r); - - return r; -} - -static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, - u8 *digest) -{ - int r; - - if (unlikely(!v->version)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } - } - - r = crypto_shash_final(desc, digest); - - if (unlikely(r < 0)) - DMERR("crypto_shash_final failed: %d", r); - - return r; -} - -static int verity_hash(struct dm_verity *v, struct shash_desc *desc, - const u8 *data, size_t len, u8 *digest) -{ - int r; - - r = verity_hash_init(v, desc); - if (unlikely(r < 0)) - return r; - - r = verity_hash_update(v, desc, data, len); - if (unlikely(r < 0)) - return r; - - return verity_hash_final(v, desc, digest); -} - -static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, - sector_t *hash_block, unsigned *offset) -{ - sector_t position = verity_position_at_level(v, block, level); - unsigned idx; - - *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits); - - if (!offset) - return; - - idx = position & ((1 << v->hash_per_block_bits) - 1); - if (!v->version) - *offset = idx * v->digest_size; - else - *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits); -} - -/* - * Handle verification errors. - */ -static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, - unsigned long long block) -{ - char verity_env[DM_VERITY_ENV_LENGTH]; - char *envp[] = { verity_env, NULL }; - const char *type_str = ""; - struct mapped_device *md = dm_table_get_md(v->ti->table); - - /* Corruption should be visible in device status in all modes */ - v->hash_failed = 1; - - if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS) - goto out; - - v->corrupted_errs++; - - switch (type) { - case DM_VERITY_BLOCK_TYPE_DATA: - type_str = "data"; - break; - case DM_VERITY_BLOCK_TYPE_METADATA: - type_str = "metadata"; - break; - default: - BUG(); - } - - DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, - block); - - if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) - DMERR("%s: reached maximum errors", v->data_dev->name); - - snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu", - DM_VERITY_ENV_VAR_NAME, type, block); - - kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp); - -out: - if (v->mode == DM_VERITY_MODE_LOGGING) - return 0; - - if (v->mode == DM_VERITY_MODE_RESTART) - kernel_restart("dm-verity device corrupted"); - - return 1; -} - -/* - * Verify hash of a metadata block pertaining to the specified data block - * ("block" argument) at a specified level ("level" argument). - * - * On successful return, io_want_digest(v, io) contains the hash value for - * a lower tree level or for the data block (if we're at the lowest leve). - * - * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned. - * If "skip_unverified" is false, unverified buffer is hashed and verified - * against current value of io_want_digest(v, io). - */ -static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, int level, bool skip_unverified, - u8 *want_digest) -{ - struct dm_buffer *buf; - struct buffer_aux *aux; - u8 *data; - int r; - sector_t hash_block; - unsigned offset; - - verity_hash_at_level(v, block, level, &hash_block, &offset); - - data = dm_bufio_read(v->bufio, hash_block, &buf); - if (IS_ERR(data)) - return PTR_ERR(data); - - aux = dm_bufio_get_aux_data(buf); - - if (!aux->hash_verified) { - if (skip_unverified) { - r = 1; - goto release_ret_r; - } - - r = verity_hash(v, io_hash_desc(v, io), - data, 1 << v->hash_dev_block_bits, - io_real_digest(v, io)); - if (unlikely(r < 0)) - goto release_ret_r; - - if (likely(memcmp(io_real_digest(v, io), want_digest, - v->digest_size) == 0)) - aux->hash_verified = 1; - else if (verity_handle_err(v, - DM_VERITY_BLOCK_TYPE_METADATA, - hash_block)) { - r = -EIO; - goto release_ret_r; - } - } - - data += offset; - memcpy(want_digest, data, v->digest_size); - r = 0; - -release_ret_r: - dm_bufio_release(buf); - return r; -} - -/* - * Find a hash for a given block, write it to digest and verify the integrity - * of the hash tree if necessary. - */ -static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest) -{ - int i; - int r; - - if (likely(v->levels)) { - /* - * First, we try to get the requested hash for - * the current block. If the hash block itself is - * verified, zero is returned. If it isn't, this - * function returns 1 and we fall back to whole - * chain verification. - */ - r = verity_verify_level(v, io, block, 0, true, digest); - if (likely(r <= 0)) - return r; - } - - memcpy(digest, v->root_digest, v->digest_size); - - for (i = v->levels - 1; i >= 0; i--) { - r = verity_verify_level(v, io, block, i, false, digest); - if (unlikely(r)) - return r; - } - - return 0; -} - -/* - * Verify one "dm_verity_io" structure. - */ -static int verity_verify_io(struct dm_verity_io *io) -{ - struct dm_verity *v = io->v; - struct bio *bio = dm_bio_from_per_bio_data(io, - v->ti->per_bio_data_size); - unsigned b; - - for (b = 0; b < io->n_blocks; b++) { - int r; - unsigned todo; - struct shash_desc *desc = io_hash_desc(v, io); - - r = verity_hash_for_block(v, io, io->block + b, - io_want_digest(v, io)); - if (unlikely(r < 0)) - return r; - - r = verity_hash_init(v, desc); - if (unlikely(r < 0)) - return r; - - todo = 1 << v->data_dev_block_bits; - do { - u8 *page; - unsigned len; - struct bio_vec bv = bio_iter_iovec(bio, io->iter); - - page = kmap_atomic(bv.bv_page); - len = bv.bv_len; - if (likely(len >= todo)) - len = todo; - r = verity_hash_update(v, desc, page + bv.bv_offset, - len); - kunmap_atomic(page); - - if (unlikely(r < 0)) - return r; - - bio_advance_iter(bio, &io->iter, len); - todo -= len; - } while (todo); - - r = verity_hash_final(v, desc, io_real_digest(v, io)); - if (unlikely(r < 0)) - return r; - - if (likely(memcmp(io_real_digest(v, io), - io_want_digest(v, io), v->digest_size) == 0)) - continue; - else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) - return -EIO; - } - - return 0; -} - -/* - * End one "io" structure with a given error. - */ -static void verity_finish_io(struct dm_verity_io *io, int error) -{ - struct dm_verity *v = io->v; - struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); - - bio->bi_end_io = io->orig_bi_end_io; - bio->bi_private = io->orig_bi_private; - - bio_endio_nodec(bio, error); -} - -static void verity_work(struct work_struct *w) -{ - struct dm_verity_io *io = container_of(w, struct dm_verity_io, work); - - verity_finish_io(io, verity_verify_io(io)); -} - -static void verity_end_io(struct bio *bio, int error) -{ - struct dm_verity_io *io = bio->bi_private; - - if (error) { - verity_finish_io(io, error); - return; - } - - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); -} - -/* - * Prefetch buffers for the specified io. - * The root buffer is not prefetched, it is assumed that it will be cached - * all the time. - */ -static void verity_prefetch_io(struct work_struct *work) -{ - struct dm_verity_prefetch_work *pw = - container_of(work, struct dm_verity_prefetch_work, work); - struct dm_verity *v = pw->v; - int i; - - for (i = v->levels - 2; i >= 0; i--) { - sector_t hash_block_start; - sector_t hash_block_end; - verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL); - verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL); - if (!i) { - unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); - - cluster >>= v->data_dev_block_bits; - if (unlikely(!cluster)) - goto no_prefetch_cluster; - - if (unlikely(cluster & (cluster - 1))) - cluster = 1 << __fls(cluster); - - hash_block_start &= ~(sector_t)(cluster - 1); - hash_block_end |= cluster - 1; - if (unlikely(hash_block_end >= v->hash_blocks)) - hash_block_end = v->hash_blocks - 1; - } -no_prefetch_cluster: - dm_bufio_prefetch(v->bufio, hash_block_start, - hash_block_end - hash_block_start + 1); - } - - kfree(pw); -} - -static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) -{ - struct dm_verity_prefetch_work *pw; - - pw = kmalloc(sizeof(struct dm_verity_prefetch_work), - GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - - if (!pw) - return; - - INIT_WORK(&pw->work, verity_prefetch_io); - pw->v = v; - pw->block = io->block; - pw->n_blocks = io->n_blocks; - queue_work(v->verify_wq, &pw->work); -} - -/* - * Bio map function. It allocates dm_verity_io structure and bio vector and - * fills them. Then it issues prefetches and the I/O. - */ -static int verity_map(struct dm_target *ti, struct bio *bio) -{ - struct dm_verity *v = ti->private; - struct dm_verity_io *io; - - bio->bi_bdev = v->data_dev->bdev; - bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector); - - if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & - ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) { - DMERR_LIMIT("unaligned io"); - return -EIO; - } - - if (bio_end_sector(bio) >> - (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { - DMERR_LIMIT("io out of range"); - return -EIO; - } - - if (bio_data_dir(bio) == WRITE) - return -EIO; - - io = dm_per_bio_data(bio, ti->per_bio_data_size); - io->v = v; - io->orig_bi_end_io = bio->bi_end_io; - io->orig_bi_private = bio->bi_private; - io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); - io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; - - bio->bi_end_io = verity_end_io; - bio->bi_private = io; - io->iter = bio->bi_iter; - - verity_submit_prefetch(v, io); - - generic_make_request(bio); - - return DM_MAPIO_SUBMITTED; -} - -/* - * Status: V (valid) or C (corruption found) - */ -static void verity_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) -{ - struct dm_verity *v = ti->private; - unsigned sz = 0; - unsigned x; - - switch (type) { - case STATUSTYPE_INFO: - DMEMIT("%c", v->hash_failed ? 'C' : 'V'); - break; - case STATUSTYPE_TABLE: - DMEMIT("%u %s %s %u %u %llu %llu %s ", - v->version, - v->data_dev->name, - v->hash_dev->name, - 1 << v->data_dev_block_bits, - 1 << v->hash_dev_block_bits, - (unsigned long long)v->data_blocks, - (unsigned long long)v->hash_start, - v->alg_name - ); - for (x = 0; x < v->digest_size; x++) - DMEMIT("%02x", v->root_digest[x]); - DMEMIT(" "); - if (!v->salt_size) - DMEMIT("-"); - else - for (x = 0; x < v->salt_size; x++) - DMEMIT("%02x", v->salt[x]); - if (v->mode != DM_VERITY_MODE_EIO) { - DMEMIT(" 1 "); - switch (v->mode) { - case DM_VERITY_MODE_LOGGING: - DMEMIT(DM_VERITY_OPT_LOGGING); - break; - case DM_VERITY_MODE_RESTART: - DMEMIT(DM_VERITY_OPT_RESTART); - break; - default: - BUG(); - } - } - break; - } -} - -static int verity_ioctl(struct dm_target *ti, unsigned cmd, - unsigned long arg) -{ - struct dm_verity *v = ti->private; - int r = 0; - - if (v->data_start || - ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT) - r = scsi_verify_blk_ioctl(NULL, cmd); - - return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode, - cmd, arg); -} - -static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct dm_verity *v = ti->private; - struct request_queue *q = bdev_get_queue(v->data_dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = v->data_dev->bdev; - bvm->bi_sector = verity_map_sector(v, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - -static int verity_iterate_devices(struct dm_target *ti, - iterate_devices_callout_fn fn, void *data) -{ - struct dm_verity *v = ti->private; - - return fn(ti, v->data_dev, v->data_start, ti->len, data); -} - -static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) -{ - struct dm_verity *v = ti->private; - - if (limits->logical_block_size < 1 << v->data_dev_block_bits) - limits->logical_block_size = 1 << v->data_dev_block_bits; - - if (limits->physical_block_size < 1 << v->data_dev_block_bits) - limits->physical_block_size = 1 << v->data_dev_block_bits; - - blk_limits_io_min(limits, limits->logical_block_size); -} - -static void verity_dtr(struct dm_target *ti) -{ - struct dm_verity *v = ti->private; - - if (v->verify_wq) - destroy_workqueue(v->verify_wq); - - if (v->bufio) - dm_bufio_client_destroy(v->bufio); - - kfree(v->salt); - kfree(v->root_digest); - - if (v->tfm) - crypto_free_shash(v->tfm); - - kfree(v->alg_name); - - if (v->hash_dev) - dm_put_device(ti, v->hash_dev); - - if (v->data_dev) - dm_put_device(ti, v->data_dev); - - kfree(v); -} - -static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) -{ - int r; - unsigned argc; - struct dm_target *ti = v->ti; - const char *arg_name; - - static struct dm_arg _args[] = { - {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"}, - }; - - r = dm_read_arg_group(_args, as, &argc, &ti->error); - if (r) - return -EINVAL; - - if (!argc) - return 0; - - do { - arg_name = dm_shift_arg(as); - argc--; - - if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) { - v->mode = DM_VERITY_MODE_LOGGING; - continue; - - } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { - v->mode = DM_VERITY_MODE_RESTART; - continue; - } - - ti->error = "Unrecognized verity feature request"; - return -EINVAL; - } while (argc && !r); - - return r; -} - -/* - * Target parameters: - * The current format is version 1. - * Vsn 0 is compatible with original Chromium OS releases. - * - * - * - * - * - * - * - * - * Hex string or "-" if no salt. - */ -static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) -{ - struct dm_verity *v; - struct dm_arg_set as; - unsigned int num; - unsigned long long num_ll; - int r; - int i; - sector_t hash_position; - char dummy; - - v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); - if (!v) { - ti->error = "Cannot allocate verity structure"; - return -ENOMEM; - } - ti->private = v; - v->ti = ti; - - if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) { - ti->error = "Device must be readonly"; - r = -EINVAL; - goto bad; - } - - if (argc < 10) { - ti->error = "Not enough arguments"; - r = -EINVAL; - goto bad; - } - - if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 || - num > 1) { - ti->error = "Invalid version"; - r = -EINVAL; - goto bad; - } - v->version = num; - - r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev); - if (r) { - ti->error = "Data device lookup failed"; - goto bad; - } - - r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev); - if (r) { - ti->error = "Data device lookup failed"; - goto bad; - } - - if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 || - !num || (num & (num - 1)) || - num < bdev_logical_block_size(v->data_dev->bdev) || - num > PAGE_SIZE) { - ti->error = "Invalid data device block size"; - r = -EINVAL; - goto bad; - } - v->data_dev_block_bits = __ffs(num); - - if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 || - !num || (num & (num - 1)) || - num < bdev_logical_block_size(v->hash_dev->bdev) || - num > INT_MAX) { - ti->error = "Invalid hash device block size"; - r = -EINVAL; - goto bad; - } - v->hash_dev_block_bits = __ffs(num); - - if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || - (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) - >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { - ti->error = "Invalid data blocks"; - r = -EINVAL; - goto bad; - } - v->data_blocks = num_ll; - - if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) { - ti->error = "Data device is too small"; - r = -EINVAL; - goto bad; - } - - if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || - (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) - >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { - ti->error = "Invalid hash start"; - r = -EINVAL; - goto bad; - } - v->hash_start = num_ll; - - v->alg_name = kstrdup(argv[7], GFP_KERNEL); - if (!v->alg_name) { - ti->error = "Cannot allocate algorithm name"; - r = -ENOMEM; - goto bad; - } - - v->tfm = crypto_alloc_shash(v->alg_name, 0, 0); - if (IS_ERR(v->tfm)) { - ti->error = "Cannot initialize hash function"; - r = PTR_ERR(v->tfm); - v->tfm = NULL; - goto bad; - } - v->digest_size = crypto_shash_digestsize(v->tfm); - if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { - ti->error = "Digest size too big"; - r = -EINVAL; - goto bad; - } - v->shash_descsize = - sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm); - - v->root_digest = kmalloc(v->digest_size, GFP_KERNEL); - if (!v->root_digest) { - ti->error = "Cannot allocate root digest"; - r = -ENOMEM; - goto bad; - } - if (strlen(argv[8]) != v->digest_size * 2 || - hex2bin(v->root_digest, argv[8], v->digest_size)) { - ti->error = "Invalid root digest"; - r = -EINVAL; - goto bad; - } - - if (strcmp(argv[9], "-")) { - v->salt_size = strlen(argv[9]) / 2; - v->salt = kmalloc(v->salt_size, GFP_KERNEL); - if (!v->salt) { - ti->error = "Cannot allocate salt"; - r = -ENOMEM; - goto bad; - } - if (strlen(argv[9]) != v->salt_size * 2 || - hex2bin(v->salt, argv[9], v->salt_size)) { - ti->error = "Invalid salt"; - r = -EINVAL; - goto bad; - } - } - - argv += 10; - argc -= 10; - - /* Optional parameters */ - if (argc) { - as.argc = argc; - as.argv = argv; - - r = verity_parse_opt_args(&as, v); - if (r < 0) - goto bad; - } - - v->hash_per_block_bits = - __fls((1 << v->hash_dev_block_bits) / v->digest_size); - - v->levels = 0; - if (v->data_blocks) - while (v->hash_per_block_bits * v->levels < 64 && - (unsigned long long)(v->data_blocks - 1) >> - (v->hash_per_block_bits * v->levels)) - v->levels++; - - if (v->levels > DM_VERITY_MAX_LEVELS) { - ti->error = "Too many tree levels"; - r = -E2BIG; - goto bad; - } - - hash_position = v->hash_start; - for (i = v->levels - 1; i >= 0; i--) { - sector_t s; - v->hash_level_block[i] = hash_position; - s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1) - >> ((i + 1) * v->hash_per_block_bits); - if (hash_position + s < hash_position) { - ti->error = "Hash device offset overflow"; - r = -E2BIG; - goto bad; - } - hash_position += s; - } - v->hash_blocks = hash_position; - - v->bufio = dm_bufio_client_create(v->hash_dev->bdev, - 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), - dm_bufio_alloc_callback, NULL); - if (IS_ERR(v->bufio)) { - ti->error = "Cannot initialize dm-bufio"; - r = PTR_ERR(v->bufio); - v->bufio = NULL; - goto bad; - } - - if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) { - ti->error = "Hash device is too small"; - r = -E2BIG; - goto bad; - } - - ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); - - /* WQ_UNBOUND greatly improves performance when running on ramdisk */ - v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); - if (!v->verify_wq) { - ti->error = "Cannot allocate workqueue"; - r = -ENOMEM; - goto bad; - } - - return 0; - -bad: - verity_dtr(ti); - - return r; -} - -static struct target_type verity_target = { - .name = "verity", - .version = {1, 2, 0}, - .module = THIS_MODULE, - .ctr = verity_ctr, - .dtr = verity_dtr, - .map = verity_map, - .status = verity_status, - .ioctl = verity_ioctl, - .merge = verity_merge, - .iterate_devices = verity_iterate_devices, - .io_hints = verity_io_hints, -}; - -static int __init dm_verity_init(void) -{ - int r; - - r = dm_register_target(&verity_target); - if (r < 0) - DMERR("register failed %d", r); - - return r; -} - -static void __exit dm_verity_exit(void) -{ - dm_unregister_target(&verity_target); -} - -module_init(dm_verity_init); -module_exit(dm_verity_exit); - -MODULE_AUTHOR("Mikulas Patocka "); -MODULE_AUTHOR("Mandeep Baines "); -MODULE_AUTHOR("Will Drewry "); -MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 83ab4db900a55e93d584448d235ae21f82305f08 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 14:27:20 +0000 Subject: UPSTREAM: dm verity: factor out structures and functions useful to separate object (cherry-picked from ffa393807cd69656d5b6bc9d9622e205071cbab8) Prepare for an optional verity object to make use of existing dm-verity structures and functions. Bug: 21893453 Change-Id: I102463e88089eb047596d4acccc424e42c3abca5 Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 117 ++++++------------------------------------ drivers/md/dm-verity.h | 113 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 101 deletions(-) create mode 100644 drivers/md/dm-verity.h diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 00f9e619f9ff..82d8e71fec26 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -14,12 +14,10 @@ * access behavior. */ -#include "dm-bufio.h" +#include "dm-verity.h" #include -#include #include -#include #define DM_MSG_PREFIX "verity" @@ -28,7 +26,6 @@ #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 -#define DM_VERITY_MAX_LEVELS 63 #define DM_VERITY_MAX_CORRUPTED_ERRS 100 #define DM_VERITY_OPT_LOGGING "ignore_corruption" @@ -40,73 +37,6 @@ static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); -enum verity_mode { - DM_VERITY_MODE_EIO, - DM_VERITY_MODE_LOGGING, - DM_VERITY_MODE_RESTART -}; - -enum verity_block_type { - DM_VERITY_BLOCK_TYPE_DATA, - DM_VERITY_BLOCK_TYPE_METADATA -}; - -struct dm_verity { - struct dm_dev *data_dev; - struct dm_dev *hash_dev; - struct dm_target *ti; - struct dm_bufio_client *bufio; - char *alg_name; - struct crypto_shash *tfm; - u8 *root_digest; /* digest of the root block */ - u8 *salt; /* salt: its size is salt_size */ - unsigned salt_size; - sector_t data_start; /* data offset in 512-byte sectors */ - sector_t hash_start; /* hash start in blocks */ - sector_t data_blocks; /* the number of data blocks */ - sector_t hash_blocks; /* the number of hash blocks */ - unsigned char data_dev_block_bits; /* log2(data blocksize) */ - unsigned char hash_dev_block_bits; /* log2(hash blocksize) */ - unsigned char hash_per_block_bits; /* log2(hashes in hash block) */ - unsigned char levels; /* the number of tree levels */ - unsigned char version; - unsigned digest_size; /* digest size for the current hash algorithm */ - unsigned shash_descsize;/* the size of temporary space for crypto */ - int hash_failed; /* set to 1 if hash of any block failed */ - enum verity_mode mode; /* mode for handling verification errors */ - unsigned corrupted_errs;/* Number of errors for corrupted blocks */ - - struct workqueue_struct *verify_wq; - - /* starting blocks for each tree level. 0 is the lowest level. */ - sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; -}; - -struct dm_verity_io { - struct dm_verity *v; - - /* original values of bio->bi_end_io and bio->bi_private */ - bio_end_io_t *orig_bi_end_io; - void *orig_bi_private; - - sector_t block; - unsigned n_blocks; - - struct bvec_iter iter; - - struct work_struct work; - - /* - * Three variably-size fields follow this struct: - * - * u8 hash_desc[v->shash_descsize]; - * u8 real_digest[v->digest_size]; - * u8 want_digest[v->digest_size]; - * - * To access them use: io_hash_desc(), io_real_digest() and io_want_digest(). - */ -}; - struct dm_verity_prefetch_work { struct work_struct work; struct dm_verity *v; @@ -114,21 +44,6 @@ struct dm_verity_prefetch_work { unsigned n_blocks; }; -static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) -{ - return (struct shash_desc *)(io + 1); -} - -static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io) -{ - return (u8 *)(io + 1) + v->shash_descsize; -} - -static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io) -{ - return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; -} - /* * Auxiliary structure appended to each dm-bufio buffer. If the value * hash_verified is nonzero, hash of the block has been verified. @@ -237,8 +152,8 @@ static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, return r; } -static int verity_hash(struct dm_verity *v, struct shash_desc *desc, - const u8 *data, size_t len, u8 *digest) +int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest) { int r; @@ -326,12 +241,12 @@ out: * Verify hash of a metadata block pertaining to the specified data block * ("block" argument) at a specified level ("level" argument). * - * On successful return, io_want_digest(v, io) contains the hash value for - * a lower tree level or for the data block (if we're at the lowest leve). + * On successful return, verity_io_want_digest(v, io) contains the hash value + * for a lower tree level or for the data block (if we're at the lowest level). * * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned. * If "skip_unverified" is false, unverified buffer is hashed and verified - * against current value of io_want_digest(v, io). + * against current value of verity_io_want_digest(v, io). */ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, sector_t block, int level, bool skip_unverified, @@ -358,13 +273,13 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, goto release_ret_r; } - r = verity_hash(v, io_hash_desc(v, io), + r = verity_hash(v, verity_io_hash_desc(v, io), data, 1 << v->hash_dev_block_bits, - io_real_digest(v, io)); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) goto release_ret_r; - if (likely(memcmp(io_real_digest(v, io), want_digest, + if (likely(memcmp(verity_io_real_digest(v, io), want_digest, v->digest_size) == 0)) aux->hash_verified = 1; else if (verity_handle_err(v, @@ -388,8 +303,8 @@ release_ret_r: * Find a hash for a given block, write it to digest and verify the integrity * of the hash tree if necessary. */ -static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest) +int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest) { int i; int r; @@ -431,10 +346,10 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++) { int r; unsigned todo; - struct shash_desc *desc = io_hash_desc(v, io); + struct shash_desc *desc = verity_io_hash_desc(v, io); r = verity_hash_for_block(v, io, io->block + b, - io_want_digest(v, io)); + verity_io_want_digest(v, io)); if (unlikely(r < 0)) return r; @@ -463,12 +378,12 @@ static int verity_verify_io(struct dm_verity_io *io) todo -= len; } while (todo); - r = verity_hash_final(v, desc, io_real_digest(v, io)); + r = verity_hash_final(v, desc, verity_io_real_digest(v, io)); if (unlikely(r < 0)) return r; - if (likely(memcmp(io_real_digest(v, io), - io_want_digest(v, io), v->digest_size) == 0)) + if (likely(memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size) == 0)) continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, io->block + b)) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h new file mode 100644 index 000000000000..ecf4f05add40 --- /dev/null +++ b/drivers/md/dm-verity.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * Copyright (C) 2015 Google, Inc. + * + * Author: Mikulas Patocka + * + * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors + * + * This file is released under the GPLv2. + */ + +#ifndef DM_VERITY_H +#define DM_VERITY_H + +#include "dm-bufio.h" +#include +#include + +#define DM_VERITY_MAX_LEVELS 63 + +enum verity_mode { + DM_VERITY_MODE_EIO, + DM_VERITY_MODE_LOGGING, + DM_VERITY_MODE_RESTART +}; + +enum verity_block_type { + DM_VERITY_BLOCK_TYPE_DATA, + DM_VERITY_BLOCK_TYPE_METADATA +}; + +struct dm_verity { + struct dm_dev *data_dev; + struct dm_dev *hash_dev; + struct dm_target *ti; + struct dm_bufio_client *bufio; + char *alg_name; + struct crypto_shash *tfm; + u8 *root_digest; /* digest of the root block */ + u8 *salt; /* salt: its size is salt_size */ + unsigned salt_size; + sector_t data_start; /* data offset in 512-byte sectors */ + sector_t hash_start; /* hash start in blocks */ + sector_t data_blocks; /* the number of data blocks */ + sector_t hash_blocks; /* the number of hash blocks */ + unsigned char data_dev_block_bits; /* log2(data blocksize) */ + unsigned char hash_dev_block_bits; /* log2(hash blocksize) */ + unsigned char hash_per_block_bits; /* log2(hashes in hash block) */ + unsigned char levels; /* the number of tree levels */ + unsigned char version; + unsigned digest_size; /* digest size for the current hash algorithm */ + unsigned shash_descsize;/* the size of temporary space for crypto */ + int hash_failed; /* set to 1 if hash of any block failed */ + enum verity_mode mode; /* mode for handling verification errors */ + unsigned corrupted_errs;/* Number of errors for corrupted blocks */ + + struct workqueue_struct *verify_wq; + + /* starting blocks for each tree level. 0 is the lowest level. */ + sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; +}; + +struct dm_verity_io { + struct dm_verity *v; + + /* original values of bio->bi_end_io and bio->bi_private */ + bio_end_io_t *orig_bi_end_io; + void *orig_bi_private; + + sector_t block; + unsigned n_blocks; + + struct bvec_iter iter; + + struct work_struct work; + + /* + * Three variably-size fields follow this struct: + * + * u8 hash_desc[v->shash_descsize]; + * u8 real_digest[v->digest_size]; + * u8 want_digest[v->digest_size]; + * + * To access them use: verity_io_hash_desc(), verity_io_real_digest() + * and verity_io_want_digest(). + */ +}; + +static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v, + struct dm_verity_io *io) +{ + return (struct shash_desc *)(io + 1); +} + +static inline u8 *verity_io_real_digest(struct dm_verity *v, + struct dm_verity_io *io) +{ + return (u8 *)(io + 1) + v->shash_descsize; +} + +static inline u8 *verity_io_want_digest(struct dm_verity *v, + struct dm_verity_io *io) +{ + return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; +} + +extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest); + +extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest); + +#endif /* DM_VERITY_H */ -- cgit v1.2.3 From 42e8a0d21296f099fd75063b7e5f585379b3c1a7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 14:28:16 +0000 Subject: UPSTREAM: dm verity: factor out verity_for_bv_block() (cherry-picked from bb4d73ac5e4f0a6c4853f35824f6cb2d396a2f9c) verity_for_bv_block() will be re-used by optional dm-verity object. Bug: 21893453 Change-Id: Ia7782db8077bcc46fd000f5be1919942c28b03db Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 72 +++++++++++++++++++++++++++++-------------- drivers/md/dm-verity.h | 6 ++++ 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 82d8e71fec26..748ef75c938d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -333,19 +333,61 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +/* + * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec + * starting from iter. + */ +int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, + int (*process)(struct dm_verity *v, + struct dm_verity_io *io, u8 *data, + size_t len)) +{ + unsigned todo = 1 << v->data_dev_block_bits; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); + + do { + int r; + u8 *page; + unsigned len; + struct bio_vec bv = bio_iter_iovec(bio, *iter); + + page = kmap_atomic(bv.bv_page); + len = bv.bv_len; + + if (likely(len >= todo)) + len = todo; + + r = process(v, io, page + bv.bv_offset, len); + kunmap_atomic(page); + + if (r < 0) + return r; + + bio_advance_iter(bio, iter, len); + todo -= len; + } while (todo); + + return 0; +} + +static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + return verity_hash_update(v, verity_io_hash_desc(v, io), data, len); +} + /* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) { struct dm_verity *v = io->v; - struct bio *bio = dm_bio_from_per_bio_data(io, - v->ti->per_bio_data_size); + struct bvec_iter start; unsigned b; for (b = 0; b < io->n_blocks; b++) { int r; - unsigned todo; struct shash_desc *desc = verity_io_hash_desc(v, io); r = verity_hash_for_block(v, io, io->block + b, @@ -357,26 +399,10 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; - todo = 1 << v->data_dev_block_bits; - do { - u8 *page; - unsigned len; - struct bio_vec bv = bio_iter_iovec(bio, io->iter); - - page = kmap_atomic(bv.bv_page); - len = bv.bv_len; - if (likely(len >= todo)) - len = todo; - r = verity_hash_update(v, desc, page + bv.bv_offset, - len); - kunmap_atomic(page); - - if (unlikely(r < 0)) - return r; - - bio_advance_iter(bio, &io->iter, len); - todo -= len; - } while (todo); + start = io->iter; + r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update); + if (unlikely(r < 0)) + return r; r = verity_hash_final(v, desc, verity_io_real_digest(v, io)); if (unlikely(r < 0)) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index ecf4f05add40..4c2d3d87db28 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -104,6 +104,12 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; } +extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, + int (*process)(struct dm_verity *v, + struct dm_verity_io *io, + u8 *data, size_t len)); + extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, const u8 *data, size_t len, u8 *digest); -- cgit v1.2.3 From 656b900ff4c23a749d4392a92c0007eb086c322e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 14:31:20 +0000 Subject: UPSTREAM: dm verity: add support for forward error correction (cherry-picked from a739ff3f543afbb4a041c16cd0182c8e8d366e70) Add support for correcting corrupted blocks using Reed-Solomon. This code uses RS(255, N) interleaved across data and hash blocks. Each error-correcting block covers N bytes evenly distributed across the combined total data, so that each byte is a maximum distance away from the others. This makes it possible to recover from several consecutive corrupted blocks with relatively small space overhead. In addition, using verity hashes to locate erasures nearly doubles the effectiveness of error correction. Being able to detect corrupted blocks also improves performance, because only corrupted blocks need to corrected. For a 2 GiB partition, RS(255, 253) (two parity bytes for each 253-byte block) can correct up to 16 MiB of consecutive corrupted blocks if erasures can be located, and 8 MiB if they cannot, with 16 MiB space overhead. Bug: 21893453 Change-Id: Ice5afb0a1fc44fe1865c30ce8a95b9fe30144fac Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- Documentation/device-mapper/verity.txt | 35 +- drivers/md/Kconfig | 12 + drivers/md/Makefile | 4 + drivers/md/dm-verity-fec.c | 812 +++++++++++++++++++++++++++++++++ drivers/md/dm-verity-fec.h | 152 ++++++ drivers/md/dm-verity-target.c | 55 ++- drivers/md/dm-verity.h | 10 + 7 files changed, 1071 insertions(+), 9 deletions(-) create mode 100644 drivers/md/dm-verity-fec.c create mode 100644 drivers/md/dm-verity-fec.h diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 2929f6b1ccf1..83c7c99db0be 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -17,11 +17,11 @@ Construction Parameters 0 is the original format used in the Chromium OS. The salt is appended when hashing, digests are stored continuously and - the rest of the block is padded with zeros. + the rest of the block is padded with zeroes. 1 is the current format that should be used for new devices. The salt is prepended when hashing and each digest is - padded with zeros to the power of two. + padded with zeroes to the power of two. This is the device containing data, the integrity of which needs to be @@ -72,6 +72,32 @@ Construction Parameters notify user space. +use_fec_from_device + Use forward error correction (FEC) to recover from corruption if hash + verification fails. Use encoding data from the specified device. This + may be the same device where data and hash blocks reside, in which case + fec_start must be outside data and hash areas. + + If the encoding data covers additional metadata, it must be accessible + on the hash device after the hash blocks. + + Note: block sizes for data and hash devices must match. Also, if the + verity is encrypted the should be too. + +fec_roots + Number of generator roots. This equals to the number of parity bytes in + the encoding data. For example, in RS(M, N) encoding, the number of roots + is M-N. + +fec_blocks + The number of encoding data blocks on the FEC device. The block size for + the FEC device is . + +fec_start + This is the offset, in blocks, from the start of the + FEC device to the beginning of the encoding data. + + Theory of operation =================== @@ -91,6 +117,11 @@ per-block basis. This allows for a lightweight hash computation on first read into the page cache. Block hashes are stored linearly, aligned to the nearest block size. +If forward error correction (FEC) support is enabled any recovery of +corrupted data will be verified using the cryptographic hash of the +corresponding data. This is why combining error correction with +integrity checking is essential. + Hash Tree --------- diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 5bdedf6df153..33c3109f104e 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -418,6 +418,18 @@ config DM_VERITY If unsure, say N. +config DM_VERITY_FEC + bool "Verity forward error correction support" + depends on DM_VERITY + select REED_SOLOMON + select REED_SOLOMON_DEC8 + ---help--- + Add forward error correction support to dm-verity. This option + makes it possible to use pre-generated error correction data to + recover from corrupted blocks. + + If unsure, say N. + config DM_SWITCH tristate "Switch target support (EXPERIMENTAL)" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 2095ce7bf9ab..cb2a8800f1c8 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -60,3 +60,7 @@ obj-$(CONFIG_DM_ERA) += dm-era.o ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o endif + +ifeq ($(CONFIG_DM_VERITY_FEC),y) +dm-verity-objs += dm-verity-fec.o +endif diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c new file mode 100644 index 000000000000..88143d36a1d2 --- /dev/null +++ b/drivers/md/dm-verity-fec.c @@ -0,0 +1,812 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * Author: Sami Tolvanen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include "dm-verity-fec.h" +#include + +#define DM_MSG_PREFIX "verity-fec" + +/* + * If error correction has been configured, returns true. + */ +bool verity_fec_is_enabled(struct dm_verity *v) +{ + return v->fec && v->fec->dev; +} + +/* + * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable + * length fields. + */ +static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) +{ + return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); +} + +/* + * Return an interleaved offset for a byte in RS block. + */ +static inline u64 fec_interleave(struct dm_verity *v, u64 offset) +{ + u32 mod; + + mod = do_div(offset, v->fec->rsn); + return offset + mod * (v->fec->rounds << v->data_dev_block_bits); +} + +/* + * Decode an RS block using Reed-Solomon. + */ +static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, + u8 *data, u8 *fec, int neras) +{ + int i; + uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN]; + + for (i = 0; i < v->fec->roots; i++) + par[i] = fec[i]; + + return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras, + fio->erasures, 0, NULL); +} + +/* + * Read error-correcting codes for the requested RS block. Returns a pointer + * to the data block. Caller is responsible for releasing buf. + */ +static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, + unsigned *offset, struct dm_buffer **buf) +{ + u64 position, block; + u8 *res; + + position = (index + rsb) * v->fec->roots; + block = position >> v->data_dev_block_bits; + *offset = (unsigned)(position - (block << v->data_dev_block_bits)); + + res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); + if (unlikely(IS_ERR(res))) { + DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", + v->data_dev->name, (unsigned long long)rsb, + (unsigned long long)(v->fec->start + block), + PTR_ERR(res)); + *buf = NULL; + } + + return res; +} + +/* Loop over each preallocated buffer slot. */ +#define fec_for_each_prealloc_buffer(__i) \ + for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++) + +/* Loop over each extra buffer slot. */ +#define fec_for_each_extra_buffer(io, __i) \ + for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++) + +/* Loop over each allocated buffer. */ +#define fec_for_each_buffer(io, __i) \ + for (__i = 0; __i < (io)->nbufs; __i++) + +/* Loop over each RS block in each allocated buffer. */ +#define fec_for_each_buffer_rs_block(io, __i, __j) \ + fec_for_each_buffer(io, __i) \ + for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++) + +/* + * Return a pointer to the current RS block when called inside + * fec_for_each_buffer_rs_block. + */ +static inline u8 *fec_buffer_rs_block(struct dm_verity *v, + struct dm_verity_fec_io *fio, + unsigned i, unsigned j) +{ + return &fio->bufs[i][j * v->fec->rsn]; +} + +/* + * Return an index to the current RS block when called inside + * fec_for_each_buffer_rs_block. + */ +static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j) +{ + return (i << DM_VERITY_FEC_BUF_RS_BITS) + j; +} + +/* + * Decode all RS blocks from buffers and copy corrected bytes into fio->output + * starting from block_offset. + */ +static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, + u64 rsb, int byte_index, unsigned block_offset, + int neras) +{ + int r, corrected = 0, res; + struct dm_buffer *buf; + unsigned n, i, offset; + u8 *par, *block; + + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); + if (IS_ERR(par)) + return PTR_ERR(par); + + /* + * Decode the RS blocks we have in bufs. Each RS block results in + * one corrected target byte and consumes fec->roots parity bytes. + */ + fec_for_each_buffer_rs_block(fio, n, i) { + block = fec_buffer_rs_block(v, fio, n, i); + res = fec_decode_rs8(v, fio, block, &par[offset], neras); + if (res < 0) { + dm_bufio_release(buf); + + r = res; + goto error; + } + + corrected += res; + fio->output[block_offset] = block[byte_index]; + + block_offset++; + if (block_offset >= 1 << v->data_dev_block_bits) + goto done; + + /* read the next block when we run out of parity bytes */ + offset += v->fec->roots; + if (offset >= 1 << v->data_dev_block_bits) { + dm_bufio_release(buf); + + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); + if (unlikely(IS_ERR(par))) + return PTR_ERR(par); + } + } +done: + r = corrected; +error: + if (r < 0 && neras) + DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", + v->data_dev->name, (unsigned long long)rsb, r); + else if (r > 0) + DMWARN_LIMIT("%s: FEC %llu: corrected %d errors", + v->data_dev->name, (unsigned long long)rsb, r); + + return r; +} + +/* + * Locate data block erasures using verity hashes. + */ +static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, + u8 *want_digest, u8 *data) +{ + if (unlikely(verity_hash(v, verity_io_hash_desc(v, io), + data, 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io)))) + return 0; + + return memcmp(verity_io_real_digest(v, io), want_digest, + v->digest_size) != 0; +} + +/* + * Read data blocks that are part of the RS block and deinterleave as much as + * fits into buffers. Check for erasure locations if @neras is non-NULL. + */ +static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, + u64 rsb, u64 target, unsigned block_offset, + int *neras) +{ + int i, j, target_index = -1; + struct dm_buffer *buf; + struct dm_bufio_client *bufio; + struct dm_verity_fec_io *fio = fec_io(io); + u64 block, ileaved; + u8 *bbuf, *rs_block; + u8 want_digest[v->digest_size]; + unsigned n, k; + + if (neras) + *neras = 0; + + /* + * read each of the rsn data blocks that are part of the RS block, and + * interleave contents to available bufs + */ + for (i = 0; i < v->fec->rsn; i++) { + ileaved = fec_interleave(v, rsb * v->fec->rsn + i); + + /* + * target is the data block we want to correct, target_index is + * the index of this block within the rsn RS blocks + */ + if (ileaved == target) + target_index = i; + + block = ileaved >> v->data_dev_block_bits; + bufio = v->fec->data_bufio; + + if (block >= v->data_blocks) { + block -= v->data_blocks; + + /* + * blocks outside the area were assumed to contain + * zeros when encoding data was generated + */ + if (unlikely(block >= v->fec->hash_blocks)) + continue; + + block += v->hash_start; + bufio = v->bufio; + } + + bbuf = dm_bufio_read(bufio, block, &buf); + if (unlikely(IS_ERR(bbuf))) { + DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", + v->data_dev->name, + (unsigned long long)rsb, + (unsigned long long)block, PTR_ERR(bbuf)); + + /* assume the block is corrupted */ + if (neras && *neras <= v->fec->roots) + fio->erasures[(*neras)++] = i; + + continue; + } + + /* locate erasures if the block is on the data device */ + if (bufio == v->fec->data_bufio && + verity_hash_for_block(v, io, block, want_digest) == 0) { + /* + * skip if we have already found the theoretical + * maximum number (i.e. fec->roots) of erasures + */ + if (neras && *neras <= v->fec->roots && + fec_is_erasure(v, io, want_digest, bbuf)) + fio->erasures[(*neras)++] = i; + } + + /* + * deinterleave and copy the bytes that fit into bufs, + * starting from block_offset + */ + fec_for_each_buffer_rs_block(fio, n, j) { + k = fec_buffer_rs_index(n, j) + block_offset; + + if (k >= 1 << v->data_dev_block_bits) + goto done; + + rs_block = fec_buffer_rs_block(v, fio, n, j); + rs_block[i] = bbuf[k]; + } +done: + dm_bufio_release(buf); + } + + return target_index; +} + +/* + * Allocate RS control structure and FEC buffers from preallocated mempools, + * and attempt to allocate as many extra buffers as available. + */ +static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) +{ + unsigned n; + + if (!fio->rs) { + fio->rs = mempool_alloc(v->fec->rs_pool, 0); + if (unlikely(!fio->rs)) { + DMERR("failed to allocate RS"); + return -ENOMEM; + } + } + + fec_for_each_prealloc_buffer(n) { + if (fio->bufs[n]) + continue; + + fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO); + if (unlikely(!fio->bufs[n])) { + DMERR("failed to allocate FEC buffer"); + return -ENOMEM; + } + } + + /* try to allocate the maximum number of buffers */ + fec_for_each_extra_buffer(fio, n) { + if (fio->bufs[n]) + continue; + + fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO); + /* we can manage with even one buffer if necessary */ + if (unlikely(!fio->bufs[n])) + break; + } + fio->nbufs = n; + + if (!fio->output) { + fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO); + + if (!fio->output) { + DMERR("failed to allocate FEC page"); + return -ENOMEM; + } + } + + return 0; +} + +/* + * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are + * zeroed before deinterleaving. + */ +static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) +{ + unsigned n; + + fec_for_each_buffer(fio, n) + memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS); + + memset(fio->erasures, 0, sizeof(fio->erasures)); +} + +/* + * Decode all RS blocks in a single data block and return the target block + * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses + * hashes to locate erasures. + */ +static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, + struct dm_verity_fec_io *fio, u64 rsb, u64 offset, + bool use_erasures) +{ + int r, neras = 0; + unsigned pos; + + r = fec_alloc_bufs(v, fio); + if (unlikely(r < 0)) + return r; + + for (pos = 0; pos < 1 << v->data_dev_block_bits; ) { + fec_init_bufs(v, fio); + + r = fec_read_bufs(v, io, rsb, offset, pos, + use_erasures ? &neras : NULL); + if (unlikely(r < 0)) + return r; + + r = fec_decode_bufs(v, fio, rsb, r, pos, neras); + if (r < 0) + return r; + + pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS; + } + + /* Always re-validate the corrected block against the expected hash */ + r = verity_hash(v, verity_io_hash_desc(v, io), fio->output, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io)); + if (unlikely(r < 0)) + return r; + + if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io), + v->digest_size)) { + DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)", + v->data_dev->name, (unsigned long long)rsb, neras); + return -EILSEQ; + } + + return 0; +} + +static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data, + size_t len) +{ + struct dm_verity_fec_io *fio = fec_io(io); + + memcpy(data, &fio->output[fio->output_pos], len); + fio->output_pos += len; + + return 0; +} + +/* + * Correct errors in a block. Copies corrected block to dest if non-NULL, + * otherwise to a bio_vec starting from iter. + */ +int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, + enum verity_block_type type, sector_t block, u8 *dest, + struct bvec_iter *iter) +{ + int r; + struct dm_verity_fec_io *fio = fec_io(io); + u64 offset, res, rsb; + + if (!verity_fec_is_enabled(v)) + return -EOPNOTSUPP; + + if (type == DM_VERITY_BLOCK_TYPE_METADATA) + block += v->data_blocks; + + /* + * For RS(M, N), the continuous FEC data is divided into blocks of N + * bytes. Since block size may not be divisible by N, the last block + * is zero padded when decoding. + * + * Each byte of the block is covered by a different RS(M, N) code, + * and each code is interleaved over N blocks to make it less likely + * that bursty corruption will leave us in unrecoverable state. + */ + + offset = block << v->data_dev_block_bits; + + res = offset; + div64_u64(res, v->fec->rounds << v->data_dev_block_bits); + + /* + * The base RS block we can feed to the interleaver to find out all + * blocks required for decoding. + */ + rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits); + + /* + * Locating erasures is slow, so attempt to recover the block without + * them first. Do a second attempt with erasures if the corruption is + * bad enough. + */ + r = fec_decode_rsb(v, io, fio, rsb, offset, false); + if (r < 0) { + r = fec_decode_rsb(v, io, fio, rsb, offset, true); + if (r < 0) + return r; + } + + if (dest) + memcpy(dest, fio->output, 1 << v->data_dev_block_bits); + else if (iter) { + fio->output_pos = 0; + r = verity_for_bv_block(v, io, iter, fec_bv_copy); + } + + return r; +} + +/* + * Clean up per-bio data. + */ +void verity_fec_finish_io(struct dm_verity_io *io) +{ + unsigned n; + struct dm_verity_fec *f = io->v->fec; + struct dm_verity_fec_io *fio = fec_io(io); + + if (!verity_fec_is_enabled(io->v)) + return; + + mempool_free(fio->rs, f->rs_pool); + + fec_for_each_prealloc_buffer(n) + mempool_free(fio->bufs[n], f->prealloc_pool); + + fec_for_each_extra_buffer(fio, n) + mempool_free(fio->bufs[n], f->extra_pool); + + mempool_free(fio->output, f->output_pool); +} + +/* + * Initialize per-bio data. + */ +void verity_fec_init_io(struct dm_verity_io *io) +{ + struct dm_verity_fec_io *fio = fec_io(io); + + if (!verity_fec_is_enabled(io->v)) + return; + + fio->rs = NULL; + memset(fio->bufs, 0, sizeof(fio->bufs)); + fio->nbufs = 0; + fio->output = NULL; +} + +/* + * Append feature arguments and values to the status table. + */ +unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, + char *result, unsigned maxlen) +{ + if (!verity_fec_is_enabled(v)) + return sz; + + DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s " + DM_VERITY_OPT_FEC_BLOCKS " %llu " + DM_VERITY_OPT_FEC_START " %llu " + DM_VERITY_OPT_FEC_ROOTS " %d", + v->fec->dev->name, + (unsigned long long)v->fec->blocks, + (unsigned long long)v->fec->start, + v->fec->roots); + + return sz; +} + +void verity_fec_dtr(struct dm_verity *v) +{ + struct dm_verity_fec *f = v->fec; + + if (!verity_fec_is_enabled(v)) + goto out; + + mempool_destroy(f->rs_pool); + mempool_destroy(f->prealloc_pool); + mempool_destroy(f->extra_pool); + kmem_cache_destroy(f->cache); + + if (f->data_bufio) + dm_bufio_client_destroy(f->data_bufio); + if (f->bufio) + dm_bufio_client_destroy(f->bufio); + + if (f->dev) + dm_put_device(v->ti, f->dev); +out: + kfree(f); + v->fec = NULL; +} + +static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data) +{ + struct dm_verity *v = (struct dm_verity *)pool_data; + + return init_rs(8, 0x11d, 0, 1, v->fec->roots); +} + +static void fec_rs_free(void *element, void *pool_data) +{ + struct rs_control *rs = (struct rs_control *)element; + + if (rs) + free_rs(rs); +} + +bool verity_is_fec_opt_arg(const char *arg_name) +{ + return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) || + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) || + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) || + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)); +} + +int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, + unsigned *argc, const char *arg_name) +{ + int r; + struct dm_target *ti = v->ti; + const char *arg_value; + unsigned long long num_ll; + unsigned char num_c; + char dummy; + + if (!*argc) { + ti->error = "FEC feature arguments require a value"; + return -EINVAL; + } + + arg_value = dm_shift_arg(as); + (*argc)--; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) { + r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev); + if (r) { + ti->error = "FEC device lookup failed"; + return r; + } + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) { + if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 || + ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS; + return -EINVAL; + } + v->fec->blocks = num_ll; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) { + if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 || + ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >> + (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_START; + return -EINVAL; + } + v->fec->start = num_ll; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) { + if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c || + num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) || + num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS; + return -EINVAL; + } + v->fec->roots = num_c; + + } else { + ti->error = "Unrecognized verity FEC feature request"; + return -EINVAL; + } + + return 0; +} + +/* + * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr. + */ +int verity_fec_ctr_alloc(struct dm_verity *v) +{ + struct dm_verity_fec *f; + + f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL); + if (!f) { + v->ti->error = "Cannot allocate FEC structure"; + return -ENOMEM; + } + v->fec = f; + + return 0; +} + +/* + * Validate arguments and preallocate memory. Must be called after arguments + * have been parsed using verity_fec_parse_opt_args. + */ +int verity_fec_ctr(struct dm_verity *v) +{ + struct dm_verity_fec *f = v->fec; + struct dm_target *ti = v->ti; + u64 hash_blocks; + + if (!verity_fec_is_enabled(v)) { + verity_fec_dtr(v); + return 0; + } + + /* + * FEC is computed over data blocks, possible metadata, and + * hash blocks. In other words, FEC covers total of fec_blocks + * blocks consisting of the following: + * + * data blocks | hash blocks | metadata (optional) + * + * We allow metadata after hash blocks to support a use case + * where all data is stored on the same device and FEC covers + * the entire area. + * + * If metadata is included, we require it to be available on the + * hash device after the hash blocks. + */ + + hash_blocks = v->hash_blocks - v->hash_start; + + /* + * Require matching block sizes for data and hash devices for + * simplicity. + */ + if (v->data_dev_block_bits != v->hash_dev_block_bits) { + ti->error = "Block sizes must match to use FEC"; + return -EINVAL; + } + + if (!f->roots) { + ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS; + return -EINVAL; + } + f->rsn = DM_VERITY_FEC_RSM - f->roots; + + if (!f->blocks) { + ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS; + return -EINVAL; + } + + f->rounds = f->blocks; + if (sector_div(f->rounds, f->rsn)) + f->rounds++; + + /* + * Due to optional metadata, f->blocks can be larger than + * data_blocks and hash_blocks combined. + */ + if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS; + return -EINVAL; + } + + /* + * Metadata is accessed through the hash device, so we require + * it to be large enough. + */ + f->hash_blocks = f->blocks - v->data_blocks; + if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) { + ti->error = "Hash device is too small for " + DM_VERITY_OPT_FEC_BLOCKS; + return -E2BIG; + } + + f->bufio = dm_bufio_client_create(f->dev->bdev, + 1 << v->data_dev_block_bits, + 1, 0, NULL, NULL); + if (IS_ERR(f->bufio)) { + ti->error = "Cannot initialize FEC bufio client"; + return PTR_ERR(f->bufio); + } + + if (dm_bufio_get_device_size(f->bufio) < + ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { + ti->error = "FEC device is too small"; + return -E2BIG; + } + + f->data_bufio = dm_bufio_client_create(v->data_dev->bdev, + 1 << v->data_dev_block_bits, + 1, 0, NULL, NULL); + if (IS_ERR(f->data_bufio)) { + ti->error = "Cannot initialize FEC data bufio client"; + return PTR_ERR(f->data_bufio); + } + + if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) { + ti->error = "Data device is too small"; + return -E2BIG; + } + + /* Preallocate an rs_control structure for each worker thread */ + f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc, + fec_rs_free, (void *) v); + if (!f->rs_pool) { + ti->error = "Cannot allocate RS pool"; + return -ENOMEM; + } + + f->cache = kmem_cache_create("dm_verity_fec_buffers", + f->rsn << DM_VERITY_FEC_BUF_RS_BITS, + 0, 0, NULL); + if (!f->cache) { + ti->error = "Cannot create FEC buffer cache"; + return -ENOMEM; + } + + /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */ + f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() * + DM_VERITY_FEC_BUF_PREALLOC, + f->cache); + if (!f->prealloc_pool) { + ti->error = "Cannot allocate FEC buffer prealloc pool"; + return -ENOMEM; + } + + f->extra_pool = mempool_create_slab_pool(0, f->cache); + if (!f->extra_pool) { + ti->error = "Cannot allocate FEC buffer extra pool"; + return -ENOMEM; + } + + /* Preallocate an output buffer for each thread */ + f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(), + 1 << v->data_dev_block_bits); + if (!f->output_pool) { + ti->error = "Cannot allocate FEC output pool"; + return -ENOMEM; + } + + /* Reserve space for our per-bio data */ + ti->per_bio_data_size += sizeof(struct dm_verity_fec_io); + + return 0; +} diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h new file mode 100644 index 000000000000..7fa0298b995e --- /dev/null +++ b/drivers/md/dm-verity-fec.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * Author: Sami Tolvanen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#ifndef DM_VERITY_FEC_H +#define DM_VERITY_FEC_H + +#include "dm-verity.h" +#include + +/* Reed-Solomon(M, N) parameters */ +#define DM_VERITY_FEC_RSM 255 +#define DM_VERITY_FEC_MAX_RSN 253 +#define DM_VERITY_FEC_MIN_RSN 231 /* ~10% space overhead */ + +/* buffers for deinterleaving and decoding */ +#define DM_VERITY_FEC_BUF_PREALLOC 1 /* buffers to preallocate */ +#define DM_VERITY_FEC_BUF_RS_BITS 4 /* 1 << RS blocks per buffer */ +/* we need buffers for at most 1 << block size RS blocks */ +#define DM_VERITY_FEC_BUF_MAX \ + (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) + +#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" +#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" +#define DM_VERITY_OPT_FEC_START "fec_start" +#define DM_VERITY_OPT_FEC_ROOTS "fec_roots" + +/* configuration */ +struct dm_verity_fec { + struct dm_dev *dev; /* parity data device */ + struct dm_bufio_client *data_bufio; /* for data dev access */ + struct dm_bufio_client *bufio; /* for parity data access */ + sector_t start; /* parity data start in blocks */ + sector_t blocks; /* number of blocks covered */ + sector_t rounds; /* number of interleaving rounds */ + sector_t hash_blocks; /* blocks covered after v->hash_start */ + unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */ + unsigned char rsn; /* N of RS(M, N) */ + mempool_t *rs_pool; /* mempool for fio->rs */ + mempool_t *prealloc_pool; /* mempool for preallocated buffers */ + mempool_t *extra_pool; /* mempool for extra buffers */ + mempool_t *output_pool; /* mempool for output */ + struct kmem_cache *cache; /* cache for buffers */ +}; + +/* per-bio data */ +struct dm_verity_fec_io { + struct rs_control *rs; /* Reed-Solomon state */ + int erasures[DM_VERITY_FEC_MAX_RSN]; /* erasures for decode_rs8 */ + u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */ + unsigned nbufs; /* number of buffers allocated */ + u8 *output; /* buffer for corrected output */ + size_t output_pos; +}; + +#ifdef CONFIG_DM_VERITY_FEC + +/* each feature parameter requires a value */ +#define DM_VERITY_OPTS_FEC 8 + +extern bool verity_fec_is_enabled(struct dm_verity *v); + +extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, + enum verity_block_type type, sector_t block, + u8 *dest, struct bvec_iter *iter); + +extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, + char *result, unsigned maxlen); + +extern void verity_fec_finish_io(struct dm_verity_io *io); +extern void verity_fec_init_io(struct dm_verity_io *io); + +extern bool verity_is_fec_opt_arg(const char *arg_name); +extern int verity_fec_parse_opt_args(struct dm_arg_set *as, + struct dm_verity *v, unsigned *argc, + const char *arg_name); + +extern void verity_fec_dtr(struct dm_verity *v); + +extern int verity_fec_ctr_alloc(struct dm_verity *v); +extern int verity_fec_ctr(struct dm_verity *v); + +#else /* !CONFIG_DM_VERITY_FEC */ + +#define DM_VERITY_OPTS_FEC 0 + +static inline bool verity_fec_is_enabled(struct dm_verity *v) +{ + return false; +} + +static inline int verity_fec_decode(struct dm_verity *v, + struct dm_verity_io *io, + enum verity_block_type type, + sector_t block, u8 *dest, + struct bvec_iter *iter) +{ + return -EOPNOTSUPP; +} + +static inline unsigned verity_fec_status_table(struct dm_verity *v, + unsigned sz, char *result, + unsigned maxlen) +{ + return sz; +} + +static inline void verity_fec_finish_io(struct dm_verity_io *io) +{ +} + +static inline void verity_fec_init_io(struct dm_verity_io *io) +{ +} + +static inline bool verity_is_fec_opt_arg(const char *arg_name) +{ + return false; +} + +static inline int verity_fec_parse_opt_args(struct dm_arg_set *as, + struct dm_verity *v, + unsigned *argc, + const char *arg_name) +{ + return -EINVAL; +} + +static inline void verity_fec_dtr(struct dm_verity *v) +{ +} + +static inline int verity_fec_ctr_alloc(struct dm_verity *v) +{ + return 0; +} + +static inline int verity_fec_ctr(struct dm_verity *v) +{ + return 0; +} + +#endif /* CONFIG_DM_VERITY_FEC */ + +#endif /* DM_VERITY_FEC_H */ diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 748ef75c938d..0b927cf54e8b 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -15,6 +15,7 @@ */ #include "dm-verity.h" +#include "dm-verity-fec.h" #include #include @@ -31,7 +32,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" -#define DM_VERITY_OPTS_MAX 1 +#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC) static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; @@ -282,6 +283,10 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, if (likely(memcmp(verity_io_real_digest(v, io), want_digest, v->digest_size) == 0)) aux->hash_verified = 1; + else if (verity_fec_decode(v, io, + DM_VERITY_BLOCK_TYPE_METADATA, + hash_block, data, NULL) == 0) + aux->hash_verified = 1; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { @@ -411,8 +416,11 @@ static int verity_verify_io(struct dm_verity_io *io) if (likely(memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io), v->digest_size) == 0)) continue; + else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b, NULL, &start) == 0) + continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) + io->block + b)) return -EIO; } @@ -430,6 +438,8 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; + verity_fec_finish_io(io); + bio_endio_nodec(bio, error); } @@ -444,7 +454,7 @@ static void verity_end_io(struct bio *bio, int error) { struct dm_verity_io *io = bio->bi_private; - if (error) { + if (error && !verity_fec_is_enabled(io->v)) { verity_finish_io(io, error); return; } @@ -548,6 +558,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) bio->bi_private = io; io->iter = bio->bi_iter; + verity_fec_init_io(io); + verity_submit_prefetch(v, io); generic_make_request(bio); @@ -562,6 +574,7 @@ static void verity_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; + unsigned args = 0; unsigned sz = 0; unsigned x; @@ -588,8 +601,15 @@ static void verity_status(struct dm_target *ti, status_type_t type, else for (x = 0; x < v->salt_size; x++) DMEMIT("%02x", v->salt[x]); + if (v->mode != DM_VERITY_MODE_EIO) + args++; + if (verity_fec_is_enabled(v)) + args += DM_VERITY_OPTS_FEC; + if (!args) + return; + DMEMIT(" %u", args); if (v->mode != DM_VERITY_MODE_EIO) { - DMEMIT(" 1 "); + DMEMIT(" "); switch (v->mode) { case DM_VERITY_MODE_LOGGING: DMEMIT(DM_VERITY_OPT_LOGGING); @@ -601,6 +621,7 @@ static void verity_status(struct dm_target *ti, status_type_t type, BUG(); } } + sz = verity_fec_status_table(v, sz, result, maxlen); break; } } @@ -679,6 +700,8 @@ static void verity_dtr(struct dm_target *ti) if (v->data_dev) dm_put_device(ti, v->data_dev); + verity_fec_dtr(v); + kfree(v); } @@ -711,6 +734,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { v->mode = DM_VERITY_MODE_RESTART; continue; + + } else if (verity_is_fec_opt_arg(arg_name)) { + r = verity_fec_parse_opt_args(as, v, &argc, arg_name); + if (r) + return r; + continue; } ti->error = "Unrecognized verity feature request"; @@ -753,6 +782,10 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->private = v; v->ti = ti; + r = verity_fec_ctr_alloc(v); + if (r) + goto bad; + if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) { ti->error = "Device must be readonly"; r = -EINVAL; @@ -941,8 +974,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); - /* WQ_UNBOUND greatly improves performance when running on ramdisk */ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); if (!v->verify_wq) { @@ -951,6 +982,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ti->per_bio_data_size = sizeof(struct dm_verity_io) + + v->shash_descsize + v->digest_size * 2; + + r = verity_fec_ctr(v); + if (r) + goto bad; + + ti->per_bio_data_size = roundup(ti->per_bio_data_size, + __alignof__(struct dm_verity_io)); + return 0; bad: @@ -961,7 +1002,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 4c2d3d87db28..daf2172497e6 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -29,6 +29,8 @@ enum verity_block_type { DM_VERITY_BLOCK_TYPE_METADATA }; +struct dm_verity_fec; + struct dm_verity { struct dm_dev *data_dev; struct dm_dev *hash_dev; @@ -58,6 +60,8 @@ struct dm_verity { /* starting blocks for each tree level. 0 is the lowest level. */ sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; + + struct dm_verity_fec *fec; /* forward error correction */ }; struct dm_verity_io { @@ -104,6 +108,12 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; } +static inline u8 *verity_io_digest_end(struct dm_verity *v, + struct dm_verity_io *io) +{ + return verity_io_want_digest(v, io) + v->digest_size; +} + extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, struct bvec_iter *iter, int (*process)(struct dm_verity *v, -- cgit v1.2.3 From 7112f730a3061f3c726651eef8bc298cf7a2e4ce Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 14:32:37 +0000 Subject: UPSTREAM: dm verity: add ignore_zero_blocks feature (cherry-picked from 0cc37c2df4fa0aa702f9662edce4b7ce12c86b7a) If ignore_zero_blocks is enabled dm-verity will return zeroes for blocks matching a zero hash without validating the content. Bug: 21893453 Change-Id: Ief4cc7c07d6166fa33fab5faf7de8b9a4a470656 Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- Documentation/device-mapper/verity.txt | 5 ++ drivers/md/dm-verity-fec.c | 8 +++- drivers/md/dm-verity-target.c | 87 ++++++++++++++++++++++++++++++---- drivers/md/dm-verity.h | 3 +- 4 files changed, 93 insertions(+), 10 deletions(-) diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 83c7c99db0be..59a66abd62c0 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -72,6 +72,11 @@ Construction Parameters notify user space. +ignore_zero_blocks + Do not verify blocks that are expected to contain zeroes and always return + zeroes instead. This may be useful if the partition contains unused blocks + that are not guaranteed to contain zeroes. + use_fec_from_device Use forward error correction (FEC) to recover from corruption if hash verification fails. Use encoding data from the specified device. This diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 88143d36a1d2..1cc10c4de701 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -205,6 +205,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, u64 rsb, u64 target, unsigned block_offset, int *neras) { + bool is_zero; int i, j, target_index = -1; struct dm_buffer *buf; struct dm_bufio_client *bufio; @@ -264,7 +265,12 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, /* locate erasures if the block is on the data device */ if (bufio == v->fec->data_bufio && - verity_hash_for_block(v, io, block, want_digest) == 0) { + verity_hash_for_block(v, io, block, want_digest, + &is_zero) == 0) { + /* skip known zero blocks entirely */ + if (is_zero) + continue; + /* * skip if we have already found the theoretical * maximum number (i.e. fec->roots) of erasures diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 0b927cf54e8b..4258e175513d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -31,8 +31,9 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" +#define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" -#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC) +#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC) static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; @@ -309,10 +310,9 @@ release_ret_r: * of the hash tree if necessary. */ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest) + sector_t block, u8 *digest, bool *is_zero) { - int i; - int r; + int r = 0, i; if (likely(v->levels)) { /* @@ -324,7 +324,7 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, */ r = verity_verify_level(v, io, block, 0, true, digest); if (likely(r <= 0)) - return r; + goto out; } memcpy(digest, v->root_digest, v->digest_size); @@ -332,10 +332,15 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, for (i = v->levels - 1; i >= 0; i--) { r = verity_verify_level(v, io, block, i, false, digest); if (unlikely(r)) - return r; + goto out; } +out: + if (!r && v->zero_digest) + *is_zero = !memcmp(v->zero_digest, digest, v->digest_size); + else + *is_zero = false; - return 0; + return r; } /* @@ -382,11 +387,19 @@ static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io, return verity_hash_update(v, verity_io_hash_desc(v, io), data, len); } +static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memset(data, 0, len); + return 0; +} + /* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) { + bool is_zero; struct dm_verity *v = io->v; struct bvec_iter start; unsigned b; @@ -396,10 +409,24 @@ static int verity_verify_io(struct dm_verity_io *io) struct shash_desc *desc = verity_io_hash_desc(v, io); r = verity_hash_for_block(v, io, io->block + b, - verity_io_want_digest(v, io)); + verity_io_want_digest(v, io), + &is_zero); if (unlikely(r < 0)) return r; + if (is_zero) { + /* + * If we expect a zero block, don't validate, just + * return zeros. + */ + r = verity_for_bv_block(v, io, &io->iter, + verity_bv_zero); + if (unlikely(r < 0)) + return r; + + continue; + } + r = verity_hash_init(v, desc); if (unlikely(r < 0)) return r; @@ -605,6 +632,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, args++; if (verity_fec_is_enabled(v)) args += DM_VERITY_OPTS_FEC; + if (v->zero_digest) + args++; if (!args) return; DMEMIT(" %u", args); @@ -621,6 +650,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, BUG(); } } + if (v->zero_digest) + DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); sz = verity_fec_status_table(v, sz, result, maxlen); break; } @@ -688,6 +719,7 @@ static void verity_dtr(struct dm_target *ti) kfree(v->salt); kfree(v->root_digest); + kfree(v->zero_digest); if (v->tfm) crypto_free_shash(v->tfm); @@ -705,6 +737,37 @@ static void verity_dtr(struct dm_target *ti) kfree(v); } +static int verity_alloc_zero_digest(struct dm_verity *v) +{ + int r = -ENOMEM; + struct shash_desc *desc; + u8 *zero_data; + + v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL); + + if (!v->zero_digest) + return r; + + desc = kmalloc(v->shash_descsize, GFP_KERNEL); + + if (!desc) + return r; /* verity_dtr will free zero_digest */ + + zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL); + + if (!zero_data) + goto out; + + r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits, + v->zero_digest); + +out: + kfree(desc); + kfree(zero_data); + + return r; +} + static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) { int r; @@ -735,6 +798,14 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) v->mode = DM_VERITY_MODE_RESTART; continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) { + r = verity_alloc_zero_digest(v); + if (r) { + ti->error = "Cannot allocate zero digest"; + return r; + } + continue; + } else if (verity_is_fec_opt_arg(arg_name)) { r = verity_fec_parse_opt_args(as, v, &argc, arg_name); if (r) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index daf2172497e6..85735eb85647 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -40,6 +40,7 @@ struct dm_verity { struct crypto_shash *tfm; u8 *root_digest; /* digest of the root block */ u8 *salt; /* salt: its size is salt_size */ + u8 *zero_digest; /* digest for a zero block */ unsigned salt_size; sector_t data_start; /* data offset in 512-byte sectors */ sector_t hash_start; /* hash start in blocks */ @@ -124,6 +125,6 @@ extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, const u8 *data, size_t len, u8 *digest); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest); + sector_t block, u8 *digest, bool *is_zero); #endif /* DM_VERITY_H */ -- cgit v1.2.3 From 9408350ed80005174918ce5147490035b2cf451b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 16:23:49 +0000 Subject: ANDROID: android: base-cfg: enable CONFIG_DM_VERITY_FEC Bug: 21893453 Change-Id: Idd0dfe4e3e527df2eff2f0d734effc40dce294c7 Signed-off-by: Sami Tolvanen --- android/configs/android-base.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 930f28d539ab..89a3f9338994 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -21,6 +21,7 @@ CONFIG_CGROUP_SCHED=y CONFIG_CP15_BARRIER_EMULATION=y CONFIG_DM_CRYPT=y CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y CONFIG_EMBEDDED=y CONFIG_FB=y CONFIG_HIGH_RES_TIMERS=y -- cgit v1.2.3 From c8247d3644af7b5fde1129bcc667452e9d8c3965 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:30:06 -0800 Subject: UPSTREAM: proc: actually make proc_fd_permission() thread-friendly (cherry pick from commit 54708d2858e79a2bdda10bf8a20c80eb96c20613) The commit 96d0df79f264 ("proc: make proc_fd_permission() thread-friendly") fixed the access to /proc/self/fd from sub-threads, but introduced another problem: a sub-thread can't access /proc//fd/ or /proc/thread-self/fd if generic_permission() fails. Change proc_fd_permission() to check same_thread_group(pid_task(), current). Fixes: 96d0df79f264 ("proc: make proc_fd_permission() thread-friendly") Reported-by: "Jin, Yihua" Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 26016905 Change-Id: I786b4bdca2e32c100f7df40cb26804da2f579b6f --- fs/proc/fd.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/proc/fd.c b/fs/proc/fd.c index e11d7c590bb0..3e5f0eff02f9 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -283,11 +283,19 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, */ int proc_fd_permission(struct inode *inode, int mask) { - int rv = generic_permission(inode, mask); + struct task_struct *p; + int rv; + + rv = generic_permission(inode, mask); if (rv == 0) - return 0; - if (task_tgid(current) == proc_pid(inode)) + return rv; + + rcu_read_lock(); + p = pid_task(proc_pid(inode), PIDTYPE_PID); + if (p && same_thread_group(p, current)) rv = 0; + rcu_read_unlock(); + return rv; } -- cgit v1.2.3 From 39e4c69f0a8724592916edaff175a37d6b12131c Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 30 Dec 2015 09:26:15 -0800 Subject: ANDROID: rtc-palmas: correct for bcd year Replace bcd2bin and bin2bcd with one that maps years 1970 to 2129 in a pattern that works with the underlying hardware. The only transition that does not work correctly for this rtc clock is the transition from 2099 to 2100, it proceeds to 2000. The rtc clock retains and transitions the year correctly in all other circumstances. Signed-off-by: Mark Salyzyn Bug: 26346842 Change-Id: I99f784d1f2b8ce063b3713df0914c7d8814a4e35 --- drivers/rtc/rtc-palmas.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 4dfe2d793fa3..d2346e0b4d29 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -45,6 +45,42 @@ struct palmas_rtc { /* Total number of RTC registers needed to set time*/ #define PALMAS_NUM_TIME_REGS (PALMAS_YEARS_REG - PALMAS_SECONDS_REG + 1) +/* + * Special bin2bcd mapping to deal with bcd storage of year. + * + * 0-69 -> 0xD0 + * 70-99 (1970 - 1999) -> 0xD0 - 0xF9 (correctly rolls to 0x00) + * 100-199 (2000 - 2099) -> 0x00 - 0x99 (does not roll to 0xA0 :-( ) + * 200-229 (2100 - 2129) -> 0xA0 - 0xC9 (really for completeness) + * 230- -> 0xC9 + * + * Confirmed: the only transition that does not work correctly for this rtc + * clock is the transition from 2099 to 2100, it proceeds to 2000. We will + * accept this issue since the clock retains and transitions the year correctly + * in all other conditions. + */ +static unsigned char year_bin2bcd(int val) +{ + if (val < 70) + return 0xD0; + if (val < 100) + return bin2bcd(val - 20) | 0x80; /* KISS leverage of bin2bcd */ + if (val >= 230) + return 0xC9; + if (val >= 200) + return bin2bcd(val - 180) | 0x80; + return bin2bcd(val - 100); +} + +static int year_bcd2bin(unsigned char val) +{ + if (val >= 0xD0) + return bcd2bin(val & 0x7F) + 20; + if (val >= 0xA0) + return bcd2bin(val & 0x7F) + 180; + return bcd2bin(val) + 100; +} + static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm) { unsigned char rtc_data[PALMAS_NUM_TIME_REGS]; @@ -71,7 +107,7 @@ static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm) tm->tm_hour = bcd2bin(rtc_data[2]); tm->tm_mday = bcd2bin(rtc_data[3]); tm->tm_mon = bcd2bin(rtc_data[4]) - 1; - tm->tm_year = bcd2bin(rtc_data[5]) + 100; + tm->tm_year = year_bcd2bin(rtc_data[5]); return ret; } @@ -87,7 +123,7 @@ static int palmas_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_data[2] = bin2bcd(tm->tm_hour); rtc_data[3] = bin2bcd(tm->tm_mday); rtc_data[4] = bin2bcd(tm->tm_mon + 1); - rtc_data[5] = bin2bcd(tm->tm_year - 100); + rtc_data[5] = year_bin2bcd(tm->tm_year); /* Stop RTC while updating the RTC time registers */ ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG, @@ -142,7 +178,7 @@ static int palmas_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_hour = bcd2bin(alarm_data[2]); alm->time.tm_mday = bcd2bin(alarm_data[3]); alm->time.tm_mon = bcd2bin(alarm_data[4]) - 1; - alm->time.tm_year = bcd2bin(alarm_data[5]) + 100; + alm->time.tm_year = year_bcd2bin(alarm_data[5]); ret = palmas_read(palmas, PALMAS_RTC_BASE, PALMAS_RTC_INTERRUPTS_REG, &int_val); @@ -173,7 +209,7 @@ static int palmas_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) alarm_data[2] = bin2bcd(alm->time.tm_hour); alarm_data[3] = bin2bcd(alm->time.tm_mday); alarm_data[4] = bin2bcd(alm->time.tm_mon + 1); - alarm_data[5] = bin2bcd(alm->time.tm_year - 100); + alarm_data[5] = year_bin2bcd(alm->time.tm_year); ret = palmas_bulk_write(palmas, PALMAS_RTC_BASE, PALMAS_ALARM_SECONDS_REG, alarm_data, PALMAS_NUM_TIME_REGS); -- cgit v1.2.3 From 17f814655f93a829100eff8d1b5a37f454781828 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:28:15 -0800 Subject: UPSTREAM: mm: when stealing freepages, also take pages created by splitting buddy page (cherry pick from commit 99592d598eca62bdbbf62b59941c189176dfc614) When studying page stealing, I noticed some weird looking decisions in try_to_steal_freepages(). The first I assume is a bug (Patch 1), the following two patches were driven by evaluation. Testing was done with stress-highalloc of mmtests, using the mm_page_alloc_extfrag tracepoint and postprocessing to get counts of how often page stealing occurs for individual migratetypes, and what migratetypes are used for fallbacks. Arguably, the worst case of page stealing is when UNMOVABLE allocation steals from MOVABLE pageblock. RECLAIMABLE allocation stealing from MOVABLE allocation is also not ideal, so the goal is to minimize these two cases. The evaluation of v2 wasn't always clear win and Joonsoo questioned the results. Here I used different baseline which includes RFC compaction improvements from [1]. I found that the compaction improvements reduce variability of stress-highalloc, so there's less noise in the data. First, let's look at stress-highalloc configured to do sync compaction, and how these patches reduce page stealing events during the test. First column is after fresh reboot, other two are reiterations of test without reboot. That was all accumulater over 5 re-iterations (so the benchmark was run 5x3 times with 5 fresh restarts). Baseline: 3.19-rc4 3.19-rc4 3.19-rc4 5-nothp-1 5-nothp-2 5-nothp-3 Page alloc extfrag event 10264225 8702233 10244125 Extfrag fragmenting 10263271 8701552 10243473 Extfrag fragmenting for unmovable 13595 17616 15960 Extfrag fragmenting unmovable placed with movable 7989 12193 8447 Extfrag fragmenting for reclaimable 658 1840 1817 Extfrag fragmenting reclaimable placed with movable 558 1677 1679 Extfrag fragmenting for movable 10249018 8682096 10225696 With Patch 1: 3.19-rc4 3.19-rc4 3.19-rc4 6-nothp-1 6-nothp-2 6-nothp-3 Page alloc extfrag event 11834954 9877523 9774860 Extfrag fragmenting 11833993 9876880 9774245 Extfrag fragmenting for unmovable 7342 16129 11712 Extfrag fragmenting unmovable placed with movable 4191 10547 6270 Extfrag fragmenting for reclaimable 373 1130 923 Extfrag fragmenting reclaimable placed with movable 302 906 738 Extfrag fragmenting for movable 11826278 9859621 9761610 With Patch 2: 3.19-rc4 3.19-rc4 3.19-rc4 7-nothp-1 7-nothp-2 7-nothp-3 Page alloc extfrag event 4725990 3668793 3807436 Extfrag fragmenting 4725104 3668252 3806898 Extfrag fragmenting for unmovable 6678 7974 7281 Extfrag fragmenting unmovable placed with movable 2051 3829 4017 Extfrag fragmenting for reclaimable 429 1208 1278 Extfrag fragmenting reclaimable placed with movable 369 976 1034 Extfrag fragmenting for movable 4717997 3659070 3798339 With Patch 3: 3.19-rc4 3.19-rc4 3.19-rc4 8-nothp-1 8-nothp-2 8-nothp-3 Page alloc extfrag event 5016183 4700142 3850633 Extfrag fragmenting 5015325 4699613 3850072 Extfrag fragmenting for unmovable 1312 3154 3088 Extfrag fragmenting unmovable placed with movable 1115 2777 2714 Extfrag fragmenting for reclaimable 437 1193 1097 Extfrag fragmenting reclaimable placed with movable 330 969 879 Extfrag fragmenting for movable 5013576 4695266 3845887 In v2 we've seen apparent regression with Patch 1 for unmovable events, this is now gone, suggesting it was indeed noise. Here, each patch improves the situation for unmovable events. Reclaimable is improved by patch 1 and then either the same modulo noise, or perhaps sligtly worse - a small price for unmovable improvements, IMHO. The number of movable allocations falling back to other migratetypes is most noisy, but it's reduced to half at Patch 2 nevertheless. These are least critical as compaction can move them around. If we look at success rates, the patches don't affect them, that didn't change. Baseline: 3.19-rc4 3.19-rc4 3.19-rc4 5-nothp-1 5-nothp-2 5-nothp-3 Success 1 Min 49.00 ( 0.00%) 42.00 ( 14.29%) 41.00 ( 16.33%) Success 1 Mean 51.00 ( 0.00%) 45.00 ( 11.76%) 42.60 ( 16.47%) Success 1 Max 55.00 ( 0.00%) 51.00 ( 7.27%) 46.00 ( 16.36%) Success 2 Min 53.00 ( 0.00%) 47.00 ( 11.32%) 44.00 ( 16.98%) Success 2 Mean 59.60 ( 0.00%) 50.80 ( 14.77%) 48.20 ( 19.13%) Success 2 Max 64.00 ( 0.00%) 56.00 ( 12.50%) 52.00 ( 18.75%) Success 3 Min 84.00 ( 0.00%) 82.00 ( 2.38%) 78.00 ( 7.14%) Success 3 Mean 85.60 ( 0.00%) 82.80 ( 3.27%) 79.40 ( 7.24%) Success 3 Max 86.00 ( 0.00%) 83.00 ( 3.49%) 80.00 ( 6.98%) Patch 1: 3.19-rc4 3.19-rc4 3.19-rc4 6-nothp-1 6-nothp-2 6-nothp-3 Success 1 Min 49.00 ( 0.00%) 44.00 ( 10.20%) 44.00 ( 10.20%) Success 1 Mean 51.80 ( 0.00%) 46.00 ( 11.20%) 45.80 ( 11.58%) Success 1 Max 54.00 ( 0.00%) 49.00 ( 9.26%) 49.00 ( 9.26%) Success 2 Min 58.00 ( 0.00%) 49.00 ( 15.52%) 48.00 ( 17.24%) Success 2 Mean 60.40 ( 0.00%) 51.80 ( 14.24%) 50.80 ( 15.89%) Success 2 Max 63.00 ( 0.00%) 54.00 ( 14.29%) 55.00 ( 12.70%) Success 3 Min 84.00 ( 0.00%) 81.00 ( 3.57%) 79.00 ( 5.95%) Success 3 Mean 85.00 ( 0.00%) 81.60 ( 4.00%) 79.80 ( 6.12%) Success 3 Max 86.00 ( 0.00%) 82.00 ( 4.65%) 82.00 ( 4.65%) Patch 2: 3.19-rc4 3.19-rc4 3.19-rc4 7-nothp-1 7-nothp-2 7-nothp-3 Success 1 Min 50.00 ( 0.00%) 44.00 ( 12.00%) 39.00 ( 22.00%) Success 1 Mean 52.80 ( 0.00%) 45.60 ( 13.64%) 42.40 ( 19.70%) Success 1 Max 55.00 ( 0.00%) 46.00 ( 16.36%) 47.00 ( 14.55%) Success 2 Min 52.00 ( 0.00%) 48.00 ( 7.69%) 45.00 ( 13.46%) Success 2 Mean 53.40 ( 0.00%) 49.80 ( 6.74%) 48.80 ( 8.61%) Success 2 Max 57.00 ( 0.00%) 52.00 ( 8.77%) 52.00 ( 8.77%) Success 3 Min 84.00 ( 0.00%) 81.00 ( 3.57%) 79.00 ( 5.95%) Success 3 Mean 85.00 ( 0.00%) 82.40 ( 3.06%) 79.60 ( 6.35%) Success 3 Max 86.00 ( 0.00%) 83.00 ( 3.49%) 80.00 ( 6.98%) Patch 3: 3.19-rc4 3.19-rc4 3.19-rc4 8-nothp-1 8-nothp-2 8-nothp-3 Success 1 Min 46.00 ( 0.00%) 44.00 ( 4.35%) 42.00 ( 8.70%) Success 1 Mean 50.20 ( 0.00%) 45.60 ( 9.16%) 44.00 ( 12.35%) Success 1 Max 52.00 ( 0.00%) 47.00 ( 9.62%) 47.00 ( 9.62%) Success 2 Min 53.00 ( 0.00%) 49.00 ( 7.55%) 48.00 ( 9.43%) Success 2 Mean 55.80 ( 0.00%) 50.60 ( 9.32%) 49.00 ( 12.19%) Success 2 Max 59.00 ( 0.00%) 52.00 ( 11.86%) 51.00 ( 13.56%) Success 3 Min 84.00 ( 0.00%) 80.00 ( 4.76%) 79.00 ( 5.95%) Success 3 Mean 85.40 ( 0.00%) 81.60 ( 4.45%) 80.40 ( 5.85%) Success 3 Max 87.00 ( 0.00%) 83.00 ( 4.60%) 82.00 ( 5.75%) While there's no improvement here, I consider reduced fragmentation events to be worth on its own. Patch 2 also seems to reduce scanning for free pages, and migrations in compaction, suggesting it has somewhat less work to do: Patch 1: Compaction stalls 4153 3959 3978 Compaction success 1523 1441 1446 Compaction failures 2630 2517 2531 Page migrate success 4600827 4943120 5104348 Page migrate failure 19763 16656 17806 Compaction pages isolated 9597640 10305617 10653541 Compaction migrate scanned 77828948 86533283 87137064 Compaction free scanned 517758295 521312840 521462251 Compaction cost 5503 5932 6110 Patch 2: Compaction stalls 3800 3450 3518 Compaction success 1421 1316 1317 Compaction failures 2379 2134 2201 Page migrate success 4160421 4502708 4752148 Page migrate failure 19705 14340 14911 Compaction pages isolated 8731983 9382374 9910043 Compaction migrate scanned 98362797 96349194 98609686 Compaction free scanned 496512560 469502017 480442545 Compaction cost 5173 5526 5811 As with v2, /proc/pagetypeinfo appears unaffected with respect to numbers of unmovable and reclaimable pageblocks. Configuring the benchmark to allocate like THP page fault (i.e. no sync compaction) gives much noisier results for iterations 2 and 3 after reboot. This is not so surprising given how [1] offers lower improvements in this scenario due to less restarts after deferred compaction which would change compaction pivot. Baseline: 3.19-rc4 3.19-rc4 3.19-rc4 5-thp-1 5-thp-2 5-thp-3 Page alloc extfrag event 8148965 6227815 6646741 Extfrag fragmenting 8147872 6227130 6646117 Extfrag fragmenting for unmovable 10324 12942 15975 Extfrag fragmenting unmovable placed with movable 5972 8495 10907 Extfrag fragmenting for reclaimable 601 1707 2210 Extfrag fragmenting reclaimable placed with movable 520 1570 2000 Extfrag fragmenting for movable 8136947 6212481 6627932 Patch 1: 3.19-rc4 3.19-rc4 3.19-rc4 6-thp-1 6-thp-2 6-thp-3 Page alloc extfrag event 8345457 7574471 7020419 Extfrag fragmenting 8343546 7573777 7019718 Extfrag fragmenting for unmovable 10256 18535 30716 Extfrag fragmenting unmovable placed with movable 6893 11726 22181 Extfrag fragmenting for reclaimable 465 1208 1023 Extfrag fragmenting reclaimable placed with movable 353 996 843 Extfrag fragmenting for movable 8332825 7554034 6987979 Patch 2: 3.19-rc4 3.19-rc4 3.19-rc4 7-thp-1 7-thp-2 7-thp-3 Page alloc extfrag event 3512847 3020756 2891625 Extfrag fragmenting 3511940 3020185 2891059 Extfrag fragmenting for unmovable 9017 6892 6191 Extfrag fragmenting unmovable placed with movable 1524 3053 2435 Extfrag fragmenting for reclaimable 445 1081 1160 Extfrag fragmenting reclaimable placed with movable 375 918 986 Extfrag fragmenting for movable 3502478 3012212 2883708 Patch 3: 3.19-rc4 3.19-rc4 3.19-rc4 8-thp-1 8-thp-2 8-thp-3 Page alloc extfrag event 3181699 3082881 2674164 Extfrag fragmenting 3180812 3082303 2673611 Extfrag fragmenting for unmovable 1201 4031 4040 Extfrag fragmenting unmovable placed with movable 974 3611 3645 Extfrag fragmenting for reclaimable 478 1165 1294 Extfrag fragmenting reclaimable placed with movable 387 985 1030 Extfrag fragmenting for movable 3179133 3077107 2668277 The improvements for first iteration are clear, the rest is much noisier and can appear like regression for Patch 1. Anyway, patch 2 rectifies it. Allocation success rates are again unaffected so there's no point in making this e-mail any longer. [1] http://marc.info/?l=linux-mm&m=142166196321125&w=2 This patch (of 3): When __rmqueue_fallback() is called to allocate a page of order X, it will find a page of order Y >= X of a fallback migratetype, which is different from the desired migratetype. With the help of try_to_steal_freepages(), it may change the migratetype (to the desired one) also of: 1) all currently free pages in the pageblock containing the fallback page 2) the fallback pageblock itself 3) buddy pages created by splitting the fallback page (when Y > X) These decisions take the order Y into account, as well as the desired migratetype, with the goal of preventing multiple fallback allocations that could e.g. distribute UNMOVABLE allocations among multiple pageblocks. Originally, decision for 1) has implied the decision for 3). Commit 47118af076f6 ("mm: mmzone: MIGRATE_CMA migration type added") changed that (probably unintentionally) so that the buddy pages in case 3) are always changed to the desired migratetype, except for CMA pageblocks. Commit fef903efcf0c ("mm/page_allo.c: restructure free-page stealing code and fix a bug") did some refactoring and added a comment that the case of 3) is intended. Commit 0cbef29a7821 ("mm: __rmqueue_fallback() should respect pageblock type") removed the comment and tried to restore the original behavior where 1) implies 3), but due to the previous refactoring, the result is instead that only 2) implies 3) - and the conditions for 2) are less frequently met than conditions for 1). This may increase fragmentation in situations where the code decides to steal all free pages from the pageblock (case 1)), but then gives back the buddy pages produced by splitting. This patch restores the original intended logic where 1) implies 3). During testing with stress-highalloc from mmtests, this has shown to decrease the number of events where UNMOVABLE and RECLAIMABLE allocations steal from MOVABLE pageblocks, which can lead to permanent fragmentation. In some cases it has increased the number of events when MOVABLE allocations steal from UNMOVABLE or RECLAIMABLE pageblocks, but these are fixable by sync compaction and thus less harmful. Note that evaluation has shown that the behavior introduced by 47118af076f6 for buddy pages in case 3) is actually even better than the original logic, so the following patch will introduce it properly once again. For stable backports of this patch it makes thus sense to only fix versions containing 0cbef29a7821. [iamjoonsoo.kim@lge.com: tracepoint fix] Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Zhang Yanfei Acked-by: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Cc: KOSAKI Motohiro Cc: [3.13+ containing 0cbef29a7821] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 26295735 Bug: 26412925 Bug: 26379045 Change-Id: Iafa036801ca93a25048702de6b4f3e676eebf276 --- include/trace/events/kmem.h | 7 ++++--- mm/page_alloc.c | 12 +++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aece1346ceb7..4ad10baecd4d 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -268,11 +268,11 @@ TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, int alloc_order, int fallback_order, - int alloc_migratetype, int fallback_migratetype, int new_migratetype), + int alloc_migratetype, int fallback_migratetype), TP_ARGS(page, alloc_order, fallback_order, - alloc_migratetype, fallback_migratetype, new_migratetype), + alloc_migratetype, fallback_migratetype), TP_STRUCT__entry( __field( struct page *, page ) @@ -289,7 +289,8 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; - __entry->change_ownership = (new_migratetype == alloc_migratetype); + __entry->change_ownership = (alloc_migratetype == + get_pageblock_migratetype(page)); ), TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0e695eca64c1..d45203779b51 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1086,8 +1086,8 @@ static void change_pageblock_range(struct page *pageblock_page, * nor move CMA pages to different free lists. We don't want unmovable pages * to be allocated from MIGRATE_CMA areas. * - * Returns the new migratetype of the pageblock (or the same old migratetype - * if it was unchanged). + * Returns the allocation migratetype if free pages were stolen, or the + * fallback migratetype if it was decided not to steal. */ static int try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) @@ -1118,12 +1118,10 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, /* Claim the whole block if over half of it is free */ if (pages >= (1 << (pageblock_order-1)) || - page_group_by_mobility_disabled) { - + page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_type); - return start_type; - } + return start_type; } return fallback_type; @@ -1175,7 +1173,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) set_freepage_migratetype(page, new_type); trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, migratetype, new_type); + start_migratetype, migratetype); return page; } -- cgit v1.2.3 From f34a097caa34edb4fe7798a3aec3d7076d47fb9e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:28:18 -0800 Subject: UPSTREAM: mm: always steal split buddies in fallback allocations (cherry pick from commit 3a1086fba92b6e2311b6a342f68bc380beb240fe) When allocation falls back to another migratetype, it will steal a page with highest available order, and (depending on this order and desired migratetype), it might also steal the rest of free pages from the same pageblock. Given the preference of highest available order, it is likely that it will be higher than the desired order, and result in the stolen buddy page being split. The remaining pages after split are currently stolen only when the rest of the free pages are stolen. This can however lead to situations where for MOVABLE allocations we split e.g. order-4 fallback UNMOVABLE page, but steal only order-0 page. Then on the next MOVABLE allocation (which may be batched to fill the pcplists) we split another order-3 or higher page, etc. By stealing all pages that we have split, we can avoid further stealing. This patch therefore adjusts the page stealing so that buddy pages created by split are always stolen. This has effect only on MOVABLE allocations, as RECLAIMABLE and UNMOVABLE allocations already always do that in addition to stealing the rest of free pages from the pageblock. The change also allows to simplify try_to_steal_freepages() and factor out CMA handling. According to Mel, it has been intended since the beginning that buddy pages after split would be stolen always, but it doesn't seem like it was ever the case until commit 47118af076f6 ("mm: mmzone: MIGRATE_CMA migration type added"). The commit has unintentionally introduced this behavior, but was reverted by commit 0cbef29a7821 ("mm: __rmqueue_fallback() should respect pageblock type"). Neither included evaluation. My evaluation with stress-highalloc from mmtests shows about 2.5x reduction of page stealing events for MOVABLE allocations, without affecting the page stealing events for other allocation migratetypes. Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Zhang Yanfei Acked-by: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 26295735 Bug: 26412925 Bug: 26379045 Change-Id: Ibef0354ee498c525c512a2bfd41565255fc2b8c0 --- mm/page_alloc.c | 62 +++++++++++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d45203779b51..78c6b9459ab9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1080,33 +1080,18 @@ static void change_pageblock_range(struct page *pageblock_page, /* * If breaking a large block of pages, move all free pages to the preferred * allocation list. If falling back for a reclaimable kernel allocation, be - * more aggressive about taking ownership of free pages. - * - * On the other hand, never change migration type of MIGRATE_CMA pageblocks - * nor move CMA pages to different free lists. We don't want unmovable pages - * to be allocated from MIGRATE_CMA areas. - * - * Returns the allocation migratetype if free pages were stolen, or the - * fallback migratetype if it was decided not to steal. + * more aggressive about taking ownership of free pages. If we claim more than + * half of the pageblock, change pageblock's migratetype as well. */ -static int try_to_steal_freepages(struct zone *zone, struct page *page, +static void try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) { int current_order = page_order(page); - /* - * When borrowing from MIGRATE_CMA, we need to release the excess - * buddy pages to CMA itself. We also ensure the freepage_migratetype - * is set to CMA so it is returned to the correct freelist in case - * the page ends up being not actually allocated from the pcp lists. - */ - if (is_migrate_cma(fallback_type)) - return fallback_type; - /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { change_pageblock_range(page, current_order, start_type); - return start_type; + return; } if (current_order >= pageblock_order / 2 || @@ -1120,11 +1105,7 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, if (pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_type); - - return start_type; } - - return fallback_type; } /* Remove an element from the buddy allocator from the fallback list */ @@ -1134,14 +1115,15 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) struct free_area *area; unsigned int current_order; struct page *page; - int migratetype, new_type, i; /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order && current_order <= MAX_ORDER-1; --current_order) { + int i; for (i = 0;; i++) { - migratetype = fallbacks[start_migratetype][i]; + int migratetype = fallbacks[start_migratetype][i]; + int buddy_type = start_migratetype; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) @@ -1155,22 +1137,36 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) struct page, lru); area->nr_free--; - new_type = try_to_steal_freepages(zone, page, - start_migratetype, - migratetype); + if (!is_migrate_cma(migratetype)) { + try_to_steal_freepages(zone, page, + start_migratetype, + migratetype); + } else { + /* + * When borrowing from MIGRATE_CMA, we need to + * release the excess buddy pages to CMA + * itself, and we do not try to steal extra + * free pages. + */ + buddy_type = migratetype; + } /* Remove the page from the freelists */ list_del(&page->lru); rmv_page_order(page); expand(zone, page, order, current_order, area, - new_type); - /* The freepage_migratetype may differ from pageblock's + buddy_type); + + /* + * The freepage_migratetype may differ from pageblock's * migratetype depending on the decisions in - * try_to_steal_freepages. This is OK as long as it does - * not differ for MIGRATE_CMA type. + * try_to_steal_freepages(). This is OK as long as it + * does not differ for MIGRATE_CMA pageblocks. For CMA + * we need to make sure unallocated pages flushed from + * pcp lists are returned to the correct freelist. */ - set_freepage_migratetype(page, new_type); + set_freepage_migratetype(page, buddy_type); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); -- cgit v1.2.3 From a4edf85a8b92509df7b1f82bcb5e3c0500daee4c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:28:21 -0800 Subject: UPSTREAM: mm: more aggressive page stealing for UNMOVABLE allocations (cherry pick from commit 9c0415eb8cbf0c8fd043b6c0f0354308ab099df5) When allocation falls back to stealing free pages of another migratetype, it can decide to steal extra pages, or even the whole pageblock in order to reduce fragmentation, which could happen if further allocation fallbacks pick a different pageblock. In try_to_steal_freepages(), one of the situations where extra pages are stolen happens when we are trying to allocate a MIGRATE_RECLAIMABLE page. However, MIGRATE_UNMOVABLE allocations are not treated the same way, although spreading such allocation over multiple fallback pageblocks is arguably even worse than it is for RECLAIMABLE allocations. To minimize fragmentation, we should minimize the number of such fallbacks, and thus steal as much as is possible from each fallback pageblock. Note that in theory this might put more pressure on movable pageblocks and cause movable allocations to steal back from unmovable pageblocks. However, movable allocations are not as aggressive with stealing, and do not cause permanent fragmentation, so the tradeoff is reasonable, and evaluation seems to support the change. This patch thus adds a check for MIGRATE_UNMOVABLE to the decision to steal extra free pages. When evaluating with stress-highalloc from mmtests, this has reduced the number of MIGRATE_UNMOVABLE fallbacks to roughly 1/6. The number of these fallbacks stealing from MIGRATE_MOVABLE block is reduced to 1/3. There was no observation of growing number of unmovable pageblocks over time, and also not of increased movable allocation fallbacks. Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 26295735 Bug: 26412925 Bug: 26379045 Change-Id: I35346bbfae786e5e1499d889fe57182d21439750 --- mm/page_alloc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 78c6b9459ab9..3541c12c47c9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1078,10 +1078,19 @@ static void change_pageblock_range(struct page *pageblock_page, } /* - * If breaking a large block of pages, move all free pages to the preferred - * allocation list. If falling back for a reclaimable kernel allocation, be - * more aggressive about taking ownership of free pages. If we claim more than - * half of the pageblock, change pageblock's migratetype as well. + * When we are falling back to another migratetype during allocation, try to + * steal extra free pages from the same pageblocks to satisfy further + * allocations, instead of polluting multiple pageblocks. + * + * If we are stealing a relatively large buddy page, it is likely there will + * be more free pages in the pageblock, so try to steal them all. For + * reclaimable and unmovable allocations, we steal regardless of page size, + * as fragmentation caused by those allocations polluting movable pageblocks + * is worse than movable allocations stealing from unmovable and reclaimable + * pageblocks. + * + * If we claim more than half of the pageblock, change pageblock's migratetype + * as well. */ static void try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) @@ -1096,6 +1105,7 @@ static void try_to_steal_freepages(struct zone *zone, struct page *page, if (current_order >= pageblock_order / 2 || start_type == MIGRATE_RECLAIMABLE || + start_type == MIGRATE_UNMOVABLE || page_group_by_mobility_disabled) { int pages; -- cgit v1.2.3 From 416ee929da9e911a3182f13a2cb5620f9ed33788 Mon Sep 17 00:00:00 2001 From: Vasily Kulikov Date: Wed, 9 Sep 2015 15:36:00 -0700 Subject: UPSTREAM: include/linux/poison.h: fix LIST_POISON{1,2} offset (cherry pick from commit 8a5e5e02fc83aaf67053ab53b359af08c6c49aaf) Poison pointer values should be small enough to find a room in non-mmap'able/hardly-mmap'able space. E.g. on x86 "poison pointer space" is located starting from 0x0. Given unprivileged users cannot mmap anything below mmap_min_addr, it should be safe to use poison pointers lower than mmap_min_addr. The current poison pointer values of LIST_POISON{1,2} might be too big for mmap_min_addr values equal or less than 1 MB (common case, e.g. Ubuntu uses only 0x10000). There is little point to use such a big value given the "poison pointer space" below 1 MB is not yet exhausted. Changing it to a smaller value solves the problem for small mmap_min_addr setups. The values are suggested by Solar Designer: http://www.openwall.com/lists/oss-security/2015/05/02/6 Signed-off-by: Vasily Kulikov Cc: Solar Designer Cc: Thomas Gleixner Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 26429468 Bug: 26186802 Change-Id: I4b92183eecf01a072132ebf454ec8a206ecdc3cd --- include/linux/poison.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/poison.h b/include/linux/poison.h index 2110a81c5e2a..253c9b4198ef 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -19,8 +19,8 @@ * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ -#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) -#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) +#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ /* -- cgit v1.2.3 From 8f2e399a9d90dc541e3ab633e5fb78e895d69d5b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 19 Nov 2015 13:45:41 -0800 Subject: of: Fix build warnings In commit d6cb004d80 (of: fix CONFIG_CMDLINE_EXTEND), the types of some variables in early_init_dt_scan_chosen() were modified, which results in build warnings. This patch resets the unsigned long to an int, and re-adds the const to the char*. Change-Id: Ie60ae92b4552e453cf477dd83f42838b3f95975e Signed-off-by: John Stultz --- drivers/of/fdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index af3b78382d4a..821f6b591ce2 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -911,8 +911,8 @@ static const char *config_cmdline = ""; int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - unsigned long l = 0; - char *p = NULL; + int l = 0; + const char *p = NULL; char *cmdline = data; pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); -- cgit v1.2.3 From 2d59c614a6b6a836b0714d5bb652b7dcf63a23b9 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 30 Dec 2015 03:05:44 +0530 Subject: usb: gadget: rndis: use %z format specifier for size_t Use '%z' format specifier for sizeof operator instead of '%u' to fix build warnings like: warning: format '%u' expects type 'unsigned int', but argument 3 has type 'long unsigned int' Change-Id: If016ee2e47a97773ae4f27d88d79a886b0acf0bc Signed-off-by: Amit Pundir --- drivers/usb/gadget/function/rndis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 1b468a8297b3..ef948877f8ab 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -1063,7 +1063,7 @@ int rndis_rm_hdr(struct gether *port, } if (skb->len < sizeof *hdr) { - pr_err("invalid rndis pkt: skblen:%u hdr_len:%u", + pr_err("invalid rndis pkt: skblen:%u hdr_len:%zu", skb->len, sizeof *hdr); dev_kfree_skb_any(skb); return -EINVAL; -- cgit v1.2.3 From 9bed3bf5ed21067a3fc753315847a963d860c43e Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 6 Jan 2016 14:48:11 +0530 Subject: Revert "HID: Add input_register callback." This reverts commit 274ba2dc1eb8d949da547743ae26178a3f83ff6b. This patch updated one of the callbacks (input_configured) but didn't update the respective drivers, so we end up running into a lot of warnings when building HID drivers. e.g. drivers/hid/hid-sony.c:2050:2: warning: initialization from incompatible pointer type [enabled by default] .input_configured = sony_input_configured, ^ ...and so on. Hence revert this patch and cherry-pick the upstream(4.4-rc1) commit 9154301a47b3 "HID: hid-input: allow input_configured callback return errors" instead. Change-Id: If417e1216a4063606cc9e1581ebd13b89880fd7a Signed-off-by: Amit Pundir --- drivers/hid/hid-input.c | 10 ++++------ drivers/hid/hid-multitouch.c | 14 ++++---------- include/linux/hid.h | 4 ++-- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d538ea9edb39..725f22ca47fc 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1448,9 +1448,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) * UGCI) cram a lot of unrelated inputs into the * same interface. */ hidinput->report = report; - if (drv->input_configured && - drv->input_configured(hid, hidinput)) - goto out_cleanup; + if (drv->input_configured) + drv->input_configured(hid, hidinput); if (input_register_device(hidinput->input)) goto out_cleanup; hidinput = NULL; @@ -1471,9 +1470,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } if (hidinput) { - if (drv->input_configured && - drv->input_configured(hid, hidinput)) - goto out_cleanup; + if (drv->input_configured) + drv->input_configured(hid, hidinput); if (input_register_device(hidinput->input)) goto out_cleanup; } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e6cab622fdfb..9080e3325fa5 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -711,13 +711,12 @@ static void mt_touch_report(struct hid_device *hid, struct hid_report *report) mt_sync_frame(td, report->field[0]->hidinput->input); } -static int mt_touch_input_configured(struct hid_device *hdev, +static void mt_touch_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_class *cls = &td->mtclass; struct input_dev *input = hi->input; - int ret; if (!td->maxcontacts) td->maxcontacts = MT_DEFAULT_MAXCONTACT; @@ -732,12 +731,9 @@ static int mt_touch_input_configured(struct hid_device *hdev, if (cls->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) td->mt_flags |= INPUT_MT_DROP_UNUSED; - ret = input_mt_init_slots(input, td->maxcontacts, td->mt_flags); - if (ret) - return ret; + input_mt_init_slots(input, td->maxcontacts, td->mt_flags); td->mt_flags = 0; - return 0; } static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, @@ -891,16 +887,15 @@ static void mt_post_parse(struct mt_device *td) cls->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; } -static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) +static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct mt_device *td = hid_get_drvdata(hdev); char *name; const char *suffix = NULL; struct hid_field *field = hi->report->field[0]; - int ret = 0; if (hi->report->id == td->mt_report_id) - ret = mt_touch_input_configured(hdev, hi); + mt_touch_input_configured(hdev, hi); /* * some egalax touchscreens have "application == HID_DG_TOUCHSCREEN" @@ -951,7 +946,6 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) hi->input->name = name; } } - return ret; } static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) diff --git a/include/linux/hid.h b/include/linux/hid.h index 15680809d345..78ea9bf941cd 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -670,8 +670,8 @@ struct hid_driver { int (*input_mapped)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max); - int (*input_configured)(struct hid_device *hdev, - struct hid_input *hidinput); + void (*input_configured)(struct hid_device *hdev, + struct hid_input *hidinput); void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); -- cgit v1.2.3 From 7682bfa411f4197de3a3cf3d6194ef3dac93808d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 29 Sep 2015 15:52:59 -0700 Subject: UPSTREAM: HID: hid-input: allow input_configured callback return errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When configuring input device via input_configured callback we may encounter errors (for example input_mt_init_slots() may fail). Instead of continuing with half-initialized input device let's allow driver indicate failures. Signed-off-by: Jaikumar Ganesh Signed-off-by: Arve Hjønnevåg Reviewed-by: Benjamin Tissoires Reviewed-by: David Herrmann Acked-by: Nikolai Kondrashov Acked-by: Andrew Duggan Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina Change-Id: I3494c1937b84ff6cd654aef25b5c803aff69f61d (cherry picked from commit 9154301a47b33bdc273d8254c407792524367558) Signed-off-by: Amit Pundir --- drivers/hid/hid-appleir.c | 4 +++- drivers/hid/hid-elo.c | 4 +++- drivers/hid/hid-input.c | 10 ++++++---- drivers/hid/hid-magicmouse.c | 8 ++++++-- drivers/hid/hid-multitouch.c | 20 +++++++++++++++----- drivers/hid/hid-ntrig.c | 6 ++++-- drivers/hid/hid-rmi.c | 9 ++++++--- drivers/hid/hid-sony.c | 13 ++++++++++--- include/linux/hid.h | 4 ++-- 9 files changed, 55 insertions(+), 23 deletions(-) diff --git a/drivers/hid/hid-appleir.c b/drivers/hid/hid-appleir.c index 0e6a42d37eb6..07cbc70f00e7 100644 --- a/drivers/hid/hid-appleir.c +++ b/drivers/hid/hid-appleir.c @@ -256,7 +256,7 @@ out: return 0; } -static void appleir_input_configured(struct hid_device *hid, +static int appleir_input_configured(struct hid_device *hid, struct hid_input *hidinput) { struct input_dev *input_dev = hidinput->input; @@ -275,6 +275,8 @@ static void appleir_input_configured(struct hid_device *hid, for (i = 0; i < ARRAY_SIZE(appleir_key_table); i++) set_bit(appleir->keymap[i], input_dev->keybit); clear_bit(KEY_RESERVED, input_dev->keybit); + + return 0; } static int appleir_input_mapping(struct hid_device *hid, diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index 4e49462870ab..aad8c162a825 100644 --- a/drivers/hid/hid-elo.c +++ b/drivers/hid/hid-elo.c @@ -37,7 +37,7 @@ static bool use_fw_quirk = true; module_param(use_fw_quirk, bool, S_IRUGO); MODULE_PARM_DESC(use_fw_quirk, "Do periodic pokes for broken M firmwares (default = true)"); -static void elo_input_configured(struct hid_device *hdev, +static int elo_input_configured(struct hid_device *hdev, struct hid_input *hidinput) { struct input_dev *input = hidinput->input; @@ -45,6 +45,8 @@ static void elo_input_configured(struct hid_device *hdev, set_bit(BTN_TOUCH, input->keybit); set_bit(ABS_PRESSURE, input->absbit); input_set_abs_params(input, ABS_PRESSURE, 0, 256, 0, 0); + + return 0; } static void elo_process_data(struct input_dev *input, const u8 *data, int size) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 725f22ca47fc..d538ea9edb39 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1448,8 +1448,9 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) * UGCI) cram a lot of unrelated inputs into the * same interface. */ hidinput->report = report; - if (drv->input_configured) - drv->input_configured(hid, hidinput); + if (drv->input_configured && + drv->input_configured(hid, hidinput)) + goto out_cleanup; if (input_register_device(hidinput->input)) goto out_cleanup; hidinput = NULL; @@ -1470,8 +1471,9 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } if (hidinput) { - if (drv->input_configured) - drv->input_configured(hid, hidinput); + if (drv->input_configured && + drv->input_configured(hid, hidinput)) + goto out_cleanup; if (input_register_device(hidinput->input)) goto out_cleanup; } diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 29a74c1efcb8..d6fa496d0ca2 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -471,18 +471,22 @@ static int magicmouse_input_mapping(struct hid_device *hdev, return 0; } -static void magicmouse_input_configured(struct hid_device *hdev, +static int magicmouse_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct magicmouse_sc *msc = hid_get_drvdata(hdev); + int ret; - int ret = magicmouse_setup_input(msc->input, hdev); + ret = magicmouse_setup_input(msc->input, hdev); if (ret) { hid_err(hdev, "magicmouse setup input failed (%d)\n", ret); /* clean msc->input to notify probe() of the failure */ msc->input = NULL; + return ret; } + + return 0; } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 9080e3325fa5..437be8ed7a48 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -711,12 +711,13 @@ static void mt_touch_report(struct hid_device *hid, struct hid_report *report) mt_sync_frame(td, report->field[0]->hidinput->input); } -static void mt_touch_input_configured(struct hid_device *hdev, +static int mt_touch_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_class *cls = &td->mtclass; struct input_dev *input = hi->input; + int ret; if (!td->maxcontacts) td->maxcontacts = MT_DEFAULT_MAXCONTACT; @@ -731,9 +732,12 @@ static void mt_touch_input_configured(struct hid_device *hdev, if (cls->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) td->mt_flags |= INPUT_MT_DROP_UNUSED; - input_mt_init_slots(input, td->maxcontacts, td->mt_flags); + ret = input_mt_init_slots(input, td->maxcontacts, td->mt_flags); + if (ret) + return ret; td->mt_flags = 0; + return 0; } static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, @@ -887,15 +891,19 @@ static void mt_post_parse(struct mt_device *td) cls->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; } -static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi) +static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct mt_device *td = hid_get_drvdata(hdev); char *name; const char *suffix = NULL; struct hid_field *field = hi->report->field[0]; + int ret; - if (hi->report->id == td->mt_report_id) - mt_touch_input_configured(hdev, hi); + if (hi->report->id == td->mt_report_id) { + ret = mt_touch_input_configured(hdev, hi); + if (ret) + return ret; + } /* * some egalax touchscreens have "application == HID_DG_TOUCHSCREEN" @@ -946,6 +954,8 @@ static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi) hi->input->name = name; } } + + return 0; } static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index 600f2075512f..756d1ef9bd99 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -859,14 +859,14 @@ not_claimed_input: return 1; } -static void ntrig_input_configured(struct hid_device *hid, +static int ntrig_input_configured(struct hid_device *hid, struct hid_input *hidinput) { struct input_dev *input = hidinput->input; if (hidinput->report->maxfield < 1) - return; + return 0; switch (hidinput->report->field[0]->application) { case HID_DG_PEN: @@ -890,6 +890,8 @@ static void ntrig_input_configured(struct hid_device *hid, "N-Trig MultiTouch"; break; } + + return 0; } static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id) diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 3cccff73b9b9..a7effa490a3c 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -799,7 +799,7 @@ static int rmi_populate(struct hid_device *hdev) return 0; } -static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi) +static int rmi_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct rmi_data *data = hid_get_drvdata(hdev); struct input_dev *input = hi->input; @@ -811,7 +811,7 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi) hid_dbg(hdev, "Opening low level driver\n"); ret = hid_hw_open(hdev); if (ret) - return; + return ret; /* Allow incoming hid reports */ hid_device_io_start(hdev); @@ -849,7 +849,9 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi) input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 0x0f, 0, 0); input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 0x0f, 0, 0); - input_mt_init_slots(input, data->max_fingers, INPUT_MT_POINTER); + ret = input_mt_init_slots(input, data->max_fingers, INPUT_MT_POINTER); + if (ret < 0) + goto exit; if (data->button_count) { __set_bit(EV_KEY, input->evbit); @@ -865,6 +867,7 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi) exit: hid_device_io_stop(hdev); hid_hw_close(hdev); + return ret; } static int rmi_input_mapping(struct hid_device *hdev, diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index bc4269e559f1..1e08214e9305 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -1100,20 +1100,27 @@ static int sony_register_touchpad(struct hid_input *hi, int touch_count, return 0; } -static void sony_input_configured(struct hid_device *hdev, +static int sony_input_configured(struct hid_device *hdev, struct hid_input *hidinput) { struct sony_sc *sc = hid_get_drvdata(hdev); + int ret; /* * The Dualshock 4 touchpad supports 2 touches and has a * resolution of 1920x942 (44.86 dots/mm). */ if (sc->quirks & DUALSHOCK4_CONTROLLER) { - if (sony_register_touchpad(hidinput, 2, 1920, 942) != 0) + ret = sony_register_touchpad(hidinput, 2, 1920, 942); + if (ret) { hid_err(sc->hdev, - "Unable to initialize multi-touch slots\n"); + "Unable to initialize multi-touch slots: %d\n", + ret); + return ret; + } } + + return 0; } /* diff --git a/include/linux/hid.h b/include/linux/hid.h index 78ea9bf941cd..15680809d345 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -670,8 +670,8 @@ struct hid_driver { int (*input_mapped)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max); - void (*input_configured)(struct hid_device *hdev, - struct hid_input *hidinput); + int (*input_configured)(struct hid_device *hdev, + struct hid_input *hidinput); void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); -- cgit v1.2.3 From 7fee022e67c7d6955fd258ee99dab6979e8dcab7 Mon Sep 17 00:00:00 2001 From: dcashman Date: Tue, 29 Dec 2015 13:06:42 -0800 Subject: Revert "arm: mm: support ARCH_MMAP_RND_BITS." This reverts commit bc8eb3ef20681a250f0e293044e3bb795f39e0cf. Bug: 25973686 Signed-off-by: Daniel Cashman Change-Id: I568f9cb23c9b3ae1d66d35b1deb67a7a4eeaae3b --- arch/arm/Kconfig | 24 ------------------------ arch/arm/mm/mmap.c | 7 ++----- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 424f1576b651..6fc091ca09d5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -296,30 +296,6 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. -config ARCH_MMAP_RND_BITS_MIN - int - default 8 - -config ARCH_MMAP_RND_BITS_MAX - int - default 14 if MMU && PAGE_OFFSET=0x40000000 - default 15 if MMU && PAGE_OFFSET=0x80000000 - default 16 if MMU - default 8 - -config ARCH_MMAP_RND_BITS - int "Number of bits to use for ASLR of mmap base address" if EXPERT - range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX - default ARCH_MMAP_RND_BITS_MIN - help - This value can be used to select the number of bits to use to - determine the random offset to the base address of vma regions - resulting from mmap allocations. This value will be bounded - by the architecture's minimum and maximum supported values. - - This value can be changed after boot using the - /proc/sys/kernel/mmap_rnd_bits tunable - # # The "ARM system type" choice list is ordered alphabetically by option # text. Please add new entries in the option alphabetic order. diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 290cbb310486..5e85ed371364 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -11,10 +11,6 @@ #include #include -int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; -int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; -int mmap_rnd_bits = CONFIG_ARCH_MMAP_RND_BITS; - #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) - random_factor = (get_random_int() % (1 << mmap_rnd_bits)) << PAGE_SHIFT; + random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; -- cgit v1.2.3 From b9849e29412de0ecbe9c5340c375b9254c63ddad Mon Sep 17 00:00:00 2001 From: dcashman Date: Tue, 29 Dec 2015 13:18:10 -0800 Subject: Revert "mm: mmap: Add new /proc tunable for mmap_base ASLR." This reverts commit 565ef50330f97868b8d01b78c7809e453679ba76. Bug: 25973686 Signed-off-by: Daniel Cashman Change-Id: I39891e4cd2eaab41b606c594ef204b52e47f9adf --- Documentation/sysctl/kernel.txt | 14 -------------- include/linux/mm.h | 6 ------ kernel/sysctl.c | 11 ----------- 3 files changed, 31 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 62044945a118..57baff5bdb80 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -41,7 +41,6 @@ show up in /proc/sys/kernel: - kptr_restrict - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] -- mmap_rnd_bits - modprobe ==> Documentation/debugging-modules.txt - modules_disabled - msg_next_id [ sysv ipc ] @@ -387,19 +386,6 @@ This flag controls the L2 cache of G3 processor boards. If ============================================================== -mmap_rnd_bits: - -This value can be used to select the number of bits to use to -determine the random offset to the base address of vma regions -resulting from mmap allocations on architectures which support -tuning address space randomization. This value will be bounded -by the architecture's minimum and maximum supported values. - -This value can be changed after boot using the -/proc/sys/kernel/mmap_rnd_bits tunable - -============================================================== - modules_disabled: A toggle value indicating if modules are allowed to be loaded diff --git a/include/linux/mm.h b/include/linux/mm.h index 1fb49a650c17..18a4997dcde7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -48,12 +48,6 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif -#ifdef CONFIG_ARCH_MMAP_RND_BITS -extern int mmap_rnd_bits_min; -extern int mmap_rnd_bits_max; -extern int mmap_rnd_bits; -#endif - #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a6de89ef2191..78af9692e0a4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1105,17 +1105,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, -#endif -#ifdef CONFIG_ARCH_MMAP_RND_BITS - { - .procname = "mmap_rnd_bits", - .data = &mmap_rnd_bits, - .maxlen = sizeof(mmap_rnd_bits), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &mmap_rnd_bits_min, - .extra2 = &mmap_rnd_bits_max, - }, #endif { } }; -- cgit v1.2.3 From 4443881a4a96c3e0822ee4818300ffbc70298c02 Mon Sep 17 00:00:00 2001 From: dcashman Date: Tue, 29 Dec 2015 14:24:39 -0800 Subject: FROMLIST: mm: mmap: Add new /proc tunable for mmap_base ASLR. (cherry picked from commit https://lkml.org/lkml/2015/12/21/337) ASLR only uses as few as 8 bits to generate the random offset for the mmap base address on 32 bit architectures. This value was chosen to prevent a poorly chosen value from dividing the address space in such a way as to prevent large allocations. This may not be an issue on all platforms. Allow the specification of a minimum number of bits so that platforms desiring greater ASLR protection may determine where to place the trade-off. Bug: 24047224 Signed-off-by: Daniel Cashman Signed-off-by: Daniel Cashman Change-Id: Ia4742ca554b8e4f9a3ff9ad6818b956ebd67893f --- Documentation/sysctl/vm.txt | 29 +++++++++++++++++++ arch/Kconfig | 68 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 11 ++++++++ kernel/sysctl.c | 22 +++++++++++++++ mm/mmap.c | 12 ++++++++ 5 files changed, 142 insertions(+) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 9aecaa51f735..ccc50a53c452 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -42,6 +42,8 @@ Currently, these files are in /proc/sys/vm: - min_slab_ratio - min_unmapped_ratio - mmap_min_addr +- mmap_rnd_bits +- mmap_rnd_compat_bits - nr_hugepages - nr_overcommit_hugepages - nr_trim_pages (only if CONFIG_MMU=n) @@ -490,6 +492,33 @@ against future potential kernel bugs. ============================================================== +mmap_rnd_bits: + +This value can be used to select the number of bits to use to +determine the random offset to the base address of vma regions +resulting from mmap allocations on architectures which support +tuning address space randomization. This value will be bounded +by the architecture's minimum and maximum supported values. + +This value can be changed after boot using the +/proc/sys/vm/mmap_rnd_bits tunable + +============================================================== + +mmap_rnd_compat_bits: + +This value can be used to select the number of bits to use to +determine the random offset to the base address of vma regions +resulting from mmap allocations for applications run in +compatibility mode on architectures which support tuning address +space randomization. This value will be bounded by the +architecture's minimum and maximum supported values. + +This value can be changed after boot using the +/proc/sys/vm/mmap_rnd_compat_bits tunable + +============================================================== + nr_hugepages Change the minimum size of the hugepage pool. diff --git a/arch/Kconfig b/arch/Kconfig index 05d7a8a458d5..240bda0276b9 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -484,6 +484,74 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK This spares a stack switch and improves cache usage on softirq processing. +config HAVE_ARCH_MMAP_RND_BITS + bool + help + An arch should select this symbol if it supports setting a variable + number of bits for use in establishing the base address for mmap + allocations, has MMU enabled and provides values for both: + - ARCH_MMAP_RND_BITS_MIN + - ARCH_MMAP_RND_BITS_MAX + +config ARCH_MMAP_RND_BITS_MIN + int + +config ARCH_MMAP_RND_BITS_MAX + int + +config ARCH_MMAP_RND_BITS_DEFAULT + int + +config ARCH_MMAP_RND_BITS + int "Number of bits to use for ASLR of mmap base address" if EXPERT + range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX + default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT + default ARCH_MMAP_RND_BITS_MIN + depends on HAVE_ARCH_MMAP_RND_BITS + help + This value can be used to select the number of bits to use to + determine the random offset to the base address of vma regions + resulting from mmap allocations. This value will be bounded + by the architecture's minimum and maximum supported values. + + This value can be changed after boot using the + /proc/sys/vm/mmap_rnd_bits tunable + +config HAVE_ARCH_MMAP_RND_COMPAT_BITS + bool + help + An arch should select this symbol if it supports running applications + in compatibility mode, supports setting a variable number of bits for + use in establishing the base address for mmap allocations, has MMU + enabled and provides values for both: + - ARCH_MMAP_RND_COMPAT_BITS_MIN + - ARCH_MMAP_RND_COMPAT_BITS_MAX + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + int + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + int + +config ARCH_MMAP_RND_COMPAT_BITS_DEFAULT + int + +config ARCH_MMAP_RND_COMPAT_BITS + int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT + range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX + default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT + default ARCH_MMAP_RND_COMPAT_BITS_MIN + depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS + help + This value can be used to select the number of bits to use to + determine the random offset to the base address of vma regions + resulting from mmap allocations for compatible applications This + value will be bounded by the architecture's minimum and maximum + supported values. + + This value can be changed after boot using the + /proc/sys/vm/mmap_rnd_compat_bits tunable + # # ABI hall of shame # diff --git a/include/linux/mm.h b/include/linux/mm.h index 18a4997dcde7..be19d1f1144d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -48,6 +48,17 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS +extern const int mmap_rnd_bits_min; +extern const int mmap_rnd_bits_max; +extern int mmap_rnd_bits __read_mostly; +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS +extern const int mmap_rnd_compat_bits_min; +extern const int mmap_rnd_compat_bits_max; +extern int mmap_rnd_compat_bits __read_mostly; +#endif + #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 78af9692e0a4..a1854266ed85 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1503,6 +1503,28 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS + { + .procname = "mmap_rnd_bits", + .data = &mmap_rnd_bits, + .maxlen = sizeof(mmap_rnd_bits), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&mmap_rnd_bits_min, + .extra2 = (void *)&mmap_rnd_bits_max, + }, +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS + { + .procname = "mmap_rnd_compat_bits", + .data = &mmap_rnd_compat_bits, + .maxlen = sizeof(mmap_rnd_compat_bits), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&mmap_rnd_compat_bits_min, + .extra2 = (void *)&mmap_rnd_compat_bits_max, + }, +#endif { } }; diff --git a/mm/mmap.c b/mm/mmap.c index ce1b969c91fe..c7210f0b5ccd 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -57,6 +57,18 @@ #define arch_rebalance_pgtables(addr, len) (addr) #endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS +const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; +const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; +int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS; +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS +const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN; +const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; +int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; +#endif + + static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end); -- cgit v1.2.3 From 7514b4f5c78b629ac78bed60d2be1c92827145f4 Mon Sep 17 00:00:00 2001 From: dcashman Date: Tue, 29 Dec 2015 15:07:17 -0800 Subject: FROMLIST: arm: mm: support ARCH_MMAP_RND_BITS. (cherry picked from commit https://lkml.org/lkml/2015/12/21/341) arm: arch_mmap_rnd() uses a hard-code value of 8 to generate the random offset for the mmap base address. This value represents a compromise between increased ASLR effectiveness and avoiding address-space fragmentation. Replace it with a Kconfig option, which is sensibly bounded, so that platform developers may choose where to place this compromise. Keep 8 as the minimum acceptable value. Bug: 24047224 Signed-off-by: Daniel Cashman Signed-off-by: Daniel Cashman Change-Id: I0d5988d378bea2d7bf7ac8db90ddd95623a52c68 --- arch/arm/Kconfig | 9 +++++++++ arch/arm/mm/mmap.c | 3 +-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6fc091ca09d5..e9464e91455b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -30,6 +30,7 @@ config ARM select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB + select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT @@ -296,6 +297,14 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. +config ARCH_MMAP_RND_BITS_MIN + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 14 if PAGE_OFFSET=0x40000000 + default 15 if PAGE_OFFSET=0x80000000 + default 16 + # # The "ARM system type" choice list is ordered alphabetically by option # text. Please add new entries in the option alphabetic order. diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5e85ed371364..655aa70aa101 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -173,10 +173,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; - /* 8 bits of randomness in 20 address space bits */ if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) - random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + random_factor = (get_random_int() & ((1 << mmap_rnd_bits) - 1)) << PAGE_SHIFT; if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; -- cgit v1.2.3 From 527d350d5e91c2b3abc2d49789843601abb8a0eb Mon Sep 17 00:00:00 2001 From: dcashman Date: Tue, 29 Dec 2015 15:28:13 -0800 Subject: FROMLIST: arm64: mm: support ARCH_MMAP_RND_BITS. (cherry picked from commit https://lkml.org/lkml/2015/12/21/340) arm64: arch_mmap_rnd() uses STACK_RND_MASK to generate the random offset for the mmap base address. This value represents a compromise between increased ASLR effectiveness and avoiding address-space fragmentation. Replace it with a Kconfig option, which is sensibly bounded, so that platform developers may choose where to place this compromise. Keep default values as new minimums. Bug: 24047224 Signed-off-by: Daniel Cashman Signed-off-by: Daniel Cashman Change-Id: Ie94f0068927861637414348077c7f33057bbbeb7 --- arch/arm64/Kconfig | 29 +++++++++++++++++++++++++++++ arch/arm64/mm/mmap.c | 21 +++++++++------------ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ce6ecfa7703b..f608579ec258 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -37,6 +37,8 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB + select HAVE_ARCH_MMAP_RND_BITS + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT @@ -86,6 +88,33 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 + +# max bits determined by the following formula: +# VA_BITS - PAGE_SHIFT - 3 +config ARCH_MMAP_RND_BITS_MAX + default 19 if ARM64_VA_BITS=36 + default 24 if ARM64_VA_BITS=39 + default 27 if ARM64_VA_BITS=42 + default 30 if ARM64_VA_BITS=47 + default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES + default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES + default 33 if ARM64_VA_BITS=48 + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 7 if ARM64_64K_PAGES + default 9 if ARM64_16K_PAGES + default 11 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config NO_IOPORT_MAP def_bool y if !PCI diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 1d73662f00ff..a822f7d670e2 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -47,22 +47,19 @@ static int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -/* - * Since get_random_int() returns the same value within a 1 jiffy window, we - * will almost always get the same randomisation for the stack and mmap - * region. This will mean the relative distance between stack and mmap will be - * the same. - * - * To avoid this we can shift the randomness by 1 bit. - */ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; - if (current->flags & PF_RANDOMIZE) - rnd = (long)get_random_int() & (STACK_RND_MASK >> 1); - - return rnd << (PAGE_SHIFT + 1); + if (current->flags & PF_RANDOMIZE) { +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_32BIT)) + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); + else +#endif + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + } + return rnd << PAGE_SHIFT; } static unsigned long mmap_base(void) -- cgit v1.2.3 From 3b7fb0ed4f8246acc0208cebbb08391f8ea7133d Mon Sep 17 00:00:00 2001 From: dcashman Date: Tue, 29 Dec 2015 15:44:12 -0800 Subject: FROMLIST: x86: mm: support ARCH_MMAP_RND_BITS. (cherry picked from commit https://lkml.org/lkml/2015/12/21/339) x86: arch_mmap_rnd() uses hard-coded values, 8 for 32-bit and 28 for 64-bit, to generate the random offset for the mmap base address. This value represents a compromise between increased ASLR effectiveness and avoiding address-space fragmentation. Replace it with a Kconfig option, which is sensibly bounded, so that platform developers may choose where to place this compromise. Keep default values as new minimums. Bug: 24047224 Signed-off-by: Daniel Cashman Signed-off-by: Daniel Cashman Change-Id: I0b33964ce9d8b43c2d82d11bb9f2dbbc007e4250 --- arch/x86/Kconfig | 16 ++++++++++++++++ arch/x86/mm/mmap.c | 12 ++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 41a503c15862..d6bd602c2738 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -103,6 +103,8 @@ config X86 select DCACHE_WORD_ACCESS select GENERIC_SMP_IDLE_THREAD select ARCH_WANT_IPC_PARSE_VERSION if X86_32 + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER select BUILDTIME_EXTABLE_SORT select GENERIC_CMOS_UPDATE @@ -168,6 +170,20 @@ config HAVE_LATENCYTOP_SUPPORT config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 28 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 32 if 64BIT + default 16 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config SBUS bool diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 919b91205cd4..7b9e5663dd21 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -69,15 +69,15 @@ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; - /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ if (current->flags & PF_RANDOMIZE) { if (mmap_is_ia32()) - rnd = get_random_int() % (1<<8); +#ifdef CONFIG_COMPAT + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); +#else + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); +#endif else - rnd = get_random_int() % (1<<28); + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); } return rnd << PAGE_SHIFT; } -- cgit v1.2.3 From 44b0d57d9139eea55d782a9b244f20ec20e558f0 Mon Sep 17 00:00:00 2001 From: "liping.zhang" Date: Mon, 11 Jan 2016 13:31:01 +0800 Subject: xt_qtaguid: fix a race condition in if_tag_stat_update Miss a lock protection in if_tag_stat_update while doing get_iface_entry. So if one CPU is doing iface_stat_create while another CPU is doing if_tag_stat_update, race will happened. Change-Id: Ib8d98e542f4e385685499f5b7bb7354f08654a75 Signed-off-by: Liping Zhang --- net/netfilter/xt_qtaguid.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 2f9784c1e692..ea2dd21e3ada 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1290,11 +1290,12 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", ifname, uid, sk, direction, proto, bytes); - + spin_lock_bh(&iface_stat_list_lock); iface_entry = get_iface_entry(ifname); if (!iface_entry) { pr_err_ratelimited("qtaguid: iface_stat: stat_update() " "%s not found\n", ifname); + spin_unlock_bh(&iface_stat_list_lock); return; } /* It is ok to process data when an iface_entry is inactive */ @@ -1330,8 +1331,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, * {0, uid_tag} will also get updated. */ tag_stat_update(tag_stat_entry, direction, proto, bytes); - spin_unlock_bh(&iface_entry->tag_stat_list_lock); - return; + goto unlock; } /* Loop over tag list under this interface for {0,uid_tag} */ @@ -1371,6 +1371,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, tag_stat_update(new_tag_stat, direction, proto, bytes); unlock: spin_unlock_bh(&iface_entry->tag_stat_list_lock); + spin_unlock_bh(&iface_stat_list_lock); } static int iface_netdev_event_handler(struct notifier_block *nb, -- cgit v1.2.3 From 7e87edc31454d7bff1dd35fe0cba2117f59bcb1c Mon Sep 17 00:00:00 2001 From: Aniroop Mathur Date: Wed, 3 Dec 2014 14:27:42 -0800 Subject: Input: initialize device counter variables with -1 Let's initialize atomic_t variables keeping track of number of various devices created so far with -1 in order to avoid extra subtraction operation. Change-Id: I58bfb094ec24e4b4cab48b3318852a27a227dc9a Signed-off-by: Aniroop Mathur Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/gameport/gameport.c | 4 ++-- drivers/input/joystick/xpad.c | 8 ++++---- drivers/input/misc/ims-pcu.c | 4 ++-- drivers/input/serio/serio.c | 4 ++-- drivers/input/serio/serio_raw.c | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index e29c04e2aff4..e853a2134680 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -527,14 +527,14 @@ EXPORT_SYMBOL(gameport_set_phys); */ static void gameport_init_port(struct gameport *gameport) { - static atomic_t gameport_no = ATOMIC_INIT(0); + static atomic_t gameport_no = ATOMIC_INIT(-1); __module_get(THIS_MODULE); mutex_init(&gameport->drv_mutex); device_initialize(&gameport->dev); dev_set_name(&gameport->dev, "gameport%lu", - (unsigned long)atomic_inc_return(&gameport_no) - 1); + (unsigned long)atomic_inc_return(&gameport_no)); gameport->dev.bus = &gameport_bus; gameport->dev.release = gameport_release_port; if (gameport->parent) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index fc55f0d15b70..3aa2f3f3da5b 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -886,8 +886,8 @@ static void xpad_led_set(struct led_classdev *led_cdev, static int xpad_led_probe(struct usb_xpad *xpad) { - static atomic_t led_seq = ATOMIC_INIT(0); - long led_no; + static atomic_t led_seq = ATOMIC_INIT(-1); + unsigned long led_no; struct xpad_led *led; struct led_classdev *led_cdev; int error; @@ -899,9 +899,9 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - led_no = (long)atomic_inc_return(&led_seq) - 1; + led_no = atomic_inc_return(&led_seq); - snprintf(led->name, sizeof(led->name), "xpad%ld", led_no); + snprintf(led->name, sizeof(led->name), "xpad%lu", led_no); led->xpad = xpad; led_cdev = &led->led_cdev; diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index afed8e2b2f94..ac1fa5f44580 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1851,7 +1851,7 @@ static int ims_pcu_identify_type(struct ims_pcu *pcu, u8 *device_id) static int ims_pcu_init_application_mode(struct ims_pcu *pcu) { - static atomic_t device_no = ATOMIC_INIT(0); + static atomic_t device_no = ATOMIC_INIT(-1); const struct ims_pcu_device_info *info; int error; @@ -1882,7 +1882,7 @@ static int ims_pcu_init_application_mode(struct ims_pcu *pcu) } /* Device appears to be operable, complete initialization */ - pcu->device_no = atomic_inc_return(&device_no) - 1; + pcu->device_no = atomic_inc_return(&device_no); /* * PCU-B devices, both GEN_1 and GEN_2 do not have OFN sensor diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index d399b8b0f000..a05a5179da32 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -514,7 +514,7 @@ static void serio_release_port(struct device *dev) */ static void serio_init_port(struct serio *serio) { - static atomic_t serio_no = ATOMIC_INIT(0); + static atomic_t serio_no = ATOMIC_INIT(-1); __module_get(THIS_MODULE); @@ -525,7 +525,7 @@ static void serio_init_port(struct serio *serio) mutex_init(&serio->drv_mutex); device_initialize(&serio->dev); dev_set_name(&serio->dev, "serio%lu", - (unsigned long)atomic_inc_return(&serio_no) - 1); + (unsigned long)atomic_inc_return(&serio_no)); serio->dev.bus = &serio_bus; serio->dev.release = serio_release_port; serio->dev.groups = serio_device_attr_groups; diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c index c9a02fe57576..71ef5d65a0c6 100644 --- a/drivers/input/serio/serio_raw.c +++ b/drivers/input/serio/serio_raw.c @@ -292,7 +292,7 @@ static irqreturn_t serio_raw_interrupt(struct serio *serio, unsigned char data, static int serio_raw_connect(struct serio *serio, struct serio_driver *drv) { - static atomic_t serio_raw_no = ATOMIC_INIT(0); + static atomic_t serio_raw_no = ATOMIC_INIT(-1); struct serio_raw *serio_raw; int err; @@ -303,7 +303,7 @@ static int serio_raw_connect(struct serio *serio, struct serio_driver *drv) } snprintf(serio_raw->name, sizeof(serio_raw->name), - "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no) - 1); + "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no)); kref_init(&serio_raw->kref); INIT_LIST_HEAD(&serio_raw->client_list); init_waitqueue_head(&serio_raw->wait); -- cgit v1.2.3 From d8c67c677782ca2cc0f901fa4262146d2a04c684 Mon Sep 17 00:00:00 2001 From: Ming-ting Yao Wei Date: Tue, 14 Apr 2015 16:59:11 -0700 Subject: Input: xpad - add rumble support for Xbox One controller This adds rumble support for Xbox One controller by sending continuous rumble command. Trigger button rumbling is not yet implemented. Signed-off-by: Ming-ting Yao Wei Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 3aa2f3f3da5b..de342859e953 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -31,12 +31,14 @@ * - the iForce driver drivers/char/joystick/iforce.c * - the skeleton-driver drivers/usb/usb-skeleton.c * - Xbox 360 information http://www.free60.org/wiki/Gamepad + * - Xbox One information https://github.com/quantus/xbox-one-controller-protocol * * Thanks to: * - ITO Takayuki for providing essential xpad information on his website * - Vojtech Pavlik - iforce driver / input subsystem * - Greg Kroah-Hartman - usb-skeleton driver * - XBOX Linux project - extra USB id's + * - Pekka Pöyry (quantus) - Xbox One controller reverse engineering * * TODO: * - fine tune axes (especially trigger axes) @@ -828,6 +830,23 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); + case XTYPE_XBOXONE: + xpad->odata[0] = 0x09; /* activate rumble */ + xpad->odata[1] = 0x08; + xpad->odata[2] = 0x00; + xpad->odata[3] = 0x08; /* continuous effect */ + xpad->odata[4] = 0x00; /* simple rumble mode */ + xpad->odata[5] = 0x03; /* L and R actuator only */ + xpad->odata[6] = 0x00; /* TODO: LT actuator */ + xpad->odata[7] = 0x00; /* TODO: RT actuator */ + xpad->odata[8] = strong / 256; /* left actuator */ + xpad->odata[9] = weak / 256; /* right actuator */ + xpad->odata[10] = 0x80; /* length of pulse */ + xpad->odata[11] = 0x00; /* stop period of pulse */ + xpad->irq_out->transfer_buffer_length = 12; + + return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); + default: dev_dbg(&xpad->dev->dev, "%s - rumble command sent to unsupported xpad type: %d\n", -- cgit v1.2.3 From 1cd6f8b0d30f6514b644b664fd7608523120b8c5 Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Mon, 22 Jun 2015 14:10:36 -0700 Subject: Input: xpad - set the LEDs properly on XBox Wireless controllers Based on Patch by Pierre-Loup A. Griffais : Add the logic to set the LEDs on XBox Wireless controllers. Command sequence found by sniffing the Windows data stream when plugging the device in. Updated based on comments on linux-input: unify codepaths in xpad_send_led_command for wired/ wireless controller. Also document command values for clarification. All values tested on Xbox 360 Wireless Controller. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 50 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index de342859e953..7be285a47330 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -881,17 +881,57 @@ struct xpad_led { struct usb_xpad *xpad; }; +/** + * @param command + * 0: off + * 1: all blink, then previous setting + * 2: 1/top-left blink, then on + * 3: 2/top-right blink, then on + * 4: 3/bottom-left blink, then on + * 5: 4/bottom-right blink, then on + * 6: 1/top-left on + * 7: 2/top-right on + * 8: 3/bottom-left on + * 9: 4/bottom-right on + * 10: rotate + * 11: blink, based on previous setting + * 12: slow blink, based on previous setting + * 13: rotate with two lights + * 14: persistent slow all blink + * 15: blink once, then previous setting + */ static void xpad_send_led_command(struct usb_xpad *xpad, int command) { - if (command >= 0 && command < 14) { - mutex_lock(&xpad->odata_mutex); + command %= 16; + + mutex_lock(&xpad->odata_mutex); + + switch (xpad->xtype) { + case XTYPE_XBOX360: xpad->odata[0] = 0x01; xpad->odata[1] = 0x03; xpad->odata[2] = command; xpad->irq_out->transfer_buffer_length = 3; - usb_submit_urb(xpad->irq_out, GFP_KERNEL); - mutex_unlock(&xpad->odata_mutex); + break; + case XTYPE_XBOX360W: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x00; + xpad->odata[2] = 0x08; + xpad->odata[3] = 0x40 + command; + xpad->odata[4] = 0x00; + xpad->odata[5] = 0x00; + xpad->odata[6] = 0x00; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + break; } + + usb_submit_urb(xpad->irq_out, GFP_KERNEL); + mutex_unlock(&xpad->odata_mutex); } static void xpad_led_set(struct led_classdev *led_cdev, @@ -911,7 +951,7 @@ static int xpad_led_probe(struct usb_xpad *xpad) struct led_classdev *led_cdev; int error; - if (xpad->xtype != XTYPE_XBOX360) + if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W) return 0; xpad->led = led = kzalloc(sizeof(struct xpad_led), GFP_KERNEL); -- cgit v1.2.3 From 5d50ea8b4530baacd4bb63c7c2d9183f9e90de52 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Mon, 22 Jun 2015 14:11:30 -0700 Subject: Input: xpad - re-send LED command on present event The controller only receives commands when its present. So for the correct LED to be lit the LED command has to be sent on the present event. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 7be285a47330..75a2f9d46957 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -344,6 +344,7 @@ struct usb_xpad { int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ + unsigned long led_no; /* led to lit on xbox360 controllers */ }; /* @@ -488,6 +489,8 @@ static void xpad360_process_packet(struct usb_xpad *xpad, input_sync(dev); } +static void xpad_identify_controller(struct usb_xpad *xpad); + /* * xpad360w_process_packet * @@ -510,6 +513,11 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha if (data[1] & 0x80) { xpad->pad_present = 1; usb_submit_urb(xpad->bulk_out, GFP_ATOMIC); + /* + * Light up the segment corresponding to + * controller number. + */ + xpad_identify_controller(xpad); } else xpad->pad_present = 0; } @@ -934,6 +942,12 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command) mutex_unlock(&xpad->odata_mutex); } +static void xpad_identify_controller(struct usb_xpad *xpad) +{ + /* Light up the segment corresponding to controller number */ + xpad_send_led_command(xpad, (xpad->led_no % 4) + 2); +} + static void xpad_led_set(struct led_classdev *led_cdev, enum led_brightness value) { @@ -945,8 +959,7 @@ static void xpad_led_set(struct led_classdev *led_cdev, static int xpad_led_probe(struct usb_xpad *xpad) { - static atomic_t led_seq = ATOMIC_INIT(-1); - unsigned long led_no; + static atomic_t led_seq = ATOMIC_INIT(-1); struct xpad_led *led; struct led_classdev *led_cdev; int error; @@ -958,9 +971,9 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - led_no = atomic_inc_return(&led_seq); + xpad->led_no = atomic_inc_return(&led_seq); - snprintf(led->name, sizeof(led->name), "xpad%lu", led_no); + snprintf(led->name, sizeof(led->name), "xpad%lu", xpad->led_no); led->xpad = xpad; led_cdev = &led->led_cdev; @@ -974,10 +987,8 @@ static int xpad_led_probe(struct usb_xpad *xpad) return error; } - /* - * Light up the segment corresponding to controller number - */ - xpad_send_led_command(xpad, (led_no % 4) + 2); + /* Light up the segment corresponding to controller number */ + xpad_identify_controller(xpad); return 0; } @@ -994,6 +1005,7 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) #else static int xpad_led_probe(struct usb_xpad *xpad) { return 0; } static void xpad_led_disconnect(struct usb_xpad *xpad) { } +static void xpad_identify_controller(struct usb_xpad *xpad) { } #endif -- cgit v1.2.3 From 656fffee3d825be810a4135ae2b525d5eb213cf4 Mon Sep 17 00:00:00 2001 From: Erik Lundgren Date: Tue, 6 Oct 2015 17:04:11 -0700 Subject: Input: xpad - add Covert Forces edition of the Xbox One controller It is identical to the Xbox One controller but has a different product ID. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 75a2f9d46957..3722b0da8e13 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -125,6 +125,7 @@ static const struct xpad_device { { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE }, + { 0x045e, 0x02dd, "Microsoft X-Box One pad (Covert Forces)", 0, XTYPE_XBOXONE }, { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, -- cgit v1.2.3 From 08414cfc5d6bb807bc831a187309ed39e887f810 Mon Sep 17 00:00:00 2001 From: Dario Scarpa Date: Tue, 6 Oct 2015 17:04:36 -0700 Subject: Input: xpad - fix Razer Atrox Arcade Stick button mapping The "Razer Atrox Arcade Stick" features 10 buttons, and two of them (LT/RT) don't work properly. Change its definition in xpad_device[] (mapping field) to fix. Signed-off-by: Dario Scarpa Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 3722b0da8e13..698325c1a08f 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -205,7 +205,7 @@ static const struct xpad_device { { 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 }, - { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 }, -- cgit v1.2.3 From 8889243baad4364260be6a2d5bba96c2d07f206c Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Tue, 6 Oct 2015 17:06:16 -0700 Subject: Input: xpad - clarify LED enumeration Rename led_no -> pad_nr: the number stored there is not the LED number - it gets translated later on to a LED number in xpad_identify_controller; Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 698325c1a08f..12b1d66fa710 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -345,7 +345,7 @@ struct usb_xpad { int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ - unsigned long led_no; /* led to lit on xbox360 controllers */ + unsigned long pad_nr; /* the order x360 pads were attached */ }; /* @@ -357,7 +357,6 @@ struct usb_xpad { * The used report descriptor was taken from ITO Takayukis website: * http://euc.jp/periphs/xbox-controller.ja.html */ - static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; @@ -506,7 +505,6 @@ static void xpad_identify_controller(struct usb_xpad *xpad); * 01.1 - Pad state (Bytes 4+) valid * */ - static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { /* Presence change */ @@ -891,6 +889,7 @@ struct xpad_led { }; /** + * set the LEDs on Xbox360 / Wireless Controllers * @param command * 0: off * 1: all blink, then previous setting @@ -943,10 +942,13 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command) mutex_unlock(&xpad->odata_mutex); } +/* + * Light up the segment corresponding to the pad number on + * Xbox 360 Controllers. + */ static void xpad_identify_controller(struct usb_xpad *xpad) { - /* Light up the segment corresponding to controller number */ - xpad_send_led_command(xpad, (xpad->led_no % 4) + 2); + xpad_send_led_command(xpad, (xpad->pad_nr % 4) + 2); } static void xpad_led_set(struct led_classdev *led_cdev, @@ -972,9 +974,9 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - xpad->led_no = atomic_inc_return(&led_seq); + xpad->pad_nr = atomic_inc_return(&led_seq); - snprintf(led->name, sizeof(led->name), "xpad%lu", xpad->led_no); + snprintf(led->name, sizeof(led->name), "xpad%lu", xpad->pad_nr); led->xpad = xpad; led_cdev = &led->led_cdev; -- cgit v1.2.3 From b0ad2b640ae71bf632f47332dbc3ab2552c1ec37 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Sat, 10 Oct 2015 10:00:49 -0700 Subject: Input: xpad - use ida() for finding the pad_nr The pad_nr corresponds to the lit up LED on the controller. Therefore there should be no gaps when enumerating. Currently a LED is only re-assigned after a controller is re-connected 4 times. This patch uses ida to track connected pads - this way we can re-assign freed up pad number immediately. Consider the following case: 1. pad A is connected and gets pad_nr = 0 2. pad B is connected and gets pad_nr = 1 3. pad A is disconnected 4. pad A is connected again using ida_simple_get() controller A now correctly gets pad_nr = 0 again. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 12b1d66fa710..3110a39ab0d6 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -243,7 +243,6 @@ static const signed short xpad_btn_triggers[] = { -1 }; - static const signed short xpad360_btn[] = { /* buttons for x360 controller */ BTN_TL, BTN_TR, /* Button LB/RB */ BTN_MODE, /* The big X button */ @@ -345,7 +344,7 @@ struct usb_xpad { int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ - unsigned long pad_nr; /* the order x360 pads were attached */ + int pad_nr; /* the order x360 pads were attached */ }; /* @@ -881,6 +880,9 @@ static int xpad_init_ff(struct usb_xpad *xpad) { return 0; } #if defined(CONFIG_JOYSTICK_XPAD_LEDS) #include +#include + +static DEFINE_IDA(xpad_pad_seq); struct xpad_led { char name[16]; @@ -962,7 +964,6 @@ static void xpad_led_set(struct led_classdev *led_cdev, static int xpad_led_probe(struct usb_xpad *xpad) { - static atomic_t led_seq = ATOMIC_INIT(-1); struct xpad_led *led; struct led_classdev *led_cdev; int error; @@ -974,9 +975,13 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - xpad->pad_nr = atomic_inc_return(&led_seq); + xpad->pad_nr = ida_simple_get(&xpad_pad_seq, 0, 0, GFP_KERNEL); + if (xpad->pad_nr < 0) { + error = xpad->pad_nr; + goto err_free_mem; + } - snprintf(led->name, sizeof(led->name), "xpad%lu", xpad->pad_nr); + snprintf(led->name, sizeof(led->name), "xpad%d", xpad->pad_nr); led->xpad = xpad; led_cdev = &led->led_cdev; @@ -984,16 +989,19 @@ static int xpad_led_probe(struct usb_xpad *xpad) led_cdev->brightness_set = xpad_led_set; error = led_classdev_register(&xpad->udev->dev, led_cdev); - if (error) { - kfree(led); - xpad->led = NULL; - return error; - } + if (error) + goto err_free_id; /* Light up the segment corresponding to controller number */ xpad_identify_controller(xpad); - return 0; + +err_free_id: + ida_simple_remove(&xpad_pad_seq, xpad->pad_nr); +err_free_mem: + kfree(led); + xpad->led = NULL; + return error; } static void xpad_led_disconnect(struct usb_xpad *xpad) @@ -1002,6 +1010,7 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) if (xpad_led) { led_classdev_unregister(&xpad_led->led_cdev); + ida_simple_remove(&xpad_pad_seq, xpad->pad_nr); kfree(xpad_led); } } @@ -1011,7 +1020,6 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) { } static void xpad_identify_controller(struct usb_xpad *xpad) { } #endif - static int xpad_open(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); -- cgit v1.2.3 From f28e1165f08e56d74e20bc34145d922964e5da38 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Tue, 6 Oct 2015 17:07:31 -0700 Subject: Input: xpad - remove needless bulk out URB used for LED setup This code was probably wrong ever since and is redundant with xpad_send_led_command. Both try to send a similar command to the xbox360 controller. However xpad_send_led_command correctly uses the pad_nr instead of bInterfaceNumber to select the led and re-uses the irq_out URB instead of creating a new one. Note that this change only affects the two supported wireless controllers. Tested using the xbox360 wireless controller (PC). Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 79 +------------------------------------------ 1 file changed, 1 insertion(+), 78 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 3110a39ab0d6..ce83f976854d 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -328,9 +328,6 @@ struct usb_xpad { unsigned char *idata; /* input data */ dma_addr_t idata_dma; - struct urb *bulk_out; - unsigned char *bdata; - struct urb *irq_out; /* urb for interrupt out report */ unsigned char *odata; /* output data */ dma_addr_t odata_dma; @@ -510,7 +507,6 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha if (data[0] & 0x08) { if (data[1] & 0x80) { xpad->pad_present = 1; - usb_submit_urb(xpad->bulk_out, GFP_ATOMIC); /* * Light up the segment corresponding to * controller number. @@ -672,28 +668,6 @@ exit: __func__, retval); } -static void xpad_bulk_out(struct urb *urb) -{ - struct usb_xpad *xpad = urb->context; - struct device *dev = &xpad->intf->dev; - - switch (urb->status) { - case 0: - /* success */ - break; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - /* this urb is terminated, clean up */ - dev_dbg(dev, "%s - urb shutting down with status: %d\n", - __func__, urb->status); - break; - default: - dev_dbg(dev, "%s - nonzero urb status received: %d\n", - __func__, urb->status); - } -} - static void xpad_irq_out(struct urb *urb) { struct usb_xpad *xpad = urb->context; @@ -1229,52 +1203,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id usb_set_intfdata(intf, xpad); if (xpad->xtype == XTYPE_XBOX360W) { - /* - * Setup the message to set the LEDs on the - * controller when it shows up - */ - xpad->bulk_out = usb_alloc_urb(0, GFP_KERNEL); - if (!xpad->bulk_out) { - error = -ENOMEM; - goto fail7; - } - - xpad->bdata = kzalloc(XPAD_PKT_LEN, GFP_KERNEL); - if (!xpad->bdata) { - error = -ENOMEM; - goto fail8; - } - - xpad->bdata[2] = 0x08; - switch (intf->cur_altsetting->desc.bInterfaceNumber) { - case 0: - xpad->bdata[3] = 0x42; - break; - case 2: - xpad->bdata[3] = 0x43; - break; - case 4: - xpad->bdata[3] = 0x44; - break; - case 6: - xpad->bdata[3] = 0x45; - } - - ep_irq_in = &intf->cur_altsetting->endpoint[1].desc; - if (usb_endpoint_is_bulk_out(ep_irq_in)) { - usb_fill_bulk_urb(xpad->bulk_out, udev, - usb_sndbulkpipe(udev, - ep_irq_in->bEndpointAddress), - xpad->bdata, XPAD_PKT_LEN, - xpad_bulk_out, xpad); - } else { - usb_fill_int_urb(xpad->bulk_out, udev, - usb_sndintpipe(udev, - ep_irq_in->bEndpointAddress), - xpad->bdata, XPAD_PKT_LEN, - xpad_bulk_out, xpad, 0); - } - /* * Submit the int URB immediately rather than waiting for open * because we get status messages from the device whether @@ -1285,13 +1213,11 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id xpad->irq_in->dev = xpad->udev; error = usb_submit_urb(xpad->irq_in, GFP_KERNEL); if (error) - goto fail9; + goto fail7; } return 0; - fail9: kfree(xpad->bdata); - fail8: usb_free_urb(xpad->bulk_out); fail7: input_unregister_device(input_dev); input_dev = NULL; fail6: xpad_led_disconnect(xpad); @@ -1315,8 +1241,6 @@ static void xpad_disconnect(struct usb_interface *intf) xpad_deinit_output(xpad); if (xpad->xtype == XTYPE_XBOX360W) { - usb_kill_urb(xpad->bulk_out); - usb_free_urb(xpad->bulk_out); usb_kill_urb(xpad->irq_in); } @@ -1324,7 +1248,6 @@ static void xpad_disconnect(struct usb_interface *intf) usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); - kfree(xpad->bdata); kfree(xpad); usb_set_intfdata(intf, NULL); -- cgit v1.2.3 From 5d0f1c1bec2d503052bcdf0a2243cf18a75779da Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Sat, 10 Oct 2015 09:32:55 -0700 Subject: Input: xpad - factor out URB submission in xpad_play_effect Move submission logic to a single point at the end of the function. This makes it easy to add locking/ queuing code later on. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 140 +++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 71 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index ce83f976854d..bca8c320f3ee 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -762,80 +762,78 @@ static void xpad_deinit_output(struct usb_xpad *xpad) static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct usb_xpad *xpad = input_get_drvdata(dev); + __u16 strong; + __u16 weak; - if (effect->type == FF_RUMBLE) { - __u16 strong = effect->u.rumble.strong_magnitude; - __u16 weak = effect->u.rumble.weak_magnitude; - - switch (xpad->xtype) { - - case XTYPE_XBOX: - xpad->odata[0] = 0x00; - xpad->odata[1] = 0x06; - xpad->odata[2] = 0x00; - xpad->odata[3] = strong / 256; /* left actuator */ - xpad->odata[4] = 0x00; - xpad->odata[5] = weak / 256; /* right actuator */ - xpad->irq_out->transfer_buffer_length = 6; - - return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); - - case XTYPE_XBOX360: - xpad->odata[0] = 0x00; - xpad->odata[1] = 0x08; - xpad->odata[2] = 0x00; - xpad->odata[3] = strong / 256; /* left actuator? */ - xpad->odata[4] = weak / 256; /* right actuator? */ - xpad->odata[5] = 0x00; - xpad->odata[6] = 0x00; - xpad->odata[7] = 0x00; - xpad->irq_out->transfer_buffer_length = 8; - - return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); - - case XTYPE_XBOX360W: - xpad->odata[0] = 0x00; - xpad->odata[1] = 0x01; - xpad->odata[2] = 0x0F; - xpad->odata[3] = 0xC0; - xpad->odata[4] = 0x00; - xpad->odata[5] = strong / 256; - xpad->odata[6] = weak / 256; - xpad->odata[7] = 0x00; - xpad->odata[8] = 0x00; - xpad->odata[9] = 0x00; - xpad->odata[10] = 0x00; - xpad->odata[11] = 0x00; - xpad->irq_out->transfer_buffer_length = 12; - - return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); - - case XTYPE_XBOXONE: - xpad->odata[0] = 0x09; /* activate rumble */ - xpad->odata[1] = 0x08; - xpad->odata[2] = 0x00; - xpad->odata[3] = 0x08; /* continuous effect */ - xpad->odata[4] = 0x00; /* simple rumble mode */ - xpad->odata[5] = 0x03; /* L and R actuator only */ - xpad->odata[6] = 0x00; /* TODO: LT actuator */ - xpad->odata[7] = 0x00; /* TODO: RT actuator */ - xpad->odata[8] = strong / 256; /* left actuator */ - xpad->odata[9] = weak / 256; /* right actuator */ - xpad->odata[10] = 0x80; /* length of pulse */ - xpad->odata[11] = 0x00; /* stop period of pulse */ - xpad->irq_out->transfer_buffer_length = 12; - - return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); - - default: - dev_dbg(&xpad->dev->dev, - "%s - rumble command sent to unsupported xpad type: %d\n", - __func__, xpad->xtype); - return -1; - } + if (effect->type != FF_RUMBLE) + return 0; + + strong = effect->u.rumble.strong_magnitude; + weak = effect->u.rumble.weak_magnitude; + + switch (xpad->xtype) { + case XTYPE_XBOX: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x06; + xpad->odata[2] = 0x00; + xpad->odata[3] = strong / 256; /* left actuator */ + xpad->odata[4] = 0x00; + xpad->odata[5] = weak / 256; /* right actuator */ + xpad->irq_out->transfer_buffer_length = 6; + break; + + case XTYPE_XBOX360: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x08; + xpad->odata[2] = 0x00; + xpad->odata[3] = strong / 256; /* left actuator? */ + xpad->odata[4] = weak / 256; /* right actuator? */ + xpad->odata[5] = 0x00; + xpad->odata[6] = 0x00; + xpad->odata[7] = 0x00; + xpad->irq_out->transfer_buffer_length = 8; + break; + + case XTYPE_XBOX360W: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x01; + xpad->odata[2] = 0x0F; + xpad->odata[3] = 0xC0; + xpad->odata[4] = 0x00; + xpad->odata[5] = strong / 256; + xpad->odata[6] = weak / 256; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + break; + + case XTYPE_XBOXONE: + xpad->odata[0] = 0x09; /* activate rumble */ + xpad->odata[1] = 0x08; + xpad->odata[2] = 0x00; + xpad->odata[3] = 0x08; /* continuous effect */ + xpad->odata[4] = 0x00; /* simple rumble mode */ + xpad->odata[5] = 0x03; /* L and R actuator only */ + xpad->odata[6] = 0x00; /* TODO: LT actuator */ + xpad->odata[7] = 0x00; /* TODO: RT actuator */ + xpad->odata[8] = strong / 256; /* left actuator */ + xpad->odata[9] = weak / 256; /* right actuator */ + xpad->odata[10] = 0x80; /* length of pulse */ + xpad->odata[11] = 0x00; /* stop period of pulse */ + xpad->irq_out->transfer_buffer_length = 12; + break; + + default: + dev_dbg(&xpad->dev->dev, + "%s - rumble command sent to unsupported xpad type: %d\n", + __func__, xpad->xtype); + return -EINVAL; } - return 0; + return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); } static int xpad_init_ff(struct usb_xpad *xpad) -- cgit v1.2.3 From 14cd0e39c47464d01c42db8a4f42c6d3ad9b57a6 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Sat, 10 Oct 2015 09:33:52 -0700 Subject: Input: xpad - x360w: report dpad as buttons and axes as discussed here[0], x360w is the only pad that maps dpad_to_button. This is bad for downstream developers as they have to differ between x360 and x360w which is not intuitive. This patch implements the suggested solution of exposing the dpad both as axes and as buttons. This retains backward compatibility with software already dealing with the difference while makes new software work as expected across x360/ x360w pads. [0] http://www.spinics.net/lists/linux-input/msg34421.html Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index bca8c320f3ee..de8f420e8181 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -435,7 +435,16 @@ static void xpad360_process_packet(struct usb_xpad *xpad, input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & 0x08); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & 0x01); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & 0x02); - } else { + } + + /* + * This should be a simple else block. However historically + * xbox360w has mapped DPAD to buttons while xbox360 did not. This + * made no sense, but now we can not just switch back and have to + * support both behaviors. + */ + if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || + xpad->xtype == XTYPE_XBOX360W) { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, @@ -1158,7 +1167,16 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { for (i = 0; xpad_btn_pad[i] >= 0; i++) __set_bit(xpad_btn_pad[i], input_dev->keybit); - } else { + } + + /* + * This should be a simple else block. However historically + * xbox360w has mapped DPAD to buttons while xbox360 did not. This + * made no sense, but now we can not just switch back and have to + * support both behaviors. + */ + if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || + xpad->xtype == XTYPE_XBOX360W) { for (i = 0; xpad_abs_pad[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_pad[i]); } -- cgit v1.2.3 From 15255d3cf130d1cd96221d489abaf53eca60b7eb Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Sat, 10 Oct 2015 09:34:17 -0700 Subject: Input: xpad - move the input device creation to a new function To allow us to later create / destroy the input device from the urb callback, we need to initialize/ deinitialize the input device from a separate function. So pull that logic out now to make later patches more "obvious" as to what they do. Signed-off-by: "Pierre-Loup A. Griffais" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 211 ++++++++++++++++++++++++------------------ 1 file changed, 119 insertions(+), 92 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index de8f420e8181..0524d197df13 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -342,6 +342,7 @@ struct usb_xpad { int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ int pad_nr; /* the order x360 pads were attached */ + const char *name; /* name of the device */ }; /* @@ -1060,91 +1061,36 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) } } -static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) +static void xpad_deinit_input(struct usb_xpad *xpad) +{ + xpad_led_disconnect(xpad); + input_unregister_device(xpad->dev); +} + +static int xpad_init_input(struct usb_xpad *xpad) { - struct usb_device *udev = interface_to_usbdev(intf); - struct usb_xpad *xpad; struct input_dev *input_dev; - struct usb_endpoint_descriptor *ep_irq_in; - int ep_irq_in_idx; int i, error; - for (i = 0; xpad_device[i].idVendor; i++) { - if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) && - (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct)) - break; - } - - if (xpad_device[i].xtype == XTYPE_XBOXONE && - intf->cur_altsetting->desc.bInterfaceNumber != 0) { - /* - * The Xbox One controller lists three interfaces all with the - * same interface class, subclass and protocol. Differentiate by - * interface number. - */ - return -ENODEV; - } - - xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL); input_dev = input_allocate_device(); - if (!xpad || !input_dev) { - error = -ENOMEM; - goto fail1; - } - - xpad->idata = usb_alloc_coherent(udev, XPAD_PKT_LEN, - GFP_KERNEL, &xpad->idata_dma); - if (!xpad->idata) { - error = -ENOMEM; - goto fail1; - } - - xpad->irq_in = usb_alloc_urb(0, GFP_KERNEL); - if (!xpad->irq_in) { - error = -ENOMEM; - goto fail2; - } - - xpad->udev = udev; - xpad->intf = intf; - xpad->mapping = xpad_device[i].mapping; - xpad->xtype = xpad_device[i].xtype; - - if (xpad->xtype == XTYPE_UNKNOWN) { - if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) { - if (intf->cur_altsetting->desc.bInterfaceProtocol == 129) - xpad->xtype = XTYPE_XBOX360W; - else - xpad->xtype = XTYPE_XBOX360; - } else - xpad->xtype = XTYPE_XBOX; - - if (dpad_to_buttons) - xpad->mapping |= MAP_DPAD_TO_BUTTONS; - if (triggers_to_buttons) - xpad->mapping |= MAP_TRIGGERS_TO_BUTTONS; - if (sticks_to_null) - xpad->mapping |= MAP_STICKS_TO_NULL; - } + if (!input_dev) + return -ENOMEM; xpad->dev = input_dev; - usb_make_path(udev, xpad->phys, sizeof(xpad->phys)); - strlcat(xpad->phys, "/input0", sizeof(xpad->phys)); - - input_dev->name = xpad_device[i].name; + input_dev->name = xpad->name; input_dev->phys = xpad->phys; - usb_to_input_id(udev, &input_dev->id); - input_dev->dev.parent = &intf->dev; + usb_to_input_id(xpad->udev, &input_dev->id); + input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); input_dev->open = xpad_open; input_dev->close = xpad_close; - input_dev->evbit[0] = BIT_MASK(EV_KEY); + __set_bit(EV_KEY, input_dev->evbit); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { - input_dev->evbit[0] |= BIT_MASK(EV_ABS); + __set_bit(EV_ABS, input_dev->evbit); /* set up axes */ for (i = 0; xpad_abs[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs[i]); @@ -1189,17 +1135,100 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id xpad_set_up_abs(input_dev, xpad_abs_triggers[i]); } - error = xpad_init_output(intf, xpad); - if (error) - goto fail3; - error = xpad_init_ff(xpad); if (error) - goto fail4; + goto err_free_input; error = xpad_led_probe(xpad); if (error) - goto fail5; + goto err_destroy_ff; + + error = input_register_device(xpad->dev); + if (error) + goto err_disconnect_led; + + return 0; + +err_disconnect_led: + xpad_led_disconnect(xpad); +err_destroy_ff: + input_ff_destroy(input_dev); +err_free_input: + input_free_device(input_dev); + return error; +} + +static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct usb_xpad *xpad; + struct usb_endpoint_descriptor *ep_irq_in; + int ep_irq_in_idx; + int i, error; + + for (i = 0; xpad_device[i].idVendor; i++) { + if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) && + (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct)) + break; + } + + if (xpad_device[i].xtype == XTYPE_XBOXONE && + intf->cur_altsetting->desc.bInterfaceNumber != 0) { + /* + * The Xbox One controller lists three interfaces all with the + * same interface class, subclass and protocol. Differentiate by + * interface number. + */ + return -ENODEV; + } + + xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL); + if (!xpad) + return -ENOMEM; + + usb_make_path(udev, xpad->phys, sizeof(xpad->phys)); + strlcat(xpad->phys, "/input0", sizeof(xpad->phys)); + + xpad->idata = usb_alloc_coherent(udev, XPAD_PKT_LEN, + GFP_KERNEL, &xpad->idata_dma); + if (!xpad->idata) { + error = -ENOMEM; + goto err_free_mem; + } + + xpad->irq_in = usb_alloc_urb(0, GFP_KERNEL); + if (!xpad->irq_in) { + error = -ENOMEM; + goto err_free_idata; + } + + xpad->udev = udev; + xpad->intf = intf; + xpad->mapping = xpad_device[i].mapping; + xpad->xtype = xpad_device[i].xtype; + xpad->name = xpad_device[i].name; + + if (xpad->xtype == XTYPE_UNKNOWN) { + if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) { + if (intf->cur_altsetting->desc.bInterfaceProtocol == 129) + xpad->xtype = XTYPE_XBOX360W; + else + xpad->xtype = XTYPE_XBOX360; + } else { + xpad->xtype = XTYPE_XBOX; + } + + if (dpad_to_buttons) + xpad->mapping |= MAP_DPAD_TO_BUTTONS; + if (triggers_to_buttons) + xpad->mapping |= MAP_TRIGGERS_TO_BUTTONS; + if (sticks_to_null) + xpad->mapping |= MAP_STICKS_TO_NULL; + } + + error = xpad_init_output(intf, xpad); + if (error) + goto err_free_in_urb; /* Xbox One controller has in/out endpoints swapped. */ ep_irq_in_idx = xpad->xtype == XTYPE_XBOXONE ? 1 : 0; @@ -1212,12 +1241,12 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id xpad->irq_in->transfer_dma = xpad->idata_dma; xpad->irq_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; - error = input_register_device(xpad->dev); - if (error) - goto fail6; - usb_set_intfdata(intf, xpad); + error = xpad_init_input(xpad); + if (error) + goto err_deinit_output; + if (xpad->xtype == XTYPE_XBOX360W) { /* * Submit the int URB immediately rather than waiting for open @@ -1229,20 +1258,19 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id xpad->irq_in->dev = xpad->udev; error = usb_submit_urb(xpad->irq_in, GFP_KERNEL); if (error) - goto fail7; + goto err_deinit_input; } - return 0; - fail7: input_unregister_device(input_dev); - input_dev = NULL; - fail6: xpad_led_disconnect(xpad); - fail5: if (input_dev) - input_ff_destroy(input_dev); - fail4: xpad_deinit_output(xpad); - fail3: usb_free_urb(xpad->irq_in); - fail2: usb_free_coherent(udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); - fail1: input_free_device(input_dev); +err_deinit_input: + xpad_deinit_input(xpad); +err_deinit_output: + xpad_deinit_output(xpad); +err_free_in_urb: + usb_free_urb(xpad->irq_in); +err_free_idata: + usb_free_coherent(udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); +err_free_mem: kfree(xpad); return error; @@ -1252,8 +1280,7 @@ static void xpad_disconnect(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata (intf); - xpad_led_disconnect(xpad); - input_unregister_device(xpad->dev); + xpad_deinit_input(xpad); xpad_deinit_output(xpad); if (xpad->xtype == XTYPE_XBOX360W) { -- cgit v1.2.3 From efcdff81f4add6ce0aab8a63d3b275112c817a59 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Sat, 10 Oct 2015 09:36:17 -0700 Subject: Input: xpad - query wireless controller state at init When we initialize the driver/device, we really don't know how many controllers are connected. So send a "query presence" command to the base-station. (Command discovered by Zachary Lund) Presence packet taken from: https://github.com/computerquip/xpad5 Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 0524d197df13..6bba9bf3c44b 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -768,6 +768,33 @@ static void xpad_deinit_output(struct usb_xpad *xpad) } } +static int xpad_inquiry_pad_presence(struct usb_xpad *xpad) +{ + int retval; + + mutex_lock(&xpad->odata_mutex); + + xpad->odata[0] = 0x08; + xpad->odata[1] = 0x00; + xpad->odata[2] = 0x0F; + xpad->odata[3] = 0xC0; + xpad->odata[4] = 0x00; + xpad->odata[5] = 0x00; + xpad->odata[6] = 0x00; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + + retval = usb_submit_urb(xpad->irq_out, GFP_KERNEL); + + mutex_unlock(&xpad->odata_mutex); + + return retval; +} + #ifdef CONFIG_JOYSTICK_XPAD_FF static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { @@ -1259,9 +1286,22 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id error = usb_submit_urb(xpad->irq_in, GFP_KERNEL); if (error) goto err_deinit_input; + + /* + * Send presence packet. + * This will force the controller to resend connection packets. + * This is useful in the case we activate the module after the + * adapter has been plugged in, as it won't automatically + * send us info about the controllers. + */ + error = xpad_inquiry_pad_presence(xpad); + if (error) + goto err_kill_in_urb; } return 0; +err_kill_in_urb: + usb_kill_urb(xpad->irq_in); err_deinit_input: xpad_deinit_input(xpad); err_deinit_output: -- cgit v1.2.3 From c3d2715b3e14e2c526e68649d5d895221391f5e4 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Mon, 19 Oct 2015 00:06:58 -0700 Subject: Input: xpad - fix clash of presence handling with LED setting Do not call xpad_identify_controller at init with wireless devices: it conflicts with the already sent presence packet and will be called by xpad360w_process_packet as needed anyway. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Tim Clark --- drivers/input/joystick/xpad.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 6bba9bf3c44b..a028913ba114 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1001,8 +1001,15 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (error) goto err_free_id; - /* Light up the segment corresponding to controller number */ - xpad_identify_controller(xpad); + if (xpad->xtype == XTYPE_XBOX360) { + /* + * Light up the segment corresponding to controller + * number on wired devices. On wireless we'll do that + * when they respond to "presence" packet. + */ + xpad_identify_controller(xpad); + } + return 0; err_free_id: -- cgit v1.2.3 From 87bc7349077b48d5b2f939ca85fc8fcffd927f09 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Feb 2015 13:48:00 -0800 Subject: UPSTREAM: seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO The value resulting from the SECCOMP_RET_DATA mask could exceed MAX_ERRNO when setting errno during a SECCOMP_RET_ERRNO filter action. This makes sure we have a reliable value being set, so that an invalid errno will not be ignored by userspace. Signed-off-by: Kees Cook Reported-by: Dmitry V. Levin Cc: Andy Lutomirski Cc: Will Drewry Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 580c57f1076872ebc2427f898b927944ce170f2d) Signed-off-by: Kees Cook Change-Id: If82aa1b5f4be6375e8ca301d317d3d2db88a66dd --- kernel/seccomp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4ef9687ac115..4f44028943e6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -629,7 +629,9 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) switch (action) { case SECCOMP_RET_ERRNO: - /* Set the low-order 16-bits as a errno. */ + /* Set low-order bits as an errno, capped at MAX_ERRNO. */ + if (data > MAX_ERRNO) + data = MAX_ERRNO; syscall_set_return_value(current, task_pt_regs(current), -data, 0); goto skip; -- cgit v1.2.3 From 6d2862fa73e1943e3931e43bdf1f7572c69d177c Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Thu, 14 Jan 2016 14:33:52 +0100 Subject: UPSTREAM: memcg: Only free spare array when readers are done A spare array holding mem cgroup threshold events is kept around to make sure we can always safely deregister an event and have an array to store the new set of events in. In the scenario where we're going from 1 to 0 registered events, the pointer to the primary array containing 1 event is copied to the spare slot, and then the spare slot is freed because no events are left. However, it is freed before calling synchronize_rcu(), which means readers may still be accessing threshold->primary after it is freed. Fixed by only freeing after synchronize_rcu(). Change-Id: Ie56b72e47168d22eba0e4f1af7d8bf3265dc6d47 Signed-off-by: Martijn Coenen --- mm/memcontrol.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b926a93fb576..364cf276984b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4748,16 +4748,17 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, swap_buffers: /* Swap primary and spare array */ thresholds->spare = thresholds->primary; - /* If all events are unregistered, free the spare array */ - if (!new) { - kfree(thresholds->spare); - thresholds->spare = NULL; - } rcu_assign_pointer(thresholds->primary, new); /* To be sure that nobody uses thresholds */ synchronize_rcu(); + + /* If all events are unregistered, free the spare array */ + if (!new) { + kfree(thresholds->spare); + thresholds->spare = NULL; + } unlock: mutex_unlock(&memcg->thresholds_lock); } -- cgit v1.2.3 From d480fe2101acb9b872a5e174d48811a74f3e212d Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 19 Jan 2016 01:17:30 +0530 Subject: ion: fix page pool cache policy Fix redundant "buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE" checks in if(!cached ...) condition block. AOSP Change-Id: I98ee8902df0c80135dddfa998c4ca4c2bb44e40e, "ion: Handle the memory mapping correctly on x86", is broken on android-3.18+ kernels. It conflicts with upstream commit 53a91c68fa7b, "staging: ion: Add private buffer flag to skip page pooling on free", and break the ION_PRIV_FLAG_SHRINKER_FREE private flag check logic. Change-Id: I9cee4bcc3545cf92e07c21c2b42d27cf88da3316 Reported-by: chenfeng Signed-off-by: Amit Pundir --- drivers/staging/android/ion/ion_system_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c index 1f9feb7480b2..9adfd6de8f8c 100644 --- a/drivers/staging/android/ion/ion_system_heap.c +++ b/drivers/staging/android/ion/ion_system_heap.c @@ -83,7 +83,7 @@ static void free_buffer_page(struct ion_system_heap *heap, unsigned int order = compound_order(page); bool cached = ion_buffer_cached(buffer); - if (!cached && !(buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE)) { + if (!cached) { struct ion_page_pool *pool = heap->pools[order_to_index(order)]; if (buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE) ion_page_pool_free_immediate(pool, page); -- cgit v1.2.3 From ba8bb5774ca7b1acc314c98638cf678ce0beb19a Mon Sep 17 00:00:00 2001 From: Yevgeny Pats Date: Tue, 19 Jan 2016 22:09:04 +0000 Subject: UPSTREAM: KEYS: Fix keyring ref leak in join_session_keyring() (cherry pick from commit 23567fd052a9abb6d67fe8e7a9ccdd9800a540f2) This fixes CVE-2016-0728. If a thread is asked to join as a session keyring the keyring that's already set as its session, we leak a keyring reference. This can be tested with the following program: #include #include #include #include int main(int argc, const char *argv[]) { int i = 0; key_serial_t serial; serial = keyctl(KEYCTL_JOIN_SESSION_KEYRING, "leaked-keyring"); if (serial < 0) { perror("keyctl"); return -1; } if (keyctl(KEYCTL_SETPERM, serial, KEY_POS_ALL | KEY_USR_ALL) < 0) { perror("keyctl"); return -1; } for (i = 0; i < 100; i++) { serial = keyctl(KEYCTL_JOIN_SESSION_KEYRING, "leaked-keyring"); if (serial < 0) { perror("keyctl"); return -1; } } return 0; } If, after the program has run, there something like the following line in /proc/keys: 3f3d898f I--Q--- 100 perm 3f3f0000 0 0 keyring leaked-keyring: empty with a usage count of 100 * the number of times the program has been run, then the kernel is malfunctioning. If leaked-keyring has zero usages or has been garbage collected, then the problem is fixed. Reported-by: Yevgeny Pats Signed-off-by: David Howells Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Jarod Wilson Signed-off-by: James Morris Change-Id: Ic3db0461d08011e432bf3a6a784dd62b764558e3 --- security/keys/process_keys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index bd536cb221e2..db91639c81e3 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -794,6 +794,7 @@ long join_session_keyring(const char *name) ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { + key_put(keyring); ret = 0; goto error2; } -- cgit v1.2.3 From bd8d3dd3ae35f283f3b76e47b9762225c9f7d46c Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 23 Nov 2015 16:07:41 -0500 Subject: UPSTREAM: selinux: fix bug in conditional rules handling (cherry picked from commit commit f3bef67992e8698897b584616535803887c4a73e) commit fa1aa143ac4a ("selinux: extended permissions for ioctls") introduced a bug into the handling of conditional rules, skipping the processing entirely when the caller does not provide an extended permissions (xperms) structure. Access checks from userspace using /sys/fs/selinux/access do not include such a structure since that interface does not presently expose extended permission information. As a result, conditional rules were being ignored entirely on userspace access requests, producing denials when access was allowed by conditional rules in the policy. Fix the bug by only skipping computation of extended permissions in this situation, not the entire conditional rules processing. Change-Id: I24f39e3907d0b00a4194e15a4472e8d859508fa9 Reported-by: Laurent Bigonville Signed-off-by: Stephen Smalley [PM: fixed long lines in patch description] Cc: stable@vger.kernel.org # 4.3 Signed-off-by: Paul Moore --- security/selinux/ss/conditional.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 18643bf9894d..456e1a9bcfde 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -638,7 +638,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, { struct avtab_node *node; - if (!ctab || !key || !avd || !xperms) + if (!ctab || !key || !avd) return; for (node = avtab_search_node(ctab, key); node; @@ -657,7 +657,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, if ((u16)(AVTAB_AUDITALLOW|AVTAB_ENABLED) == (node->key.specified & (AVTAB_AUDITALLOW|AVTAB_ENABLED))) avd->auditallow |= node->datum.u.data; - if ((node->key.specified & AVTAB_ENABLED) && + if (xperms && (node->key.specified & AVTAB_ENABLED) && (node->key.specified & AVTAB_XPERMS)) services_compute_xperms_drivers(xperms, node); } -- cgit v1.2.3 From bea19db82340c97543b9824ca7643fba1c9a0c56 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Mar 2015 21:12:12 -0700 Subject: net: add sk_fullsock() helper We have many places where we want to check if a socket is not a timewait or request socket. Use a helper to avoid hard coding this. [Backport of net-next 1d0ab253872cdd3d8e7913f59c266c7fd01771d0] [lorenzo@google.com: removed TCPF_NEW_SYN_RECV, and added a comment to add it back.] Bug: 24163529 Change-Id: Ibf09017e1ab00af5e6925273117c335d7f515d73 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Lorenzo Colitti --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 7db3db112baa..0b2bfa24c7dc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,6 +68,7 @@ #include #include #include +#include struct cgroup; struct cgroup_subsys; @@ -2265,6 +2266,15 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb) return NULL; } +/* This helper checks if a socket is a full socket, + * ie _not_ a timewait or request socket. + * TODO: Check for TCPF_NEW_SYN_RECV when that starts to exist. + */ +static inline bool sk_fullsock(const struct sock *sk) +{ + return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT); +} + void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); -- cgit v1.2.3 From 8246f1898f6da97f1b25fd153264c10ac4f0f0ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 4 Oct 2015 21:08:09 -0700 Subject: ipv6: inet6_sk() should use sk_fullsock() SYN_RECV & TIMEWAIT sockets are not full blown, they do not have a pinet6 pointer. [Backport of net-next e7eadb4de9e645e1b34539dc4128240b1e5f71dc] Bug: 24163529 Change-Id: I6ce67a190d67d200c6ebeb81d2daeb9c86cd7581 Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bac42ed3a579..10e14afc7cf7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,9 +256,9 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) +static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { - return inet_sk(__sk)->pinet6; + return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) -- cgit v1.2.3 From 100263d643c99138fc97071287d4701066cb075e Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:02 +0900 Subject: net: diag: split inet_diag_dump_one_icsk into two Currently, inet_diag_dump_one_icsk finds a socket and then dumps its information to userspace. Split it into a part that finds the socket and a part that dumps the information. [Backport of net-next b613f56ec9baf30edf5d9d607b822532a273dad7] Change-Id: I9218ab7f71d18f9a74726a0b9caa48fd604b0bb3 Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 5 +++++ net/ipv4/inet_diag.c | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 46da02410a09..0f3eddff8eea 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -3,6 +3,7 @@ #include +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -39,6 +40,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e34dccbc4d70..a29994760aa3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -288,15 +288,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + struct inet_diag_req_v2 *req) { - int err; struct sock *sk; - struct sk_buff *rep; - struct net *net = sock_net(in_skb->sk); - err = -EINVAL; if (req->sdiag_family == AF_INET) { sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -313,16 +310,34 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s } #endif else { - goto out_nosk; + return ERR_PTR(-EINVAL); } - err = -ENOENT; - if (sk == NULL) - goto out_nosk; + if (!sk) + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(sizeof(struct inet_diag_msg) + sizeof(struct inet_diag_meminfo) + @@ -350,7 +365,6 @@ out: if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -- cgit v1.2.3 From 194c5f317994f57a1df0a669f2a338a667933d89 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:03 +0900 Subject: net: diag: Add the ability to destroy a socket. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. [Backport of net-next 64be0aed59ad519d6f2160868734f7e278290ac1] Change-Id: I24f4a157e0a2cdb267317920aa230cc6528ca072 Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 ++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 1 + net/core/sock_diag.c | 23 ++++++++++++++++++++--- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 46cca4c06848..1a3b383ba4e6 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -11,6 +11,7 @@ struct sock; struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -26,4 +27,5 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); +int sock_diag_destroy(struct sock *sk, int err); #endif diff --git a/include/net/sock.h b/include/net/sock.h index 0b2bfa24c7dc..0d7bb6b7fd64 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1049,6 +1049,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; /* diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index b00e29efb161..472adbb711ed 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY_BACKPORT 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index ad704c757bb4..5bf1aebd9ab0 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -132,7 +132,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -152,8 +152,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY_BACKPORT && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -179,7 +183,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY_BACKPORT: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -194,6 +199,18 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { -- cgit v1.2.3 From 8387ea26df81828361980fddef693109c79fa97b Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:04 +0900 Subject: net: diag: Support SOCK_DESTROY for inet sockets. This passes the SOCK_DESTROY operation to the underlying protocol diag handler, or returns -EOPNOTSUPP if that handler does not define a destroy operation. Most of this patch is just renaming functions. This is not strictly necessary, but it would be fairly counterintuitive to have the code to destroy inet sockets be in a function whose name starts with inet_diag_get. [Backport of net-next 6eb5d2e08f071c05ecbe135369c9ad418826cab2] Change-Id: If50e5dd38dbb9e421ad7146f34778b5a143ff58d Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 ++++ net/ipv4/inet_diag.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0f3eddff8eea..25de5e73845c 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,10 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + struct inet_diag_req_v2 *req); + __u16 idiag_type; }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a29994760aa3..0cbab925dd5b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -369,7 +369,7 @@ out: } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { @@ -379,8 +379,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY_BACKPORT && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -1056,7 +1060,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -1090,7 +1094,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -1098,7 +1102,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; attr = nlmsg_find_attr(h, hdrlen, @@ -1116,17 +1121,19 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) -- cgit v1.2.3 From b80585a64123cc55a9aa11ff57b27315985f9b1f Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:05 +0900 Subject: net: diag: Support destroying TCP sockets. This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. [Backport of net-next c1e64e298b8cad309091b95d8436a0255c84f54a] Change-Id: Ie46bb701e061dc54dd38831d31b693aab4349483 Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/Kconfig | 13 +++++++++++++ net/ipv4/tcp.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 19 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 6 files changed, 68 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index e6ec66a5fedc..d431be36263b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1085,6 +1085,8 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e682b48e0709..022e599cfff9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -441,6 +441,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a8c78ccf0f13..3e7e91159833 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3033,6 +3033,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0d73f9ddb55b..1d745eac7b8f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -45,11 +47,28 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 147be2024290..e0a27e9f5f62 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2423,6 +2423,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cab352848605..4b0090f2aaa9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1902,6 +1902,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { -- cgit v1.2.3 From 476c6cef83b6cff280d2b21c0daacbf6c53c2110 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 22 Dec 2015 00:03:44 +0900 Subject: net: tcp: deal with listen sockets properly in tcp_abort. When closing a listen socket, tcp_abort currently calls tcp_done without clearing the request queue. If the socket has a child socket that is established but not yet accepted, the child socket is then left without a parent, causing a leak. Fix this by setting the socket state to TCP_CLOSE and calling inet_csk_listen_stop with the socket lock held, like tcp_close does. Tested using net_test. With this patch, calling SOCK_DESTROY on a listen socket that has an established but not yet accepted child socket results in the parent and the child being closed, such that they no longer appear in sock_diag dumps. [Backport of net-next 2010b93e9317cc12acd20c4aed385af7f9d1681e] Change-Id: I2066a77a6b8358c15bb3d61e9bfb448622bfedb9 Reported-by: Eric Dumazet Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3e7e91159833..d029120851fc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3043,6 +3043,11 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + inet_csk_listen_stop(sk); + } + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ local_bh_disable(); bh_lock_sock(sk); -- cgit v1.2.3 From e603010c1d5abba2b652efa9b8009512c83564b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jan 2016 16:25:01 -0800 Subject: net: diag: support v4mapped sockets in inet_diag_find_one_icsk() Lorenzo reported that we could not properly find v4mapped sockets in inet_diag_find_one_icsk(). This patch fixes the issue. [Cherry-pick of net 7c1306723ee916ea9f1fa7d9e4c7a6d029ca7aaf] Change-Id: Ie0f5b78b15f30f375bb6f8aadf2104109a10031c Reported-by: Lorenzo Colitti Signed-off-by: Eric Dumazet Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0cbab925dd5b..710cecc4d4fc 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -301,12 +301,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net, } #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { - sk = inet6_lookup(net, hashinfo, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3], + req->id.idiag_dport, req->id.idiag_src[3], + req->id.idiag_sport, req->id.idiag_if); + else + sk = inet6_lookup(net, hashinfo, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); } #endif else { -- cgit v1.2.3 From bcb748c2268c08689e1e98a1d7c6bb3f540552e1 Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Mon, 18 May 2015 19:44:41 +0900 Subject: neigh: Better handling of transition to NUD_PROBE state [1] When entering NUD_PROBE state via neigh_update(), perhaps received from userspace, correctly (re)initialize the probes count to zero. This is useful for forcing revalidation of a neighbor (for example if the host is attempting to do DNA [IPv4 4436, IPv6 6059]). [2] Notify listeners when a neighbor goes into NUD_PROBE state. By sending notifications on entry to NUD_PROBE state listeners get more timely warnings of imminent connectivity issues. The current notifications on entry to NUD_STALE have somewhat limited usefulness: NUD_STALE is a perfectly normal state, as is NUD_DELAY, whereas notifications on entry to NUD_FAILURE come after a neighbor reachability problem has been confirmed (typically after three probes). [Cherry-pick of upstream 765c9c639fbb132af0cafc6e1da22fe6cea26bb8] Signed-off-by: Erik Kline Acked-By: Lorenzo Colitti Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ef31fef25e5a..ad1c823a9e56 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -933,6 +933,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); + notify = 1; next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } } else { @@ -1164,6 +1165,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (new != old) { neigh_del_timer(neigh); + if (new & NUD_PROBE) + atomic_set(&neigh->probes, 0); if (new & NUD_IN_TIMER) neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? -- cgit v1.2.3 From 69649c8a83f8b5e61d631e8962f9169bbece6766 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 28 Jan 2016 14:10:50 +0530 Subject: Revert "net: wireless: Add CONFIG_WIFI_CONTROL_FUNC option" This reverts commit 9486c0aac41b5283add06b8b9e83f0073bcae5ec. WIFI_CONTROL_FUNC config is not used anywhere in android-3.10+ kernels. Its usage can be last tracked to BCMDHD wireless driver in android-3.4 kernel. Change-Id: I8092bf10b70dd84a9bb3aade4aca80891742d504 Signed-off-by: Amit Pundir --- drivers/net/wireless/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 710733f034fa..16604bdf5197 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -265,11 +265,6 @@ config MWL8K To compile this driver as a module, choose M here: the module will be called mwl8k. If unsure, say N. -config WIFI_CONTROL_FUNC - bool "Enable WiFi control function abstraction" - help - Enables Power/Reset/Carddetect function abstraction - source "drivers/net/wireless/ath/Kconfig" source "drivers/net/wireless/b43/Kconfig" source "drivers/net/wireless/b43legacy/Kconfig" -- cgit v1.2.3 From 47be4c1862a864662721a98b6fbc415c6bda29df Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Feb 2016 00:52:15 +0900 Subject: selinux: nlmsgtab: add SOCK_DESTROY to the netlink mapping tables Without this, using SOCK_DESTROY in enforcing mode results in: SELinux: unrecognized netlink message type=21 for sclass=32 Change-Id: I7862bb0fc83573567243ffa9549a2c7405b5986c --- security/selinux/nlmsgtab.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 902b5e9cec7e..053210a559ad 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -77,9 +77,10 @@ static struct nlmsg_perm nlmsg_route_perms[] = static struct nlmsg_perm nlmsg_tcpdiag_perms[] = { - { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, - { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, - { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { SOCK_DESTROY_BACKPORT, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE }, }; static struct nlmsg_perm nlmsg_xfrm_perms[] = -- cgit v1.2.3 From deeb7b42a161bfdfff52e54e58e2b4825e33c532 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 8 Feb 2016 11:51:13 +0530 Subject: Revert "usb: gadget: composite: Fix corruption when changing configuration" This reverts commit c5c7f4d35eb6a8c1eaeabc416814cdde4c52fb14. We do not need this fix with Android ConfigFS gadgets. Change-Id: I7dffb884a49ecb2f5d619150edfa3e51808ef8cc Signed-off-by: Amit Pundir --- drivers/usb/gadget/composite.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 815126ae386a..3d353ef97918 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -845,7 +845,7 @@ done: } EXPORT_SYMBOL_GPL(usb_add_config); -static void unbind_config(struct usb_composite_dev *cdev, +static void remove_config(struct usb_composite_dev *cdev, struct usb_configuration *config) { while (!list_empty(&config->functions)) { @@ -860,6 +860,7 @@ static void unbind_config(struct usb_composite_dev *cdev, /* may free memory for "f" */ } } + list_del(&config->list); if (config->unbind) { DBG(cdev, "unbind config '%s'/%p\n", config->label, config); config->unbind(config); @@ -886,11 +887,9 @@ void usb_remove_config(struct usb_composite_dev *cdev, if (cdev->config == config) reset_config(cdev); - list_del(&config->list); - spin_unlock_irqrestore(&cdev->lock, flags); - unbind_config(cdev, config); + remove_config(cdev, config); } /*-------------------------------------------------------------------------*/ @@ -1825,8 +1824,7 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) struct usb_configuration *c; c = list_first_entry(&cdev->configs, struct usb_configuration, list); - list_del(&c->list); - unbind_config(cdev, c); + remove_config(cdev, c); } if (cdev->driver->unbind && unbind_driver) cdev->driver->unbind(cdev); -- cgit v1.2.3 From ebc5df8900e95eaa84581c3a869d7adb6cd4e0f1 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 9 Feb 2016 12:31:42 -0800 Subject: android: recommended.cfg: enable taskstats CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y Signed-off-by: Mark Salyzyn Bug: 21334988 Bug: 26966375 Change-Id: Id54be2aad6acdb51040ba613d5d987dd693cd591 --- android/configs/android-recommended.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg index 562de015f05c..e4c8aaade197 100644 --- a/android/configs/android-recommended.cfg +++ b/android/configs/android-recommended.cfg @@ -111,6 +111,10 @@ CONFIG_TABLET_USB_GTCO=y CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_TABLET_USB_WACOM=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y CONFIG_TIMER_STATS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -- cgit v1.2.3 From 12ef60e6494d2234ac60cc23fa4e1f35666a96f2 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 5 Jan 2016 17:36:31 +0530 Subject: kbuild: Makefile.clean: make Kbuild and Makefile optional AOSP commit b13ce9f4aa6f "ARM64: add option to build Image.gz/dtb combo" broke archclean / mrproper build targets and we run into: ---------- ./scripts/Makefile.clean:14: arch/arm64/boot/amd/Makefile: No such file or directory make[2]: *** No rule to make target `arch/arm64/boot/amd/Makefile'. Stop. make[1]: *** [arch/arm64/boot/amd] Error 2 make: *** [archclean] Error 2 ---------- This patch skip the missing Kbuild/Makefile reporting error. It does the job (i.e cleanup dts/*/*.dtb and do not spit out missing file error messages as well). Signed-off-by: Amit Pundir --- scripts/Makefile.clean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index a609552a86dc..cec78c48e1cf 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -14,7 +14,7 @@ clean := -f $(srctree)/scripts/Makefile.clean obj # The filename Kbuild has precedence over Makefile kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) +-include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) # Figure out what we need to build from the various variables # ========================================================================== -- cgit v1.2.3 From 9d266ff91d7856618a0f733a690ba2208a1e6689 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 8 Jan 2016 13:40:11 +0530 Subject: usb: gadget: function: fix unused func warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missed fixing this unused function warning last time. Use __maybe_unused attribute to fix it: CC drivers/usb/gadget/function/f_fs.o drivers/usb/gadget/function/f_fs.c:1622:12: warning: ‘functionfs_bind_config’ defined but not used [-Wunused-function] static int functionfs_bind_config(struct usb_composite_dev *cdev, ^ Signed-off-by: Amit Pundir --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 45207e2246c0..8b1ad9d29f86 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1619,7 +1619,7 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) kfree(epfiles); } -static int functionfs_bind_config(struct usb_composite_dev *cdev, +static int __maybe_unused functionfs_bind_config(struct usb_composite_dev *cdev, struct usb_configuration *c, struct ffs_data *ffs) { -- cgit v1.2.3 From 416271766c791b201d47c7c1ec02643e16bd7141 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 8 Jan 2016 13:57:58 +0530 Subject: usb: gadget: build audio_source function only if SND is enabled Also select SND_PCM while building f_audio_source. Updated changes from linaro-android commit 7b377487a28c "usb: gadget: audio_source function depends on SND_PCM" to align with similar set of changes from aosp/android-4.1 commit ba462c7cf20e "usb: gadget: build audio_source function only if SND is enabled". Signed-off-by: Amit Pundir --- drivers/usb/gadget/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index aa2e48aba324..51896f6332b4 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -401,7 +401,8 @@ config USB_CONFIGFS_F_ACC config USB_CONFIGFS_F_AUDIO_SRC boolean "Audio Source gadget" depends on USB_CONFIGFS && USB_CONFIGFS_F_ACC - depends on SND_PCM + depends on SND + select SND_PCM select USB_F_AUDIO_SRC help USB gadget Audio Source support -- cgit v1.2.3 From 17aa01a7059b202f893a9e7e859032640318b458 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 8 Jan 2016 15:34:15 +0530 Subject: HID: hidpp: update input_configured callback Update input_configured callback for hidpp (hidpp_input_configured) to align with the changes from AOSP commit 7682bfa411f4 "UPSTREAM: HID: hid-input: allow input_configured callback return errors". Signed-off-by: Amit Pundir --- drivers/hid/hid-logitech-hidpp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 7dd9163f7e03..07bc1e66fb6d 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -652,13 +652,15 @@ static int hidpp_input_mapping(struct hid_device *hdev, struct hid_input *hi, return 0; } -static void hidpp_input_configured(struct hid_device *hdev, +static int hidpp_input_configured(struct hid_device *hdev, struct hid_input *hidinput) { struct hidpp_device *hidpp = hid_get_drvdata(hdev); if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP) wtp_input_configured(hdev, hidinput); + + return 0; } static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, -- cgit v1.2.3