From 022e510fcbda79183fd2cdc01abb01b4be80d03f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Jul 2017 16:14:42 +0800 Subject: md: remove 'idx' from 'struct resync_pages' bio_add_page() won't fail for resync bio, and the page index for each bio is same, so remove it. More importantly the 'idx' of 'struct resync_pages' is initialized in mempool allocator function, the current way is wrong since mempool is only responsible for allocation, we can't use that for initialization. Suggested-by: NeilBrown Reported-by: NeilBrown Reported-and-tested-by: Patrick Fixes: f0250618361d(md: raid10: don't use bio's vec table to manage resync pages) Fixes: 98d30c5812c3(md: raid1: don't use bio's vec table to manage resync pages) Cc: stable@vger.kernel.org (4.12+) Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- drivers/md/md.h | 1 - drivers/md/raid1.c | 6 +++--- drivers/md/raid10.c | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/md.h b/drivers/md/md.h index b50eb4ac1b82..991769cc3615 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -738,7 +738,6 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio /* for managing resync I/O pages */ struct resync_pages { - unsigned idx; /* for get/put page from the pool */ void *raid_bio; struct page *pages[RESYNC_PAGES]; }; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3febfc8391fb..0896c772a560 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -170,7 +170,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) resync_get_all_pages(rp); } - rp->idx = 0; rp->raid_bio = r1_bio; bio->bi_private = rp; } @@ -2619,6 +2618,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, int good_sectors = RESYNC_SECTORS; int min_bad = 0; /* number of sectors that are bad in all devices */ int idx = sector_to_idx(sector_nr); + int page_idx = 0; if (!conf->r1buf_pool) if (init_resync(conf)) @@ -2846,7 +2846,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r1_bio->bios[i]; rp = get_resync_pages(bio); if (bio->bi_end_io) { - page = resync_fetch_page(rp, rp->idx++); + page = resync_fetch_page(rp, page_idx); /* * won't fail because the vec table is big @@ -2858,7 +2858,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, nr_sectors += len>>9; sector_nr += len>>9; sync_blocks -= (len>>9); - } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES); + } while (++page_idx < RESYNC_PAGES); r1_bio->sectors = nr_sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5026e7ad51d3..fa8bcf04e791 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -221,7 +221,6 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) resync_get_all_pages(rp); } - rp->idx = 0; rp->raid_bio = r10_bio; bio->bi_private = rp; if (rbio) { @@ -2853,6 +2852,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t sectors_skipped = 0; int chunks_skipped = 0; sector_t chunk_mask = conf->geo.chunk_mask; + int page_idx = 0; if (!conf->r10buf_pool) if (init_resync(conf)) @@ -3355,7 +3355,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, break; for (bio= biolist ; bio ; bio=bio->bi_next) { struct resync_pages *rp = get_resync_pages(bio); - page = resync_fetch_page(rp, rp->idx++); + page = resync_fetch_page(rp, page_idx); /* * won't fail because the vec table is big enough * to hold all these pages @@ -3364,7 +3364,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } nr_sectors += len>>9; sector_nr += len>>9; - } while (get_resync_pages(biolist)->idx < RESYNC_PAGES); + } while (++page_idx < RESYNC_PAGES); r10_bio->sectors = nr_sectors; while (biolist) { -- cgit v1.2.3 From fb0eb5df09307603b21845af1d143cc910154593 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Jul 2017 16:14:43 +0800 Subject: md: raid1/raid10: initialize bvec table via bio_add_page() We will support multipage bvec soon, so initialize bvec table using the standardy way instead of writing the talbe directly. Otherwise it won't work any more once multipage bvec is enabled. Acked-by: Guoqing Jiang Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- drivers/md/raid1-10.c | 19 +++++++++++++++++++ drivers/md/raid1.c | 18 ++++-------------- drivers/md/raid10.c | 6 ++++-- 3 files changed, 27 insertions(+), 16 deletions(-) create mode 100644 drivers/md/raid1-10.c diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c new file mode 100644 index 000000000000..3adb5b9dc4b4 --- /dev/null +++ b/drivers/md/raid1-10.c @@ -0,0 +1,19 @@ +/* generally called after bio_reset() for reseting bvec */ +static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp, + int size) +{ + int idx = 0; + + /* initialize bvec table again */ + do { + struct page *page = resync_fetch_page(rp, idx); + int len = min_t(int, size, PAGE_SIZE); + + /* + * won't fail because the vec table is big + * enough to hold all these pages + */ + bio_add_page(bio, page, len, 0); + size -= len; + } while (idx++ < RESYNC_PAGES && size > 0); +} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0896c772a560..fe86ab18961b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -81,6 +81,8 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #define raid1_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) +#include "raid1-10.c" + /* * 'strct resync_pages' stores actual pages used for doing the resync * IO, and it is per-bio, so make .bi_private points to it. @@ -2085,10 +2087,7 @@ static void process_checks(struct r1bio *r1_bio) /* Fix variable parts of all bios */ vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); for (i = 0; i < conf->raid_disks * 2; i++) { - int j; - int size; blk_status_t status; - struct bio_vec *bi; struct bio *b = r1_bio->bios[i]; struct resync_pages *rp = get_resync_pages(b); if (b->bi_end_io != end_sync_read) @@ -2097,8 +2096,6 @@ static void process_checks(struct r1bio *r1_bio) status = b->bi_status; bio_reset(b); b->bi_status = status; - b->bi_vcnt = vcnt; - b->bi_iter.bi_size = r1_bio->sectors << 9; b->bi_iter.bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; b->bi_bdev = conf->mirrors[i].rdev->bdev; @@ -2106,15 +2103,8 @@ static void process_checks(struct r1bio *r1_bio) rp->raid_bio = r1_bio; b->bi_private = rp; - size = b->bi_iter.bi_size; - bio_for_each_segment_all(bi, b, j) { - bi->bv_offset = 0; - if (size > PAGE_SIZE) - bi->bv_len = PAGE_SIZE; - else - bi->bv_len = size; - size -= PAGE_SIZE; - } + /* initialize bvec table again */ + md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9); } for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fa8bcf04e791..9952721e1cde 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -110,6 +110,8 @@ static void end_reshape(struct r10conf *conf); #define raid10_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) +#include "raid1-10.c" + /* * 'strct resync_pages' stores actual pages used for doing the resync * IO, and it is per-bio, so make .bi_private points to it. @@ -2086,8 +2088,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) rp = get_resync_pages(tbio); bio_reset(tbio); - tbio->bi_vcnt = vcnt; - tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; + md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size); + rp->raid_bio = r10_bio; tbio->bi_private = rp; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; -- cgit v1.2.3 From be453e7761d0e72d8a1b2fcfde6d1a7e53881190 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Jul 2017 16:14:44 +0800 Subject: md: raid1-10: move raid1/raid10 common code into raid1-10.c No function change, just move 'struct resync_pages' and related helpers into raid1-10.c Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- drivers/md/md.h | 53 ------------------------------------------- drivers/md/raid1-10.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/raid1.c | 9 -------- drivers/md/raid10.c | 9 -------- 4 files changed, 62 insertions(+), 71 deletions(-) diff --git a/drivers/md/md.h b/drivers/md/md.h index 991769cc3615..09db03455801 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -731,57 +731,4 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } - -/* Maximum size of each resync request */ -#define RESYNC_BLOCK_SIZE (64*1024) -#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) - -/* for managing resync I/O pages */ -struct resync_pages { - void *raid_bio; - struct page *pages[RESYNC_PAGES]; -}; - -static inline int resync_alloc_pages(struct resync_pages *rp, - gfp_t gfp_flags) -{ - int i; - - for (i = 0; i < RESYNC_PAGES; i++) { - rp->pages[i] = alloc_page(gfp_flags); - if (!rp->pages[i]) - goto out_free; - } - - return 0; - -out_free: - while (--i >= 0) - put_page(rp->pages[i]); - return -ENOMEM; -} - -static inline void resync_free_pages(struct resync_pages *rp) -{ - int i; - - for (i = 0; i < RESYNC_PAGES; i++) - put_page(rp->pages[i]); -} - -static inline void resync_get_all_pages(struct resync_pages *rp) -{ - int i; - - for (i = 0; i < RESYNC_PAGES; i++) - get_page(rp->pages[i]); -} - -static inline struct page *resync_fetch_page(struct resync_pages *rp, - unsigned idx) -{ - if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) - return NULL; - return rp->pages[idx]; -} #endif /* _MD_MD_H */ diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 3adb5b9dc4b4..9f2670b45f31 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -1,3 +1,65 @@ +/* Maximum size of each resync request */ +#define RESYNC_BLOCK_SIZE (64*1024) +#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) + +/* for managing resync I/O pages */ +struct resync_pages { + void *raid_bio; + struct page *pages[RESYNC_PAGES]; +}; + +static inline int resync_alloc_pages(struct resync_pages *rp, + gfp_t gfp_flags) +{ + int i; + + for (i = 0; i < RESYNC_PAGES; i++) { + rp->pages[i] = alloc_page(gfp_flags); + if (!rp->pages[i]) + goto out_free; + } + + return 0; + +out_free: + while (--i >= 0) + put_page(rp->pages[i]); + return -ENOMEM; +} + +static inline void resync_free_pages(struct resync_pages *rp) +{ + int i; + + for (i = 0; i < RESYNC_PAGES; i++) + put_page(rp->pages[i]); +} + +static inline void resync_get_all_pages(struct resync_pages *rp) +{ + int i; + + for (i = 0; i < RESYNC_PAGES; i++) + get_page(rp->pages[i]); +} + +static inline struct page *resync_fetch_page(struct resync_pages *rp, + unsigned idx) +{ + if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) + return NULL; + return rp->pages[idx]; +} + +/* + * 'strct resync_pages' stores actual pages used for doing the resync + * IO, and it is per-bio, so make .bi_private points to it. + */ +static inline struct resync_pages *get_resync_pages(struct bio *bio) +{ + return bio->bi_private; +} + /* generally called after bio_reset() for reseting bvec */ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp, int size) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fe86ab18961b..8387eb1540cd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -83,15 +83,6 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #include "raid1-10.c" -/* - * 'strct resync_pages' stores actual pages used for doing the resync - * IO, and it is per-bio, so make .bi_private points to it. - */ -static inline struct resync_pages *get_resync_pages(struct bio *bio) -{ - return bio->bi_private; -} - /* * for resync bio, r1bio pointer can be retrieved from the per-bio * 'struct resync_pages'. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9952721e1cde..e2617d0f37dc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -112,15 +112,6 @@ static void end_reshape(struct r10conf *conf); #include "raid1-10.c" -/* - * 'strct resync_pages' stores actual pages used for doing the resync - * IO, and it is per-bio, so make .bi_private points to it. - */ -static inline struct resync_pages *get_resync_pages(struct bio *bio) -{ - return bio->bi_private; -} - /* * for resync bio, r10bio pointer can be retrieved from the per-bio * 'struct resync_pages'. -- cgit v1.2.3 From 16d56e2fcc1fc15b981369653c3b41d7ff0b443d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 17 Jul 2017 14:33:48 -0700 Subject: md/raid1: fix writebehind bio clone After bio is submitted, we should not clone it as its bi_iter might be invalid by driver. This is the case of behind_master_bio. In certain situration, we could dispatch behind_master_bio immediately for the first disk and then clone it for other disks. https://bugzilla.kernel.org/show_bug.cgi?id=196383 Reported-and-tested-by: Markus Reviewed-by: Ming Lei Fix: 841c1316c7da(md: raid1: improve write behind) Cc: stable@vger.kernel.org (4.12+) Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8387eb1540cd..1d235cc8b402 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -484,10 +484,6 @@ static void raid1_end_write_request(struct bio *bio) } if (behind) { - /* we release behind master bio when all write are done */ - if (r1_bio->behind_master_bio == bio) - to_put = NULL; - if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); @@ -1080,7 +1076,7 @@ static void unfreeze_array(struct r1conf *conf) wake_up(&conf->wait_barrier); } -static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, +static void alloc_behind_master_bio(struct r1bio *r1_bio, struct bio *bio) { int size = bio->bi_iter.bi_size; @@ -1090,11 +1086,13 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); if (!behind_bio) - goto fail; + return; /* discard op, we don't support writezero/writesame yet */ - if (!bio_has_data(bio)) + if (!bio_has_data(bio)) { + behind_bio->bi_iter.bi_size = size; goto skip_copy; + } while (i < vcnt && size) { struct page *page; @@ -1115,14 +1113,13 @@ skip_copy: r1_bio->behind_master_bio = behind_bio;; set_bit(R1BIO_BehindIO, &r1_bio->state); - return behind_bio; + return; free_pages: pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_iter.bi_size); bio_free_pages(behind_bio); -fail: - return behind_bio; + bio_put(behind_bio); } struct raid1_plug_cb { @@ -1475,7 +1472,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { - mbio = alloc_behind_master_bio(r1_bio, bio); + alloc_behind_master_bio(r1_bio, bio); } bitmap_startwrite(bitmap, r1_bio->sector, @@ -1485,14 +1482,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, first_clone = 0; } - if (!mbio) { - if (r1_bio->behind_master_bio) - mbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, - mddev->bio_set); - else - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - } + if (r1_bio->behind_master_bio) + mbio = bio_clone_fast(r1_bio->behind_master_bio, + GFP_NOIO, mddev->bio_set); + else + mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); if (r1_bio->behind_master_bio) { if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) @@ -2346,8 +2340,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) wbio = bio_clone_fast(r1_bio->behind_master_bio, GFP_NOIO, mddev->bio_set); - /* We really need a _all clone */ - wbio->bi_iter = (struct bvec_iter){ 0 }; } else { wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, mddev->bio_set); -- cgit v1.2.3