From 16342c21c94760ac4d426371546d4f9e27758e1b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 6 Jan 2014 10:35:34 +1100 Subject: md/raid10: fix bug when raid10 recovery fails to recover a block. commit e8b849158508565e0cd6bc80061124afc5879160 upstream. commit e875ecea266a543e643b19e44cf472f1412708f9 md/raid10 record bad blocks as needed during recovery. added code to the "cannot recover this block" path to record a bad block rather than fail the whole recovery. Unfortunately this new case was placed *after* r10bio was freed rather than *before*, yet it still uses r10bio. This is will crash with a null dereference. So move the freeing of r10bio down where it is safe. Fixes: e875ecea266a543e643b19e44cf472f1412708f9 Reported-by: Damian Nowak URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181 Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid10.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0add8682175..ff3f8d057e0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3198,10 +3198,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (j == conf->copies) { /* Cannot recover, so abort the recovery or * record a bad block */ - put_buf(r10_bio); - if (rb2) - atomic_dec(&rb2->remaining); - r10_bio = rb2; if (any_working) { /* problem is that there are bad blocks * on other device(s) @@ -3233,6 +3229,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, mirror->recovery_disabled = mddev->recovery_disabled; } + put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); + r10_bio = rb2; break; } } -- cgit v1.2.3 From ddd618aa9eee9f143d750d15497bf6f117874928 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 14 Jan 2014 10:38:09 +1100 Subject: md/raid10: fix two bugs in handling of known-bad-blocks. commit b50c259e25d9260b9108dc0c2964c26e5ecbe1c1 upstream. If we discover a bad block when reading we split the request and potentially read some of it from a different device. The code path of this has two bugs in RAID10. 1/ we get a spin_lock with _irq, but unlock without _irq!! 2/ The calculation of 'sectors_handled' is wrong, as can be clearly seen by comparison with raid1.c This leads to at least 2 warnings and a probable crash is a RAID10 ever had known bad blocks. Fixes: 856e08e23762dfb92ffc68fd0a8d228f9e152160 Reported-by: Damian Nowak URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181 Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/raid10.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ff3f8d057e0..d2f8cd332b4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1321,7 +1321,7 @@ read_again: /* Could not read all from this device, so we will * need another r10_bio. */ - sectors_handled = (r10_bio->sectors + max_sectors + sectors_handled = (r10_bio->sector + max_sectors - bio->bi_sector); r10_bio->sectors = max_sectors; spin_lock_irq(&conf->device_lock); @@ -1329,7 +1329,7 @@ read_again: bio->bi_phys_segments = 2; else bio->bi_phys_segments++; - spin_unlock(&conf->device_lock); + spin_unlock_irq(&conf->device_lock); /* Cannot call generic_make_request directly * as that will be queued in __generic_make_request * and subsequent mempool_alloc might block -- cgit v1.2.3