aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-flakey.c2
-rw-r--r--drivers/md/dm-io.c23
-rw-r--r--drivers/md/dm-ioctl.c2
-rw-r--r--drivers/md/dm-raid.c17
-rw-r--r--drivers/md/dm-thin-metadata.c25
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid10.c38
7 files changed, 77 insertions, 32 deletions
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 9fb18c147825..b280c433e4a0 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -323,7 +323,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
* Corrupt successful READs while in down state.
* If flags were specified, only corrupt those that match.
*/
- if (!error && bio_submitted_while_down &&
+ if (fc->corrupt_bio_byte && !error && bio_submitted_while_down &&
(bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc))
corrupt_bio_data(bio, fc);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ad2eba40e319..ea5dd289fe2a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -296,6 +296,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
unsigned offset;
unsigned num_bvecs;
sector_t remaining = where->count;
+ struct request_queue *q = bdev_get_queue(where->bdev);
+ sector_t discard_sectors;
/*
* where->count may be zero if rw holds a flush and we need to
@@ -305,9 +307,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
/*
* Allocate a suitably sized-bio.
*/
- num_bvecs = dm_sector_div_up(remaining,
- (PAGE_SIZE >> SECTOR_SHIFT));
- num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
+ if (rw & REQ_DISCARD)
+ num_bvecs = 1;
+ else
+ num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
+ dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
@@ -315,10 +320,14 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
bio->bi_destructor = dm_bio_destructor;
store_io_and_region_in_bio(bio, io, region);
- /*
- * Try and add as many pages as possible.
- */
- while (remaining) {
+ if (rw & REQ_DISCARD) {
+ discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
+ bio->bi_size = discard_sectors << SECTOR_SHIFT;
+ remaining -= discard_sectors;
+ } else while (remaining) {
+ /*
+ * Try and add as many pages as possible.
+ */
dp->get_page(dp, &page, &len, &offset);
len = min(len, to_bytes(remaining));
if (!bio_add_page(bio, page, len, offset))
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 31c2dc25886d..1ce84ed0b765 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1437,7 +1437,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
if (!argc) {
DMWARN("Empty message received.");
- goto out;
+ goto out_argv;
}
table = dm_get_live_table(md);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 86cb7e5d83d5..787022c18187 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -668,7 +668,14 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
return ret;
sb = page_address(rdev->sb_page);
- if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
+
+ /*
+ * Two cases that we want to write new superblocks and rebuild:
+ * 1) New device (no matching magic number)
+ * 2) Device specified for rebuild (!In_sync w/ offset == 0)
+ */
+ if ((sb->magic != cpu_to_le32(DM_RAID_MAGIC)) ||
+ (!test_bit(In_sync, &rdev->flags) && !rdev->recovery_offset)) {
super_sync(rdev->mddev, rdev);
set_bit(FirstUse, &rdev->flags);
@@ -745,11 +752,8 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
*/
rdev_for_each(r, t, mddev) {
if (!test_bit(In_sync, &r->flags)) {
- if (!test_bit(FirstUse, &r->flags))
- DMERR("Superblock area of "
- "rebuild device %d should have been "
- "cleared.", r->raid_disk);
- set_bit(FirstUse, &r->flags);
+ DMINFO("Device %d specified for rebuild: "
+ "Clearing superblock", r->raid_disk);
rebuilds++;
} else if (test_bit(FirstUse, &r->flags))
new_devs++;
@@ -971,6 +975,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
+ ti->num_flush_requests = 1;
mutex_lock(&rs->md.reconfig_mutex);
ret = md_run(&rs->md);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 59c4f0446ffa..237571af77fd 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -385,6 +385,7 @@ static int init_pmd(struct dm_pool_metadata *pmd,
data_sm = dm_sm_disk_create(tm, nr_blocks);
if (IS_ERR(data_sm)) {
DMERR("sm_disk_create failed");
+ dm_tm_unlock(tm, sblock);
r = PTR_ERR(data_sm);
goto bad;
}
@@ -789,6 +790,11 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
return 0;
}
+/*
+ * __open_device: Returns @td corresponding to device with id @dev,
+ * creating it if @create is set and incrementing @td->open_count.
+ * On failure, @td is undefined.
+ */
static int __open_device(struct dm_pool_metadata *pmd,
dm_thin_id dev, int create,
struct dm_thin_device **td)
@@ -799,10 +805,16 @@ static int __open_device(struct dm_pool_metadata *pmd,
struct disk_device_details details_le;
/*
- * Check the device isn't already open.
+ * If the device is already open, return it.
*/
list_for_each_entry(td2, &pmd->thin_devices, list)
if (td2->id == dev) {
+ /*
+ * May not create an already-open device.
+ */
+ if (create)
+ return -EEXIST;
+
td2->open_count++;
*td = td2;
return 0;
@@ -817,6 +829,9 @@ static int __open_device(struct dm_pool_metadata *pmd,
if (r != -ENODATA || !create)
return r;
+ /*
+ * Create new device.
+ */
changed = 1;
details_le.mapped_blocks = 0;
details_le.transaction_id = cpu_to_le64(pmd->trans_id);
@@ -882,12 +897,10 @@ static int __create_thin(struct dm_pool_metadata *pmd,
r = __open_device(pmd, dev, 1, &td);
if (r) {
- __close_device(td);
dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
dm_btree_del(&pmd->bl_info, dev_root);
return r;
}
- td->changed = 1;
__close_device(td);
return r;
@@ -967,14 +980,14 @@ static int __create_snap(struct dm_pool_metadata *pmd,
goto bad;
r = __set_snapshot_details(pmd, td, origin, pmd->time);
+ __close_device(td);
+
if (r)
goto bad;
- __close_device(td);
return 0;
bad:
- __close_device(td);
dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
dm_btree_remove(&pmd->details_info, pmd->details_root,
&key, &pmd->details_root);
@@ -1211,6 +1224,8 @@ static int __remove(struct dm_thin_device *td, dm_block_t block)
if (r)
return r;
+ td->mapped_blocks--;
+ td->changed = 1;
pmd->need_commit = 1;
return 0;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a368db2431a5..a0b225eb4ac4 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -624,7 +624,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)
return 1;
rcu_read_lock();
- for (i = 0; i < conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q = bdev_get_queue(rdev->bdev);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6e8aa213f0d5..58c44d6453a0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -67,6 +67,7 @@ static int max_queued_requests = 1024;
static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf);
+static int enough(struct r10conf *conf, int ignore);
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{
@@ -347,6 +348,19 @@ static void raid10_end_read_request(struct bio *bio, int error)
* wait for the 'master' bio.
*/
set_bit(R10BIO_Uptodate, &r10_bio->state);
+ } else {
+ /* If all other devices that store this block have
+ * failed, we want to return the error upwards rather
+ * than fail the last device. Here we redefine
+ * "uptodate" to mean "Don't want to retry"
+ */
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (!enough(conf, rdev->raid_disk))
+ uptodate = 1;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ }
+ if (uptodate) {
raid_end_bio_io(r10_bio);
rdev_dec_pending(rdev, conf->mddev);
} else {
@@ -2052,6 +2066,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
"md/raid10:%s: %s: Failing raid device\n",
mdname(mddev), b);
md_error(mddev, conf->mirrors[d].rdev);
+ r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
@@ -2105,8 +2120,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rdev,
r10_bio->devs[r10_bio->read_slot].addr
+ sect,
- s, 0))
+ s, 0)) {
md_error(mddev, rdev);
+ r10_bio->devs[r10_bio->read_slot].bio
+ = IO_BLOCKED;
+ }
break;
}
@@ -2299,17 +2317,20 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
* This is all done synchronously while the array is
* frozen.
*/
+ bio = r10_bio->devs[slot].bio;
+ bdevname(bio->bi_bdev, b);
+ bio_put(bio);
+ r10_bio->devs[slot].bio = NULL;
+
if (mddev->ro == 0) {
freeze_array(conf);
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
- }
+ } else
+ r10_bio->devs[slot].bio = IO_BLOCKED;
+
rdev_dec_pending(rdev, mddev);
- bio = r10_bio->devs[slot].bio;
- bdevname(bio->bi_bdev, b);
- r10_bio->devs[slot].bio =
- mddev->ro ? IO_BLOCKED : NULL;
read_more:
rdev = read_balance(conf, r10_bio, &max_sectors);
if (rdev == NULL) {
@@ -2318,13 +2339,10 @@ read_more:
mdname(mddev), b,
(unsigned long long)r10_bio->sector);
raid_end_bio_io(r10_bio);
- bio_put(bio);
return;
}
do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
- if (bio)
- bio_put(bio);
slot = r10_bio->read_slot;
printk_ratelimited(
KERN_ERR
@@ -2360,7 +2378,6 @@ read_more:
mbio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
generic_make_request(bio);
- bio = NULL;
r10_bio = mempool_alloc(conf->r10bio_pool,
GFP_NOIO);
@@ -3243,7 +3260,6 @@ static int run(struct mddev *mddev)
disk->rdev = rdev;
}
- disk->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk