aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-mpath.c11
-rw-r--r--drivers/md/dm-table.c61
-rw-r--r--drivers/md/dm-thin.c135
-rw-r--r--drivers/md/dm-verity.c8
-rw-r--r--drivers/md/dm.c71
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/md/md.c15
-rw-r--r--drivers/md/raid10.c38
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5.c7
10 files changed, 251 insertions, 98 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d8abb90a6c2..034233eefc8 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
unsigned long arg)
{
struct multipath *m = ti->private;
+ struct pgpath *pgpath;
struct block_device *bdev;
fmode_t mode;
unsigned long flags;
@@ -1570,12 +1571,14 @@ again:
if (!m->current_pgpath)
__choose_pgpath(m, 0);
- if (m->current_pgpath) {
- bdev = m->current_pgpath->path.dev->bdev;
- mode = m->current_pgpath->path.dev->mode;
+ pgpath = m->current_pgpath;
+
+ if (pgpath) {
+ bdev = pgpath->path.dev->bdev;
+ mode = pgpath->path.dev->mode;
}
- if (m->queue_io)
+ if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
r = -EAGAIN;
else if (!bdev)
r = -EIO;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index f90069029aa..100368eb799 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
return &t->targets[(KEYS_PER_NODE * n) + k];
}
+static int count_device(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ unsigned *num_devices = data;
+
+ (*num_devices)++;
+
+ return 0;
+}
+
+/*
+ * Check whether a table has no data devices attached using each
+ * target's iterate_devices method.
+ * Returns false if the result is unknown because a target doesn't
+ * support iterate_devices.
+ */
+bool dm_table_has_no_data_devices(struct dm_table *table)
+{
+ struct dm_target *uninitialized_var(ti);
+ unsigned i = 0, num_devices = 0;
+
+ while (i < dm_table_get_num_targets(table)) {
+ ti = dm_table_get_target(table, i++);
+
+ if (!ti->type->iterate_devices)
+ return false;
+
+ ti->type->iterate_devices(ti, count_device, &num_devices);
+ if (num_devices)
+ return false;
+ }
+
+ return true;
+}
+
/*
* Establish the new table's queue_limits and validate them.
*/
@@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
return q && blk_queue_nonrot(q);
}
-static bool dm_table_is_nonrot(struct dm_table *t)
+static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && !blk_queue_add_random(q);
+}
+
+static bool dm_table_all_devices_attribute(struct dm_table *t,
+ iterate_devices_callout_fn func)
{
struct dm_target *ti;
unsigned i = 0;
- /* Ensure that all underlying device are non-rotational. */
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!ti->type->iterate_devices ||
- !ti->type->iterate_devices(ti, device_is_nonrot, NULL))
+ !ti->type->iterate_devices(ti, func, NULL))
return 0;
}
@@ -1396,7 +1439,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_discard_zeroes_data(t))
q->limits.discard_zeroes_data = 0;
- if (dm_table_is_nonrot(t))
+ /* Ensure that all underlying devices are non-rotational. */
+ if (dm_table_all_devices_attribute(t, device_is_nonrot))
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
@@ -1404,6 +1448,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
dm_table_set_integrity(t);
/*
+ * Determine whether or not this queue's I/O timings contribute
+ * to the entropy pool, Only request-based targets use this.
+ * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
+ * have it set.
+ */
+ if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
+ queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+
+ /*
* QUEUE_FLAG_STACKABLE must be set after all queue settings are
* visible to other CPUs because, once the flag is set, incoming bios
* are processed by request-based dm, which refers to the queue
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index af1fc3b2c2a..c29410af1e2 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -509,9 +509,9 @@ enum pool_mode {
struct pool_features {
enum pool_mode mode;
- unsigned zero_new_blocks:1;
- unsigned discard_enabled:1;
- unsigned discard_passdown:1;
+ bool zero_new_blocks:1;
+ bool discard_enabled:1;
+ bool discard_passdown:1;
};
struct thin_c;
@@ -580,7 +580,8 @@ struct pool_c {
struct dm_target_callbacks callbacks;
dm_block_t low_water_blocks;
- struct pool_features pf;
+ struct pool_features requested_pf; /* Features requested during table load */
+ struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */
};
/*
@@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool)
/*----------------------------------------------------------------
* Binding of control targets to a pool object
*--------------------------------------------------------------*/
+static bool data_dev_supports_discard(struct pool_c *pt)
+{
+ struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
+
+ return q && blk_queue_discard(q);
+}
+
+/*
+ * If discard_passdown was enabled verify that the data device
+ * supports discards. Disable discard_passdown if not.
+ */
+static void disable_passdown_if_not_supported(struct pool_c *pt)
+{
+ struct pool *pool = pt->pool;
+ struct block_device *data_bdev = pt->data_dev->bdev;
+ struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
+ sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
+ const char *reason = NULL;
+ char buf[BDEVNAME_SIZE];
+
+ if (!pt->adjusted_pf.discard_passdown)
+ return;
+
+ if (!data_dev_supports_discard(pt))
+ reason = "discard unsupported";
+
+ else if (data_limits->max_discard_sectors < pool->sectors_per_block)
+ reason = "max discard sectors smaller than a block";
+
+ else if (data_limits->discard_granularity > block_size)
+ reason = "discard granularity larger than a block";
+
+ else if (block_size & (data_limits->discard_granularity - 1))
+ reason = "discard granularity not a factor of block size";
+
+ if (reason) {
+ DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
+ pt->adjusted_pf.discard_passdown = false;
+ }
+}
+
static int bind_control_target(struct pool *pool, struct dm_target *ti)
{
struct pool_c *pt = ti->private;
@@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
* We want to make sure that degraded pools are never upgraded.
*/
enum pool_mode old_mode = pool->pf.mode;
- enum pool_mode new_mode = pt->pf.mode;
+ enum pool_mode new_mode = pt->adjusted_pf.mode;
if (old_mode > new_mode)
new_mode = old_mode;
pool->ti = ti;
pool->low_water_blocks = pt->low_water_blocks;
- pool->pf = pt->pf;
- set_pool_mode(pool, new_mode);
+ pool->pf = pt->adjusted_pf;
- /*
- * If discard_passdown was enabled verify that the data device
- * supports discards. Disable discard_passdown if not; otherwise
- * -EOPNOTSUPP will be returned.
- */
- /* FIXME: pull this out into a sep fn. */
- if (pt->pf.discard_passdown) {
- struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
- if (!q || !blk_queue_discard(q)) {
- char buf[BDEVNAME_SIZE];
- DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
- bdevname(pt->data_dev->bdev, buf));
- pool->pf.discard_passdown = 0;
- }
- }
+ set_pool_mode(pool, new_mode);
return 0;
}
@@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
static void pool_features_init(struct pool_features *pf)
{
pf->mode = PM_WRITE;
- pf->zero_new_blocks = 1;
- pf->discard_enabled = 1;
- pf->discard_passdown = 1;
+ pf->zero_new_blocks = true;
+ pf->discard_enabled = true;
+ pf->discard_passdown = true;
}
static void __pool_destroy(struct pool *pool)
@@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
argc--;
if (!strcasecmp(arg_name, "skip_block_zeroing"))
- pf->zero_new_blocks = 0;
+ pf->zero_new_blocks = false;
else if (!strcasecmp(arg_name, "ignore_discard"))
- pf->discard_enabled = 0;
+ pf->discard_enabled = false;
else if (!strcasecmp(arg_name, "no_discard_passdown"))
- pf->discard_passdown = 0;
+ pf->discard_passdown = false;
else if (!strcasecmp(arg_name, "read_only"))
pf->mode = PM_READ_ONLY;
@@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
pt->metadata_dev = metadata_dev;
pt->data_dev = data_dev;
pt->low_water_blocks = low_water_blocks;
- pt->pf = pf;
+ pt->adjusted_pf = pt->requested_pf = pf;
ti->num_flush_requests = 1;
+
/*
* Only need to enable discards if the pool should pass
* them down to the data device. The thin device's discard
@@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
*/
if (pf.discard_enabled && pf.discard_passdown) {
ti->num_discard_requests = 1;
+
/*
* Setting 'discards_supported' circumvents the normal
* stacking of discard limits (this keeps the pool and
* thin devices' discard limits consistent).
*/
ti->discards_supported = true;
+ ti->discard_zeroes_data_unsupported = true;
}
ti->private = pt;
@@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
(unsigned long)pool->sectors_per_block,
(unsigned long long)pt->low_water_blocks);
- emit_flags(&pt->pf, result, sz, maxlen);
+ emit_flags(&pt->requested_pf, result, sz, maxlen);
break;
}
@@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
-static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
+static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
{
- /*
- * FIXME: these limits may be incompatible with the pool's data device
- */
+ struct pool *pool = pt->pool;
+ struct queue_limits *data_limits;
+
limits->max_discard_sectors = pool->sectors_per_block;
/*
- * This is just a hint, and not enforced. We have to cope with
- * bios that cover a block partially. A discard that spans a block
- * boundary is not sent to this target.
+ * discard_granularity is just a hint, and not enforced.
*/
- limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
- limits->discard_zeroes_data = pool->pf.zero_new_blocks;
+ if (pt->adjusted_pf.discard_passdown) {
+ data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
+ limits->discard_granularity = data_limits->discard_granularity;
+ } else
+ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
}
static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, 0);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
- if (pool->pf.discard_enabled)
- set_discard_limits(pool, limits);
+
+ /*
+ * pt->adjusted_pf is a staging area for the actual features to use.
+ * They get transferred to the live pool in bind_control_target()
+ * called from pool_preresume().
+ */
+ if (!pt->adjusted_pf.discard_enabled)
+ return;
+
+ disable_passdown_if_not_supported(pt);
+
+ set_discard_limits(pt, limits);
}
static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,
return 0;
}
+/*
+ * A thin device always inherits its queue limits from its pool.
+ */
static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct thin_c *tc = ti->private;
- struct pool *pool = tc->pool;
- blk_limits_io_min(limits, 0);
- blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
- set_discard_limits(pool, limits);
+ *limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
}
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 254d19268ad..892ae2766aa 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
v->hash_dev_block_bits = ffs(num) - 1;
if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
- num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) !=
- (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) {
+ (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
ti->error = "Invalid data blocks";
r = -EINVAL;
goto bad;
@@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
- num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) !=
- (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) {
+ (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
+ >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
ti->error = "Invalid hash start";
r = -EINVAL;
goto bad;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4e09b6ff5b4..67ffa391edc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
{
int r = error;
struct dm_rq_target_io *tio = clone->end_io_data;
- dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
+ dm_request_endio_fn rq_end_io = NULL;
- if (mapped && rq_end_io)
- r = rq_end_io(tio->ti, clone, error, &tio->info);
+ if (tio->ti) {
+ rq_end_io = tio->ti->type->rq_end_io;
+
+ if (mapped && rq_end_io)
+ r = rq_end_io(tio->ti, clone, error, &tio->info);
+ }
if (r <= 0)
/* The target wants to complete the I/O */
@@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
int r, requeued = 0;
struct dm_rq_target_io *tio = clone->end_io_data;
- /*
- * Hold the md reference here for the in-flight I/O.
- * We can't rely on the reference count by device opener,
- * because the device may be closed during the request completion
- * when all bios are completed.
- * See the comment in rq_completed() too.
- */
- dm_get(md);
-
tio->ti = ti;
r = ti->type->map_rq(ti, clone, &tio->info);
switch (r) {
@@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone,
return requeued;
}
+static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
+{
+ struct request *clone;
+
+ blk_start_request(orig);
+ clone = orig->special;
+ atomic_inc(&md->pending[rq_data_dir(clone)]);
+
+ /*
+ * Hold the md reference here for the in-flight I/O.
+ * We can't rely on the reference count by device opener,
+ * because the device may be closed during the request completion
+ * when all bios are completed.
+ * See the comment in rq_completed() too.
+ */
+ dm_get(md);
+
+ return clone;
+}
+
/*
* q->request_fn for request-based dm.
* Called with the queue lock held.
@@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q)
pos = blk_rq_pos(rq);
ti = dm_table_find_target(map, pos);
- BUG_ON(!dm_target_is_valid(ti));
+ if (!dm_target_is_valid(ti)) {
+ /*
+ * Must perform setup, that dm_done() requires,
+ * before calling dm_kill_unmapped_request
+ */
+ DMERR_LIMIT("request attempted access beyond the end of device");
+ clone = dm_start_request(md, rq);
+ dm_kill_unmapped_request(clone, -EIO);
+ continue;
+ }
if (ti->type->busy && ti->type->busy(ti))
goto delay_and_out;
- blk_start_request(rq);
- clone = rq->special;
- atomic_inc(&md->pending[rq_data_dir(clone)]);
+ clone = dm_start_request(md, rq);
spin_unlock(q->queue_lock);
if (map_request(ti, clone, md))
@@ -1684,8 +1706,6 @@ delay_and_out:
blk_delay_queue(q, HZ / 10);
out:
dm_table_put(map);
-
- return;
}
int dm_underlying_device_busy(struct request_queue *q)
@@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md)
*/
struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
{
- struct dm_table *map = ERR_PTR(-EINVAL);
+ struct dm_table *live_map, *map = ERR_PTR(-EINVAL);
struct queue_limits limits;
int r;
@@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
if (!dm_suspended_md(md))
goto out;
+ /*
+ * If the new table has no data devices, retain the existing limits.
+ * This helps multipath with queue_if_no_path if all paths disappear,
+ * then new I/O is queued based on these limits, and then some paths
+ * reappear.
+ */
+ if (dm_table_has_no_data_devices(table)) {
+ live_map = dm_get_live_table(md);
+ if (live_map)
+ limits = md->queue->limits;
+ dm_table_put(live_map);
+ }
+
r = dm_calculate_queue_limits(table, &limits);
if (r) {
map = ERR_PTR(r);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 52eef493d26..6a99fefaa74 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table,
struct queue_limits *limits);
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcd098794d3..308e87b417e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1108,8 +1108,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
ret = 0;
}
rdev->sectors = rdev->sb_start;
- /* Limit to 4TB as metadata cannot record more than that */
- if (rdev->sectors >= (2ULL << 32))
+ /* Limit to 4TB as metadata cannot record more than that.
+ * (not needed for Linear and RAID0 as metadata doesn't
+ * record this size)
+ */
+ if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
rdev->sectors = (2ULL << 32) - 2;
if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
@@ -1400,7 +1403,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
/* Limit to 4TB as metadata cannot record more than that.
* 4TB == 2^32 KB, or 2*2^32 sectors.
*/
- if (num_sectors >= (2ULL << 32))
+ if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
num_sectors = (2ULL << 32) - 2;
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
@@ -7616,6 +7619,8 @@ static int remove_and_add_spares(struct mddev *mddev)
}
}
}
+ if (removed)
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
return spares;
}
@@ -7629,9 +7634,11 @@ static void reap_sync_thread(struct mddev *mddev)
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
/* activate any spares */
- if (mddev->pers->spare_active(mddev))
+ if (mddev->pers->spare_active(mddev)) {
sysfs_notify(&mddev->kobj, NULL,
"degraded");
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ }
}
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->pers->finish_reshape)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index de5ed6fd880..0138a727c1f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -659,7 +659,11 @@ static int raid10_mergeable_bvec(struct request_queue *q,
max = biovec->bv_len;
if (mddev->merge_check_needed) {
- struct r10bio r10_bio;
+ struct {
+ struct r10bio r10_bio;
+ struct r10dev devs[conf->copies];
+ } on_stack;
+ struct r10bio *r10_bio = &on_stack.r10_bio;
int s;
if (conf->reshape_progress != MaxSector) {
/* Cannot give any guidance during reshape */
@@ -667,18 +671,18 @@ static int raid10_mergeable_bvec(struct request_queue *q,
return biovec->bv_len;
return 0;
}
- r10_bio.sector = sector;
- raid10_find_phys(conf, &r10_bio);
+ r10_bio->sector = sector;
+ raid10_find_phys(conf, r10_bio);
rcu_read_lock();
for (s = 0; s < conf->copies; s++) {
- int disk = r10_bio.devs[s].devnum;
+ int disk = r10_bio->devs[s].devnum;
struct md_rdev *rdev = rcu_dereference(
conf->mirrors[disk].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q =
bdev_get_queue(rdev->bdev);
if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio.devs[s].addr
+ bvm->bi_sector = r10_bio->devs[s].addr
+ rdev->data_offset;
bvm->bi_bdev = rdev->bdev;
max = min(max, q->merge_bvec_fn(
@@ -690,7 +694,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
struct request_queue *q =
bdev_get_queue(rdev->bdev);
if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio.devs[s].addr
+ bvm->bi_sector = r10_bio->devs[s].addr
+ rdev->data_offset;
bvm->bi_bdev = rdev->bdev;
max = min(max, q->merge_bvec_fn(
@@ -1508,14 +1512,16 @@ static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
do {
int n = conf->copies;
int cnt = 0;
+ int this = first;
while (n--) {
- if (conf->mirrors[first].rdev &&
- first != ignore)
+ if (conf->mirrors[this].rdev &&
+ this != ignore)
cnt++;
- first = (first+1) % geo->raid_disks;
+ this = (this+1) % geo->raid_disks;
}
if (cnt == 0)
return 0;
+ first = (first + geo->near_copies) % geo->raid_disks;
} while (first != 0);
return 1;
}
@@ -4414,14 +4420,18 @@ static int handle_reshape_read_error(struct mddev *mddev,
{
/* Use sync reads to get the blocks from somewhere else */
int sectors = r10_bio->sectors;
- struct r10bio r10b;
struct r10conf *conf = mddev->private;
+ struct {
+ struct r10bio r10_bio;
+ struct r10dev devs[conf->copies];
+ } on_stack;
+ struct r10bio *r10b = &on_stack.r10_bio;
int slot = 0;
int idx = 0;
struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec;
- r10b.sector = r10_bio->sector;
- __raid10_find_phys(&conf->prev, &r10b);
+ r10b->sector = r10_bio->sector;
+ __raid10_find_phys(&conf->prev, r10b);
while (sectors) {
int s = sectors;
@@ -4432,7 +4442,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
s = PAGE_SIZE >> 9;
while (!success) {
- int d = r10b.devs[slot].devnum;
+ int d = r10b->devs[slot].devnum;
struct md_rdev *rdev = conf->mirrors[d].rdev;
sector_t addr;
if (rdev == NULL ||
@@ -4440,7 +4450,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
!test_bit(In_sync, &rdev->flags))
goto failed;
- addr = r10b.devs[slot].addr + idx * PAGE_SIZE;
+ addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
success = sync_page_io(rdev,
addr,
s << 9,
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 007c2c68dd8..1054cf60234 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -110,7 +110,7 @@ struct r10bio {
* We choose the number when they are allocated.
* We sometimes need an extra bio to write to the replacement.
*/
- struct {
+ struct r10dev {
struct bio *bio;
union {
struct bio *repl_bio; /* used for resync and
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index adda94df5eb..0689173fd9f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -393,6 +393,8 @@ static int calc_degraded(struct r5conf *conf)
degraded = 0;
for (i = 0; i < conf->previous_raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && test_bit(Faulty, &rdev->flags))
+ rdev = rcu_dereference(conf->disks[i].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags))
degraded++;
else if (test_bit(In_sync, &rdev->flags))
@@ -417,6 +419,8 @@ static int calc_degraded(struct r5conf *conf)
degraded2 = 0;
for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && test_bit(Faulty, &rdev->flags))
+ rdev = rcu_dereference(conf->disks[i].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags))
degraded2++;
else if (test_bit(In_sync, &rdev->flags))
@@ -1587,6 +1591,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
#ifdef CONFIG_MULTICORE_RAID456
init_waitqueue_head(&nsh->ops.wait_for_ops);
#endif
+ spin_lock_init(&nsh->stripe_lock);
list_add(&nsh->lru, &newstripes);
}
@@ -4192,7 +4197,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
finish_wait(&conf->wait_for_overlap, &w);
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
- if ((bi->bi_rw & REQ_NOIDLE) &&
+ if ((bi->bi_rw & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
release_stripe_plug(mddev, sh);