aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-24 14:42:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-24 14:42:03 -0700
commite288e931c12e77982aa582e2380cd072becfe488 (patch)
tree8d1b08d7b87bd5529da561e9602cc40698b08009 /drivers
parentdb6aaf4d55f95dcb6b162c3a59b56eb1e85ccdfe (diff)
parentc0f04d88e46d14de51f4baebb6efafb7d59e9f96 (diff)
Merge branch 'bcache' (bcache fixes from Kent Overstreet)
Merge bcache fixes from Kent Overstreet: "There's fixes for _three_ different data corruption bugs, all of which were found by users hitting them in the wild. The first one isn't bcache specific - in 3.11 bcache was switched to the bio_copy_data in fs/bio.c, and that's when the bug in that code was discovered, but it's also used by raid1 and pktcdvd. (That was my code too, so the bug's doubly embarassing given that it was or should've been just a cut and paste from bcache code. Dunno what happened there). Most of these (all the non data corruption bugs, actually) were ready before the merge window and have been sitting in Jens' tree, but I don't know what's been up with him lately..." * emailed patches from Kent Overstreet <kmo@daterainc.com>: bcache: Fix flushes in writeback mode bcache: Fix for handling overlapping extents when reading in a btree node bcache: Fix a shrinker deadlock bcache: Fix a dumb CPU spinning bug in writeback bcache: Fix a flush/fua performance bug bcache: Fix a writeback performance regression bcache: Correct printf()-style format length modifier bcache: Fix for when no journal entries are found bcache: Strip endline when writing the label through sysfs bcache: Fix a dumb journal discard bug block: Fix bio_copy_data()
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/bset.c39
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/journal.c33
-rw-r--r--drivers/md/bcache/request.c15
-rw-r--r--drivers/md/bcache/sysfs.c9
-rw-r--r--drivers/md/bcache/util.c11
-rw-r--r--drivers/md/bcache/util.h12
-rw-r--r--drivers/md/bcache/writeback.c42
9 files changed, 108 insertions, 64 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b39f6f0b45f..0f12382aa35 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
*/
atomic_t has_dirty;
- struct ratelimit writeback_rate;
+ struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
/*
@@ -507,10 +507,9 @@ struct cached_dev {
*/
sector_t last_read;
- /* Number of writeback bios in flight */
- atomic_t in_flight;
+ /* Limit number of writeback bios in flight */
+ struct semaphore in_flight;
struct closure_with_timer writeback;
- struct closure_waitlist writeback_wait;
struct keybuf writeback_keys;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8010eed06a5..22d1ae72c28 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
/* Mergesort */
+static void sort_key_next(struct btree_iter *iter,
+ struct btree_iter_set *i)
+{
+ i->k = bkey_next(i->k);
+
+ if (i->k == i->end)
+ *i = iter->data[--iter->used];
+}
+
static void btree_sort_fixup(struct btree_iter *iter)
{
while (iter->used > 1) {
struct btree_iter_set *top = iter->data, *i = top + 1;
- struct bkey *k;
if (iter->used > 2 &&
btree_iter_cmp(i[0], i[1]))
i++;
- for (k = i->k;
- k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
- k = bkey_next(k))
- if (top->k > i->k)
- __bch_cut_front(top->k, k);
- else if (KEY_SIZE(k))
- bch_cut_back(&START_KEY(k), top->k);
-
- if (top->k < i->k || k == i->k)
+ if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
break;
- heap_sift(iter, i - top, btree_iter_cmp);
+ if (!KEY_SIZE(i->k)) {
+ sort_key_next(iter, i);
+ heap_sift(iter, i - top, btree_iter_cmp);
+ continue;
+ }
+
+ if (top->k > i->k) {
+ if (bkey_cmp(top->k, i->k) >= 0)
+ sort_key_next(iter, i);
+ else
+ bch_cut_front(top->k, i->k);
+
+ heap_sift(iter, i - top, btree_iter_cmp);
+ } else {
+ /* can't happen because of comparison func */
+ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
+ bch_cut_back(&START_KEY(i->k), top->k);
+ }
}
}
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index f9764e61978..f42fc7ed9cd 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
return;
err:
- bch_cache_set_error(b->c, "io error reading bucket %lu",
+ bch_cache_set_error(b->c, "io error reading bucket %zu",
PTR_BUCKET_NR(b->c, &b->key, 0));
}
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
return SHRINK_STOP;
/* Return -1 if we can't do anything right now */
- if (sc->gfp_mask & __GFP_WAIT)
+ if (sc->gfp_mask & __GFP_IO)
mutex_lock(&c->bucket_lock);
else if (!mutex_trylock(&c->bucket_lock))
return -1;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ba95ab84b2b..8435f81e5d8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
pr_debug("%u journal buckets", ca->sb.njournal_buckets);
- /* Read journal buckets ordered by golden ratio hash to quickly
+ /*
+ * Read journal buckets ordered by golden ratio hash to quickly
* find a sequence of buckets with valid journal entries
*/
for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
goto bsearch;
}
- /* If that fails, check all the buckets we haven't checked
+ /*
+ * If that fails, check all the buckets we haven't checked
* already
*/
pr_debug("falling back to linear search");
- for (l = 0; l < ca->sb.njournal_buckets; l++) {
- if (test_bit(l, bitmap))
- continue;
-
+ for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
+ l < ca->sb.njournal_buckets;
+ l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
if (read_bucket(l))
goto bsearch;
- }
+
+ if (list_empty(list))
+ continue;
bsearch:
/* Binary search */
m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ bsearch:
r = m;
}
- /* Read buckets in reverse order until we stop finding more
+ /*
+ * Read buckets in reverse order until we stop finding more
* journal entries
*/
- pr_debug("finishing up");
+ pr_debug("finishing up: m %u njournal_buckets %u",
+ m, ca->sb.njournal_buckets);
l = m;
while (1) {
@@ -228,9 +233,10 @@ bsearch:
}
}
- c->journal.seq = list_entry(list->prev,
- struct journal_replay,
- list)->j.seq;
+ if (!list_empty(list))
+ c->journal.seq = list_entry(list->prev,
+ struct journal_replay,
+ list)->j.seq;
return 0;
#undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
return;
}
- switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) {
+ switch (atomic_read(&ja->discard_in_flight)) {
case DISCARD_IN_FLIGHT:
return;
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
if (cl)
BUG_ON(!closure_wait(&w->wait, cl));
+ closure_flush(&c->journal.io);
__journal_try_write(c, true);
}
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 786a1a4f74d..71eb233b9ac 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s)
} else {
bch_writeback_add(dc);
- if (s->op.flush_journal) {
+ if (bio->bi_rw & REQ_FLUSH) {
/* Also need to send a flush to the backing device */
- s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
- dc->disk.bio_split);
+ struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
+ dc->disk.bio_split);
- bio->bi_size = 0;
- bio->bi_vcnt = 0;
- closure_bio_submit(bio, cl, s->d);
+ flush->bi_rw = WRITE_FLUSH;
+ flush->bi_bdev = bio->bi_bdev;
+ flush->bi_end_io = request_endio;
+ flush->bi_private = cl;
+
+ closure_bio_submit(flush, cl, s->d);
} else {
s->op.cache_bio = bio;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4fe6ab2fbe2..924dcfdae11 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -223,8 +223,13 @@ STORE(__cached_dev)
}
if (attr == &sysfs_label) {
- /* note: endlines are preserved */
- memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
+ if (size > SB_LABEL_SIZE)
+ return -EINVAL;
+ memcpy(dc->sb.label, buf, size);
+ if (size < SB_LABEL_SIZE)
+ dc->sb.label[size] = '\0';
+ if (size && dc->sb.label[size - 1] == '\n')
+ dc->sb.label[size - 1] = '\0';
bch_write_bdev_super(dc, NULL);
if (dc->disk.c) {
memcpy(dc->disk.c->uuids[dc->disk.id].label,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 98eb81159a2..420dad545c7 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
stats->last = now ?: 1;
}
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
+/**
+ * bch_next_delay() - increment @d by the amount of work done, and return how
+ * long to delay until the next time to do some work.
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
uint64_t now = local_clock();
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1ae2a73ad85..ea345c6896f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
(ewma) >> factor; \
})
-struct ratelimit {
+struct bch_ratelimit {
+ /* Next time we want to do some work, in nanoseconds */
uint64_t next;
+
+ /*
+ * Rate at which we want to do work, in units per nanosecond
+ * The units here correspond to the units passed to bch_next_delay()
+ */
unsigned rate;
};
-static inline void ratelimit_reset(struct ratelimit *d)
+static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
{
d->next = local_clock();
}
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
#define __DIV_SAFE(n, d, zero) \
({ \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff55162..ba3ee48320f 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
{
+ uint64_t ret;
+
if (atomic_read(&dc->disk.detaching) ||
!dc->writeback_percent)
return 0;
- return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
+ ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
+
+ return min_t(uint64_t, ret, HZ);
}
/* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
up_write(&dc->writeback_lock);
- ratelimit_reset(&dc->writeback_rate);
+ bch_ratelimit_reset(&dc->writeback_rate);
/* Punt to workqueue only so we don't recurse and blow the stack */
continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
}
bch_keybuf_del(&dc->writeback_keys, w);
- atomic_dec_bug(&dc->in_flight);
-
- closure_wake_up(&dc->writeback_wait);
+ up(&dc->in_flight);
closure_return_with_destructor(cl, dirty_io_destructor);
}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
closure_bio_submit(&io->bio, cl, &io->dc->disk);
- continue_at(cl, write_dirty_finish, dirty_wq);
+ continue_at(cl, write_dirty_finish, system_wq);
}
static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
closure_bio_submit(&io->bio, cl, &io->dc->disk);
- continue_at(cl, write_dirty, dirty_wq);
+ continue_at(cl, write_dirty, system_wq);
}
static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
if (delay > 0 &&
(KEY_START(&w->key) != dc->last_read ||
- jiffies_to_msecs(delay) > 50)) {
- w->private = NULL;
-
- closure_delay(&dc->writeback, delay);
- continue_at(cl, read_dirty, dirty_wq);
- }
+ jiffies_to_msecs(delay) > 50))
+ delay = schedule_timeout_uninterruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
trace_bcache_writeback(&w->key);
- closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
+ down(&dc->in_flight);
+ closure_call(&io->cl, read_dirty_submit, NULL, cl);
delay = writeback_delay(dc, KEY_SIZE(&w->key));
-
- atomic_inc(&dc->in_flight);
-
- if (!closure_wait_event(&dc->writeback_wait, cl,
- atomic_read(&dc->in_flight) < 64))
- continue_at(cl, read_dirty, dirty_wq);
}
if (0) {
@@ -442,7 +435,11 @@ err:
bch_keybuf_del(&dc->writeback_keys, w);
}
- refill_dirty(cl);
+ /*
+ * Wait for outstanding writeback IOs to finish (and keybuf slots to be
+ * freed) before refilling again
+ */
+ continue_at(cl, refill_dirty, dirty_wq);
}
/* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
+ sema_init(&dc->in_flight, 64);
closure_init_unlocked(&dc->writeback);
init_rwsem(&dc->writeback_lock);
@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
int __init bch_writeback_init(void)
{
- dirty_wq = create_singlethread_workqueue("bcache_writeback");
+ dirty_wq = create_workqueue("bcache_writeback");
if (!dirty_wq)
return -ENOMEM;