aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-07-24 12:25:42 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2017-07-24 12:25:42 +1000
commit921d05470f8d88ecf296758cbb8af8bc20c5377c (patch)
tree9a4ef6850e041c26826129ad81d51f4c464687c4
parent918ca58f2d3de433ea49b2cb6d58f2ae23ca3430 (diff)
parente49c0cd75de983348d332520808e0751fd6577a4 (diff)
Merge remote-tracking branch 'device-mapper/for-next'
-rw-r--r--drivers/md/dm-bufio.c95
-rw-r--r--drivers/md/dm-bufio.h9
-rw-r--r--drivers/md/dm-integrity.c32
3 files changed, 102 insertions, 34 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 850ff6c67994..bc5e821bb796 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -64,6 +64,12 @@
#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1))
/*
+ * Align buffer writes to this boundary.
+ * Tests show that SSDs have the highest IOPS when using 4k writes.
+ */
+#define DM_BUFIO_WRITE_ALIGN 4096
+
+/*
* dm_buffer->list_mode
*/
#define LIST_CLEAN 0
@@ -149,6 +155,10 @@ struct dm_buffer {
blk_status_t write_error;
unsigned long state;
unsigned long last_accessed;
+ unsigned dirty_start;
+ unsigned dirty_end;
+ unsigned write_start;
+ unsigned write_end;
struct dm_bufio_client *c;
struct list_head write_list;
struct bio bio;
@@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context)
}
static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
- unsigned n_sectors, bio_end_io_t *end_io)
+ unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
{
int r;
struct dm_io_request io_req = {
@@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
if (b->data_mode != DATA_MODE_VMALLOC) {
io_req.mem.type = DM_IO_KMEM;
- io_req.mem.ptr.addr = b->data;
+ io_req.mem.ptr.addr = (char *)b->data + offset;
} else {
io_req.mem.type = DM_IO_VMA;
- io_req.mem.ptr.vma = b->data;
+ io_req.mem.ptr.vma = (char *)b->data + offset;
}
b->bio.bi_end_io = end_io;
@@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio)
}
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
- unsigned n_sectors, bio_end_io_t *end_io)
+ unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
{
char *ptr;
- int len;
+ unsigned len;
bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
b->bio.bi_iter.bi_sector = sector;
@@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
b->bio.bi_private = end_io;
bio_set_op_attrs(&b->bio, rw, 0);
- /*
- * We assume that if len >= PAGE_SIZE ptr is page-aligned.
- * If len < PAGE_SIZE the buffer doesn't cross page boundary.
- */
- ptr = b->data;
+ ptr = (char *)b->data + offset;
len = n_sectors << SECTOR_SHIFT;
- if (len >= PAGE_SIZE)
- BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
- else
- BUG_ON((unsigned long)ptr & (len - 1));
-
do {
- if (!bio_add_page(&b->bio, virt_to_page(ptr),
- len < PAGE_SIZE ? len : PAGE_SIZE,
+ unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
+ if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
offset_in_page(ptr))) {
BUG_ON(b->c->block_size <= PAGE_SIZE);
- use_dmio(b, rw, sector, n_sectors, end_io);
+ use_dmio(b, rw, sector, n_sectors, offset, end_io);
return;
}
- len -= PAGE_SIZE;
- ptr += PAGE_SIZE;
+ len -= this_step;
+ ptr += this_step;
} while (len > 0);
submit_bio(&b->bio);
@@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
{
unsigned n_sectors;
sector_t sector;
-
- if (rw == WRITE && b->c->write_callback)
- b->c->write_callback(b);
+ unsigned offset, end;
sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
- n_sectors = 1 << b->c->sectors_per_block_bits;
+
+ if (rw != WRITE) {
+ n_sectors = 1 << b->c->sectors_per_block_bits;
+ offset = 0;
+ } else {
+ if (b->c->write_callback)
+ b->c->write_callback(b);
+ offset = b->write_start;
+ end = b->write_end;
+ offset &= -DM_BUFIO_WRITE_ALIGN;
+ end += DM_BUFIO_WRITE_ALIGN - 1;
+ end &= -DM_BUFIO_WRITE_ALIGN;
+ if (unlikely(end > b->c->block_size))
+ end = b->c->block_size;
+
+ sector += offset >> SECTOR_SHIFT;
+ n_sectors = (end - offset) >> SECTOR_SHIFT;
+ }
if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
b->data_mode != DATA_MODE_VMALLOC)
- use_inline_bio(b, rw, sector, n_sectors, end_io);
+ use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
else
- use_dmio(b, rw, sector, n_sectors, end_io);
+ use_dmio(b, rw, sector, n_sectors, offset, end_io);
}
/*----------------------------------------------------------------
@@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b,
clear_bit(B_DIRTY, &b->state);
wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
+ b->write_start = b->dirty_start;
+ b->write_end = b->dirty_end;
+
if (!write_list)
submit_io(b, WRITE, write_endio);
else
@@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b)
}
EXPORT_SYMBOL_GPL(dm_bufio_release);
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
+ unsigned start, unsigned end)
{
struct dm_bufio_client *c = b->c;
+ BUG_ON(start >= end);
+ BUG_ON(end > b->c->block_size);
+
dm_bufio_lock(c);
BUG_ON(test_bit(B_READING, &b->state));
- if (!test_and_set_bit(B_DIRTY, &b->state))
+ if (!test_and_set_bit(B_DIRTY, &b->state)) {
+ b->dirty_start = start;
+ b->dirty_end = end;
__relink_lru(b, LIST_DIRTY);
+ } else {
+ if (start < b->dirty_start)
+ b->dirty_start = start;
+ if (end > b->dirty_end)
+ b->dirty_end = end;
+ }
dm_bufio_unlock(c);
}
+EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
+
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+{
+ dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
+}
EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
@@ -1399,6 +1436,8 @@ retry:
wait_on_bit_io(&b->state, B_WRITING,
TASK_UNINTERRUPTIBLE);
set_bit(B_DIRTY, &b->state);
+ b->dirty_start = 0;
+ b->dirty_end = c->block_size;
__unlink_buffer(b);
__link_buffer(b, new_block, LIST_DIRTY);
} else {
diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h
index b6d8f53ec15b..be732d3f8611 100644
--- a/drivers/md/dm-bufio.h
+++ b/drivers/md/dm-bufio.h
@@ -94,6 +94,15 @@ void dm_bufio_release(struct dm_buffer *b);
void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
/*
+ * Mark a part of the buffer dirty.
+ *
+ * The specified part of the buffer is scheduled to be written. dm-bufio may
+ * write the specified part of the buffer or it may write a larger superset.
+ */
+void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
+ unsigned start, unsigned end);
+
+/*
* Initiate writing of dirty buffers, without waiting for completion.
*/
void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 1b224aa9cf15..4a4a26f7f9ae 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -225,6 +225,8 @@ struct dm_integrity_c {
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
struct alg_spec journal_mac_alg;
+
+ atomic64_t number_of_mismatches;
};
struct dm_integrity_range {
@@ -309,6 +311,8 @@ static void dm_integrity_dtr(struct dm_target *ti);
static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
{
+ if (err == -EILSEQ)
+ atomic64_inc(&ic->number_of_mismatches);
if (!cmpxchg(&ic->failed, 0, err))
DMERR("Error on %s: %d", msg, err);
}
@@ -1040,7 +1044,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
memcpy(tag, dp, to_copy);
} else if (op == TAG_WRITE) {
memcpy(dp, tag, to_copy);
- dm_bufio_mark_buffer_dirty(b);
+ dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
} else {
/* e.g.: op == TAG_CMP */
if (unlikely(memcmp(dp, tag, to_copy))) {
@@ -1273,6 +1277,7 @@ again:
DMERR("Checksum failed at sector 0x%llx",
(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
r = -EILSEQ;
+ atomic64_inc(&ic->number_of_mismatches);
}
if (likely(checksums != checksums_onstack))
kfree(checksums);
@@ -1587,16 +1592,18 @@ retry:
if (likely(ic->mode == 'J')) {
if (dio->write) {
unsigned next_entry, i, pos;
- unsigned ws, we;
+ unsigned ws, we, range_sectors;
- dio->range.n_sectors = min(dio->range.n_sectors, ic->free_sectors);
+ dio->range.n_sectors = min(dio->range.n_sectors,
+ ic->free_sectors << ic->sb->log2_sectors_per_block);
if (unlikely(!dio->range.n_sectors))
goto sleep;
- ic->free_sectors -= dio->range.n_sectors;
+ range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
+ ic->free_sectors -= range_sectors;
journal_section = ic->free_section;
journal_entry = ic->free_section_entry;
- next_entry = ic->free_section_entry + dio->range.n_sectors;
+ next_entry = ic->free_section_entry + range_sectors;
ic->free_section_entry = next_entry % ic->journal_section_entries;
ic->free_section += next_entry / ic->journal_section_entries;
ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
@@ -1727,6 +1734,8 @@ static void pad_uncommitted(struct dm_integrity_c *ic)
wraparound_section(ic, &ic->free_section);
ic->n_uncommitted_sections++;
}
+ WARN_ON(ic->journal_sections * ic->journal_section_entries !=
+ (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
}
static void integrity_commit(struct work_struct *w)
@@ -1821,6 +1830,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
{
unsigned i, j, n;
struct journal_completion comp;
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
comp.ic = ic;
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
@@ -1945,6 +1957,8 @@ skip_io:
dm_bufio_write_dirty_buffers_async(ic->bufio);
+ blk_finish_plug(&plug);
+
complete_journal_op(&comp);
wait_for_completion_io(&comp.comp);
@@ -2221,7 +2235,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
switch (type) {
case STATUSTYPE_INFO:
- result[0] = '\0';
+ DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches));
break;
case STATUSTYPE_TABLE: {
@@ -2794,6 +2808,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
bio_list_init(&ic->flush_bio_list);
init_waitqueue_head(&ic->copy_to_journal_wait);
init_completion(&ic->crypto_backoff);
+ atomic64_set(&ic->number_of_mismatches, 0);
r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
if (r) {
@@ -3019,6 +3034,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "Block size doesn't match the information in superblock";
goto bad;
}
+ if (!le32_to_cpu(ic->sb->journal_sections)) {
+ r = -EINVAL;
+ ti->error = "Corrupted superblock, journal_sections is 0";
+ goto bad;
+ }
/* make sure that ti->max_io_len doesn't overflow */
if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {