diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-kcopyd.c | 19 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 22 |
2 files changed, 36 insertions, 5 deletions
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 56fcccc30554..e0cfde3501e0 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -55,15 +55,17 @@ struct dm_kcopyd_client { struct dm_kcopyd_throttle *throttle; /* - * We maintain three lists of jobs: + * We maintain four lists of jobs: * * i) jobs waiting for pages * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. + * iii) jobs that don't need to do any IO and just run a callback + * iv) jobs that have completed. * - * All three of these are protected by job_lock. + * All four of these are protected by job_lock. */ spinlock_t job_lock; + struct list_head callback_jobs; struct list_head complete_jobs; struct list_head io_jobs; struct list_head pages_jobs; @@ -584,6 +586,7 @@ static void do_work(struct work_struct *work) struct dm_kcopyd_client *kc = container_of(work, struct dm_kcopyd_client, kcopyd_work); struct blk_plug plug; + unsigned long flags; /* * The order that these are called is *very* important. @@ -592,6 +595,10 @@ static void do_work(struct work_struct *work) * list. io jobs call wake when they complete and it all * starts again. */ + spin_lock_irqsave(&kc->job_lock, flags); + list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); + blk_start_plug(&plug); process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->pages_jobs, kc, run_pages_job); @@ -609,7 +616,7 @@ static void dispatch_job(struct kcopyd_job *job) struct dm_kcopyd_client *kc = job->kc; atomic_inc(&kc->nr_jobs); if (unlikely(!job->source.count)) - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); else if (job->pages == &zero_page_list) push(&kc->io_jobs, job); else @@ -796,7 +803,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) job->read_err = read_err; job->write_err = write_err; - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); wake(kc); } EXPORT_SYMBOL(dm_kcopyd_do_callback); @@ -826,6 +833,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro return ERR_PTR(-ENOMEM); spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->callback_jobs); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); @@ -875,6 +883,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + BUG_ON(!list_empty(&kc->callback_jobs)); BUG_ON(!list_empty(&kc->complete_jobs)); BUG_ON(!list_empty(&kc->io_jobs)); BUG_ON(!list_empty(&kc->pages_jobs)); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c65feeada864..2da0b9b213c7 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,6 +19,7 @@ #include <linux/vmalloc.h> #include <linux/log2.h> #include <linux/dm-kcopyd.h> +#include <linux/semaphore.h> #include "dm.h" @@ -105,6 +106,9 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; + /* Maximum number of in-flight COW jobs. */ + struct semaphore cow_count; + struct dm_kcopyd_client *kcopyd_client; /* Wait for events based on state_bits */ @@ -145,6 +149,19 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +/* + * Maximum number of chunks being copied on write. + * + * The value was decided experimentally as a trade-off between memory + * consumption, stalling the kernel's workqueues and maintaining a high enough + * throughput. + */ +#define DEFAULT_COW_THRESHOLD 2048 + +static int cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); + DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -1189,6 +1206,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } + sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); @@ -1560,6 +1579,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) } list_add(&pe->out_of_order_entry, lh); } + up(&s->cow_count); } /* @@ -1583,6 +1603,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ + down(&s->cow_count); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1602,6 +1623,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; + down(&s->cow_count); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); |