aboutsummaryrefslogtreecommitdiff
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2013-04-02 10:04:39 +0200
committerJens Axboe <axboe@kernel.dk>2013-04-02 10:04:39 +0200
commit64f8de4da7d3962632f152d3d702d68bb8accc29 (patch)
treec90a872a6d91c824635d59572e1e578980f4bc98 /fs/fs-writeback.c
parentf1fb3449efd5c49b48e35746bc7283eb9c73e3a0 (diff)
parentb5c872ddb7083c7909fb76a170c3807e04564bb3 (diff)
Merge branch 'writeback-workqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq into for-3.10/core
Tejun writes: ----- This is the pull request for the earlier patchset[1] with the same name. It's only three patches (the first one was committed to workqueue tree) but the merge strategy is a bit involved due to the dependencies. * Because the conversion needs features from wq/for-3.10, block/for-3.10/core is based on rc3, and wq/for-3.10 has conflicts with rc3, I pulled mainline (rc5) into wq/for-3.10 to prevent those workqueue conflicts from flaring up in block tree. * Resolving the issue that Jan and Dave raised about debugging requires arch-wide changes. The patchset is being worked on[2] but it'll have to go through -mm after these changes show up in -next, and not included in this pull request. The three commits are located in the following git branch. git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git writeback-workqueue Pulling it into block/for-3.10/core produces a conflict in drivers/md/raid5.c between the following two commits. e3620a3ad5 ("MD RAID5: Avoid accessing gendisk or queue structs when not available") 2f6db2a707 ("raid5: use bio_reset()") The conflict is trivial - one removes an "if ()" conditional while the other removes "rbi->bi_next = NULL" right above it. We just need to remove both. The merged branch is available at git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git block-test-merge so that you can use it for verification. The test merge commit has proper merge description. While these changes are a bit of pain to route, they make code simpler and even have, while minute, measureable performance gain[3] even on a workload which isn't particularly favorable to showing the benefits of this conversion. ---- Fixed up the conflict. Conflicts: drivers/md/raid5.c Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c102
1 files changed, 32 insertions, 70 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 21f46fb3a10..8067d3719e9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -22,7 +22,6 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
@@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head)
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>
-/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
-static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
-{
- if (bdi->wb.task) {
- wake_up_process(bdi->wb.task);
- } else {
- /*
- * The bdi thread isn't there, wake up the forker thread which
- * will create and run it.
- */
- wake_up_process(default_backing_dev_info.wb.task);
- }
-}
-
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
spin_lock_bh(&bdi->wb_lock);
list_add_tail(&work->list, &bdi->work_list);
- if (!bdi->wb.task)
- trace_writeback_nothread(bdi, work);
- bdi_wakeup_flusher(bdi);
spin_unlock_bh(&bdi->wb_lock);
+
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
}
static void
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
*/
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
- if (bdi->wb.task) {
- trace_writeback_nowork(bdi);
- wake_up_process(bdi->wb.task);
- }
+ trace_writeback_nowork(bdi);
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
return;
}
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
* writeback as soon as there is no other work to do.
*/
trace_writeback_wake_background(bdi);
- spin_lock_bh(&bdi->wb_lock);
- bdi_wakeup_flusher(bdi);
- spin_unlock_bh(&bdi->wb_lock);
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
}
/*
@@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
/*
* Handle writeback of dirty data for the device backed by this bdi. Also
- * wakes up periodically and does kupdated style flushing.
+ * reschedules periodically and does kupdated style flushing.
*/
-int bdi_writeback_thread(void *data)
+void bdi_writeback_workfn(struct work_struct *work)
{
- struct bdi_writeback *wb = data;
+ struct bdi_writeback *wb = container_of(to_delayed_work(work),
+ struct bdi_writeback, dwork);
struct backing_dev_info *bdi = wb->bdi;
long pages_written;
current->flags |= PF_SWAPWRITE;
- set_freezable();
- wb->last_active = jiffies;
-
- /*
- * Our parent may run at a different priority, just set us to normal
- */
- set_user_nice(current, 0);
-
- trace_writeback_thread_start(bdi);
- while (!kthread_freezable_should_stop(NULL)) {
+ if (likely(!current_is_workqueue_rescuer() ||
+ list_empty(&bdi->bdi_list))) {
/*
- * Remove own delayed wake-up timer, since we are already awake
- * and we'll take care of the periodic write-back.
+ * The normal path. Keep writing back @bdi until its
+ * work_list is empty. Note that this path is also taken
+ * if @bdi is shutting down even when we're running off the
+ * rescuer as work_list needs to be drained.
*/
- del_timer(&wb->wakeup_timer);
-
- pages_written = wb_do_writeback(wb, 0);
-
+ do {
+ pages_written = wb_do_writeback(wb, 0);
+ trace_writeback_pages_written(pages_written);
+ } while (!list_empty(&bdi->work_list));
+ } else {
+ /*
+ * bdi_wq can't get enough workers and we're running off
+ * the emergency worker. Don't hog it. Hopefully, 1024 is
+ * enough for efficient IO.
+ */
+ pages_written = writeback_inodes_wb(&bdi->wb, 1024,
+ WB_REASON_FORKER_THREAD);
trace_writeback_pages_written(pages_written);
-
- if (pages_written)
- wb->last_active = jiffies;
-
- set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
- __set_current_state(TASK_RUNNING);
- continue;
- }
-
- if (wb_has_dirty_io(wb) && dirty_writeback_interval)
- schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
- else {
- /*
- * We have nothing to do, so can go sleep without any
- * timeout and save power. When a work is queued or
- * something is made dirty - we will be woken up.
- */
- schedule();
- }
}
- /* Flush any work that raced with us exiting */
- if (!list_empty(&bdi->work_list))
- wb_do_writeback(wb, 1);
+ if (!list_empty(&bdi->work_list) ||
+ (wb_has_dirty_io(wb) && dirty_writeback_interval))
+ queue_delayed_work(bdi_wq, &wb->dwork,
+ msecs_to_jiffies(dirty_writeback_interval * 10));
- trace_writeback_thread_stop(bdi);
- return 0;
+ current->flags &= ~PF_SWAPWRITE;
}
-
/*
* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
* the whole world.