From 8b41e6715ed555e2d8e8dac52ec1f05a9f04dcb4 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:04 +0400 Subject: fuse: make request allocations for background processing explicit There are two types of processing requests in FUSE: synchronous (via fuse_request_send()) and asynchronous (via adding to fc->bg_queue). Fortunately, the type of processing is always known in advance, at the time of request allocation. This preparatory patch utilizes this fact making fuse_get_req() aware about the type. Next patches will use it. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 2 +- fs/fuse/dev.c | 20 ++++++++++++++++++-- fs/fuse/file.c | 16 ++++++++++++++-- fs/fuse/fuse_i.h | 2 ++ fs/fuse/inode.c | 2 ++ 5 files changed, 37 insertions(+), 5 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 6f96a8def14..b7c7f306063 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -422,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc) BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); - req = fuse_get_req(fc, 1); + req = fuse_get_req_for_background(fc, 1); if (IS_ERR(req)) { rc = PTR_ERR(req); goto err; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 11dfa0c3fb4..3b8301f5c0e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -130,7 +130,8 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } -struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) +static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, + bool for_background) { struct fuse_req *req; sigset_t oldset; @@ -156,14 +157,27 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) fuse_req_init_context(req); req->waiting = 1; + req->background = for_background; return req; out: atomic_dec(&fc->num_waiting); return ERR_PTR(err); } + +struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) +{ + return __fuse_get_req(fc, npages, false); +} EXPORT_SYMBOL_GPL(fuse_get_req); +struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, + unsigned npages) +{ + return __fuse_get_req(fc, npages, true); +} +EXPORT_SYMBOL_GPL(fuse_get_req_for_background); + /* * Return request in fuse_file->reserved_req. However that may * currently be in use. If that is the case, wait for it to become @@ -232,6 +246,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, fuse_req_init_context(req); req->waiting = 1; + req->background = 0; return req; } @@ -442,6 +457,7 @@ __acquires(fc->lock) static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { + BUG_ON(req->background); spin_lock(&fc->lock); if (!fc->connected) req->out.h.error = -ENOTCONN; @@ -469,7 +485,7 @@ EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) { - req->background = 1; + BUG_ON(!req->background); fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34b80ba95ba..4884790b963 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -126,11 +126,13 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) struct fuse_req *req = ff->reserved_req; if (sync) { + req->background = 0; fuse_request_send(ff->fc, req); path_put(&req->misc.release.path); fuse_put_request(ff->fc, req); } else { req->end = fuse_release_end; + req->background = 1; fuse_request_send_background(ff->fc, req); } kfree(ff); @@ -282,6 +284,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags) WARN_ON(atomic_read(&ff->count) > 1); fuse_prepare_release(ff, flags, FUSE_RELEASE); ff->reserved_req->force = 1; + ff->reserved_req->background = 0; fuse_request_send(ff->fc, ff->reserved_req); fuse_put_request(ff->fc, ff->reserved_req); kfree(ff); @@ -661,7 +664,12 @@ static int fuse_readpages_fill(void *_data, struct page *page) int nr_alloc = min_t(unsigned, data->nr_pages, FUSE_MAX_PAGES_PER_REQ); fuse_send_readpages(req, data->file); - data->req = req = fuse_get_req(fc, nr_alloc); + if (fc->async_read) + req = fuse_get_req_for_background(fc, nr_alloc); + else + req = fuse_get_req(fc, nr_alloc); + + data->req = req; if (IS_ERR(req)) { unlock_page(page); return PTR_ERR(req); @@ -696,7 +704,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - data.req = fuse_get_req(fc, nr_alloc); + if (fc->async_read) + data.req = fuse_get_req_for_background(fc, nr_alloc); + else + data.req = fuse_get_req(fc, nr_alloc); data.nr_pages = nr_pages; err = PTR_ERR(data.req); if (IS_ERR(data.req)) @@ -1375,6 +1386,7 @@ static int fuse_writepage_locked(struct page *page) if (!req) goto err; + req->background = 1; /* writeback always goes to bg_queue */ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!tmp_page) goto err_free; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6aeba864f07..47c94d28ff8 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -708,6 +708,8 @@ void fuse_request_free(struct fuse_req *req); * caller should specify # elements in req->pages[] explicitly */ struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); +struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, + unsigned npages); /** * Get a request, may fail with -ENOMEM, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 137185c3884..e26607df93d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -346,6 +346,7 @@ static void fuse_send_destroy(struct fuse_conn *fc) fc->destroy_req = NULL; req->in.h.opcode = FUSE_DESTROY; req->force = 1; + req->background = 0; fuse_request_send(fc, req); fuse_put_request(fc, req); } @@ -1043,6 +1044,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; + init_req->background = 1; if (is_bdev) { fc->destroy_req = fuse_request_alloc(0); -- cgit v1.2.3 From 796523fb24028639c007f71e02ca21730f7c0af6 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:15 +0400 Subject: fuse: add flag fc->initialized Existing flag fc->blocked is used to suspend request allocation both in case of many background request submitted and period of time before init_reply arrives from userspace. Next patch will skip blocking allocations of synchronous request (disregarding fc->blocked). This is mostly OK, but we still need to suspend allocations if init_reply is not arrived yet. The patch introduces flag fc->initialized which will serve this purpose. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 1 + fs/fuse/dev.c | 1 + fs/fuse/fuse_i.h | 4 ++++ fs/fuse/inode.c | 3 +++ 4 files changed, 9 insertions(+) (limited to 'fs/fuse') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b7c7f306063..f563e78852b 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -505,6 +505,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) cc->fc.connected = 1; cc->fc.blocked = 0; + cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { fuse_conn_put(&cc->fc); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3b8301f5c0e..39c2fe3ba93 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2087,6 +2087,7 @@ void fuse_abort_conn(struct fuse_conn *fc) if (fc->connected) { fc->connected = 0; fc->blocked = 0; + fc->initialized = 1; end_io_requests(fc); end_queued_requests(fc); end_polls(fc); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 47c94d28ff8..6bf30f2af90 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -417,6 +417,10 @@ struct fuse_conn { /** Batching of FORGET requests (positive indicates FORGET batch) */ int forget_batch; + /** Flag indicating that INIT reply has been received. Allocating + * any fuse request will be suspended until the flag is set */ + int initialized; + /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e26607df93d..4958f8099f1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -363,6 +363,7 @@ void fuse_conn_kill(struct fuse_conn *fc) spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; + fc->initialized = 1; spin_unlock(&fc->lock); /* Flush all readers on this fs */ kill_fasync(&fc->fasync, SIGIO, POLL_IN); @@ -583,6 +584,7 @@ void fuse_conn_init(struct fuse_conn *fc) fc->polled_files = RB_ROOT; fc->reqctr = 0; fc->blocked = 1; + fc->initialized = 0; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } @@ -882,6 +884,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->conn_init = 1; } fc->blocked = 0; + fc->initialized = 1; wake_up_all(&fc->blocked_waitq); } -- cgit v1.2.3 From 0aada88476a33690c9569b094191ce92a38e6541 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:28 +0400 Subject: fuse: skip blocking on allocations of synchronous requests A task may have at most one synchronous request allocated. So these requests need not be otherwise limited. The patch re-works fuse_get_req() to follow this idea. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 1 - fs/fuse/dev.c | 30 ++++++++++++++++++++---------- fs/fuse/inode.c | 3 +-- 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index f563e78852b..c59c097eb2e 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -504,7 +504,6 @@ static int cuse_channel_open(struct inode *inode, struct file *file) cc->fc.release = cuse_fc_release; cc->fc.connected = 1; - cc->fc.blocked = 0; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 39c2fe3ba93..d692b85115b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -130,21 +130,30 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } +static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) +{ + return !fc->initialized || (for_background && fc->blocked); +} + static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, bool for_background) { struct fuse_req *req; - sigset_t oldset; - int intr; int err; - atomic_inc(&fc->num_waiting); - block_sigs(&oldset); - intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked); - restore_sigs(&oldset); - err = -EINTR; - if (intr) - goto out; + + if (fuse_block_alloc(fc, for_background)) { + sigset_t oldset; + int intr; + + block_sigs(&oldset); + intr = wait_event_interruptible(fc->blocked_waitq, + !fuse_block_alloc(fc, for_background)); + restore_sigs(&oldset); + err = -EINTR; + if (intr) + goto out; + } err = -ENOTCONN; if (!fc->connected) @@ -239,7 +248,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, struct fuse_req *req; atomic_inc(&fc->num_waiting); - wait_event(fc->blocked_waitq, !fc->blocked); + wait_event(fc->blocked_waitq, fc->initialized); req = fuse_request_alloc(0); if (!req) req = get_reserved_req(fc, file); @@ -2106,6 +2115,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; + fc->initialized = 1; end_queued_requests(fc); end_polls(fc); wake_up_all(&fc->blocked_waitq); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4958f8099f1..17ec1f70524 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -583,7 +583,7 @@ void fuse_conn_init(struct fuse_conn *fc) fc->khctr = 0; fc->polled_files = RB_ROOT; fc->reqctr = 0; - fc->blocked = 1; + fc->blocked = 0; fc->initialized = 0; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); @@ -883,7 +883,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } - fc->blocked = 0; fc->initialized = 1; wake_up_all(&fc->blocked_waitq); } -- cgit v1.2.3 From 722d2bea8c601d0744e4a37170533fdf6214a678 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:36 +0400 Subject: fuse: implement exclusive wakeup for blocked_waitq The patch solves thundering herd problem. So far as previous patches ensured that only allocations for background may block, it's safe to wake up one waiter. Whoever it is, it will wake up another one in request_end() afterwards. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d692b85115b..36731058896 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -147,7 +147,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, int intr; block_sigs(&oldset); - intr = wait_event_interruptible(fc->blocked_waitq, + intr = wait_event_interruptible_exclusive(fc->blocked_waitq, !fuse_block_alloc(fc, for_background)); restore_sigs(&oldset); err = -EINTR; @@ -161,8 +161,11 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, req = fuse_request_alloc(npages); err = -ENOMEM; - if (!req) + if (!req) { + if (for_background) + wake_up(&fc->blocked_waitq); goto out; + } fuse_req_init_context(req); req->waiting = 1; @@ -262,6 +265,17 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { if (atomic_dec_and_test(&req->count)) { + if (unlikely(req->background)) { + /* + * We get here in the unlikely case that a background + * request was allocated but not sent + */ + spin_lock(&fc->lock); + if (!fc->blocked) + wake_up(&fc->blocked_waitq); + spin_unlock(&fc->lock); + } + if (req->waiting) atomic_dec(&fc->num_waiting); @@ -359,10 +373,15 @@ __releases(fc->lock) list_del(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { - if (fc->num_background == fc->max_background) { + req->background = 0; + + if (fc->num_background == fc->max_background) fc->blocked = 0; - wake_up_all(&fc->blocked_waitq); - } + + /* Wake up next waiter, if any */ + if (!fc->blocked) + wake_up(&fc->blocked_waitq); + if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); -- cgit v1.2.3 From 3c18ef8117f0515b1e455713dfc2e18b06db9bb5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Apr 2013 21:50:58 +0200 Subject: fuse: optimize wake_up Normally blocked_waitq will be inactive, so optimize this case. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 36731058896..be5c7e13320 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -379,7 +379,7 @@ __releases(fc->lock) fc->blocked = 0; /* Wake up next waiter, if any */ - if (!fc->blocked) + if (!fc->blocked && waitqueue_active(&fc->blocked_waitq)) wake_up(&fc->blocked_waitq); if (fc->num_background == fc->congestion_threshold && -- cgit v1.2.3 From 187c5c36330bc8d15674d9e6d2a2412de6b1034d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:20:25 +0400 Subject: fuse: move fuse_release_user_pages() up fuse_release_user_pages() will be indirectly used by fuse_send_read/write in future patches. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4884790b963..1ee2fc92bc3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -494,6 +494,18 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, req->out.args[0].size = count; } +static void fuse_release_user_pages(struct fuse_req *req, int write) +{ + unsigned i; + + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + if (write) + set_page_dirty_lock(page); + put_page(page); + } +} + static size_t fuse_send_read(struct fuse_req *req, struct file *file, loff_t pos, size_t count, fl_owner_t owner) { @@ -1046,18 +1058,6 @@ out: return written ? written : err; } -static void fuse_release_user_pages(struct fuse_req *req, int write) -{ - unsigned i; - - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; - if (write) - set_page_dirty_lock(page); - put_page(page); - } -} - static inline void fuse_page_descs_length_init(struct fuse_req *req, unsigned index, unsigned nr_pages) { -- cgit v1.2.3 From 01e9d11a3e79035ca5cd89b035435acd4ba61ee1 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:20:41 +0400 Subject: fuse: add support of async IO The patch implements a framework to process an IO request asynchronously. The idea is to associate several fuse requests with a single kiocb by means of fuse_io_priv structure. The structure plays the same role for FUSE as 'struct dio' for direct-io.c. The framework is supposed to be used like this: - someone (who wants to process an IO asynchronously) allocates fuse_io_priv and initializes it setting 'async' field to non-zero value. - as soon as fuse request is filled, it can be submitted (in non-blocking way) by fuse_async_req_send() - when all submitted requests are ACKed by userspace, io->reqs drops to zero triggering aio_complete() In case of IO initiated by libaio, aio_complete() will finish processing the same way as in case of dio_complete() calling aio_complete(). But the framework may be also used for internal FUSE use when initial IO request was synchronous (from user perspective), but it's beneficial to process it asynchronously. Then the caller should wait on kiocb explicitly and aio_complete() will wake the caller up. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 17 +++++++++++ 2 files changed, 109 insertions(+) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1ee2fc92bc3..4002889fbcc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -506,6 +506,98 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +/** + * In case of short read, the caller sets 'pos' to the position of + * actual end of fuse request in IO request. Otherwise, if bytes_requested + * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1. + * + * An example: + * User requested DIO read of 64K. It was splitted into two 32K fuse requests, + * both submitted asynchronously. The first of them was ACKed by userspace as + * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The + * second request was ACKed as short, e.g. only 1K was read, resulting in + * pos == 33K. + * + * Thus, when all fuse requests are completed, the minimal non-negative 'pos' + * will be equal to the length of the longest contiguous fragment of + * transferred data starting from the beginning of IO request. + */ +static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) +{ + int left; + + spin_lock(&io->lock); + if (err) + io->err = io->err ? : err; + else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes)) + io->bytes = pos; + + left = --io->reqs; + spin_unlock(&io->lock); + + if (!left) { + long res; + + if (io->err) + res = io->err; + else if (io->bytes >= 0 && io->write) + res = -EIO; + else { + res = io->bytes < 0 ? io->size : io->bytes; + + if (!is_sync_kiocb(io->iocb)) { + struct path *path = &io->iocb->ki_filp->f_path; + struct inode *inode = path->dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + spin_unlock(&fc->lock); + } + } + + aio_complete(io->iocb, res, 0); + kfree(io); + } +} + +static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) +{ + struct fuse_io_priv *io = req->io; + ssize_t pos = -1; + + fuse_release_user_pages(req, !io->write); + + if (io->write) { + if (req->misc.write.in.size != req->misc.write.out.size) + pos = req->misc.write.in.offset - io->offset + + req->misc.write.out.size; + } else { + if (req->misc.read.in.size != req->out.args[0].size) + pos = req->misc.read.in.offset - io->offset + + req->out.args[0].size; + } + + fuse_aio_complete(io, req->out.h.error, pos); +} + +static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, + size_t num_bytes, struct fuse_io_priv *io) +{ + spin_lock(&io->lock); + io->size += num_bytes; + io->reqs++; + spin_unlock(&io->lock); + + req->io = io; + req->end = fuse_aio_complete_req; + + fuse_request_send_background(fc, req); + + return num_bytes; +} + static size_t fuse_send_read(struct fuse_req *req, struct file *file, loff_t pos, size_t count, fl_owner_t owner) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6bf30f2af90..aea072413c4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -228,6 +228,20 @@ enum fuse_req_state { FUSE_REQ_FINISHED }; +/** The request IO state (for asynchronous processing) */ +struct fuse_io_priv { + int async; + spinlock_t lock; + unsigned reqs; + ssize_t bytes; + size_t size; + __u64 offset; + bool write; + int err; + struct kiocb *iocb; + struct file *file; +}; + /** * A request to the client */ @@ -332,6 +346,9 @@ struct fuse_req { /** Inode used in the request or NULL */ struct inode *inode; + /** AIO control block */ + struct fuse_io_priv *io; + /** Link on fi->writepages */ struct list_head writepages_entry; -- cgit v1.2.3 From 36cf66ed9f871fc0d0911921fba5873df3ddb2dc Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:20:51 +0400 Subject: fuse: make fuse_direct_io() aware about AIO The patch implements passing "struct fuse_io_priv *io" down the stack up to fuse_send_read/write where it is used to submit request asynchronously. io->async==0 designates synchronous processing. Non-trivial part of the patch is changes in fuse_direct_io(): resources like fuse requests and user pages cannot be released immediately in async case. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 6 ++++-- fs/fuse/dev.c | 2 +- fs/fuse/file.c | 64 +++++++++++++++++++++++++++++++++++++++++--------------- fs/fuse/fuse_i.h | 7 ++++++- 4 files changed, 58 insertions(+), 21 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index c59c097eb2e..b3aaf7b3578 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -92,8 +92,9 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, { loff_t pos = 0; struct iovec iov = { .iov_base = buf, .iov_len = count }; + struct fuse_io_priv io = { .async = 0, .file = file }; - return fuse_direct_io(file, &iov, 1, count, &pos, 0); + return fuse_direct_io(&io, &iov, 1, count, &pos, 0); } static ssize_t cuse_write(struct file *file, const char __user *buf, @@ -101,12 +102,13 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, { loff_t pos = 0; struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; + struct fuse_io_priv io = { .async = 0, .file = file }; /* * No locking or generic_write_checks(), the server is * responsible for locking and sanity checks. */ - return fuse_direct_io(file, &iov, 1, count, &pos, 1); + return fuse_direct_io(&io, &iov, 1, count, &pos, 1); } static int cuse_open(struct inode *inode, struct file *file) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index be5c7e13320..07bdb14ae7a 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -111,7 +111,7 @@ static void restore_sigs(sigset_t *oldset) sigprocmask(SIG_SETMASK, oldset, NULL); } -static void __fuse_get_request(struct fuse_req *req) +void __fuse_get_request(struct fuse_req *req) { atomic_inc(&req->count); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4002889fbcc..e207dcdf32c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -593,14 +593,16 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, req->io = io; req->end = fuse_aio_complete_req; + __fuse_get_request(req); fuse_request_send_background(fc, req); return num_bytes; } -static size_t fuse_send_read(struct fuse_req *req, struct file *file, +static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io, loff_t pos, size_t count, fl_owner_t owner) { + struct file *file = io->file; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -611,6 +613,10 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, inarg->read_flags |= FUSE_READ_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } + + if (io->async) + return fuse_async_req_send(fc, req, count, io); + fuse_request_send(fc, req); return req->out.args[0].size; } @@ -631,6 +637,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, static int fuse_readpage(struct file *file, struct page *page) { + struct fuse_io_priv io = { .async = 0, .file = file }; struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; @@ -663,7 +670,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->num_pages = 1; req->pages[0] = page; req->page_descs[0].length = count; - num_read = fuse_send_read(req, file, pos, count, NULL); + num_read = fuse_send_read(req, &io, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -873,9 +880,10 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, req->out.args[0].value = outarg; } -static size_t fuse_send_write(struct fuse_req *req, struct file *file, +static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, loff_t pos, size_t count, fl_owner_t owner) { + struct file *file = io->file; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; struct fuse_write_in *inarg = &req->misc.write.in; @@ -886,6 +894,10 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } + + if (io->async) + return fuse_async_req_send(fc, req, count, io); + fuse_request_send(fc, req); return req->misc.write.out.size; } @@ -909,11 +921,12 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, size_t res; unsigned offset; unsigned i; + struct fuse_io_priv io = { .async = 0, .file = file }; for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); - res = fuse_send_write(req, file, pos, count, NULL); + res = fuse_send_write(req, &io, pos, count, NULL); offset = req->page_descs[0].offset; count = res; @@ -1251,10 +1264,11 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) return min(npages, FUSE_MAX_PAGES_PER_REQ); } -ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, +ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, unsigned long nr_segs, size_t count, loff_t *ppos, int write) { + struct file *file = io->file; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; @@ -1280,11 +1294,12 @@ ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, } if (write) - nres = fuse_send_write(req, file, pos, nbytes, owner); + nres = fuse_send_write(req, io, pos, nbytes, owner); else - nres = fuse_send_read(req, file, pos, nbytes, owner); + nres = fuse_send_read(req, io, pos, nbytes, owner); - fuse_release_user_pages(req, !write); + if (!io->async) + fuse_release_user_pages(req, !write); if (req->out.h.error) { if (!res) res = req->out.h.error; @@ -1314,16 +1329,18 @@ ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, } EXPORT_SYMBOL_GPL(fuse_direct_io); -static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, +static ssize_t __fuse_direct_read(struct fuse_io_priv *io, + const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { ssize_t res; + struct file *file = io->file; struct inode *inode = file_inode(file); if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), + res = fuse_direct_io(io, iov, nr_segs, iov_length(iov, nr_segs), ppos, 0); fuse_invalidate_attr(inode); @@ -1334,21 +1351,24 @@ static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct fuse_io_priv io = { .async = 0, .file = file }; struct iovec iov = { .iov_base = buf, .iov_len = count }; - return __fuse_direct_read(file, &iov, 1, ppos); + return __fuse_direct_read(&io, &iov, 1, ppos); } -static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, +static ssize_t __fuse_direct_write(struct fuse_io_priv *io, + const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { + struct file *file = io->file; struct inode *inode = file_inode(file); size_t count = iov_length(iov, nr_segs); ssize_t res; res = generic_write_checks(file, ppos, &count, 0); if (!res) { - res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1); - if (res > 0) + res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); + if (!io->async && res > 0) fuse_write_update_size(inode, *ppos); } @@ -1363,13 +1383,14 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; struct inode *inode = file_inode(file); ssize_t res; + struct fuse_io_priv io = { .async = 0, .file = file }; if (is_bad_inode(inode)) return -EIO; /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(file, &iov, 1, ppos); + res = __fuse_direct_write(&io, &iov, 1, ppos); mutex_unlock(&inode->i_mutex); return res; @@ -2339,14 +2360,23 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ssize_t ret = 0; struct file *file = NULL; loff_t pos = 0; + struct fuse_io_priv *io; file = iocb->ki_filp; pos = offset; + io = kzalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); + if (!io) + return -ENOMEM; + + io->file = file; + if (rw == WRITE) - ret = __fuse_direct_write(file, iov, nr_segs, &pos); + ret = __fuse_direct_write(io, iov, nr_segs, &pos); else - ret = __fuse_direct_read(file, iov, nr_segs, &pos); + ret = __fuse_direct_read(io, iov, nr_segs, &pos); + + kfree(io); return ret; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aea072413c4..337169a406c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -732,6 +732,11 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, unsigned npages); +/* + * Increment reference count on request + */ +void __fuse_get_request(struct fuse_req *req); + /** * Get a request, may fail with -ENOMEM, * useful for callers who doesn't use req->pages[] @@ -846,7 +851,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); -ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, +ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, unsigned long nr_segs, size_t count, loff_t *ppos, int write); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, -- cgit v1.2.3 From bcba24ccdc82f7415154cf87226c2577cea13a5c Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:21:08 +0400 Subject: fuse: enable asynchronous processing direct IO In case of synchronous DIO request (i.e. read(2) or write(2) for a file opened with O_DIRECT), the patch submits fuse requests asynchronously, but waits for their completions before return from fuse_direct_IO(). In case of asynchronous DIO request (i.e. libaio io_submit() or a file opened with O_DIRECT), the patch submits fuse requests asynchronously and return -EIOCBQUEUED immediately. The only special case is async DIO extending file. Here the patch falls back to old behaviour because we can't return -EIOCBQUEUED and update i_size later, without i_mutex hold. And we have no method to wait on real async I/O requests. The patch also clean __fuse_direct_write() up: it's better to update i_size in its callers. Thanks Brian for suggestion. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e207dcdf32c..ba1d50369c2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1366,11 +1366,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io, ssize_t res; res = generic_write_checks(file, ppos, &count, 0); - if (!res) { + if (!res) res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); - if (!io->async && res > 0) - fuse_write_update_size(inode, *ppos); - } fuse_invalidate_attr(inode); @@ -1391,6 +1388,8 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); res = __fuse_direct_write(&io, &iov, 1, ppos); + if (res > 0) + fuse_write_update_size(inode, *ppos); mutex_unlock(&inode->i_mutex); return res; @@ -2360,23 +2359,61 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ssize_t ret = 0; struct file *file = NULL; loff_t pos = 0; + struct inode *inode; + loff_t i_size; + size_t count = iov_length(iov, nr_segs); struct fuse_io_priv *io; file = iocb->ki_filp; pos = offset; + inode = file->f_mapping->host; + i_size = i_size_read(inode); - io = kzalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); + io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); if (!io) return -ENOMEM; - + spin_lock_init(&io->lock); + io->reqs = 1; + io->bytes = -1; + io->size = 0; + io->offset = offset; + io->write = (rw == WRITE); + io->err = 0; io->file = file; + /* + * By default, we want to optimize all I/Os with async request + * submission to the client filesystem. + */ + io->async = 1; + io->iocb = iocb; + + /* + * We cannot asynchronously extend the size of a file. We have no method + * to wait on real async I/O requests, so we must submit this request + * synchronously. + */ + if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + io->async = false; if (rw == WRITE) ret = __fuse_direct_write(io, iov, nr_segs, &pos); else ret = __fuse_direct_read(io, iov, nr_segs, &pos); - kfree(io); + if (io->async) { + fuse_aio_complete(io, ret < 0 ? ret : 0, -1); + + /* we have a non-extending, async request, so return */ + if (ret > 0 && !is_sync_kiocb(iocb)) + return -EIOCBQUEUED; + + ret = wait_on_sync_kiocb(iocb); + } else { + kfree(io); + } + + if (rw == WRITE && ret > 0) + fuse_write_update_size(inode, pos); return ret; } -- cgit v1.2.3 From 439ee5f0c5080d4fd15fda0c5bbee1fb3a57894e Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:21:26 +0400 Subject: fuse: optimize short direct reads If user requested direct read beyond EOF, we can skip sending fuse requests for positions beyond EOF because userspace would ACK them with zero bytes read anyway. We can trust to i_size in fuse_direct_IO for such cases because it's called from fuse_file_aio_read() and the latter updates fuse attributes including i_size. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ba1d50369c2..8f39f7b8cef 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1331,7 +1331,8 @@ EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t __fuse_direct_read(struct fuse_io_priv *io, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) + unsigned long nr_segs, loff_t *ppos, + size_t count) { ssize_t res; struct file *file = io->file; @@ -1340,8 +1341,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(io, iov, nr_segs, iov_length(iov, nr_segs), - ppos, 0); + res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); fuse_invalidate_attr(inode); @@ -1353,7 +1353,7 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, { struct fuse_io_priv io = { .async = 0, .file = file }; struct iovec iov = { .iov_base = buf, .iov_len = count }; - return __fuse_direct_read(&io, &iov, 1, ppos); + return __fuse_direct_read(&io, &iov, 1, ppos, count); } static ssize_t __fuse_direct_write(struct fuse_io_priv *io, @@ -2369,6 +2369,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, inode = file->f_mapping->host; i_size = i_size_read(inode); + /* optimization for short read */ + if (rw != WRITE && offset + count > i_size) { + if (offset >= i_size) + return 0; + count = i_size - offset; + } + io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); if (!io) return -ENOMEM; @@ -2392,13 +2399,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + if (!is_sync_kiocb(iocb) && (offset + count > i_size)) io->async = false; if (rw == WRITE) ret = __fuse_direct_write(io, iov, nr_segs, &pos); else - ret = __fuse_direct_read(io, iov, nr_segs, &pos); + ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); if (io->async) { fuse_aio_complete(io, ret < 0 ? ret : 0, -1); -- cgit v1.2.3 From efb9fa9e911b23c7ea5330215bda778a7c69dba8 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Tue, 18 Dec 2012 14:05:08 +0400 Subject: fuse: truncate file if async dio failed The patch improves error handling in fuse_direct_IO(): if we successfully submitted several fuse requests on behalf of synchronous direct write extending file and some of them failed, let's try to do our best to clean-up. Changed in v2: reuse fuse_do_setattr(). Thanks to Brian for suggestion. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 17 +++++++++-------- fs/fuse/file.c | 22 ++++++++++++++++++++-- fs/fuse/fuse_i.h | 3 +++ 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ff15522481d..254df56b847 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1562,10 +1562,9 @@ void fuse_release_nowrite(struct inode *inode) * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ -static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, - struct file *file) +int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct file *file) { - struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; struct fuse_setattr_in inarg; @@ -1574,9 +1573,6 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, loff_t oldsize; int err; - if (!fuse_allow_current_process(fc)) - return -EACCES; - if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) attr->ia_valid |= ATTR_FORCE; @@ -1671,10 +1667,15 @@ error: static int fuse_setattr(struct dentry *entry, struct iattr *attr) { + struct inode *inode = entry->d_inode; + + if (!fuse_allow_current_process(get_fuse_conn(inode))) + return -EACCES; + if (attr->ia_valid & ATTR_FILE) - return fuse_do_setattr(entry, attr, attr->ia_file); + return fuse_do_setattr(inode, attr, attr->ia_file); else - return fuse_do_setattr(entry, attr, NULL); + return fuse_do_setattr(inode, attr, NULL); } static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8f39f7b8cef..1f8e3d60dc0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2352,6 +2352,20 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, return 0; } +static void fuse_do_truncate(struct file *file) +{ + struct inode *inode = file->f_mapping->host; + struct iattr attr; + + attr.ia_valid = ATTR_SIZE; + attr.ia_size = i_size_read(inode); + + attr.ia_file = file; + attr.ia_valid |= ATTR_FILE; + + fuse_do_setattr(inode, &attr, file); +} + static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -2419,8 +2433,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, kfree(io); } - if (rw == WRITE && ret > 0) - fuse_write_update_size(inode, pos); + if (rw == WRITE) { + if (ret > 0) + fuse_write_update_size(inode, pos); + else if (ret < 0 && offset + count > i_size) + fuse_do_truncate(file); + } return ret; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 337169a406c..53b830e3b38 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -863,4 +863,7 @@ int fuse_dev_release(struct inode *inode, struct file *file); void fuse_write_update_size(struct inode *inode, loff_t pos); +int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct file *file); + #endif /* _FS_FUSE_I_H */ -- cgit v1.2.3 From 60b9df7a54804a965850db00beec4d3a2c002536 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 May 2013 14:37:21 +0200 Subject: fuse: add flag to turn on async direct IO Without async DIO write requests to a single file were always serialized. With async DIO that's no longer the case. So don't turn on async DIO by default for fear of breaking backward compatibility. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 8 ++++---- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1f8e3d60dc0..6ab7ca43f9e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2371,14 +2371,14 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { ssize_t ret = 0; - struct file *file = NULL; + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; loff_t pos = 0; struct inode *inode; loff_t i_size; size_t count = iov_length(iov, nr_segs); struct fuse_io_priv *io; - file = iocb->ki_filp; pos = offset; inode = file->f_mapping->host; i_size = i_size_read(inode); @@ -2403,9 +2403,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, io->file = file; /* * By default, we want to optimize all I/Os with async request - * submission to the client filesystem. + * submission to the client filesystem if supported. */ - io->async = 1; + io->async = ff->fc->async_dio; io->iocb = iocb; /* diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 53b830e3b38..fde7249a3a9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -541,6 +541,9 @@ struct fuse_conn { /** Does the filesystem want adaptive readdirplus? */ unsigned readdirplus_auto:1; + /** Does the filesystem support asynchronous direct-IO submission? */ + unsigned async_dio:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17ec1f70524..6201f81e4d3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -871,6 +871,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->do_readdirplus = 1; if (arg->flags & FUSE_READDIRPLUS_AUTO) fc->readdirplus_auto = 1; + if (arg->flags & FUSE_ASYNC_DIO) + fc->async_dio = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -898,7 +900,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | - FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO; + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); -- cgit v1.2.3