aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt6
-rw-r--r--arch/alpha/include/asm/io.h3
-rw-r--r--arch/s390/mm/pgtable.c4
-rw-r--r--block/Kconfig23
-rw-r--r--block/as-iosched.c10
-rw-r--r--block/blk-barrier.c120
-rw-r--r--block/blk-core.c63
-rw-r--r--block/blk-settings.c6
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/blk-tag.c1
-rw-r--r--block/blk-timeout.c21
-rw-r--r--block/cfq-iosched.c26
-rw-r--r--block/compat_ioctl.c2
-rw-r--r--block/deadline-iosched.c6
-rw-r--r--block/elevator.c73
-rw-r--r--block/genhd.c23
-rw-r--r--block/ioctl.c2
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/scsi_ioctl.c4
-rw-r--r--drivers/block/cciss.c88
-rw-r--r--drivers/block/cciss.h4
-rw-r--r--drivers/block/cciss_cmd.h3
-rw-r--r--drivers/block/loop.c39
-rw-r--r--drivers/block/nbd.c10
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/block/xen-blkfront.c8
-rw-r--r--drivers/cdrom/cdrom.c703
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm.c2
-rw-r--r--fs/aio.c100
-rw-r--r--fs/bio-integrity.c2
-rw-r--r--fs/bio.c320
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/ext4/super.c8
-rw-r--r--include/linux/aio.h5
-rw-r--r--include/linux/bio.h26
-rw-r--r--include/linux/blkdev.h52
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/genhd.h1
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/types.h11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c4
-rw-r--r--mm/bounce.c9
47 files changed, 1089 insertions, 751 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 4dbb8be1c99..3c5434c83da 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -914,7 +914,7 @@ I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
queue and specific I/O schedulers. Unless stated otherwise, elevator is used
to refer to both parts and I/O scheduler to specific I/O schedulers.
-Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c.
+Block layer implements generic dispatch queue in block/*.c.
The generic dispatch queue is responsible for properly ordering barrier
requests, requeueing, handling non-fs requests and all other subtleties.
@@ -926,8 +926,8 @@ be built inside the kernel. Each queue can choose different one and can also
change to another one dynamically.
A block layer call to the i/o scheduler follows the convention elv_xxx(). This
-calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh,
-xxx and xxx might not match exactly, but use your imagination. If an elevator
+calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
+and xxx might not match exactly, but use your imagination. If an elevator
doesn't implement a function, the switch does nothing or some minimal house
keeping work.
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index e971ab000f9..eda9b909aa0 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -96,9 +96,6 @@ static inline dma_addr_t __deprecated isa_page_to_bus(struct page *page)
return page_to_phys(page);
}
-/* This depends on working iommu. */
-#define BIO_VMERGE_BOUNDARY (alpha_mv.mv_pci_tbi ? PAGE_SIZE : 0)
-
/* Maximum PIO space address supported? */
#define IO_SPACE_LIMIT 0xffff
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ef3635b52fc..0767827540b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -263,7 +263,7 @@ int s390_enable_sie(void)
/* lets check if we are allowed to replace the mm */
task_lock(tsk);
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
- tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+ tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
task_unlock(tsk);
return -EINVAL;
}
@@ -279,7 +279,7 @@ int s390_enable_sie(void)
/* Now lets check again if something happened */
task_lock(tsk);
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
- tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+ tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
mmput(mm);
task_unlock(tsk);
return -EINVAL;
diff --git a/block/Kconfig b/block/Kconfig
index 290b219fad9..ac0956f7778 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -24,21 +24,17 @@ menuconfig BLOCK
if BLOCK
config LBD
- bool "Support for Large Block Devices"
+ bool "Support for large block devices and files"
depends on !64BIT
help
- Enable block devices of size 2TB and larger.
+ Enable block devices or files of size 2TB and larger.
This option is required to support the full capacity of large
(2TB+) block devices, including RAID, disk, Network Block Device,
Logical Volume Manager (LVM) and loopback.
-
- For example, RAID devices are frequently bigger than the capacity
- of the largest individual hard drive.
-
- This option is not required if you have individual disk drives
- which total 2TB+ and you are not aggregating the capacity into
- a large block device (e.g. using RAID or LVM).
+
+ This option also enables support for single files larger than
+ 2TB.
If unsure, say N.
@@ -58,15 +54,6 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
-config LSF
- bool "Support for Large Single Files"
- depends on !64BIT
- help
- Say Y here if you want to be able to handle very large files (2TB
- and larger), otherwise say N.
-
- If unsure, say Y.
-
config BLK_DEV_BSG
bool "Block layer SG support v4 (EXPERIMENTAL)"
depends on EXPERIMENTAL
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 71f0abb219e..631f6f44460 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1339,12 +1339,12 @@ static int as_may_queue(struct request_queue *q, int rw)
return ret;
}
-static void as_exit_queue(elevator_t *e)
+static void as_exit_queue(struct elevator_queue *e)
{
struct as_data *ad = e->elevator_data;
del_timer_sync(&ad->antic_timer);
- kblockd_flush_work(&ad->antic_work);
+ cancel_work_sync(&ad->antic_work);
BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
@@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
-static ssize_t est_time_show(elevator_t *e, char *page)
+static ssize_t est_time_show(struct elevator_queue *e, char *page)
{
struct as_data *ad = e->elevator_data;
int pos = 0;
@@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page)
}
#define SHOW_FUNCTION(__FUNC, __VAR) \
-static ssize_t __FUNC(elevator_t *e, char *page) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct as_data *ad = e->elevator_data; \
return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
@@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct as_data *ad = e->elevator_data; \
int ret = as_var_store(__PTR, (page), count); \
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6e72d661ae4..8eba4e43bb0 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -24,8 +24,8 @@
int blk_queue_ordered(struct request_queue *q, unsigned ordered,
prepare_flush_fn *prepare_flush_fn)
{
- if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
- prepare_flush_fn == NULL) {
+ if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
+ QUEUE_ORDERED_DO_POSTFLUSH))) {
printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
return -EINVAL;
}
@@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
return QUEUE_ORDSEQ_DONE;
}
-void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
{
struct request *rq;
@@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
q->ordseq |= seq;
if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
- return;
+ return false;
/*
* Okay, sequence complete.
@@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
BUG();
+
+ return true;
}
static void pre_flush_end_io(struct request *rq, int error)
@@ -134,7 +136,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
struct request *rq;
rq_end_io_fn *end_io;
- if (which == QUEUE_ORDERED_PREFLUSH) {
+ if (which == QUEUE_ORDERED_DO_PREFLUSH) {
rq = &q->pre_flush_rq;
end_io = pre_flush_end_io;
} else {
@@ -151,80 +153,110 @@ static void queue_flush(struct request_queue *q, unsigned which)
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
}
-static inline struct request *start_ordered(struct request_queue *q,
- struct request *rq)
+static inline bool start_ordered(struct request_queue *q, struct request **rqp)
{
+ struct request *rq = *rqp;
+ unsigned skip = 0;
+
q->orderr = 0;
q->ordered = q->next_ordered;
q->ordseq |= QUEUE_ORDSEQ_STARTED;
/*
- * Prep proxy barrier request.
+ * For an empty barrier, there's no actual BAR request, which
+ * in turn makes POSTFLUSH unnecessary. Mask them off.
*/
+ if (!rq->hard_nr_sectors) {
+ q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+ QUEUE_ORDERED_DO_POSTFLUSH);
+ /*
+ * Empty barrier on a write-through device w/ ordered
+ * tag has no command to issue and without any command
+ * to issue, ordering by tag can't be used. Drain
+ * instead.
+ */
+ if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
+ !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
+ q->ordered &= ~QUEUE_ORDERED_BY_TAG;
+ q->ordered |= QUEUE_ORDERED_BY_DRAIN;
+ }
+ }
+
+ /* stash away the original request */
elv_dequeue_request(q, rq);
q->orig_bar_rq = rq;
- rq = &q->bar_rq;
- blk_rq_init(q, rq);
- if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
- rq->cmd_flags |= REQ_RW;
- if (q->ordered & QUEUE_ORDERED_FUA)
- rq->cmd_flags |= REQ_FUA;
- init_request_from_bio(rq, q->orig_bar_rq->bio);
- rq->end_io = bar_end_io;
+ rq = NULL;
/*
* Queue ordered sequence. As we stack them at the head, we
* need to queue in reverse order. Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
- * request gets inbetween ordered sequence. If this request is
- * an empty barrier, we don't need to do a postflush ever since
- * there will be no data written between the pre and post flush.
- * Hence a single flush will suffice.
+ * request gets inbetween ordered sequence.
*/
- if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
- queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
- else
- q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+ if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
+ queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
+ rq = &q->post_flush_rq;
+ } else
+ skip |= QUEUE_ORDSEQ_POSTFLUSH;
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ if (q->ordered & QUEUE_ORDERED_DO_BAR) {
+ rq = &q->bar_rq;
+
+ /* initialize proxy request and queue it */
+ blk_rq_init(q, rq);
+ if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+ rq->cmd_flags |= REQ_RW;
+ if (q->ordered & QUEUE_ORDERED_DO_FUA)
+ rq->cmd_flags |= REQ_FUA;
+ init_request_from_bio(rq, q->orig_bar_rq->bio);
+ rq->end_io = bar_end_io;
- if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
- queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+ elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ } else
+ skip |= QUEUE_ORDSEQ_BAR;
+
+ if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
+ queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
rq = &q->pre_flush_rq;
} else
- q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
+ skip |= QUEUE_ORDSEQ_PREFLUSH;
- if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
- q->ordseq |= QUEUE_ORDSEQ_DRAIN;
- else
+ if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
rq = NULL;
+ else
+ skip |= QUEUE_ORDSEQ_DRAIN;
+
+ *rqp = rq;
- return rq;
+ /*
+ * Complete skipped sequences. If whole sequence is complete,
+ * return false to tell elevator that this request is gone.
+ */
+ return !blk_ordered_complete_seq(q, skip, 0);
}
-int blk_do_ordered(struct request_queue *q, struct request **rqp)
+bool blk_do_ordered(struct request_queue *q, struct request **rqp)
{
struct request *rq = *rqp;
const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
if (!q->ordseq) {
if (!is_barrier)
- return 1;
+ return true;
- if (q->next_ordered != QUEUE_ORDERED_NONE) {
- *rqp = start_ordered(q, rq);
- return 1;
- } else {
+ if (q->next_ordered != QUEUE_ORDERED_NONE)
+ return start_ordered(q, rqp);
+ else {
/*
- * This can happen when the queue switches to
- * ORDERED_NONE while this request is on it.
+ * Queue ordering not supported. Terminate
+ * with prejudice.
*/
elv_dequeue_request(q, rq);
if (__blk_end_request(rq, -EOPNOTSUPP,
blk_rq_bytes(rq)))
BUG();
*rqp = NULL;
- return 0;
+ return false;
}
}
@@ -235,9 +267,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
/* Special requests are not subject to ordering rules. */
if (!blk_fs_request(rq) &&
rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
- return 1;
+ return true;
- if (q->ordered & QUEUE_ORDERED_TAG) {
+ if (q->ordered & QUEUE_ORDERED_BY_TAG) {
/* Ordered by tag. Blocking the next barrier is enough. */
if (is_barrier && rq != &q->bar_rq)
*rqp = NULL;
@@ -248,7 +280,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
*rqp = NULL;
}
- return 1;
+ return true;
}
static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index 561e8a1b43a..a824e49c0d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -153,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
nbytes = bio->bi_size;
}
+ if (unlikely(rq->cmd_flags & REQ_QUIET))
+ set_bit(BIO_QUIET, &bio->bi_flags);
+
bio->bi_size -= nbytes;
bio->bi_sector += (nbytes >> 9);
@@ -265,8 +268,7 @@ void __generic_unplug_device(struct request_queue *q)
{
if (unlikely(blk_queue_stopped(q)))
return;
-
- if (!blk_remove_plug(q))
+ if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
return;
q->request_fn(q);
@@ -404,7 +406,8 @@ EXPORT_SYMBOL(blk_stop_queue);
void blk_sync_queue(struct request_queue *q)
{
del_timer_sync(&q->unplug_timer);
- kblockd_flush_work(&q->unplug_work);
+ del_timer_sync(&q->timeout);
+ cancel_work_sync(&q->unplug_work);
}
EXPORT_SYMBOL(blk_sync_queue);
@@ -1135,7 +1138,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
- int el_ret, nr_sectors, barrier, discard, err;
+ int el_ret, nr_sectors;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
int rw_flags;
@@ -1149,22 +1152,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
*/
blk_queue_bounce(q, &bio);
- barrier = bio_barrier(bio);
- if (unlikely(barrier) && bio_has_data(bio) &&
- (q->next_ordered == QUEUE_ORDERED_NONE)) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
-
- discard = bio_discard(bio);
- if (unlikely(discard) && !q->prepare_discard_fn) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
-
spin_lock_irq(q->queue_lock);
- if (unlikely(barrier) || elv_queue_empty(q))
+ if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1250,18 +1240,14 @@ get_rq:
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = blk_cpu_to_group(smp_processor_id());
- if (elv_queue_empty(q))
+ if (!blk_queue_nonrot(q) && elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
out:
- if (sync)
+ if (sync || blk_queue_nonrot(q))
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
return 0;
-
-end_io:
- bio_endio(bio, err);
- return 0;
}
/*
@@ -1414,15 +1400,13 @@ static inline void __generic_make_request(struct bio *bio)
char b[BDEVNAME_SIZE];
q = bdev_get_queue(bio->bi_bdev);
- if (!q) {
+ if (unlikely(!q)) {
printk(KERN_ERR
"generic_make_request: Trying to access "
"nonexistent block-device %s (%Lu)\n",
bdevname(bio->bi_bdev, b),
(long long) bio->bi_sector);
-end_io:
- bio_endio(bio, err);
- break;
+ goto end_io;
}
if (unlikely(nr_sectors > q->max_hw_sectors)) {
@@ -1459,14 +1443,19 @@ end_io:
if (bio_check_eod(bio, nr_sectors))
goto end_io;
- if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
- (bio_discard(bio) && !q->prepare_discard_fn)) {
+
+ if (bio_discard(bio) && !q->prepare_discard_fn) {
err = -EOPNOTSUPP;
goto end_io;
}
ret = q->make_request_fn(q, bio);
} while (ret);
+
+ return;
+
+end_io:
+ bio_endio(bio, err);
}
/*
@@ -1716,14 +1705,6 @@ static int __end_that_request_first(struct request *req, int error,
while ((bio = req->bio) != NULL) {
int nbytes;
- /*
- * For an empty barrier request, the low level driver must
- * store a potential error location in ->sector. We pass
- * that back up in ->bi_sector.
- */
- if (blk_empty_barrier(req))
- bio->bi_sector = req->sector;
-
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
nbytes = bio->bi_size;
@@ -2143,12 +2124,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
}
EXPORT_SYMBOL(kblockd_schedule_work);
-void kblockd_flush_work(struct work_struct *work)
-{
- cancel_work_sync(work);
-}
-EXPORT_SYMBOL(kblockd_flush_work);
-
int __init blk_dev_init(void)
{
kblockd_workqueue = create_workqueue("kblockd");
diff --git a/block/blk-settings.c b/block/blk-settings.c
index afa55e14e27..59fd05d9f1d 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -319,9 +319,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
- t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments);
- t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments);
- t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
+ t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
+ t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
+ t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
if (!t->queue_lock)
WARN_ON_ONCE(1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index e660d26ca65..ce0efc6b26d 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -161,7 +161,7 @@ void blk_complete_request(struct request *req)
}
EXPORT_SYMBOL(blk_complete_request);
-__init int blk_softirq_init(void)
+static __init int blk_softirq_init(void)
{
int i;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 21e275d7eed..a29cb788e40 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -88,9 +88,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
unsigned long ra_kb;
ssize_t ret = queue_var_store(&ra_kb, page, count);
- spin_lock_irq(q->queue_lock);
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
- spin_unlock_irq(q->queue_lock);
return ret;
}
@@ -117,10 +115,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
- /*
- * Take the queue lock to update the readahead and max_sectors
- * values synchronously:
- */
+
spin_lock_irq(q->queue_lock);
q->max_sectors = max_sectors_kb << 1;
spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index c0d419e84ce..3c518e3303a 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -158,7 +158,6 @@ fail:
/**
* blk_init_tags - initialize the tag info for an external tag map
* @depth: the maximum queue depth supported
- * @tags: the tag to use
**/
struct blk_queue_tag *blk_init_tags(int depth)
{
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 69185ea9fae..a09535377a9 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -73,11 +73,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
*/
void blk_delete_timer(struct request *req)
{
- struct request_queue *q = req->q;
-
list_del_init(&req->timeout_list);
- if (list_empty(&q->timeout_list))
- del_timer(&q->timeout);
}
static void blk_rq_timed_out(struct request *req)
@@ -111,7 +107,7 @@ static void blk_rq_timed_out(struct request *req)
void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *) data;
- unsigned long flags, uninitialized_var(next), next_set = 0;
+ unsigned long flags, next = 0;
struct request *rq, *tmp;
spin_lock_irqsave(q->queue_lock, flags);
@@ -126,15 +122,18 @@ void blk_rq_timed_out_timer(unsigned long data)
if (blk_mark_rq_complete(rq))
continue;
blk_rq_timed_out(rq);
+ } else {
+ if (!next || time_after(next, rq->deadline))
+ next = rq->deadline;
}
- if (!next_set) {
- next = rq->deadline;
- next_set = 1;
- } else if (time_after(next, rq->deadline))
- next = rq->deadline;
}
- if (next_set && !list_empty(&q->timeout_list))
+ /*
+ * next can never be 0 here with the list non-empty, since we always
+ * bump ->deadline to 1 so we can detect if the timer was ever added
+ * or not. See comment in blk_add_timer()
+ */
+ if (next)
mod_timer(&q->timeout, round_jiffies_up(next));
spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a062eebbd1..e8525fa7282 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1136,12 +1136,8 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
if (cfq_class_idle(cfqq))
max_dispatch = 1;
- if (cfqq->dispatched >= max_dispatch) {
- if (cfqd->busy_queues > 1)
- break;
- if (cfqq->dispatched >= 4 * max_dispatch)
- break;
- }
+ if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1)
+ break;
if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
break;
@@ -1318,7 +1314,15 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __cfq_exit_single_io_context(cfqd, cic);
+
+ /*
+ * Ensure we get a fresh copy of the ->key to prevent
+ * race between exiting task and queue
+ */
+ smp_read_barrier_depends();
+ if (cic->key)
+ __cfq_exit_single_io_context(cfqd, cic);
+
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
@@ -2160,7 +2164,7 @@ out_cont:
static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
{
del_timer_sync(&cfqd->idle_slice_timer);
- kblockd_flush_work(&cfqd->unplug_work);
+ cancel_work_sync(&cfqd->unplug_work);
}
static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2178,7 +2182,7 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
cfq_put_queue(cfqd->async_idle_cfqq);
}
-static void cfq_exit_queue(elevator_t *e)
+static void cfq_exit_queue(struct elevator_queue *e)
{
struct cfq_data *cfqd = e->elevator_data;
struct request_queue *q = cfqd->queue;
@@ -2288,7 +2292,7 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(elevator_t *e, char *page) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data = __VAR; \
@@ -2308,7 +2312,7 @@ SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data; \
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 67eb93cff69..f87615dea46 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -774,9 +774,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
return -ENOTTY;
- lock_kernel();
bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
- unlock_kernel();
return 0;
case BLKGETSIZE:
size = bdev->bd_inode->i_size;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index fd311179f44..c4d991d4ade 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -334,7 +334,7 @@ static int deadline_queue_empty(struct request_queue *q)
&& list_empty(&dd->fifo_list[READ]);
}
-static void deadline_exit_queue(elevator_t *e)
+static void deadline_exit_queue(struct elevator_queue *e)
{
struct deadline_data *dd = e->elevator_data;
@@ -387,7 +387,7 @@ deadline_var_store(int *var, const char *page, size_t count)
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(elevator_t *e, char *page) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct deadline_data *dd = e->elevator_data; \
int __data = __VAR; \
@@ -403,7 +403,7 @@ SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct deadline_data *dd = e->elevator_data; \
int __data; \
diff --git a/block/elevator.c b/block/elevator.c
index 86836dd179c..98259eda0ef 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -65,7 +65,7 @@ DEFINE_TRACE(block_rq_issue);
static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
{
struct request_queue *q = rq->q;
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_allow_merge_fn)
return e->ops->elevator_allow_merge_fn(q, rq, bio);
@@ -208,13 +208,13 @@ __setup("elevator=", elevator_setup);
static struct kobj_type elv_ktype;
-static elevator_t *elevator_alloc(struct request_queue *q,
+static struct elevator_queue *elevator_alloc(struct request_queue *q,
struct elevator_type *e)
{
- elevator_t *eq;
+ struct elevator_queue *eq;
int i;
- eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node);
+ eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
if (unlikely(!eq))
goto err;
@@ -240,8 +240,9 @@ err:
static void elevator_release(struct kobject *kobj)
{
- elevator_t *e = container_of(kobj, elevator_t, kobj);
+ struct elevator_queue *e;
+ e = container_of(kobj, struct elevator_queue, kobj);
elevator_put(e->elevator_type);
kfree(e->hash);
kfree(e);
@@ -297,7 +298,7 @@ int elevator_init(struct request_queue *q, char *name)
}
EXPORT_SYMBOL(elevator_init);
-void elevator_exit(elevator_t *e)
+void elevator_exit(struct elevator_queue *e)
{
mutex_lock(&e->sysfs_lock);
if (e->ops->elevator_exit_fn)
@@ -311,7 +312,7 @@ EXPORT_SYMBOL(elevator_exit);
static void elv_activate_rq(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_activate_req_fn)
e->ops->elevator_activate_req_fn(q, rq);
@@ -319,7 +320,7 @@ static void elv_activate_rq(struct request_queue *q, struct request *rq)
static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_deactivate_req_fn)
e->ops->elevator_deactivate_req_fn(q, rq);
@@ -338,7 +339,7 @@ static void elv_rqhash_del(struct request_queue *q, struct request *rq)
static void elv_rqhash_add(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
BUG_ON(ELV_ON_HASH(rq));
hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
@@ -352,7 +353,7 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
struct hlist_node *entry, *next;
struct request *rq;
@@ -494,7 +495,7 @@ EXPORT_SYMBOL(elv_dispatch_add_tail);
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
struct request *__rq;
int ret;
@@ -529,7 +530,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_merged_fn)
e->ops->elevator_merged_fn(q, rq, type);
@@ -543,7 +544,7 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
void elv_merge_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_merge_req_fn)
e->ops->elevator_merge_req_fn(q, rq, next);
@@ -755,14 +756,6 @@ struct request *elv_next_request(struct request_queue *q)
int ret;
while ((rq = __elv_next_request(q)) != NULL) {
- /*
- * Kill the empty barrier place holder, the driver must
- * not ever see it.
- */
- if (blk_empty_barrier(rq)) {
- __blk_end_request(rq, 0, blk_rq_bytes(rq));
- continue;
- }
if (!(rq->cmd_flags & REQ_STARTED)) {
/*
* This is the first time the device driver
@@ -854,7 +847,7 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
int elv_queue_empty(struct request_queue *q)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (!list_empty(&q->queue_head))
return 0;
@@ -868,7 +861,7 @@ EXPORT_SYMBOL(elv_queue_empty);
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_latter_req_fn)
return e->ops->elevator_latter_req_fn(q, rq);
@@ -877,7 +870,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
struct request *elv_former_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_former_req_fn)
return e->ops->elevator_former_req_fn(q, rq);
@@ -886,7 +879,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_set_req_fn)
return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
@@ -897,7 +890,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
void elv_put_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_put_req_fn)
e->ops->elevator_put_req_fn(rq);
@@ -905,7 +898,7 @@ void elv_put_request(struct request_queue *q, struct request *rq)
int elv_may_queue(struct request_queue *q, int rw)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_may_queue_fn)
return e->ops->elevator_may_queue_fn(q, rw);
@@ -928,7 +921,7 @@ EXPORT_SYMBOL(elv_abort_queue);
void elv_completed_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
/*
* request is released from the driver, io must be done
@@ -944,10 +937,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
* drained for flush sequence.
*/
if (unlikely(q->ordseq)) {
- struct request *first_rq = list_entry_rq(q->queue_head.next);
- if (q->in_flight == 0 &&
+ struct request *next = NULL;
+
+ if (!list_empty(&q->queue_head))
+ next = list_entry_rq(q->queue_head.next);
+
+ if (!q->in_flight &&
blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
- blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
+ (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
blk_start_queueing(q);
}
@@ -959,13 +956,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
static ssize_t
elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
- elevator_t *e = container_of(kobj, elevator_t, kobj);
struct elv_fs_entry *entry = to_elv(attr);
+ struct elevator_queue *e;
ssize_t error;
if (!entry->show)
return -EIO;
+ e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
error = e->ops ? entry->show(e, page) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
@@ -976,13 +974,14 @@ static ssize_t
elv_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
- elevator_t *e = container_of(kobj, elevator_t, kobj);
struct elv_fs_entry *entry = to_elv(attr);
+ struct elevator_queue *e;
ssize_t error;
if (!entry->store)
return -EIO;
+ e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
error = e->ops ? entry->store(e, page, length) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
@@ -1001,7 +1000,7 @@ static struct kobj_type elv_ktype = {
int elv_register_queue(struct request_queue *q)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
int error;
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
@@ -1019,7 +1018,7 @@ int elv_register_queue(struct request_queue *q)
return error;
}
-static void __elv_unregister_queue(elevator_t *e)
+static void __elv_unregister_queue(struct elevator_queue *e)
{
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
@@ -1082,7 +1081,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
*/
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
- elevator_t *old_elevator, *e;
+ struct elevator_queue *old_elevator, *e;
void *data;
/*
@@ -1188,7 +1187,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
ssize_t elv_iosched_show(struct request_queue *q, char *name)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
struct elevator_type *elv = e->elevator_type;
struct elevator_type *__e;
int len = 0;
diff --git a/block/genhd.c b/block/genhd.c
index 2f7feda61e3..d84a7df1e2a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
}
EXPORT_SYMBOL_GPL(disk_part_iter_exit);
+static inline int sector_in_part(struct hd_struct *part, sector_t sector)
+{
+ return part->start_sect <= sector &&
+ sector < part->start_sect + part->nr_sects;
+}
+
/**
* disk_map_sector_rcu - map sector to partition
* @disk: gendisk of interest
@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
{
struct disk_part_tbl *ptbl;
+ struct hd_struct *part;
int i;
ptbl = rcu_dereference(disk->part_tbl);
+ part = rcu_dereference(ptbl->last_lookup);
+ if (part && sector_in_part(part, sector))
+ return part;
+
for (i = 1; i < ptbl->len; i++) {
- struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+ part = rcu_dereference(ptbl->part[i]);
- if (part && part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects)
+ if (part && sector_in_part(part, sector)) {
+ rcu_assign_pointer(ptbl->last_lookup, part);
return part;
+ }
}
return &disk->part0;
}
@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
struct disk_part_tbl *old_ptbl = disk->part_tbl;
rcu_assign_pointer(disk->part_tbl, new_ptbl);
- if (old_ptbl)
+
+ if (old_ptbl) {
+ rcu_assign_pointer(old_ptbl->last_lookup, NULL);
call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+ }
}
/**
diff --git a/block/ioctl.c b/block/ioctl.c
index d03985b04d6..0f22e629b13 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -323,9 +323,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
return -ENOTTY;
- lock_kernel();
bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
- unlock_kernel();
return 0;
case BLKBSZSET:
/* set the logical block size */
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index c23e0296965..3a0d369d08c 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -76,7 +76,7 @@ static void *noop_init_queue(struct request_queue *q)
return nd;
}
-static void noop_exit_queue(elevator_t *e)
+static void noop_exit_queue(struct elevator_queue *e)
{
struct noop_data *nd = e->elevator_data;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index d0bb92cbefb..ee9c67d7e1b 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -60,7 +60,7 @@ static int scsi_get_bus(struct request_queue *q, int __user *p)
static int sg_get_timeout(struct request_queue *q)
{
- return q->sg_timeout / (HZ / USER_HZ);
+ return jiffies_to_clock_t(q->sg_timeout);
}
static int sg_set_timeout(struct request_queue *q, int __user *p)
@@ -68,7 +68,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
int timeout, err = get_user(timeout, p);
if (!err)
- q->sg_timeout = timeout * (HZ / USER_HZ);
+ q->sg_timeout = clock_t_to_jiffies(timeout);
return err;
}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9f7c543cc04..01e69383d9c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -164,7 +164,7 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
static int cciss_revalidate(struct gendisk *disk);
static int rebuild_lun_table(ctlr_info_t *h, int first_time);
-static int deregister_disk(struct gendisk *disk, drive_info_struct *drv,
+static int deregister_disk(ctlr_info_t *h, int drv_index,
int clear_all);
static void cciss_read_capacity(int ctlr, int logvol, int withirq,
@@ -215,31 +215,17 @@ static struct block_device_operations cciss_fops = {
/*
* Enqueuing and dequeuing functions for cmdlists.
*/
-static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c)
+static inline void addQ(struct hlist_head *list, CommandList_struct *c)
{
- if (*Qptr == NULL) {
- *Qptr = c;
- c->next = c->prev = c;
- } else {
- c->prev = (*Qptr)->prev;
- c->next = (*Qptr);
- (*Qptr)->prev->next = c;
- (*Qptr)->prev = c;
- }
+ hlist_add_head(&c->list, list);
}
-static inline CommandList_struct *removeQ(CommandList_struct **Qptr,
- CommandList_struct *c)
+static inline void removeQ(CommandList_struct *c)
{
- if (c && c->next != c) {
- if (*Qptr == c)
- *Qptr = c->next;
- c->prev->next = c->next;
- c->next->prev = c->prev;
- } else {
- *Qptr = NULL;
- }
- return c;
+ if (WARN_ON(hlist_unhashed(&c->list)))
+ return;
+
+ hlist_del_init(&c->list);
}
#include "cciss_scsi.c" /* For SCSI tape support */
@@ -506,6 +492,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
c->cmdindex = i;
}
+ INIT_HLIST_NODE(&c->list);
c->busaddr = (__u32) cmd_dma_handle;
temp64.val = (__u64) err_dma_handle;
c->ErrDesc.Addr.lower = temp64.val32.lower;
@@ -1492,8 +1479,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
* which keeps the interrupt handler from starting
* the queue.
*/
- ret = deregister_disk(h->gendisk[drv_index],
- &h->drv[drv_index], 0);
+ ret = deregister_disk(h, drv_index, 0);
h->drv[drv_index].busy_configuring = 0;
}
@@ -1711,8 +1697,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
h->drv[i].busy_configuring = 1;
spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
- return_code = deregister_disk(h->gendisk[i],
- &h->drv[i], 1);
+ return_code = deregister_disk(h, i, 1);
h->drv[i].busy_configuring = 0;
}
}
@@ -1782,15 +1767,19 @@ mem_msg:
* the highest_lun should be left unchanged and the LunID
* should not be cleared.
*/
-static int deregister_disk(struct gendisk *disk, drive_info_struct *drv,
+static int deregister_disk(ctlr_info_t *h, int drv_index,
int clear_all)
{
int i;
- ctlr_info_t *h = get_host(disk);
+ struct gendisk *disk;
+ drive_info_struct *drv;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+ drv = &h->drv[drv_index];
+ disk = h->gendisk[drv_index];
+
/* make sure logical volume is NOT is use */
if (clear_all || (h->gendisk[0] == disk)) {
if (drv->usage_count > 1)
@@ -2548,7 +2537,8 @@ static void start_io(ctlr_info_t *h)
{
CommandList_struct *c;
- while ((c = h->reqQ) != NULL) {
+ while (!hlist_empty(&h->reqQ)) {
+ c = hlist_entry(h->reqQ.first, CommandList_struct, list);
/* can't do anything if fifo is full */
if ((h->access.fifo_full(h))) {
printk(KERN_WARNING "cciss: fifo full\n");
@@ -2556,14 +2546,14 @@ static void start_io(ctlr_info_t *h)
}
/* Get the first entry from the Request Q */
- removeQ(&(h->reqQ), c);
+ removeQ(c);
h->Qdepth--;
/* Tell the controller execute command */
h->access.submit_command(h, c);
/* Put job onto the completed Q */
- addQ(&(h->cmpQ), c);
+ addQ(&h->cmpQ, c);
}
}
@@ -2576,7 +2566,7 @@ static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
memset(c->err_info, 0, sizeof(ErrorInfo_struct));
/* add it to software queue and then send it to the controller */
- addQ(&(h->reqQ), c);
+ addQ(&h->reqQ, c);
h->Qdepth++;
if (h->Qdepth > h->maxQsinceinit)
h->maxQsinceinit = h->Qdepth;
@@ -2897,7 +2887,7 @@ static void do_cciss_request(struct request_queue *q)
spin_lock_irq(q->queue_lock);
- addQ(&(h->reqQ), c);
+ addQ(&h->reqQ, c);
h->Qdepth++;
if (h->Qdepth > h->maxQsinceinit)
h->maxQsinceinit = h->Qdepth;
@@ -2985,16 +2975,12 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
a = c->busaddr;
} else {
+ struct hlist_node *tmp;
+
a &= ~3;
- if ((c = h->cmpQ) == NULL) {
- printk(KERN_WARNING
- "cciss: Completion of %08x ignored\n",
- a1);
- continue;
- }
- while (c->busaddr != a) {
- c = c->next;
- if (c == h->cmpQ)
+ c = NULL;
+ hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
+ if (c->busaddr == a)
break;
}
}
@@ -3002,8 +2988,8 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
* If we've found the command, take it off the
* completion Q and free it
*/
- if (c->busaddr == a) {
- removeQ(&h->cmpQ, c);
+ if (c && c->busaddr == a) {
+ removeQ(c);
if (c->cmd_type == CMD_RWREQ) {
complete_command(h, c, 0);
} else if (c->cmd_type == CMD_IOCTL_PEND) {
@@ -3423,6 +3409,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
return -1;
hba[i]->busy_initializing = 1;
+ INIT_HLIST_HEAD(&hba[i]->cmpQ);
+ INIT_HLIST_HEAD(&hba[i]->reqQ);
if (cciss_pci_init(hba[i], pdev) != 0)
goto clean1;
@@ -3730,15 +3718,17 @@ static void fail_all_cmds(unsigned long ctlr)
pci_disable_device(h->pdev); /* Make sure it is really dead. */
/* move everything off the request queue onto the completed queue */
- while ((c = h->reqQ) != NULL) {
- removeQ(&(h->reqQ), c);
+ while (!hlist_empty(&h->reqQ)) {
+ c = hlist_entry(h->reqQ.first, CommandList_struct, list);
+ removeQ(c);
h->Qdepth--;
- addQ(&(h->cmpQ), c);
+ addQ(&h->cmpQ, c);
}
/* Now, fail everything on the completed queue with a HW error */
- while ((c = h->cmpQ) != NULL) {
- removeQ(&h->cmpQ, c);
+ while (!hlist_empty(&h->cmpQ)) {
+ c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
+ removeQ(c);
c->err_info->CommandStatus = CMD_HARDWARE_ERR;
if (c->cmd_type == CMD_RWREQ) {
complete_command(h, c, 0);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 24a7efa993a..15e2b84734e 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -89,8 +89,8 @@ struct ctlr_info
struct access_method access;
/* queue and queue Info */
- CommandList_struct *reqQ;
- CommandList_struct *cmpQ;
+ struct hlist_head reqQ;
+ struct hlist_head cmpQ;
unsigned int Qdepth;
unsigned int maxQsinceinit;
unsigned int maxSG;
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 43bf5593b59..24e22dea1a9 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -265,8 +265,7 @@ typedef struct _CommandList_struct {
int ctlr;
int cmd_type;
long cmdindex;
- struct _CommandList_struct *prev;
- struct _CommandList_struct *next;
+ struct hlist_node list;
struct request * rq;
struct completion *waiting;
int retry_count;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index fb06ed65921..edbaac6c057 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -624,20 +624,38 @@ static int loop_switch(struct loop_device *lo, struct file *file)
}
/*
+ * Helper to flush the IOs in loop, but keeping loop thread running
+ */
+static int loop_flush(struct loop_device *lo)
+{
+ /* loop not yet configured, no running thread, nothing to flush */
+ if (!lo->lo_thread)
+ return 0;
+
+ return loop_switch(lo, NULL);
+}
+
+/*
* Do the actual switch; called from the BIO completion routine
*/
static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
{
struct file *file = p->file;
struct file *old_file = lo->lo_backing_file;
- struct address_space *mapping = file->f_mapping;
+ struct address_space *mapping;
+
+ /* if no new file, only flush of queued bios requested */
+ if (!file)
+ goto out;
+ mapping = file->f_mapping;
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+out:
complete(&p->wait);
}
@@ -901,6 +919,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
kthread_stop(lo->lo_thread);
+ lo->lo_queue->unplug_fn = NULL;
lo->lo_backing_file = NULL;
loop_release_xfer(lo);
@@ -1345,11 +1364,25 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
struct loop_device *lo = disk->private_data;
mutex_lock(&lo->lo_ctl_mutex);
- --lo->lo_refcnt;
- if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) && !lo->lo_refcnt)
+ if (--lo->lo_refcnt)
+ goto out;
+
+ if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
+ /*
+ * In autoclear mode, stop the loop thread
+ * and remove configuration after last close.
+ */
loop_clr_fd(lo, NULL);
+ } else {
+ /*
+ * Otherwise keep thread (if running) and config,
+ * but flush possible ongoing bios in thread.
+ */
+ loop_flush(lo);
+ }
+out:
mutex_unlock(&lo->lo_ctl_mutex);
return 0;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d3a91cacee8..7bcc1d8bc96 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,6 @@ static int __init nbd_init(void)
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = alloc_disk(1 << part_shift);
- elevator_t *old_e;
if (!disk)
goto out;
nbd_dev[i].disk = disk;
@@ -736,11 +735,10 @@ static int __init nbd_init(void)
put_disk(disk);
goto out;
}
- old_e = disk->queue->elevator;
- if (elevator_init(disk->queue, "deadline") == 0 ||
- elevator_init(disk->queue, "noop") == 0) {
- elevator_exit(old_e);
- }
+ /*
+ * Tell the block layer that we are not a rotational device
+ */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
}
if (register_blkdev(NBD_MAJOR, "nbd")) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 85d79a02d48..f151592ecf7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -237,6 +237,8 @@ static int virtblk_probe(struct virtio_device *vdev)
goto out_put_disk;
}
+ queue_flag_set_unlocked(QUEUE_FLAG_VIRT, vblk->disk->queue);
+
if (index < 26) {
sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
} else if (index < (26 + 1) * 26) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2d19f0cc47f..918ef725de4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -338,18 +338,12 @@ wait:
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
struct request_queue *rq;
- elevator_t *old_e;
rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
if (rq == NULL)
return -1;
- old_e = rq->elevator;
- if (IS_ERR_VALUE(elevator_init(rq, "noop")))
- printk(KERN_WARNING
- "blkfront: Switch elevator failed, use default\n");
- else
- elevator_exit(old_e);
+ queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_hardsect_size(rq, sector_size);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 7d2e91cccb1..cceace61ef2 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1712,29 +1712,30 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
return 0;
}
-static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
+static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s,
+ struct packet_command *cgc)
{
unsigned char buf[21], *base;
struct dvd_layer *layer;
- struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
int ret, layer_num = s->physical.layer_num;
if (layer_num >= DVD_LAYERS)
return -EINVAL;
- init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
- cgc.cmd[6] = layer_num;
- cgc.cmd[7] = s->type;
- cgc.cmd[9] = cgc.buflen & 0xff;
+ init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ);
+ cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
+ cgc->cmd[6] = layer_num;
+ cgc->cmd[7] = s->type;
+ cgc->cmd[9] = cgc->buflen & 0xff;
/*
* refrain from reporting errors on non-existing layers (mainly)
*/
- cgc.quiet = 1;
+ cgc->quiet = 1;
- if ((ret = cdo->generic_packet(cdi, &cgc)))
+ ret = cdo->generic_packet(cdi, cgc);
+ if (ret)
return ret;
base = &buf[4];
@@ -1762,21 +1763,22 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
return 0;
}
-static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
+static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s,
+ struct packet_command *cgc)
{
int ret;
u_char buf[8];
- struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
- init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
- cgc.cmd[6] = s->copyright.layer_num;
- cgc.cmd[7] = s->type;
- cgc.cmd[8] = cgc.buflen >> 8;
- cgc.cmd[9] = cgc.buflen & 0xff;
+ init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ);
+ cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
+ cgc->cmd[6] = s->copyright.layer_num;
+ cgc->cmd[7] = s->type;
+ cgc->cmd[8] = cgc->buflen >> 8;
+ cgc->cmd[9] = cgc->buflen & 0xff;
- if ((ret = cdo->generic_packet(cdi, &cgc)))
+ ret = cdo->generic_packet(cdi, cgc);
+ if (ret)
return ret;
s->copyright.cpst = buf[4];
@@ -1785,79 +1787,89 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
return 0;
}
-static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s)
+static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s,
+ struct packet_command *cgc)
{
int ret, size;
u_char *buf;
- struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
size = sizeof(s->disckey.value) + 4;
- if ((buf = kmalloc(size, GFP_KERNEL)) == NULL)
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
- init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
- cgc.cmd[7] = s->type;
- cgc.cmd[8] = size >> 8;
- cgc.cmd[9] = size & 0xff;
- cgc.cmd[10] = s->disckey.agid << 6;
+ init_cdrom_command(cgc, buf, size, CGC_DATA_READ);
+ cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
+ cgc->cmd[7] = s->type;
+ cgc->cmd[8] = size >> 8;
+ cgc->cmd[9] = size & 0xff;
+ cgc->cmd[10] = s->disckey.agid << 6;
- if (!(ret = cdo->generic_packet(cdi, &cgc)))
+ ret = cdo->generic_packet(cdi, cgc);
+ if (!ret)
memcpy(s->disckey.value, &buf[4], sizeof(s->disckey.value));
kfree(buf);
return ret;
}
-static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s)
+static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s,
+ struct packet_command *cgc)
{
- int ret;
- u_char buf[4 + 188];
- struct packet_command cgc;
+ int ret, size = 4 + 188;
+ u_char *buf;
struct cdrom_device_ops *cdo = cdi->ops;
- init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
- cgc.cmd[7] = s->type;
- cgc.cmd[9] = cgc.buflen & 0xff;
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
- if ((ret = cdo->generic_packet(cdi, &cgc)))
- return ret;
+ init_cdrom_command(cgc, buf, size, CGC_DATA_READ);
+ cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
+ cgc->cmd[7] = s->type;
+ cgc->cmd[9] = cgc->buflen & 0xff;
+
+ ret = cdo->generic_packet(cdi, cgc);
+ if (ret)
+ goto out;
s->bca.len = buf[0] << 8 | buf[1];
if (s->bca.len < 12 || s->bca.len > 188) {
cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
memcpy(s->bca.value, &buf[4], s->bca.len);
-
- return 0;
+ ret = 0;
+out:
+ kfree(buf);
+ return ret;
}
-static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s)
+static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
+ struct packet_command *cgc)
{
int ret = 0, size;
u_char *buf;
- struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
size = sizeof(s->manufact.value) + 4;
- if ((buf = kmalloc(size, GFP_KERNEL)) == NULL)
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
- init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
- cgc.cmd[7] = s->type;
- cgc.cmd[8] = size >> 8;
- cgc.cmd[9] = size & 0xff;
+ init_cdrom_command(cgc, buf, size, CGC_DATA_READ);
+ cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
+ cgc->cmd[7] = s->type;
+ cgc->cmd[8] = size >> 8;
+ cgc->cmd[9] = size & 0xff;
- if ((ret = cdo->generic_packet(cdi, &cgc))) {
- kfree(buf);
- return ret;
- }
+ ret = cdo->generic_packet(cdi, cgc);
+ if (ret)
+ goto out;
s->manufact.len = buf[0] << 8 | buf[1];
if (s->manufact.len < 0 || s->manufact.len > 2048) {
@@ -1868,27 +1880,29 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s)
memcpy(s->manufact.value, &buf[4], s->manufact.len);
}
+out:
kfree(buf);
return ret;
}
-static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s)
+static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s,
+ struct packet_command *cgc)
{
switch (s->type) {
case DVD_STRUCT_PHYSICAL:
- return dvd_read_physical(cdi, s);
+ return dvd_read_physical(cdi, s, cgc);
case DVD_STRUCT_COPYRIGHT:
- return dvd_read_copyright(cdi, s);
+ return dvd_read_copyright(cdi, s, cgc);
case DVD_STRUCT_DISCKEY:
- return dvd_read_disckey(cdi, s);
+ return dvd_read_disckey(cdi, s, cgc);
case DVD_STRUCT_BCA:
- return dvd_read_bca(cdi, s);
+ return dvd_read_bca(cdi, s, cgc);
case DVD_STRUCT_MANUFACT:
- return dvd_read_manufact(cdi, s);
+ return dvd_read_manufact(cdi, s, cgc);
default:
cdinfo(CD_WARNING, ": Invalid DVD structure read requested (%d)\n",
@@ -2787,271 +2801,360 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
return cdo->generic_packet(cdi, &cgc);
}
-static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
- unsigned long arg)
-{
- struct cdrom_device_ops *cdo = cdi->ops;
- struct packet_command cgc;
+static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
+ void __user *arg,
+ struct packet_command *cgc,
+ int cmd)
+{
struct request_sense sense;
- unsigned char buffer[32];
- int ret = 0;
-
- memset(&cgc, 0, sizeof(cgc));
+ struct cdrom_msf msf;
+ int blocksize = 0, format = 0, lba;
+ int ret;
- /* build a unified command and queue it through
- cdo->generic_packet() */
switch (cmd) {
case CDROMREADRAW:
+ blocksize = CD_FRAMESIZE_RAW;
+ break;
case CDROMREADMODE1:
- case CDROMREADMODE2: {
- struct cdrom_msf msf;
- int blocksize = 0, format = 0, lba;
-
- switch (cmd) {
- case CDROMREADRAW:
- blocksize = CD_FRAMESIZE_RAW;
- break;
- case CDROMREADMODE1:
- blocksize = CD_FRAMESIZE;
- format = 2;
- break;
- case CDROMREADMODE2:
- blocksize = CD_FRAMESIZE_RAW0;
- break;
- }
- IOCTL_IN(arg, struct cdrom_msf, msf);
- lba = msf_to_lba(msf.cdmsf_min0,msf.cdmsf_sec0,msf.cdmsf_frame0);
- /* FIXME: we need upper bound checking, too!! */
- if (lba < 0)
- return -EINVAL;
- cgc.buffer = kmalloc(blocksize, GFP_KERNEL);
- if (cgc.buffer == NULL)
- return -ENOMEM;
- memset(&sense, 0, sizeof(sense));
- cgc.sense = &sense;
- cgc.data_direction = CGC_DATA_READ;
- ret = cdrom_read_block(cdi, &cgc, lba, 1, format, blocksize);
- if (ret && sense.sense_key==0x05 && sense.asc==0x20 && sense.ascq==0x00) {
- /*
- * SCSI-II devices are not required to support
- * READ_CD, so let's try switching block size
- */
- /* FIXME: switch back again... */
- if ((ret = cdrom_switch_blocksize(cdi, blocksize))) {
- kfree(cgc.buffer);
- return ret;
- }
- cgc.sense = NULL;
- ret = cdrom_read_cd(cdi, &cgc, lba, blocksize, 1);
- ret |= cdrom_switch_blocksize(cdi, blocksize);
- }
- if (!ret && copy_to_user((char __user *)arg, cgc.buffer, blocksize))
- ret = -EFAULT;
- kfree(cgc.buffer);
+ blocksize = CD_FRAMESIZE;
+ format = 2;
+ break;
+ case CDROMREADMODE2:
+ blocksize = CD_FRAMESIZE_RAW0;
+ break;
+ }
+ IOCTL_IN(arg, struct cdrom_msf, msf);
+ lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0);
+ /* FIXME: we need upper bound checking, too!! */
+ if (lba < 0)
+ return -EINVAL;
+
+ cgc->buffer = kmalloc(blocksize, GFP_KERNEL);
+ if (cgc->buffer == NULL)
+ return -ENOMEM;
+
+ memset(&sense, 0, sizeof(sense));
+ cgc->sense = &sense;
+ cgc->data_direction = CGC_DATA_READ;
+ ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize);
+ if (ret && sense.sense_key == 0x05 &&
+ sense.asc == 0x20 &&
+ sense.ascq == 0x00) {
+ /*
+ * SCSI-II devices are not required to support
+ * READ_CD, so let's try switching block size
+ */
+ /* FIXME: switch back again... */
+ ret = cdrom_switch_blocksize(cdi, blocksize);
+ if (ret)
+ goto out;
+ cgc->sense = NULL;
+ ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1);
+ ret |= cdrom_switch_blocksize(cdi, blocksize);
+ }
+ if (!ret && copy_to_user(arg, cgc->buffer, blocksize))
+ ret = -EFAULT;
+out:
+ kfree(cgc->buffer);
+ return ret;
+}
+
+static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi,
+ void __user *arg)
+{
+ struct cdrom_read_audio ra;
+ int lba;
+
+ IOCTL_IN(arg, struct cdrom_read_audio, ra);
+
+ if (ra.addr_format == CDROM_MSF)
+ lba = msf_to_lba(ra.addr.msf.minute,
+ ra.addr.msf.second,
+ ra.addr.msf.frame);
+ else if (ra.addr_format == CDROM_LBA)
+ lba = ra.addr.lba;
+ else
+ return -EINVAL;
+
+ /* FIXME: we need upper bound checking, too!! */
+ if (lba < 0 || ra.nframes <= 0 || ra.nframes > CD_FRAMES)
+ return -EINVAL;
+
+ return cdrom_read_cdda(cdi, ra.buf, lba, ra.nframes);
+}
+
+static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi,
+ void __user *arg)
+{
+ int ret;
+ struct cdrom_subchnl q;
+ u_char requested, back;
+ IOCTL_IN(arg, struct cdrom_subchnl, q);
+ requested = q.cdsc_format;
+ if (!((requested == CDROM_MSF) ||
+ (requested == CDROM_LBA)))
+ return -EINVAL;
+ q.cdsc_format = CDROM_MSF;
+ ret = cdrom_read_subchannel(cdi, &q, 0);
+ if (ret)
return ret;
- }
- case CDROMREADAUDIO: {
- struct cdrom_read_audio ra;
- int lba;
-
- IOCTL_IN(arg, struct cdrom_read_audio, ra);
-
- if (ra.addr_format == CDROM_MSF)
- lba = msf_to_lba(ra.addr.msf.minute,
- ra.addr.msf.second,
- ra.addr.msf.frame);
- else if (ra.addr_format == CDROM_LBA)
- lba = ra.addr.lba;
- else
- return -EINVAL;
+ back = q.cdsc_format; /* local copy */
+ sanitize_format(&q.cdsc_absaddr, &back, requested);
+ sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested);
+ IOCTL_OUT(arg, struct cdrom_subchnl, q);
+ /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
+ return 0;
+}
- /* FIXME: we need upper bound checking, too!! */
- if (lba < 0 || ra.nframes <= 0 || ra.nframes > CD_FRAMES)
- return -EINVAL;
+static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
+ void __user *arg,
+ struct packet_command *cgc)
+{
+ struct cdrom_device_ops *cdo = cdi->ops;
+ struct cdrom_msf msf;
+ cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
+ IOCTL_IN(arg, struct cdrom_msf, msf);
+ cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF;
+ cgc->cmd[3] = msf.cdmsf_min0;
+ cgc->cmd[4] = msf.cdmsf_sec0;
+ cgc->cmd[5] = msf.cdmsf_frame0;
+ cgc->cmd[6] = msf.cdmsf_min1;
+ cgc->cmd[7] = msf.cdmsf_sec1;
+ cgc->cmd[8] = msf.cdmsf_frame1;
+ cgc->data_direction = CGC_DATA_NONE;
+ return cdo->generic_packet(cdi, cgc);
+}
- return cdrom_read_cdda(cdi, ra.buf, lba, ra.nframes);
- }
- case CDROMSUBCHNL: {
- struct cdrom_subchnl q;
- u_char requested, back;
- IOCTL_IN(arg, struct cdrom_subchnl, q);
- requested = q.cdsc_format;
- if (!((requested == CDROM_MSF) ||
- (requested == CDROM_LBA)))
- return -EINVAL;
- q.cdsc_format = CDROM_MSF;
- if ((ret = cdrom_read_subchannel(cdi, &q, 0)))
- return ret;
- back = q.cdsc_format; /* local copy */
- sanitize_format(&q.cdsc_absaddr, &back, requested);
- sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested);
- IOCTL_OUT(arg, struct cdrom_subchnl, q);
- /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
- return 0;
- }
- case CDROMPLAYMSF: {
- struct cdrom_msf msf;
- cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
- IOCTL_IN(arg, struct cdrom_msf, msf);
- cgc.cmd[0] = GPCMD_PLAY_AUDIO_MSF;
- cgc.cmd[3] = msf.cdmsf_min0;
- cgc.cmd[4] = msf.cdmsf_sec0;
- cgc.cmd[5] = msf.cdmsf_frame0;
- cgc.cmd[6] = msf.cdmsf_min1;
- cgc.cmd[7] = msf.cdmsf_sec1;
- cgc.cmd[8] = msf.cdmsf_frame1;
- cgc.data_direction = CGC_DATA_NONE;
- return cdo->generic_packet(cdi, &cgc);
- }
- case CDROMPLAYBLK: {
- struct cdrom_blk blk;
- cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
- IOCTL_IN(arg, struct cdrom_blk, blk);
- cgc.cmd[0] = GPCMD_PLAY_AUDIO_10;
- cgc.cmd[2] = (blk.from >> 24) & 0xff;
- cgc.cmd[3] = (blk.from >> 16) & 0xff;
- cgc.cmd[4] = (blk.from >> 8) & 0xff;
- cgc.cmd[5] = blk.from & 0xff;
- cgc.cmd[7] = (blk.len >> 8) & 0xff;
- cgc.cmd[8] = blk.len & 0xff;
- cgc.data_direction = CGC_DATA_NONE;
- return cdo->generic_packet(cdi, &cgc);
- }
- case CDROMVOLCTRL:
- case CDROMVOLREAD: {
- struct cdrom_volctrl volctrl;
- char mask[sizeof(buffer)];
- unsigned short offset;
+static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
+ void __user *arg,
+ struct packet_command *cgc)
+{
+ struct cdrom_device_ops *cdo = cdi->ops;
+ struct cdrom_blk blk;
+ cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
+ IOCTL_IN(arg, struct cdrom_blk, blk);
+ cgc->cmd[0] = GPCMD_PLAY_AUDIO_10;
+ cgc->cmd[2] = (blk.from >> 24) & 0xff;
+ cgc->cmd[3] = (blk.from >> 16) & 0xff;
+ cgc->cmd[4] = (blk.from >> 8) & 0xff;
+ cgc->cmd[5] = blk.from & 0xff;
+ cgc->cmd[7] = (blk.len >> 8) & 0xff;
+ cgc->cmd[8] = blk.len & 0xff;
+ cgc->data_direction = CGC_DATA_NONE;
+ return cdo->generic_packet(cdi, cgc);
+}
+
+static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
+ void __user *arg,
+ struct packet_command *cgc,
+ unsigned int cmd)
+{
+ struct cdrom_volctrl volctrl;
+ unsigned char buffer[32];
+ char mask[sizeof(buffer)];
+ unsigned short offset;
+ int ret;
- cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n");
+ cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n");
- IOCTL_IN(arg, struct cdrom_volctrl, volctrl);
+ IOCTL_IN(arg, struct cdrom_volctrl, volctrl);
- cgc.buffer = buffer;
- cgc.buflen = 24;
- if ((ret = cdrom_mode_sense(cdi, &cgc, GPMODE_AUDIO_CTL_PAGE, 0)))
- return ret;
+ cgc->buffer = buffer;
+ cgc->buflen = 24;
+ ret = cdrom_mode_sense(cdi, cgc, GPMODE_AUDIO_CTL_PAGE, 0);
+ if (ret)
+ return ret;
- /* originally the code depended on buffer[1] to determine
- how much data is available for transfer. buffer[1] is
- unfortunately ambigious and the only reliable way seem
- to be to simply skip over the block descriptor... */
- offset = 8 + be16_to_cpu(*(__be16 *)(buffer+6));
-
- if (offset + 16 > sizeof(buffer))
- return -E2BIG;
-
- if (offset + 16 > cgc.buflen) {
- cgc.buflen = offset+16;
- ret = cdrom_mode_sense(cdi, &cgc,
- GPMODE_AUDIO_CTL_PAGE, 0);
- if (ret)
- return ret;
- }
+ /* originally the code depended on buffer[1] to determine
+ how much data is available for transfer. buffer[1] is
+ unfortunately ambigious and the only reliable way seem
+ to be to simply skip over the block descriptor... */
+ offset = 8 + be16_to_cpu(*(__be16 *)(buffer + 6));
+
+ if (offset + 16 > sizeof(buffer))
+ return -E2BIG;
+
+ if (offset + 16 > cgc->buflen) {
+ cgc->buflen = offset + 16;
+ ret = cdrom_mode_sense(cdi, cgc,
+ GPMODE_AUDIO_CTL_PAGE, 0);
+ if (ret)
+ return ret;
+ }
- /* sanity check */
- if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE ||
- buffer[offset+1] < 14)
- return -EINVAL;
+ /* sanity check */
+ if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE ||
+ buffer[offset + 1] < 14)
+ return -EINVAL;
- /* now we have the current volume settings. if it was only
- a CDROMVOLREAD, return these values */
- if (cmd == CDROMVOLREAD) {
- volctrl.channel0 = buffer[offset+9];
- volctrl.channel1 = buffer[offset+11];
- volctrl.channel2 = buffer[offset+13];
- volctrl.channel3 = buffer[offset+15];
- IOCTL_OUT(arg, struct cdrom_volctrl, volctrl);
- return 0;
- }
+ /* now we have the current volume settings. if it was only
+ a CDROMVOLREAD, return these values */
+ if (cmd == CDROMVOLREAD) {
+ volctrl.channel0 = buffer[offset+9];
+ volctrl.channel1 = buffer[offset+11];
+ volctrl.channel2 = buffer[offset+13];
+ volctrl.channel3 = buffer[offset+15];
+ IOCTL_OUT(arg, struct cdrom_volctrl, volctrl);
+ return 0;
+ }
- /* get the volume mask */
- cgc.buffer = mask;
- if ((ret = cdrom_mode_sense(cdi, &cgc,
- GPMODE_AUDIO_CTL_PAGE, 1)))
- return ret;
+ /* get the volume mask */
+ cgc->buffer = mask;
+ ret = cdrom_mode_sense(cdi, cgc, GPMODE_AUDIO_CTL_PAGE, 1);
+ if (ret)
+ return ret;
- buffer[offset+9] = volctrl.channel0 & mask[offset+9];
- buffer[offset+11] = volctrl.channel1 & mask[offset+11];
- buffer[offset+13] = volctrl.channel2 & mask[offset+13];
- buffer[offset+15] = volctrl.channel3 & mask[offset+15];
+ buffer[offset + 9] = volctrl.channel0 & mask[offset + 9];
+ buffer[offset + 11] = volctrl.channel1 & mask[offset + 11];
+ buffer[offset + 13] = volctrl.channel2 & mask[offset + 13];
+ buffer[offset + 15] = volctrl.channel3 & mask[offset + 15];
- /* set volume */
- cgc.buffer = buffer + offset - 8;
- memset(cgc.buffer, 0, 8);
- return cdrom_mode_select(cdi, &cgc);
- }
+ /* set volume */
+ cgc->buffer = buffer + offset - 8;
+ memset(cgc->buffer, 0, 8);
+ return cdrom_mode_select(cdi, cgc);
+}
- case CDROMSTART:
- case CDROMSTOP: {
- cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
- cgc.cmd[0] = GPCMD_START_STOP_UNIT;
- cgc.cmd[1] = 1;
- cgc.cmd[4] = (cmd == CDROMSTART) ? 1 : 0;
- cgc.data_direction = CGC_DATA_NONE;
- return cdo->generic_packet(cdi, &cgc);
- }
+static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
+ struct packet_command *cgc,
+ int cmd)
+{
+ struct cdrom_device_ops *cdo = cdi->ops;
+ cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
+ cgc->cmd[0] = GPCMD_START_STOP_UNIT;
+ cgc->cmd[1] = 1;
+ cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0;
+ cgc->data_direction = CGC_DATA_NONE;
+ return cdo->generic_packet(cdi, cgc);
+}
- case CDROMPAUSE:
- case CDROMRESUME: {
- cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
- cgc.cmd[0] = GPCMD_PAUSE_RESUME;
- cgc.cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
- cgc.data_direction = CGC_DATA_NONE;
- return cdo->generic_packet(cdi, &cgc);
- }
+static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
+ struct packet_command *cgc,
+ int cmd)
+{
+ struct cdrom_device_ops *cdo = cdi->ops;
+ cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
+ cgc->cmd[0] = GPCMD_PAUSE_RESUME;
+ cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
+ cgc->data_direction = CGC_DATA_NONE;
+ return cdo->generic_packet(cdi, cgc);
+}
- case DVD_READ_STRUCT: {
- dvd_struct *s;
- int size = sizeof(dvd_struct);
- if (!CDROM_CAN(CDC_DVD))
- return -ENOSYS;
- if ((s = kmalloc(size, GFP_KERNEL)) == NULL)
- return -ENOMEM;
- cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n");
- if (copy_from_user(s, (dvd_struct __user *)arg, size)) {
- kfree(s);
- return -EFAULT;
- }
- if ((ret = dvd_read_struct(cdi, s))) {
- kfree(s);
- return ret;
- }
- if (copy_to_user((dvd_struct __user *)arg, s, size))
- ret = -EFAULT;
+static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi,
+ void __user *arg,
+ struct packet_command *cgc)
+{
+ int ret;
+ dvd_struct *s;
+ int size = sizeof(dvd_struct);
+
+ if (!CDROM_CAN(CDC_DVD))
+ return -ENOSYS;
+
+ s = kmalloc(size, GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n");
+ if (copy_from_user(s, arg, size)) {
kfree(s);
+ return -EFAULT;
+ }
+
+ ret = dvd_read_struct(cdi, s, cgc);
+ if (ret)
+ goto out;
+
+ if (copy_to_user(arg, s, size))
+ ret = -EFAULT;
+out:
+ kfree(s);
+ return ret;
+}
+
+static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi,
+ void __user *arg)
+{
+ int ret;
+ dvd_authinfo ai;
+ if (!CDROM_CAN(CDC_DVD))
+ return -ENOSYS;
+ cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n");
+ IOCTL_IN(arg, dvd_authinfo, ai);
+ ret = dvd_do_auth(cdi, &ai);
+ if (ret)
return ret;
- }
+ IOCTL_OUT(arg, dvd_authinfo, ai);
+ return 0;
+}
- case DVD_AUTH: {
- dvd_authinfo ai;
- if (!CDROM_CAN(CDC_DVD))
- return -ENOSYS;
- cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n");
- IOCTL_IN(arg, dvd_authinfo, ai);
- if ((ret = dvd_do_auth (cdi, &ai)))
- return ret;
- IOCTL_OUT(arg, dvd_authinfo, ai);
- return 0;
- }
+static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi,
+ void __user *arg)
+{
+ int ret;
+ long next = 0;
+ cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n");
+ ret = cdrom_get_next_writable(cdi, &next);
+ if (ret)
+ return ret;
+ IOCTL_OUT(arg, long, next);
+ return 0;
+}
- case CDROM_NEXT_WRITABLE: {
- long next = 0;
- cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n");
- if ((ret = cdrom_get_next_writable(cdi, &next)))
- return ret;
- IOCTL_OUT(arg, long, next);
- return 0;
- }
- case CDROM_LAST_WRITTEN: {
- long last = 0;
- cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n");
- if ((ret = cdrom_get_last_written(cdi, &last)))
- return ret;
- IOCTL_OUT(arg, long, last);
- return 0;
- }
- } /* switch */
+static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi,
+ void __user *arg)
+{
+ int ret;
+ long last = 0;
+ cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n");
+ ret = cdrom_get_last_written(cdi, &last);
+ if (ret)
+ return ret;
+ IOCTL_OUT(arg, long, last);
+ return 0;
+}
+
+static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
+ unsigned long arg)
+{
+ struct packet_command cgc;
+ void __user *userptr = (void __user *)arg;
+
+ memset(&cgc, 0, sizeof(cgc));
+
+ /* build a unified command and queue it through
+ cdo->generic_packet() */
+ switch (cmd) {
+ case CDROMREADRAW:
+ case CDROMREADMODE1:
+ case CDROMREADMODE2:
+ return mmc_ioctl_cdrom_read_data(cdi, userptr, &cgc, cmd);
+ case CDROMREADAUDIO:
+ return mmc_ioctl_cdrom_read_audio(cdi, userptr);
+ case CDROMSUBCHNL:
+ return mmc_ioctl_cdrom_subchannel(cdi, userptr);
+ case CDROMPLAYMSF:
+ return mmc_ioctl_cdrom_play_msf(cdi, userptr, &cgc);
+ case CDROMPLAYBLK:
+ return mmc_ioctl_cdrom_play_blk(cdi, userptr, &cgc);
+ case CDROMVOLCTRL:
+ case CDROMVOLREAD:
+ return mmc_ioctl_cdrom_volume(cdi, userptr, &cgc, cmd);
+ case CDROMSTART:
+ case CDROMSTOP:
+ return mmc_ioctl_cdrom_start_stop(cdi, &cgc, cmd);
+ case CDROMPAUSE:
+ case CDROMRESUME:
+ return mmc_ioctl_cdrom_pause_resume(cdi, &cgc, cmd);
+ case DVD_READ_STRUCT:
+ return mmc_ioctl_dvd_read_struct(cdi, userptr, &cgc);
+ case DVD_AUTH:
+ return mmc_ioctl_dvd_auth(cdi, userptr);
+ case CDROM_NEXT_WRITABLE:
+ return mmc_ioctl_cdrom_next_writable(cdi, userptr);
+ case CDROM_LAST_WRITTEN:
+ return mmc_ioctl_cdrom_last_written(cdi, userptr);
+ }
return -ENOTTY;
}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ce26c84af06..3326750ec02 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1060,7 +1060,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_page_pool;
}
- cc->bs = bioset_create(MIN_IOS, MIN_IOS);
+ cc->bs = bioset_create(MIN_IOS, 0);
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2fd6d445063..a34338567a2 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -56,7 +56,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
if (!client->pool)
goto bad;
- client->bios = bioset_create(16, 16);
+ client->bios = bioset_create(16, 0);
if (!client->bios)
goto bad;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 343094c3fee..421c9f02d8c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1093,7 +1093,7 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->tio_pool)
goto bad_tio_pool;
- md->bs = bioset_create(16, 16);
+ md->bs = bioset_create(16, 0);
if (!md->bs)
goto bad_no_bioset;
diff --git a/fs/aio.c b/fs/aio.c
index f658441d566..d6f89d3c15e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx)
kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
} while(0)
+static void ctx_rcu_free(struct rcu_head *head)
+{
+ struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+ unsigned nr_events = ctx->max_reqs;
+
+ kmem_cache_free(kioctx_cachep, ctx);
+
+ if (nr_events) {
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - nr_events > aio_nr);
+ aio_nr -= nr_events;
+ spin_unlock(&aio_nr_lock);
+ }
+}
/* __put_ioctx
* Called when the last user of an aio context has gone away,
@@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx)
*/
static void __put_ioctx(struct kioctx *ctx)
{
- unsigned nr_events = ctx->max_reqs;
-
BUG_ON(ctx->reqs_active);
cancel_delayed_work(&ctx->wq);
@@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx)
mmdrop(ctx->mm);
ctx->mm = NULL;
pr_debug("__put_ioctx: freeing %p\n", ctx);
- kmem_cache_free(kioctx_cachep, ctx);
-
- if (nr_events) {
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - nr_events > aio_nr);
- aio_nr -= nr_events;
- spin_unlock(&aio_nr_lock);
- }
+ call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
#define get_ioctx(kioctx) do { \
@@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
{
struct mm_struct *mm;
struct kioctx *ctx;
+ int did_sync = 0;
/* Prevent overflows */
if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
goto out_freectx;
/* limit the number of system wide aios */
- spin_lock(&aio_nr_lock);
- if (aio_nr + ctx->max_reqs > aio_max_nr ||
- aio_nr + ctx->max_reqs < aio_nr)
- ctx->max_reqs = 0;
- else
- aio_nr += ctx->max_reqs;
- spin_unlock(&aio_nr_lock);
+ do {
+ spin_lock_bh(&aio_nr_lock);
+ if (aio_nr + nr_events > aio_max_nr ||
+ aio_nr + nr_events < aio_nr)
+ ctx->max_reqs = 0;
+ else
+ aio_nr += ctx->max_reqs;
+ spin_unlock_bh(&aio_nr_lock);
+ if (ctx->max_reqs || did_sync)
+ break;
+
+ /* wait for rcu callbacks to have completed before giving up */
+ synchronize_rcu();
+ did_sync = 1;
+ ctx->max_reqs = nr_events;
+ } while (1);
+
if (ctx->max_reqs == 0)
goto out_cleanup;
/* now link into global list. */
- write_lock(&mm->ioctx_list_lock);
- ctx->next = mm->ioctx_list;
- mm->ioctx_list = ctx;
- write_unlock(&mm->ioctx_list_lock);
+ spin_lock(&mm->ioctx_lock);
+ hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
+ spin_unlock(&mm->ioctx_lock);
dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
*/
void exit_aio(struct mm_struct *mm)
{
- struct kioctx *ctx = mm->ioctx_list;
- mm->ioctx_list = NULL;
- while (ctx) {
- struct kioctx *next = ctx->next;
- ctx->next = NULL;
+ struct kioctx *ctx;
+
+ while (!hlist_empty(&mm->ioctx_list)) {
+ ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
+ hlist_del_rcu(&ctx->list);
+
aio_cancel_all(ctx);
wait_for_all_aios(ctx);
@@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm)
atomic_read(&ctx->users), ctx->dead,
ctx->reqs_active);
put_ioctx(ctx);
- ctx = next;
}
}
@@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req)
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
- struct kioctx *ioctx;
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
+ struct kioctx *ctx = NULL;
+ struct hlist_node *n;
- mm = current->mm;
- read_lock(&mm->ioctx_list_lock);
- for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next)
- if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) {
- get_ioctx(ioctx);
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
+ if (ctx->user_id == ctx_id && !ctx->dead) {
+ get_ioctx(ctx);
break;
}
- read_unlock(&mm->ioctx_list_lock);
+ }
- return ioctx;
+ rcu_read_unlock();
+ return ctx;
}
/*
@@ -1215,19 +1232,14 @@ out:
static void io_destroy(struct kioctx *ioctx)
{
struct mm_struct *mm = current->mm;
- struct kioctx **tmp;
int was_dead;
/* delete the entry from the list is someone else hasn't already */
- write_lock(&mm->ioctx_list_lock);
+ spin_lock(&mm->ioctx_lock);
was_dead = ioctx->dead;
ioctx->dead = 1;
- for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx;
- tmp = &(*tmp)->next)
- ;
- if (*tmp)
- *tmp = ioctx->next;
- write_unlock(&mm->ioctx_list_lock);
+ hlist_del_rcu(&ioctx->list);
+ spin_unlock(&mm->ioctx_lock);
dprintk("aio_release(%p)\n", ioctx);
if (likely(!was_dead))
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 19caf7c962a..77ebc3c263d 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
&& bip->bip_buf != NULL)
kfree(bip->bip_buf);
- mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
+ bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
mempool_free(bip, bs->bio_integrity_pool);
bio->bi_integrity = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index df99c882b80..711cee10360 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,7 +31,11 @@
DEFINE_TRACE(block_split);
-static struct kmem_cache *bio_slab __read_mostly;
+/*
+ * Test patch to inline a certain number of bi_io_vec's inside the bio
+ * itself, to shrink a bio data allocation from two mempool calls to one
+ */
+#define BIO_INLINE_VECS 4
static mempool_t *bio_split_pool __read_mostly;
@@ -40,9 +44,8 @@ static mempool_t *bio_split_pool __read_mostly;
* break badly! cannot be bigger than what you can fit into an
* unsigned short
*/
-
#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
};
#undef BV
@@ -53,12 +56,121 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
*/
struct bio_set *fs_bio_set;
+/*
+ * Our slab pool management
+ */
+struct bio_slab {
+ struct kmem_cache *slab;
+ unsigned int slab_ref;
+ unsigned int slab_size;
+ char name[8];
+};
+static DEFINE_MUTEX(bio_slab_lock);
+static struct bio_slab *bio_slabs;
+static unsigned int bio_slab_nr, bio_slab_max;
+
+static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
+{
+ unsigned int sz = sizeof(struct bio) + extra_size;
+ struct kmem_cache *slab = NULL;
+ struct bio_slab *bslab;
+ unsigned int i, entry = -1;
+
+ mutex_lock(&bio_slab_lock);
+
+ i = 0;
+ while (i < bio_slab_nr) {
+ struct bio_slab *bslab = &bio_slabs[i];
+
+ if (!bslab->slab && entry == -1)
+ entry = i;
+ else if (bslab->slab_size == sz) {
+ slab = bslab->slab;
+ bslab->slab_ref++;
+ break;
+ }
+ i++;
+ }
+
+ if (slab)
+ goto out_unlock;
+
+ if (bio_slab_nr == bio_slab_max && entry == -1) {
+ bio_slab_max <<= 1;
+ bio_slabs = krealloc(bio_slabs,
+ bio_slab_max * sizeof(struct bio_slab),
+ GFP_KERNEL);
+ if (!bio_slabs)
+ goto out_unlock;
+ }
+ if (entry == -1)
+ entry = bio_slab_nr++;
+
+ bslab = &bio_slabs[entry];
+
+ snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
+ slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!slab)
+ goto out_unlock;
+
+ printk("bio: create slab <%s> at %d\n", bslab->name, entry);
+ bslab->slab = slab;
+ bslab->slab_ref = 1;
+ bslab->slab_size = sz;
+out_unlock:
+ mutex_unlock(&bio_slab_lock);
+ return slab;
+}
+
+static void bio_put_slab(struct bio_set *bs)
+{
+ struct bio_slab *bslab = NULL;
+ unsigned int i;
+
+ mutex_lock(&bio_slab_lock);
+
+ for (i = 0; i < bio_slab_nr; i++) {
+ if (bs->bio_slab == bio_slabs[i].slab) {
+ bslab = &bio_slabs[i];
+ break;
+ }
+ }
+
+ if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
+ goto out;
+
+ WARN_ON(!bslab->slab_ref);
+
+ if (--bslab->slab_ref)
+ goto out;
+
+ kmem_cache_destroy(bslab->slab);
+ bslab->slab = NULL;
+
+out:
+ mutex_unlock(&bio_slab_lock);
+}
+
unsigned int bvec_nr_vecs(unsigned short idx)
{
return bvec_slabs[idx].nr_vecs;
}
-struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
+{
+ BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
+
+ if (idx == BIOVEC_MAX_IDX)
+ mempool_free(bv, bs->bvec_pool);
+ else {
+ struct biovec_slab *bvs = bvec_slabs + idx;
+
+ kmem_cache_free(bvs->slab, bv);
+ }
+}
+
+struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
+ struct bio_set *bs)
{
struct bio_vec *bvl;
@@ -67,60 +179,85 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
* If not, this is a bio_kmalloc() allocation and just do a
* kzalloc() for the exact number of vecs right away.
*/
- if (bs) {
+ if (!bs)
+ bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
+
+ /*
+ * see comment near bvec_array define!
+ */
+ switch (nr) {
+ case 1:
+ *idx = 0;
+ break;
+ case 2 ... 4:
+ *idx = 1;
+ break;
+ case 5 ... 16:
+ *idx = 2;
+ break;
+ case 17 ... 64:
+ *idx = 3;
+ break;
+ case 65 ... 128:
+ *idx = 4;
+ break;
+ case 129 ... BIO_MAX_PAGES:
+ *idx = 5;
+ break;
+ default:
+ return NULL;
+ }
+
+ /*
+ * idx now points to the pool we want to allocate from. only the
+ * 1-vec entry pool is mempool backed.
+ */
+ if (*idx == BIOVEC_MAX_IDX) {
+fallback:
+ bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
+ } else {
+ struct biovec_slab *bvs = bvec_slabs + *idx;
+ gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+
/*
- * see comment near bvec_array define!
+ * Make this allocation restricted and don't dump info on
+ * allocation failures, since we'll fallback to the mempool
+ * in case of failure.
*/
- switch (nr) {
- case 1:
- *idx = 0;
- break;
- case 2 ... 4:
- *idx = 1;
- break;
- case 5 ... 16:
- *idx = 2;
- break;
- case 17 ... 64:
- *idx = 3;
- break;
- case 65 ... 128:
- *idx = 4;
- break;
- case 129 ... BIO_MAX_PAGES:
- *idx = 5;
- break;
- default:
- return NULL;
- }
+ __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
/*
- * idx now points to the pool we want to allocate from
+ * Try a slab allocation. If this fails and __GFP_WAIT
+ * is set, retry with the 1-entry mempool
*/
- bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
- if (bvl)
- memset(bvl, 0,
- bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
- } else
- bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
+ bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
+ if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
+ *idx = BIOVEC_MAX_IDX;
+ goto fallback;
+ }
+ }
return bvl;
}
-void bio_free(struct bio *bio, struct bio_set *bio_set)
+void bio_free(struct bio *bio, struct bio_set *bs)
{
- if (bio->bi_io_vec) {
- const int pool_idx = BIO_POOL_IDX(bio);
+ void *p;
- BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
- }
+ if (bio_has_allocated_vec(bio))
+ bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
if (bio_integrity(bio))
- bio_integrity_free(bio, bio_set);
+ bio_integrity_free(bio, bs);
+
+ /*
+ * If we have front padding, adjust the bio pointer before freeing
+ */
+ p = bio;
+ if (bs->front_pad)
+ p -= bs->front_pad;
- mempool_free(bio, bio_set->bio_pool);
+ mempool_free(p, bs->bio_pool);
}
/*
@@ -133,7 +270,8 @@ static void bio_fs_destructor(struct bio *bio)
static void bio_kmalloc_destructor(struct bio *bio)
{
- kfree(bio->bi_io_vec);
+ if (bio_has_allocated_vec(bio))
+ kfree(bio->bi_io_vec);
kfree(bio);
}
@@ -157,16 +295,20 @@ void bio_init(struct bio *bio)
* for a &struct bio to become free. If a %NULL @bs is passed in, we will
* fall back to just using @kmalloc to allocate the required memory.
*
- * allocate bio and iovecs from the memory pools specified by the
- * bio_set structure, or @kmalloc if none given.
+ * Note that the caller must set ->bi_destructor on succesful return
+ * of a bio, to do the appropriate freeing of the bio once the reference
+ * count drops to zero.
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
- struct bio *bio;
+ struct bio *bio = NULL;
+
+ if (bs) {
+ void *p = mempool_alloc(bs->bio_pool, gfp_mask);
- if (bs)
- bio = mempool_alloc(bs->bio_pool, gfp_mask);
- else
+ if (p)
+ bio = p + bs->front_pad;
+ } else
bio = kmalloc(sizeof(*bio), gfp_mask);
if (likely(bio)) {
@@ -176,7 +318,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
if (likely(nr_iovecs)) {
unsigned long uninitialized_var(idx);
- bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+ if (nr_iovecs <= BIO_INLINE_VECS) {
+ idx = 0;
+ bvl = bio->bi_inline_vecs;
+ nr_iovecs = BIO_INLINE_VECS;
+ } else {
+ bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
+ bs);
+ nr_iovecs = bvec_nr_vecs(idx);
+ }
if (unlikely(!bvl)) {
if (bs)
mempool_free(bio, bs->bio_pool);
@@ -186,7 +336,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
goto out;
}
bio->bi_flags |= idx << BIO_POOL_OFFSET;
- bio->bi_max_vecs = bvec_nr_vecs(idx);
+ bio->bi_max_vecs = nr_iovecs;
}
bio->bi_io_vec = bvl;
}
@@ -1346,30 +1496,18 @@ EXPORT_SYMBOL(bio_sector_offset);
*/
static int biovec_create_pools(struct bio_set *bs, int pool_entries)
{
- int i;
+ struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
- struct biovec_slab *bp = bvec_slabs + i;
- mempool_t **bvp = bs->bvec_pools + i;
+ bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
+ if (!bs->bvec_pool)
+ return -ENOMEM;
- *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
- if (!*bvp)
- return -ENOMEM;
- }
return 0;
}
static void biovec_free_pools(struct bio_set *bs)
{
- int i;
-
- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
- mempool_t *bvp = bs->bvec_pools[i];
-
- if (bvp)
- mempool_destroy(bvp);
- }
-
+ mempool_destroy(bs->bvec_pool);
}
void bioset_free(struct bio_set *bs)
@@ -1379,25 +1517,49 @@ void bioset_free(struct bio_set *bs)
bioset_integrity_free(bs);
biovec_free_pools(bs);
+ bio_put_slab(bs);
kfree(bs);
}
-struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
+/**
+ * bioset_create - Create a bio_set
+ * @pool_size: Number of bio and bio_vecs to cache in the mempool
+ * @front_pad: Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ * to ask for a number of bytes to be allocated in front of the bio.
+ * Front pad allocation is useful for embedding the bio inside
+ * another structure, to avoid allocating extra data to go with the bio.
+ * Note that the bio must be embedded at the END of that structure always,
+ * or things will break badly.
+ */
+struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
{
- struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
+ unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
+ struct bio_set *bs;
+ bs = kzalloc(sizeof(*bs), GFP_KERNEL);
if (!bs)
return NULL;
- bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
+ bs->front_pad = front_pad;
+
+ bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
+ if (!bs->bio_slab) {
+ kfree(bs);
+ return NULL;
+ }
+
+ bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
if (!bs->bio_pool)
goto bad;
- if (bioset_integrity_create(bs, bio_pool_size))
+ if (bioset_integrity_create(bs, pool_size))
goto bad;
- if (!biovec_create_pools(bs, bvec_pool_size))
+ if (!biovec_create_pools(bs, pool_size))
return bs;
bad:
@@ -1421,12 +1583,16 @@ static void __init biovec_init_slabs(void)
static int __init init_bio(void)
{
- bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ bio_slab_max = 2;
+ bio_slab_nr = 0;
+ bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
+ if (!bio_slabs)
+ panic("bio: can't allocate bios\n");
bio_integrity_init_slab();
biovec_init_slabs();
- fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
+ fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
if (!fs_bio_set)
panic("bio: can't allocate bios\n");
diff --git a/fs/buffer.c b/fs/buffer.c
index 10179cfa115..776ae091d3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
page_cache_release(page);
}
+
+static int quiet_error(struct buffer_head *bh)
+{
+ if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
+ return 0;
+ return 1;
+}
+
+
static void buffer_io_error(struct buffer_head *bh)
{
char b[BDEVNAME_SIZE];
-
printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
bdevname(bh->b_bdev, b),
(unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
- if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
+ if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
buffer_io_error(bh);
printk(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
- if (printk_ratelimit())
+ if (!quiet_error(bh))
buffer_io_error(bh);
SetPageError(page);
}
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
- if (printk_ratelimit()) {
+ if (!quiet_error(bh)) {
buffer_io_error(bh);
printk(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
@@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
set_bit(BH_Eopnotsupp, &bh->b_state);
}
+ if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
+ set_bit(BH_Quiet, &bh->b_state);
+
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65db..04158ad74db 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
/* small i_blocks in vfs inode? */
if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
/*
- * CONFIG_LSF is not enabled implies the inode
+ * CONFIG_LBD is not enabled implies the inode
* i_block represent total blocks in 512 bytes
* 32 == size of vfs inode i_blocks * 8
*/
@@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
/*
- * !has_huge_files or CONFIG_LSF is not enabled
+ * !has_huge_files or CONFIG_LBD is not enabled
* implies the inode i_block represent total blocks in
* 512 bytes 32 == size of vfs inode i_blocks * 8
*/
@@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (has_huge_files) {
/*
* Large file size enabled file system can only be
- * mount if kernel is build with CONFIG_LSF
+ * mount if kernel is build with CONFIG_LBD
*/
if (sizeof(root->i_blocks) < sizeof(u64) &&
!(sb->s_flags & MS_RDONLY)) {
printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
"files cannot be mounted read-write "
- "without CONFIG_LSF.\n", sb->s_id);
+ "without CONFIG_LBD.\n", sb->s_id);
goto failed_mount;
}
}
diff --git a/include/linux/aio.h b/include/linux/aio.h
index f6b8cf99b59..b16a957030f 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -5,6 +5,7 @@
#include <linux/workqueue.h>
#include <linux/aio_abi.h>
#include <linux/uio.h>
+#include <linux/rcupdate.h>
#include <asm/atomic.h>
@@ -183,7 +184,7 @@ struct kioctx {
/* This needs improving */
unsigned long user_id;
- struct kioctx *next;
+ struct hlist_node list;
wait_queue_head_t wait;
@@ -199,6 +200,8 @@ struct kioctx {
struct aio_ring_info ring_info;
struct delayed_work wq;
+
+ struct rcu_head rcu_head;
};
/* prototypes */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6a642098e5c..18462c5b8ff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -90,10 +90,11 @@ struct bio {
unsigned int bi_comp_cpu; /* completion CPU */
+ atomic_t bi_cnt; /* pin count */
+
struct bio_vec *bi_io_vec; /* the actual vec list */
bio_end_io_t *bi_end_io;
- atomic_t bi_cnt; /* pin count */
void *bi_private;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -101,6 +102,13 @@ struct bio {
#endif
bio_destructor_t *bi_destructor; /* destructor */
+
+ /*
+ * We can inline a number of vecs at the end of the bio, to avoid
+ * double allocations for a small number of bio_vecs. This member
+ * MUST obviously be kept at the very end of the bio.
+ */
+ struct bio_vec bi_inline_vecs[0];
};
/*
@@ -117,6 +125,7 @@ struct bio {
#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
#define BIO_NULL_MAPPED 9 /* contains invalid user pages */
#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */
+#define BIO_QUIET 11 /* Make BIO Quiet */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
@@ -211,6 +220,11 @@ static inline void *bio_data(struct bio *bio)
return NULL;
}
+static inline int bio_has_allocated_vec(struct bio *bio)
+{
+ return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
+}
+
/*
* will die
*/
@@ -332,7 +346,7 @@ struct bio_pair {
extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
extern void bio_pair_release(struct bio_pair *dbio);
-extern struct bio_set *bioset_create(int, int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int);
extern void bioset_free(struct bio_set *);
extern struct bio *bio_alloc(gfp_t, int);
@@ -377,6 +391,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
extern int bio_uncopy_user(struct bio *);
void zero_fill_bio(struct bio *bio);
extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
+extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
extern unsigned int bvec_nr_vecs(unsigned short idx);
/*
@@ -395,13 +410,17 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
*/
#define BIO_POOL_SIZE 2
#define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
struct bio_set {
+ struct kmem_cache *bio_slab;
+ unsigned int front_pad;
+
mempool_t *bio_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
mempool_t *bio_integrity_pool;
#endif
- mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+ mempool_t *bvec_pool;
};
struct biovec_slab {
@@ -411,6 +430,7 @@ struct biovec_slab {
};
extern struct bio_set *fs_bio_set;
+extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
/*
* a small number of entries is fine, not going to be performance critical.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 031a315c050..7035cec583b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -26,7 +26,6 @@ struct scsi_ioctl_command;
struct request_queue;
struct elevator_queue;
-typedef struct elevator_queue elevator_t;
struct request_pm_state;
struct blk_trace;
struct request;
@@ -313,7 +312,7 @@ struct request_queue
*/
struct list_head queue_head;
struct request *last_merge;
- elevator_t *elevator;
+ struct elevator_queue *elevator;
/*
* the queue request freelist, one for reads and one for writes
@@ -449,6 +448,7 @@ struct request_queue
#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */
#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */
#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
+#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
static inline int queue_is_locked(struct request_queue *q)
{
@@ -522,22 +522,32 @@ enum {
* TAG_FLUSH : ordering by tag w/ pre and post flushes
* TAG_FUA : ordering by tag w/ pre flush and FUA write
*/
- QUEUE_ORDERED_NONE = 0x00,
- QUEUE_ORDERED_DRAIN = 0x01,
- QUEUE_ORDERED_TAG = 0x02,
-
- QUEUE_ORDERED_PREFLUSH = 0x10,
- QUEUE_ORDERED_POSTFLUSH = 0x20,
- QUEUE_ORDERED_FUA = 0x40,
-
- QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
- QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
- QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
- QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
- QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
- QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
- QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
- QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
+ QUEUE_ORDERED_BY_DRAIN = 0x01,
+ QUEUE_ORDERED_BY_TAG = 0x02,
+ QUEUE_ORDERED_DO_PREFLUSH = 0x10,
+ QUEUE_ORDERED_DO_BAR = 0x20,
+ QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
+ QUEUE_ORDERED_DO_FUA = 0x80,
+
+ QUEUE_ORDERED_NONE = 0x00,
+
+ QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN |
+ QUEUE_ORDERED_DO_BAR,
+ QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
+ QUEUE_ORDERED_DO_PREFLUSH |
+ QUEUE_ORDERED_DO_POSTFLUSH,
+ QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
+ QUEUE_ORDERED_DO_PREFLUSH |
+ QUEUE_ORDERED_DO_FUA,
+
+ QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG |
+ QUEUE_ORDERED_DO_BAR,
+ QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
+ QUEUE_ORDERED_DO_PREFLUSH |
+ QUEUE_ORDERED_DO_POSTFLUSH,
+ QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
+ QUEUE_ORDERED_DO_PREFLUSH |
+ QUEUE_ORDERED_DO_FUA,
/*
* Ordered operation sequence
@@ -585,7 +595,6 @@ enum {
#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
-#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
/* rq->queuelist of dequeued request must be list_empty() */
#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
@@ -855,10 +864,10 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
-extern int blk_do_ordered(struct request_queue *, struct request **);
+extern bool blk_do_ordered(struct request_queue *, struct request **);
extern unsigned blk_ordered_cur_seq(struct request_queue *);
extern unsigned blk_ordered_req_seq(struct request *);
-extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
+extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
@@ -977,7 +986,6 @@ static inline void put_dev_sector(Sector p)
struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-void kblockd_flush_work(struct work_struct *work);
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 3ce64b90118..8605f8a74df 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -35,6 +35,7 @@ enum bh_state_bits {
BH_Ordered, /* ordered write */
BH_Eopnotsupp, /* operation not supported (barrier) */
BH_Unwritten, /* Buffer is allocated on disk but not written */
+ BH_Quiet, /* Buffer Error Prinks to be quiet */
BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 92f6f634e3e..7a204256b15 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -28,7 +28,7 @@ typedef void (elevator_activate_req_fn) (struct request_queue *, struct request
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
typedef void *(elevator_init_fn) (struct request_queue *);
-typedef void (elevator_exit_fn) (elevator_t *);
+typedef void (elevator_exit_fn) (struct elevator_queue *);
struct elevator_ops
{
@@ -62,8 +62,8 @@ struct elevator_ops
struct elv_fs_entry {
struct attribute attr;
- ssize_t (*show)(elevator_t *, char *);
- ssize_t (*store)(elevator_t *, const char *, size_t);
+ ssize_t (*show)(struct elevator_queue *, char *);
+ ssize_t (*store)(struct elevator_queue *, const char *, size_t);
};
/*
@@ -130,7 +130,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(elevator_t *);
+extern void elevator_exit(struct elevator_queue *);
extern int elv_rq_merge_ok(struct request *, struct bio *);
/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 3df7742ce24..16948eaecae 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter {
struct disk_part_tbl {
struct rcu_head rcu_head;
int len;
+ struct hd_struct *last_lookup;
struct hd_struct *part[];
};
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fe825471d5a..9cfc9b627fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -232,8 +232,9 @@ struct mm_struct {
struct core_state *core_state; /* coredumping support */
/* aio bits */
- rwlock_t ioctx_list_lock; /* aio lock */
- struct kioctx *ioctx_list;
+ spinlock_t ioctx_lock;
+ struct hlist_head ioctx_list;
+
#ifdef CONFIG_MM_OWNER
/*
* "owner" points to a task that is regarded as the canonical
diff --git a/include/linux/types.h b/include/linux/types.h
index 1d98330b1f2..121f349cb7e 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -135,19 +135,14 @@ typedef __s64 int64_t;
*
* Linux always considers sectors to be 512 bytes long independently
* of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
*/
#ifdef CONFIG_LBD
typedef u64 sector_t;
-#else
-typedef unsigned long sector_t;
-#endif
-
-/*
- * The type of the inode's block count.
- */
-#ifdef CONFIG_LSF
typedef u64 blkcnt_t;
#else
+typedef unsigned long sector_t;
typedef unsigned long blkcnt_t;
#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index a946221879d..c9e5a1c14e0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1037,8 +1037,6 @@ NORET_TYPE void do_exit(long code)
* task into the wait for ever nirwana as well.
*/
tsk->flags |= PF_EXITPIDONE;
- if (tsk->io_context)
- exit_io_context();
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 6144b36cd89..43cbf30669e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -415,8 +415,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
spin_lock_init(&mm->page_table_lock);
- rwlock_init(&mm->ioctx_list_lock);
- mm->ioctx_list = NULL;
+ spin_lock_init(&mm->ioctx_lock);
+ INIT_HLIST_HEAD(&mm->ioctx_list);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
mm_init_owner(mm, p);
diff --git a/mm/bounce.c b/mm/bounce.c
index bf0cf7c8387..e590272fe7a 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -198,8 +198,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
/*
* irk, bounce it
*/
- if (!bio)
- bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+ if (!bio) {
+ unsigned int cnt = (*bio_orig)->bi_vcnt;
+
+ bio = bio_alloc(GFP_NOIO, cnt);
+ memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
+ }
+
to = bio->bi_io_vec + i;