aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-07 10:00:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-07 10:00:09 -0700
commit03e5cb7b50feb687508946a702febaba24c77f0b (patch)
tree3003da4195f9aa51814c26e263cdee7da46fb181
parentfc4354c6e5c21257cf4a50b32f7c11c7d65c55b3 (diff)
parentd2b7fa6174bc4260e496cbf84375c73636914641 (diff)
Merge tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe: "Nothing major in here, just two different parts: - A small series from Breno that enables passing the full SQE down for ->uring_cmd(). This is a prerequisite for enabling full network socket operations. Queued up a bit late because of some stylistic concerns that got resolved, would be nice to have this in 6.4-rc1 so the dependent work will be easier to handle for 6.5. - Fix for the huge page coalescing, which was a regression introduced in the 6.3 kernel release (Tobias)" * tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux: io_uring: Remove unnecessary BUILD_BUG_ON io_uring: Pass whole sqe to commands io_uring: Create a helper to return the SQE size io_uring/rsrc: check for nonconsecutive pages
-rw-r--r--drivers/block/ublk_drv.c40
-rw-r--r--drivers/nvme/host/ioctl.c2
-rw-r--r--include/linux/io_uring.h7
-rw-r--r--io_uring/io_uring.h10
-rw-r--r--io_uring/opdef.c2
-rw-r--r--io_uring/rsrc.c7
-rw-r--r--io_uring/uring_cmd.c12
-rw-r--r--io_uring/uring_cmd.h8
8 files changed, 47 insertions, 41 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 72a5cde9a5af..c7331f519750 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1035,7 +1035,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
}
static void ublk_commit_completion(struct ublk_device *ub,
- struct ublksrv_io_cmd *ub_cmd)
+ const struct ublksrv_io_cmd *ub_cmd)
{
u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
struct ublk_queue *ubq = ublk_get_queue(ub, qid);
@@ -1292,7 +1292,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
- struct ublksrv_io_cmd *ub_cmd)
+ const struct ublksrv_io_cmd *ub_cmd)
{
struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq;
@@ -1399,17 +1399,17 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
- struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd;
- struct ublksrv_io_cmd ub_cmd;
-
/*
* Not necessary for async retry, but let's keep it simple and always
* copy the values to avoid any potential reuse.
*/
- ub_cmd.q_id = READ_ONCE(ub_src->q_id);
- ub_cmd.tag = READ_ONCE(ub_src->tag);
- ub_cmd.result = READ_ONCE(ub_src->result);
- ub_cmd.addr = READ_ONCE(ub_src->addr);
+ const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
+ const struct ublksrv_io_cmd ub_cmd = {
+ .q_id = READ_ONCE(ub_src->q_id),
+ .tag = READ_ONCE(ub_src->tag),
+ .result = READ_ONCE(ub_src->result),
+ .addr = READ_ONCE(ub_src->addr)
+ };
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
}
@@ -1619,7 +1619,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
struct gendisk *disk;
int ret = -EINVAL;
@@ -1682,7 +1682,7 @@ out_unlock:
static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
cpumask_var_t cpumask;
unsigned long queue;
@@ -1733,7 +1733,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublksrv_ctrl_dev_info info;
struct ublk_device *ub;
@@ -1910,7 +1910,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
__func__, cmd->cmd_op, header->dev_id, header->queue_id,
@@ -1929,7 +1929,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
@@ -1960,7 +1960,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
static int ublk_ctrl_get_params(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret;
@@ -1991,7 +1991,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub,
static int ublk_ctrl_set_params(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret = -EFAULT;
@@ -2052,7 +2052,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ret = -EINVAL;
int i;
@@ -2094,7 +2094,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
static int ublk_ctrl_end_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL;
@@ -2161,7 +2161,7 @@ exit:
static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe);
bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
void __user *argp = (void __user *)(unsigned long)header->addr;
char *dev_path = NULL;
@@ -2240,7 +2240,7 @@ exit:
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
struct ublk_device *ub = NULL;
u32 cmd_op = cmd->cmd_op;
int ret = -EINVAL;
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index d24ea2e05156..81c5c9e38477 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- const struct nvme_uring_cmd *cmd = ioucmd->cmd;
+ const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 35b9328ca335..3399d979ee1c 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -24,7 +24,7 @@ enum io_uring_cmd_flags {
struct io_uring_cmd {
struct file *file;
- const void *cmd;
+ const struct io_uring_sqe *sqe;
union {
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
@@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
+
+static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
+{
+ return sqe->cmd;
+}
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 25515d69d205..259bf798a390 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -394,4 +394,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
io_req_task_work_add(req);
}
+/*
+ * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
+ * slot.
+ */
+static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
+{
+ if (ctx->flags & IORING_SETUP_SQE128)
+ return 2 * sizeof(struct io_uring_sqe);
+ return sizeof(struct io_uring_sqe);
+}
#endif
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index cca7c5b55208..3b9c6489b8b6 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = {
},
[IORING_OP_URING_CMD] = {
.name = "URING_CMD",
- .async_size = uring_cmd_pdu_size(1),
+ .async_size = 2 * sizeof(struct io_uring_sqe),
.prep_async = io_uring_cmd_prep_async,
},
[IORING_OP_SEND_ZC] = {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d4c91393e0d3..d46f72a5ef73 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1116,7 +1116,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
if (nr_pages > 1) {
folio = page_folio(pages[0]);
for (i = 1; i < nr_pages; i++) {
- if (page_folio(pages[i]) != folio) {
+ /*
+ * Pages must be consecutive and on the same folio for
+ * this to work
+ */
+ if (page_folio(pages[i]) != folio ||
+ pages[i] != pages[i - 1] + 1) {
folio = NULL;
break;
}
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 5113c9a48583..5e32db48696d 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -69,15 +69,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done);
int io_uring_cmd_prep_async(struct io_kiocb *req)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- size_t cmd_size;
- BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16);
- BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80);
-
- cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
-
- memcpy(req->async_data, ioucmd->cmd, cmd_size);
- ioucmd->cmd = req->async_data;
+ memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx));
+ ioucmd->sqe = req->async_data;
return 0;
}
@@ -103,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->imu = ctx->user_bufs[index];
io_req_set_rsrc_node(req, ctx, 0);
}
- ioucmd->cmd = sqe->cmd;
+ ioucmd->sqe = sqe;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
return 0;
}
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index 7c6697d13cb2..8117684ec3ca 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -3,11 +3,3 @@
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_uring_cmd_prep_async(struct io_kiocb *req);
-
-/*
- * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
- * the following sqe if SQE128 is used.
- */
-#define uring_cmd_pdu_size(is_sqe128) \
- ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
- offsetof(struct io_uring_sqe, cmd))