From 33940d09937276cd3c81f2874faf43e37c2db0e2 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Tue, 16 Sep 2014 16:23:12 -0400 Subject: target: encapsulate smp_mb__after_atomic() The target code has a rather generous helping of smp_mb__after_atomic() throughout the code base. Most atomic operations were followed by one and none were preceded by smp_mb__before_atomic(), nor accompanied by a comment explaining the need for a barrier. Instead of trying to prove for every case whether or not it is needed, this patch introduces atomic_inc_mb() and atomic_dec_mb(), which explicitly include the memory barriers before and after the atomic operation. For now they are defined in a target header, although they could be of general use. Most of the existing atomic/mb combinations were replaced by the new helpers. In a few cases the atomic was sandwiched in spin_lock/spin_unlock and I simply removed the barrier. I suspect that in most cases the correct conversion would have been to drop the barrier. I also suspect that a few cases exist where a) the barrier was necessary and b) a second barrier before the atomic would have been necessary and got added by this patch. Signed-off-by: Joern Engel Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 9ec9864ecf38..b106240d8385 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -903,4 +903,18 @@ struct se_wwn { struct config_group fabric_stat_group; }; +static inline void atomic_inc_mb(atomic_t *v) +{ + smp_mb__before_atomic(); + atomic_inc(v); + smp_mb__after_atomic(); +} + +static inline void atomic_dec_mb(atomic_t *v) +{ + smp_mb__before_atomic(); + atomic_dec(v); + smp_mb__after_atomic(); +} + #endif /* TARGET_CORE_BASE_H */ -- cgit v1.2.3 From f14bb039a4e8206439d3e9abd92bc76bd142f243 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Oct 2014 16:07:03 -0700 Subject: uio: Export definition of struct uio_device In order to prevent a O(n) search of the filesystem to link up its uio node with its target configuration, TCMU needs to know the minor number that UIO assigned. Expose the definition of this struct so TCMU can access this field. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/linux/uio_driver.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 1ad4724458de..baa81718d985 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -63,7 +63,17 @@ struct uio_port { #define MAX_UIO_PORT_REGIONS 5 -struct uio_device; +struct uio_device { + struct module *owner; + struct device *dev; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + struct uio_info *info; + struct kobject *map_dir; + struct kobject *portio_dir; +}; /** * struct uio_info - UIO device capabilities -- cgit v1.2.3 From 7c9e7a6fe11c8dc5b3b9d0e889dde73347247584 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Oct 2014 16:07:05 -0700 Subject: target: Add a user-passthrough backstore Add a LIO storage engine that presents commands to userspace for execution. This would allow more complex backstores to be implemented out-of-kernel, and also make experimentation a-la FUSE (but at the SCSI level -- "SUSE"?) possible. It uses a mmap()able UIO device per LUN to share a command ring and data area. The commands are raw SCSI CDBs and iovs for in/out data. The command ring is also reused for returning scsi command status and optional sense data. This implementation is based on Shaohua Li's earlier version but heavily modified. Differences include: * Shared memory allocated by kernel, not locked-down user pages * Single ring for command request and response * Offsets instead of embedded pointers * Generic SCSI CDB passthrough instead of per-cmd specialization in ring format. * Uses UIO device instead of anon_file passed in mailbox. * Optional in-kernel handling of some commands. The main reason for these differences is to permit greater resiliency if the user process dies or hangs. Things not yet implemented (on purpose): * Zero copy. The data area is flexible enough to allow page flipping or backend-allocated pages to be used by fabrics, but it's not clear these are performance wins. Can come later. * Out-of-order command completion by userspace. Possible to add by just allowing userspace to change cmd_id in rsp cmd entries, but currently not supported. * No locks between kernel cmd submission and completion routines. Sounds like it's possible, but this can come later. * Sparse allocation of mmaped area. Current code vmallocs the whole thing. If the mapped area was larger and not fully mapped then the driver would have more freedom to change cmd and data area sizes based on demand. Current code open issues: * The use of idrs may be overkill -- we maybe can replace them with a simple counter to generate cmd_ids, and a hash table to get a cmd_id's associated pointer. * Use of a free-running counter for cmd ring instead of explicit modulo math. This would require power-of-2 cmd ring size. (Add kconfig depends NET - Randy) Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/target_core_user.h | 142 ++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 include/uapi/linux/target_core_user.h (limited to 'include') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index be88166349a1..6ebd0d1faf2e 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -371,6 +371,7 @@ header-y += swab.h header-y += synclink.h header-y += sysctl.h header-y += sysinfo.h +header-y += target_core_user.h header-y += taskstats.h header-y += tcp.h header-y += tcp_metrics.h diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h new file mode 100644 index 000000000000..7dcfbe6771b1 --- /dev/null +++ b/include/uapi/linux/target_core_user.h @@ -0,0 +1,142 @@ +#ifndef __TARGET_CORE_USER_H +#define __TARGET_CORE_USER_H + +/* This header will be used by application too */ + +#include +#include + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +#define TCMU_VERSION "1.0" + +/* + * Ring Design + * ----------- + * + * The mmaped area is divided into three parts: + * 1) The mailbox (struct tcmu_mailbox, below) + * 2) The command ring + * 3) Everything beyond the command ring (data) + * + * The mailbox tells userspace the offset of the command ring from the + * start of the shared memory region, and how big the command ring is. + * + * The kernel passes SCSI commands to userspace by putting a struct + * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking + * userspace via uio's interrupt mechanism. + * + * tcmu_cmd_entry contains a header. If the header type is PAD, + * userspace should skip hdr->length bytes (mod cmdr_size) to find the + * next cmd_entry. + * + * Otherwise, the entry will contain offsets into the mmaped area that + * contain the cdb and data buffers -- the latter accessible via the + * iov array. iov addresses are also offsets into the shared area. + * + * When userspace is completed handling the command, set + * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate, + * and also set mailbox->cmd_tail equal to the old cmd_tail plus + * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it + * should process the next packet the same way, and so on. + */ + +#define TCMU_MAILBOX_VERSION 1 +#define ALIGN_SIZE 64 /* Should be enough for most CPUs */ + +struct tcmu_mailbox { + __u16 version; + __u16 flags; + __u32 cmdr_off; + __u32 cmdr_size; + + __u32 cmd_head; + + /* Updated by user. On its own cacheline */ + __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE))); + +} __packed; + +enum tcmu_opcode { + TCMU_OP_PAD = 0, + TCMU_OP_CMD, +}; + +/* + * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode. + */ +struct tcmu_cmd_entry_hdr { + __u32 len_op; +} __packed; + +#define TCMU_OP_MASK 0x7 + +static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr) +{ + return hdr->len_op & TCMU_OP_MASK; +} + +static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op) +{ + hdr->len_op &= ~TCMU_OP_MASK; + hdr->len_op |= (op & TCMU_OP_MASK); +} + +static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr) +{ + return hdr->len_op & ~TCMU_OP_MASK; +} + +static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len) +{ + hdr->len_op &= TCMU_OP_MASK; + hdr->len_op |= len; +} + +/* Currently the same as SCSI_SENSE_BUFFERSIZE */ +#define TCMU_SENSE_BUFFERSIZE 96 + +struct tcmu_cmd_entry { + struct tcmu_cmd_entry_hdr hdr; + + uint16_t cmd_id; + uint16_t __pad1; + + union { + struct { + uint64_t cdb_off; + uint64_t iov_cnt; + struct iovec iov[0]; + } req; + struct { + uint8_t scsi_status; + uint8_t __pad1; + uint16_t __pad2; + uint32_t __pad3; + char sense_buffer[TCMU_SENSE_BUFFERSIZE]; + } rsp; + }; + +} __packed; + +#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t) + +enum tcmu_genl_cmd { + TCMU_CMD_UNSPEC, + TCMU_CMD_ADDED_DEVICE, + TCMU_CMD_REMOVED_DEVICE, + __TCMU_CMD_MAX, +}; +#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1) + +enum tcmu_genl_attr { + TCMU_ATTR_UNSPEC, + TCMU_ATTR_DEVICE, + TCMU_ATTR_MINOR, + __TCMU_ATTR_MAX, +}; +#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1) + +#endif -- cgit v1.2.3 From 92404e609a2dffc55a9a22540ed48b6f0edc9c59 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 4 Oct 2014 01:06:08 +0000 Subject: target: Add force_pr_aptpl device attribute This patch adds a force_pr_aptpl device attribute used to force SPC-3 PR Activate Persistence across Target Power Loss (APTPL) operation. This makes PR metadata write-out occur during state change regardless if new PERSISTENT_RESERVE_OUT CDBs have their APTPL feature bit set. This is useful during H/A failover in active/passive setups where all PR state is being re-created on a different node, driven by configfs backend device + export layout and pre-loaded $DEV/pr/res_aptpl_metadata. Cc: Mike Christie Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index b106240d8385..23c518a0340c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -108,6 +108,8 @@ #define DA_EMULATE_ALUA 0 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ #define DA_ENFORCE_PR_ISIDS 1 +/* Force SPC-3 PR Activate Persistence across Target Power Loss */ +#define DA_FORCE_PR_APTPL 0 #define DA_STATUS_MAX_SECTORS_MIN 16 #define DA_STATUS_MAX_SECTORS_MAX 8192 /* By default don't report non-rotating (solid state) medium */ @@ -680,6 +682,7 @@ struct se_dev_attrib { enum target_prot_type pi_prot_type; enum target_prot_type hw_pi_prot_type; int enforce_pr_isids; + int force_pr_aptpl; int is_nonrot; int emulate_rest_reord; u32 hw_block_size; -- cgit v1.2.3