From 33940d09937276cd3c81f2874faf43e37c2db0e2 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 16 Sep 2014 16:23:12 -0400
Subject: target: encapsulate smp_mb__after_atomic()

The target code has a rather generous helping of smp_mb__after_atomic()
throughout the code base.  Most atomic operations were followed by one
and none were preceded by smp_mb__before_atomic(), nor accompanied by a
comment explaining the need for a barrier.

Instead of trying to prove for every case whether or not it is needed,
this patch introduces atomic_inc_mb() and atomic_dec_mb(), which
explicitly include the memory barriers before and after the atomic
operation.  For now they are defined in a target header, although they
could be of general use.

Most of the existing atomic/mb combinations were replaced by the new
helpers.  In a few cases the atomic was sandwiched in
spin_lock/spin_unlock and I simply removed the barrier.

I suspect that in most cases the correct conversion would have been to
drop the barrier.  I also suspect that a few cases exist where a) the
barrier was necessary and b) a second barrier before the atomic would
have been necessary and got added by this patch.

Signed-off-by: Joern Engel <joern@logfs.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 9ec9864ecf38..b106240d8385 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -903,4 +903,18 @@ struct se_wwn {
 	struct config_group	fabric_stat_group;
 };
 
+static inline void atomic_inc_mb(atomic_t *v)
+{
+	smp_mb__before_atomic();
+	atomic_inc(v);
+	smp_mb__after_atomic();
+}
+
+static inline void atomic_dec_mb(atomic_t *v)
+{
+	smp_mb__before_atomic();
+	atomic_dec(v);
+	smp_mb__after_atomic();
+}
+
 #endif /* TARGET_CORE_BASE_H */
-- 
cgit v1.2.3


From f14bb039a4e8206439d3e9abd92bc76bd142f243 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Wed, 1 Oct 2014 16:07:03 -0700
Subject: uio: Export definition of struct uio_device

In order to prevent a O(n) search of the filesystem to link up its uio
node with its target configuration, TCMU needs to know the minor number
that UIO assigned. Expose the definition of this struct so TCMU can
access this field.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/uio_driver.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 1ad4724458de..baa81718d985 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -63,7 +63,17 @@ struct uio_port {
 
 #define MAX_UIO_PORT_REGIONS	5
 
-struct uio_device;
+struct uio_device {
+        struct module           *owner;
+        struct device           *dev;
+        int                     minor;
+        atomic_t                event;
+        struct fasync_struct    *async_queue;
+        wait_queue_head_t       wait;
+        struct uio_info         *info;
+        struct kobject          *map_dir;
+        struct kobject          *portio_dir;
+};
 
 /**
  * struct uio_info - UIO device capabilities
-- 
cgit v1.2.3


From 7c9e7a6fe11c8dc5b3b9d0e889dde73347247584 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Wed, 1 Oct 2014 16:07:05 -0700
Subject: target: Add a user-passthrough backstore

Add a LIO storage engine that presents commands to userspace for execution.
This would allow more complex backstores to be implemented out-of-kernel,
and also make experimentation a-la FUSE (but at the SCSI level -- "SUSE"?)
possible.

It uses a mmap()able UIO device per LUN to share a command ring and data
area. The commands are raw SCSI CDBs and iovs for in/out data. The command
ring is also reused for returning scsi command status and optional sense
data.

This implementation is based on Shaohua Li's earlier version but heavily
modified. Differences include:

* Shared memory allocated by kernel, not locked-down user pages
* Single ring for command request and response
* Offsets instead of embedded pointers
* Generic SCSI CDB passthrough instead of per-cmd specialization in ring
  format.
* Uses UIO device instead of anon_file passed in mailbox.
* Optional in-kernel handling of some commands.

The main reason for these differences is to permit greater resiliency
if the user process dies or hangs.

Things not yet implemented (on purpose):

* Zero copy. The data area is flexible enough to allow page flipping or
  backend-allocated pages to be used by fabrics, but it's not clear these
  are performance wins. Can come later.
* Out-of-order command completion by userspace. Possible to add by just
  allowing userspace to change cmd_id in rsp cmd entries, but currently
  not supported.
* No locks between kernel cmd submission and completion routines. Sounds
  like it's possible, but this can come later.
* Sparse allocation of mmaped area. Current code vmallocs the whole thing.
  If the mapped area was larger and not fully mapped then the driver would
  have more freedom to change cmd and data area sizes based on demand.

Current code open issues:

* The use of idrs may be overkill -- we maybe can replace them with a
  simple counter to generate cmd_ids, and a hash table to get a cmd_id's
  associated pointer.
* Use of a free-running counter for cmd ring instead of explicit modulo
  math. This would require power-of-2 cmd ring size.

(Add kconfig depends NET - Randy)

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/uapi/linux/Kbuild             |   1 +
 include/uapi/linux/target_core_user.h | 142 ++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 include/uapi/linux/target_core_user.h

(limited to 'include')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index be88166349a1..6ebd0d1faf2e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -371,6 +371,7 @@ header-y += swab.h
 header-y += synclink.h
 header-y += sysctl.h
 header-y += sysinfo.h
+header-y += target_core_user.h
 header-y += taskstats.h
 header-y += tcp.h
 header-y += tcp_metrics.h
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
new file mode 100644
index 000000000000..7dcfbe6771b1
--- /dev/null
+++ b/include/uapi/linux/target_core_user.h
@@ -0,0 +1,142 @@
+#ifndef __TARGET_CORE_USER_H
+#define __TARGET_CORE_USER_H
+
+/* This header will be used by application too */
+
+#include <linux/types.h>
+#include <linux/uio.h>
+
+#ifndef __packed
+#define __packed                        __attribute__((packed))
+#endif
+
+#define TCMU_VERSION "1.0"
+
+/*
+ * Ring Design
+ * -----------
+ *
+ * The mmaped area is divided into three parts:
+ * 1) The mailbox (struct tcmu_mailbox, below)
+ * 2) The command ring
+ * 3) Everything beyond the command ring (data)
+ *
+ * The mailbox tells userspace the offset of the command ring from the
+ * start of the shared memory region, and how big the command ring is.
+ *
+ * The kernel passes SCSI commands to userspace by putting a struct
+ * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
+ * userspace via uio's interrupt mechanism.
+ *
+ * tcmu_cmd_entry contains a header. If the header type is PAD,
+ * userspace should skip hdr->length bytes (mod cmdr_size) to find the
+ * next cmd_entry.
+ *
+ * Otherwise, the entry will contain offsets into the mmaped area that
+ * contain the cdb and data buffers -- the latter accessible via the
+ * iov array. iov addresses are also offsets into the shared area.
+ *
+ * When userspace is completed handling the command, set
+ * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate,
+ * and also set mailbox->cmd_tail equal to the old cmd_tail plus
+ * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it
+ * should process the next packet the same way, and so on.
+ */
+
+#define TCMU_MAILBOX_VERSION 1
+#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
+
+struct tcmu_mailbox {
+	__u16 version;
+	__u16 flags;
+	__u32 cmdr_off;
+	__u32 cmdr_size;
+
+	__u32 cmd_head;
+
+	/* Updated by user. On its own cacheline */
+	__u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
+
+} __packed;
+
+enum tcmu_opcode {
+	TCMU_OP_PAD = 0,
+	TCMU_OP_CMD,
+};
+
+/*
+ * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
+ */
+struct tcmu_cmd_entry_hdr {
+		__u32 len_op;
+} __packed;
+
+#define TCMU_OP_MASK 0x7
+
+static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
+{
+	return hdr->len_op & TCMU_OP_MASK;
+}
+
+static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
+{
+	hdr->len_op &= ~TCMU_OP_MASK;
+	hdr->len_op |= (op & TCMU_OP_MASK);
+}
+
+static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
+{
+	return hdr->len_op & ~TCMU_OP_MASK;
+}
+
+static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
+{
+	hdr->len_op &= TCMU_OP_MASK;
+	hdr->len_op |= len;
+}
+
+/* Currently the same as SCSI_SENSE_BUFFERSIZE */
+#define TCMU_SENSE_BUFFERSIZE 96
+
+struct tcmu_cmd_entry {
+	struct tcmu_cmd_entry_hdr hdr;
+
+	uint16_t cmd_id;
+	uint16_t __pad1;
+
+	union {
+		struct {
+			uint64_t cdb_off;
+			uint64_t iov_cnt;
+			struct iovec iov[0];
+		} req;
+		struct {
+			uint8_t scsi_status;
+			uint8_t __pad1;
+			uint16_t __pad2;
+			uint32_t __pad3;
+			char sense_buffer[TCMU_SENSE_BUFFERSIZE];
+		} rsp;
+	};
+
+} __packed;
+
+#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
+
+enum tcmu_genl_cmd {
+	TCMU_CMD_UNSPEC,
+	TCMU_CMD_ADDED_DEVICE,
+	TCMU_CMD_REMOVED_DEVICE,
+	__TCMU_CMD_MAX,
+};
+#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
+
+enum tcmu_genl_attr {
+	TCMU_ATTR_UNSPEC,
+	TCMU_ATTR_DEVICE,
+	TCMU_ATTR_MINOR,
+	__TCMU_ATTR_MAX,
+};
+#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
+
+#endif
-- 
cgit v1.2.3


From 92404e609a2dffc55a9a22540ed48b6f0edc9c59 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 4 Oct 2014 01:06:08 +0000
Subject: target: Add force_pr_aptpl device attribute

This patch adds a force_pr_aptpl device attribute used to force SPC-3 PR
Activate Persistence across Target Power Loss (APTPL) operation.  This
makes PR metadata write-out occur during state change regardless if new
PERSISTENT_RESERVE_OUT CDBs have their APTPL feature bit set.

This is useful during H/A failover in active/passive setups where all PR
state is being re-created on a different node, driven by configfs backend
device + export layout and pre-loaded $DEV/pr/res_aptpl_metadata.

Cc: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index b106240d8385..23c518a0340c 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -108,6 +108,8 @@
 #define DA_EMULATE_ALUA				0
 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
 #define DA_ENFORCE_PR_ISIDS			1
+/* Force SPC-3 PR Activate Persistence across Target Power Loss */
+#define DA_FORCE_PR_APTPL			0
 #define DA_STATUS_MAX_SECTORS_MIN		16
 #define DA_STATUS_MAX_SECTORS_MAX		8192
 /* By default don't report non-rotating (solid state) medium */
@@ -680,6 +682,7 @@ struct se_dev_attrib {
 	enum target_prot_type pi_prot_type;
 	enum target_prot_type hw_pi_prot_type;
 	int		enforce_pr_isids;
+	int		force_pr_aptpl;
 	int		is_nonrot;
 	int		emulate_rest_reord;
 	u32		hw_block_size;
-- 
cgit v1.2.3