aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-05-30 13:35:18 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2017-05-30 13:35:18 +1000
commit8456fabfce852ba7a9c1a80c260a68d6127c8c4a (patch)
treea0ef23d8a184e7717bec4b6206c39fed3fdfb7ee
parent2c5ac53196f1081a8cf90c0ee56fab1dc381ff5c (diff)
parent5566e5d3479194537414c469e6b91d2d38a62d44 (diff)
Merge remote-tracking branch 'slave-dma/next'
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl08x.txt9
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt29
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt5
-rw-r--r--arch/arm/mach-lpc32xx/phy3250.c3
-rw-r--r--arch/arm/mach-s3c64xx/pl080.c28
-rw-r--r--arch/arm/mach-spear/spear3xx.c14
-rw-r--r--arch/arm/mach-spear/spear6xx.c14
-rw-r--r--crypto/async_tx/async_pq.c5
-rw-r--r--drivers/dma/Kconfig21
-rw-r--r--drivers/dma/Makefile1
-rw-r--r--drivers/dma/amba-pl08x.c970
-rw-r--r--drivers/dma/bcm-sba-raid.c1785
-rw-r--r--drivers/dma/dw/platform.c6
-rw-r--r--drivers/dma/ep93xx_dma.c39
-rw-r--r--drivers/dma/mv_xor_v2.c147
-rw-r--r--drivers/dma/sh/rcar-dmac.c27
-rw-r--r--drivers/dma/sh/usb-dmac.c2
-rw-r--r--include/linux/amba/pl080.h107
-rw-r--r--include/linux/amba/pl08x.h30
-rw-r--r--include/linux/raid/pq.h1
-rw-r--r--lib/raid6/mktables.c20
21 files changed, 2914 insertions, 349 deletions
diff --git a/Documentation/devicetree/bindings/dma/arm-pl08x.txt b/Documentation/devicetree/bindings/dma/arm-pl08x.txt
index 8a0097a029d3..0ba81f79266f 100644
--- a/Documentation/devicetree/bindings/dma/arm-pl08x.txt
+++ b/Documentation/devicetree/bindings/dma/arm-pl08x.txt
@@ -3,6 +3,11 @@
Required properties:
- compatible: "arm,pl080", "arm,primecell";
"arm,pl081", "arm,primecell";
+ "faraday,ftdmac020", "arm,primecell"
+- arm,primecell-periphid: on the FTDMAC020 the primecell ID is not hard-coded
+ in the hardware and must be specified here as <0x0003b080>. This number
+ follows the PrimeCell standard numbering using the JEP106 vendor code 0x38
+ for Faraday Technology.
- reg: Address range of the PL08x registers
- interrupt: The PL08x interrupt number
- clocks: The clock running the IP core clock
@@ -20,8 +25,8 @@ Optional properties:
- dma-requests: contains the total number of DMA requests supported by the DMAC
- memcpy-burst-size: the size of the bursts for memcpy: 1, 4, 8, 16, 32
64, 128 or 256 bytes are legal values
-- memcpy-bus-width: the bus width used for memcpy: 8, 16 or 32 are legal
- values
+- memcpy-bus-width: the bus width used for memcpy in bits: 8, 16 or 32 are legal
+ values, the Faraday FTDMAC020 can also accept 64 bits
Clients
Required properties:
diff --git a/Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt b/Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt
new file mode 100644
index 000000000000..092913a28457
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt
@@ -0,0 +1,29 @@
+* Broadcom SBA RAID engine
+
+Required properties:
+- compatible: Should be one of the following
+ "brcm,iproc-sba"
+ "brcm,iproc-sba-v2"
+ The "brcm,iproc-sba" has support for only 6 PQ coefficients
+ The "brcm,iproc-sba-v2" has support for only 30 PQ coefficients
+- mboxes: List of phandle and mailbox channel specifiers
+
+Example:
+
+raid_mbox: mbox@67400000 {
+ ...
+ #mbox-cells = <3>;
+ ...
+};
+
+raid0 {
+ compatible = "brcm,iproc-sba-v2";
+ mboxes = <&raid_mbox 0 0x1 0xffff>,
+ <&raid_mbox 1 0x1 0xffff>,
+ <&raid_mbox 2 0x1 0xffff>,
+ <&raid_mbox 3 0x1 0xffff>,
+ <&raid_mbox 4 0x1 0xffff>,
+ <&raid_mbox 5 0x1 0xffff>,
+ <&raid_mbox 6 0x1 0xffff>,
+ <&raid_mbox 7 0x1 0xffff>;
+};
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index 3316a9c2e638..79a204d50234 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -30,8 +30,9 @@ Required Properties:
- interrupts: interrupt specifiers for the DMAC, one for each entry in
interrupt-names.
-- interrupt-names: one entry per channel, named "ch%u", where %u is the
- channel number ranging from zero to the number of channels minus one.
+- interrupt-names: one entry for the error interrupt, named "error", plus one
+ entry per channel, named "ch%u", where %u is the channel number ranging from
+ zero to the number of channels minus one.
- clock-names: "fck" for the functional clock
- clocks: a list of phandle + clock-specifier pairs, one for each entry
diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 6c52bd32610e..e48cc06c2aec 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -137,6 +137,9 @@ static void pl08x_put_signal(const struct pl08x_channel_data *cd, int ch)
}
static struct pl08x_platform_data pl08x_pd = {
+ /* Some reasonable memcpy defaults */
+ .memcpy_burst_size = PL08X_BURST_SZ_256,
+ .memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
.slave_channels = &pl08x_slave_channels[0],
.num_slave_channels = ARRAY_SIZE(pl08x_slave_channels),
.get_xfer_signal = pl08x_get_signal,
diff --git a/arch/arm/mach-s3c64xx/pl080.c b/arch/arm/mach-s3c64xx/pl080.c
index 261820a855ec..66fc774b70ec 100644
--- a/arch/arm/mach-s3c64xx/pl080.c
+++ b/arch/arm/mach-s3c64xx/pl080.c
@@ -137,16 +137,10 @@ static const struct dma_slave_map s3c64xx_dma0_slave_map[] = {
};
struct pl08x_platform_data s3c64xx_dma0_plat_data = {
- .memcpy_channel = {
- .bus_id = "memcpy",
- .cctl_memcpy =
- (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT |
- PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
- PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT |
- PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE |
- PL080_CONTROL_PROT_SYS),
- },
+ .memcpy_burst_size = PL08X_BURST_SZ_4,
+ .memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+ .memcpy_prot_buff = true,
+ .memcpy_prot_cache = true,
.lli_buses = PL08X_AHB1,
.mem_buses = PL08X_AHB1,
.get_xfer_signal = pl08x_get_xfer_signal,
@@ -238,16 +232,10 @@ static const struct dma_slave_map s3c64xx_dma1_slave_map[] = {
};
struct pl08x_platform_data s3c64xx_dma1_plat_data = {
- .memcpy_channel = {
- .bus_id = "memcpy",
- .cctl_memcpy =
- (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT |
- PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
- PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT |
- PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE |
- PL080_CONTROL_PROT_SYS),
- },
+ .memcpy_burst_size = PL08X_BURST_SZ_4,
+ .memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+ .memcpy_prot_buff = true,
+ .memcpy_prot_cache = true,
.lli_buses = PL08X_AHB1,
.mem_buses = PL08X_AHB1,
.get_xfer_signal = pl08x_get_xfer_signal,
diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c
index 23394ac76cf2..8537fcffe5a8 100644
--- a/arch/arm/mach-spear/spear3xx.c
+++ b/arch/arm/mach-spear/spear3xx.c
@@ -44,16 +44,10 @@ struct pl022_ssp_controller pl022_plat_data = {
/* dmac device registration */
struct pl08x_platform_data pl080_plat_data = {
- .memcpy_channel = {
- .bus_id = "memcpy",
- .cctl_memcpy =
- (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | \
- PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT | \
- PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | \
- PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT | \
- PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE | \
- PL080_CONTROL_PROT_SYS),
- },
+ .memcpy_burst_size = PL08X_BURST_SZ_16,
+ .memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+ .memcpy_prot_buff = true,
+ .memcpy_prot_cache = true,
.lli_buses = PL08X_AHB1,
.mem_buses = PL08X_AHB1,
.get_xfer_signal = pl080_get_signal,
diff --git a/arch/arm/mach-spear/spear6xx.c b/arch/arm/mach-spear/spear6xx.c
index ccf3573b831c..c5fc110134ba 100644
--- a/arch/arm/mach-spear/spear6xx.c
+++ b/arch/arm/mach-spear/spear6xx.c
@@ -322,16 +322,10 @@ static struct pl08x_channel_data spear600_dma_info[] = {
};
static struct pl08x_platform_data spear6xx_pl080_plat_data = {
- .memcpy_channel = {
- .bus_id = "memcpy",
- .cctl_memcpy =
- (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | \
- PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT | \
- PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | \
- PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT | \
- PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE | \
- PL080_CONTROL_PROT_SYS),
- },
+ .memcpy_burst_size = PL08X_BURST_SZ_16,
+ .memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+ .memcpy_prot_buff = true,
+ .memcpy_prot_cache = true,
.lli_buses = PL08X_AHB1,
.mem_buses = PL08X_AHB1,
.get_xfer_signal = pl080_get_signal,
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index f83de99d7d71..56bd612927ab 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -62,9 +62,6 @@ do_async_gen_syndrome(struct dma_chan *chan,
dma_addr_t dma_dest[2];
int src_off = 0;
- if (submit->flags & ASYNC_TX_FENCE)
- dma_flags |= DMA_PREP_FENCE;
-
while (src_cnt > 0) {
submit->flags = flags_orig;
pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
@@ -83,6 +80,8 @@ do_async_gen_syndrome(struct dma_chan *chan,
if (cb_fn_orig)
dma_flags |= DMA_PREP_INTERRUPT;
}
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Drivers force forward progress in case they can not provide
* a descriptor
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 24e8597b2c3e..fd724692bb3f 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -62,8 +62,10 @@ config AMBA_PL08X
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
- Platform has a PL08x DMAC device
- which can provide DMA engine support
+ Say yes if your platform has a PL08x DMAC device which can
+ provide DMA engine support. This includes the original ARM
+ PL080 and PL081, Samsungs PL080 derivative and Faraday
+ Technology's FTDMAC020 PL080 derivative.
config AMCC_PPC440SPE_ADMA
tristate "AMCC PPC440SPe ADMA support"
@@ -99,6 +101,21 @@ config AXI_DMAC
controller is often used in Analog Device's reference designs for FPGA
platforms.
+config BCM_SBA_RAID
+ tristate "Broadcom SBA RAID engine support"
+ depends on ARM64 || COMPILE_TEST
+ depends on MAILBOX && RAID6_PQ
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select ASYNC_TX_DISABLE_XOR_VAL_DMA
+ select ASYNC_TX_DISABLE_PQ_VAL_DMA
+ default ARCH_BCM_IPROC
+ help
+ Enable support for Broadcom SBA RAID Engine. The SBA RAID
+ engine is available on most of the Broadcom iProc SoCs. It
+ has the capability to offload memcpy, xor and pq computation
+ for raid5/6.
+
config COH901318
bool "ST-Ericsson COH901318 DMA support"
select DMA_ENGINE
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 0b723e94d9e6..d12ab2985ed1 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
+obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o
obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 6bb8813ca275..13cc95c0474c 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1,9 +1,10 @@
/*
* Copyright (c) 2006 ARM Ltd.
* Copyright (c) 2010 ST-Ericsson SA
+ * Copyirght (c) 2017 Linaro Ltd.
*
* Author: Peter Pearse <peter.pearse@arm.com>
- * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -110,11 +111,12 @@ struct pl08x_driver_data;
* @channels: the number of channels available in this variant
* @signals: the number of request signals available from the hardware
* @dualmaster: whether this version supports dual AHB masters or not.
- * @nomadik: whether the channels have Nomadik security extension bits
- * that need to be checked for permission before use and some registers are
- * missing
- * @pl080s: whether this version is a PL080S, which has separate register and
- * LLI word for transfer size.
+ * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the
+ * channels have Nomadik security extension bits that need to be checked
+ * for permission before use and some registers are missing
+ * @pl080s: whether this variant is a Samsung PL080S, which has separate
+ * register and LLI word for transfer size.
+ * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020
* @max_transfer_size: the maximum single element transfer size for this
* PL08x variant.
*/
@@ -125,6 +127,7 @@ struct vendor_data {
bool dualmaster;
bool nomadik;
bool pl080s;
+ bool ftdmac020;
u32 max_transfer_size;
};
@@ -148,19 +151,34 @@ struct pl08x_bus_data {
* @id: physical index to this channel
* @base: memory base address for this physical channel
* @reg_config: configuration address for this physical channel
+ * @reg_control: control address for this physical channel
+ * @reg_src: transfer source address register
+ * @reg_dst: transfer destination address register
+ * @reg_lli: transfer LLI address register
+ * @reg_busy: if the variant has a special per-channel busy register,
+ * this contains a pointer to it
* @lock: a lock to use when altering an instance of this struct
* @serving: the virtual channel currently being served by this physical
* channel
* @locked: channel unavailable for the system, e.g. dedicated to secure
* world
+ * @ftdmac020: channel is on a FTDMAC020
+ * @pl080s: channel is on a PL08s
*/
struct pl08x_phy_chan {
unsigned int id;
void __iomem *base;
void __iomem *reg_config;
+ void __iomem *reg_control;
+ void __iomem *reg_src;
+ void __iomem *reg_dst;
+ void __iomem *reg_lli;
+ void __iomem *reg_busy;
spinlock_t lock;
struct pl08x_dma_chan *serving;
bool locked;
+ bool ftdmac020;
+ bool pl080s;
};
/**
@@ -253,8 +271,9 @@ struct pl08x_dma_chan {
/**
* struct pl08x_driver_data - the local state holder for the PL08x
- * @slave: slave engine for this instance
+ * @slave: optional slave engine for this instance
* @memcpy: memcpy engine for this instance
+ * @has_slave: the PL08x has a slave engine (routed signals)
* @base: virtual memory base (remapped) for the PL08x
* @adev: the corresponding AMBA (PrimeCell) bus entry
* @vd: vendor data for this PL08x variant
@@ -269,6 +288,7 @@ struct pl08x_dma_chan {
struct pl08x_driver_data {
struct dma_device slave;
struct dma_device memcpy;
+ bool has_slave;
void __iomem *base;
struct amba_device *adev;
const struct vendor_data *vd;
@@ -360,10 +380,24 @@ static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
{
unsigned int val;
+ /* If we have a special busy register, take a shortcut */
+ if (ch->reg_busy) {
+ val = readl(ch->reg_busy);
+ return !!(val & BIT(ch->id));
+ }
val = readl(ch->reg_config);
return val & PL080_CONFIG_ACTIVE;
}
+/*
+ * pl08x_write_lli() - Write an LLI into the DMA controller.
+ *
+ * The PL08x derivatives support linked lists, but the first item of the
+ * list containing the source, destination, control word and next LLI is
+ * ignored. Instead the driver has to write those values directly into the
+ * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE
+ * register need to be set up for the first transfer.
+ */
static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
{
@@ -381,11 +415,112 @@ static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
- writel_relaxed(lli[PL080_LLI_SRC], phychan->base + PL080_CH_SRC_ADDR);
- writel_relaxed(lli[PL080_LLI_DST], phychan->base + PL080_CH_DST_ADDR);
- writel_relaxed(lli[PL080_LLI_LLI], phychan->base + PL080_CH_LLI);
- writel_relaxed(lli[PL080_LLI_CCTL], phychan->base + PL080_CH_CONTROL);
+ writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src);
+ writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst);
+ writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli);
+
+ /*
+ * The FTMAC020 has a different layout in the CCTL word of the LLI
+ * and the CCTL register which is split in CSR and SIZE registers.
+ * Convert the LLI item CCTL into the proper values to write into
+ * the CSR and SIZE registers.
+ */
+ if (phychan->ftdmac020) {
+ u32 llictl = lli[PL080_LLI_CCTL];
+ u32 val = 0;
+
+ /* Write the transfer size (12 bits) to the size register */
+ writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK,
+ phychan->base + FTDMAC020_CH_SIZE);
+ /*
+ * Then write the control bits 28..16 to the control register
+ * by shuffleing the bits around to where they are in the
+ * main register. The mapping is as follows:
+ * Bit 28: TC_MSK - mask on all except last LLI
+ * Bit 27..25: SRC_WIDTH
+ * Bit 24..22: DST_WIDTH
+ * Bit 21..20: SRCAD_CTRL
+ * Bit 19..17: DSTAD_CTRL
+ * Bit 17: SRC_SEL
+ * Bit 16: DST_SEL
+ */
+ if (llictl & FTDMAC020_LLI_TC_MSK)
+ val |= FTDMAC020_CH_CSR_TC_MSK;
+ val |= ((llictl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+ (FTDMAC020_LLI_SRC_WIDTH_SHIFT -
+ FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT));
+ val |= ((llictl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+ (FTDMAC020_LLI_DST_WIDTH_SHIFT -
+ FTDMAC020_CH_CSR_DST_WIDTH_SHIFT));
+ val |= ((llictl & FTDMAC020_LLI_SRCAD_CTL_MSK) >>
+ (FTDMAC020_LLI_SRCAD_CTL_SHIFT -
+ FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT));
+ val |= ((llictl & FTDMAC020_LLI_DSTAD_CTL_MSK) >>
+ (FTDMAC020_LLI_DSTAD_CTL_SHIFT -
+ FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT));
+ if (llictl & FTDMAC020_LLI_SRC_SEL)
+ val |= FTDMAC020_CH_CSR_SRC_SEL;
+ if (llictl & FTDMAC020_LLI_DST_SEL)
+ val |= FTDMAC020_CH_CSR_DST_SEL;
+
+ /*
+ * Set up the bits that exist in the CSR but are not
+ * part the LLI, i.e. only gets written to the control
+ * register right here.
+ *
+ * FIXME: do not just handle memcpy, also handle slave DMA.
+ */
+ switch (pl08x->pd->memcpy_burst_size) {
+ default:
+ case PL08X_BURST_SZ_1:
+ val |= PL080_BSIZE_1 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_4:
+ val |= PL080_BSIZE_4 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_8:
+ val |= PL080_BSIZE_8 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_16:
+ val |= PL080_BSIZE_16 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_32:
+ val |= PL080_BSIZE_32 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_64:
+ val |= PL080_BSIZE_64 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_128:
+ val |= PL080_BSIZE_128 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_256:
+ val |= PL080_BSIZE_256 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ }
+
+ /* Protection flags */
+ if (pl08x->pd->memcpy_prot_buff)
+ val |= FTDMAC020_CH_CSR_PROT2;
+ if (pl08x->pd->memcpy_prot_cache)
+ val |= FTDMAC020_CH_CSR_PROT3;
+ /* We are the kernel, so we are in privileged mode */
+ val |= FTDMAC020_CH_CSR_PROT1;
+
+ writel_relaxed(val, phychan->reg_control);
+ } else {
+ /* Bits are just identical */
+ writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control);
+ }
+ /* Second control word on the PL080s */
if (pl08x->vd->pl080s)
writel_relaxed(lli[PL080S_LLI_CCTL2],
phychan->base + PL080S_CH_CONTROL2);
@@ -423,11 +558,25 @@ static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
cpu_relax();
/* Do not access config register until channel shows as inactive */
- val = readl(phychan->reg_config);
- while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+ if (phychan->ftdmac020) {
+ val = readl(phychan->reg_config);
+ while (val & FTDMAC020_CH_CFG_BUSY)
+ val = readl(phychan->reg_config);
+
+ val = readl(phychan->reg_control);
+ while (val & FTDMAC020_CH_CSR_EN)
+ val = readl(phychan->reg_control);
+
+ writel(val | FTDMAC020_CH_CSR_EN,
+ phychan->reg_control);
+ } else {
val = readl(phychan->reg_config);
+ while ((val & PL080_CONFIG_ACTIVE) ||
+ (val & PL080_CONFIG_ENABLE))
+ val = readl(phychan->reg_config);
- writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+ writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+ }
}
/*
@@ -445,6 +594,14 @@ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
u32 val;
int timeout;
+ if (ch->ftdmac020) {
+ /* Use the enable bit on the FTDMAC020 */
+ val = readl(ch->reg_control);
+ val &= ~FTDMAC020_CH_CSR_EN;
+ writel(val, ch->reg_control);
+ return;
+ }
+
/* Set the HALT bit and wait for the FIFO to drain */
val = readl(ch->reg_config);
val |= PL080_CONFIG_HALT;
@@ -464,6 +621,14 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
{
u32 val;
+ /* Use the enable bit on the FTDMAC020 */
+ if (ch->ftdmac020) {
+ val = readl(ch->reg_control);
+ val |= FTDMAC020_CH_CSR_EN;
+ writel(val, ch->reg_control);
+ return;
+ }
+
/* Clear the HALT bit */
val = readl(ch->reg_config);
val &= ~PL080_CONFIG_HALT;
@@ -479,25 +644,68 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
struct pl08x_phy_chan *ch)
{
- u32 val = readl(ch->reg_config);
+ u32 val;
+ /* The layout for the FTDMAC020 is different */
+ if (ch->ftdmac020) {
+ /* Disable all interrupts */
+ val = readl(ch->reg_config);
+ val |= (FTDMAC020_CH_CFG_INT_ABT_MASK |
+ FTDMAC020_CH_CFG_INT_ERR_MASK |
+ FTDMAC020_CH_CFG_INT_TC_MASK);
+ writel(val, ch->reg_config);
+
+ /* Abort and disable channel */
+ val = readl(ch->reg_control);
+ val &= ~FTDMAC020_CH_CSR_EN;
+ val |= FTDMAC020_CH_CSR_ABT;
+ writel(val, ch->reg_control);
+
+ /* Clear ABT and ERR interrupt flags */
+ writel(BIT(ch->id) | BIT(ch->id + 16),
+ pl08x->base + PL080_ERR_CLEAR);
+ writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
+
+ return;
+ }
+
+ val = readl(ch->reg_config);
val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
PL080_CONFIG_TC_IRQ_MASK);
-
writel(val, ch->reg_config);
writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
}
-static inline u32 get_bytes_in_cctl(u32 cctl)
+static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch)
{
- /* The source width defines the number of bytes */
- u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+ u32 val;
+ u32 bytes;
+
+ if (ch->ftdmac020) {
+ bytes = readl(ch->base + FTDMAC020_CH_SIZE);
- cctl &= PL080_CONTROL_SWIDTH_MASK;
+ val = readl(ch->reg_control);
+ val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK;
+ val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT;
+ } else if (ch->pl080s) {
+ val = readl(ch->base + PL080S_CH_CONTROL2);
+ bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
- switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+ val = readl(ch->reg_control);
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ } else {
+ /* Plain PL08x */
+ val = readl(ch->reg_control);
+ bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ }
+
+ switch (val) {
case PL080_WIDTH_8BIT:
break;
case PL080_WIDTH_16BIT:
@@ -510,14 +718,35 @@ static inline u32 get_bytes_in_cctl(u32 cctl)
return bytes;
}
-static inline u32 get_bytes_in_cctl_pl080s(u32 cctl, u32 cctl1)
+static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va)
{
- /* The source width defines the number of bytes */
- u32 bytes = cctl1 & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+ u32 val;
+ u32 bytes;
+
+ if (ch->ftdmac020) {
+ val = llis_va[PL080_LLI_CCTL];
+ bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+ val = llis_va[PL080_LLI_CCTL];
+ val &= FTDMAC020_LLI_SRC_WIDTH_MSK;
+ val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ } else if (ch->pl080s) {
+ val = llis_va[PL080S_LLI_CCTL2];
+ bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+ val = llis_va[PL080_LLI_CCTL];
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ } else {
+ /* Plain PL08x */
+ val = llis_va[PL080_LLI_CCTL];
+ bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
- cctl &= PL080_CONTROL_SWIDTH_MASK;
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ }
- switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+ switch (val) {
case PL080_WIDTH_8BIT:
break;
case PL080_WIDTH_16BIT:
@@ -552,15 +781,10 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
* Follow the LLIs to get the number of remaining
* bytes in the currently active transaction.
*/
- clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
+ clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2;
/* First get the remaining bytes in the active transfer */
- if (pl08x->vd->pl080s)
- bytes = get_bytes_in_cctl_pl080s(
- readl(ch->base + PL080_CH_CONTROL),
- readl(ch->base + PL080S_CH_CONTROL2));
- else
- bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+ bytes = get_bytes_in_phy_channel(ch);
if (!clli)
return bytes;
@@ -581,12 +805,7 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
llis_va_limit = llis_va + llis_max_words;
for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
- if (pl08x->vd->pl080s)
- bytes += get_bytes_in_cctl_pl080s(
- llis_va[PL080_LLI_CCTL],
- llis_va[PL080S_LLI_CCTL2]);
- else
- bytes += get_bytes_in_cctl(llis_va[PL080_LLI_CCTL]);
+ bytes += get_bytes_in_lli(ch, llis_va);
/*
* A LLI pointer going backward terminates the LLI list
@@ -705,7 +924,7 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
break;
}
- if (!next) {
+ if (!next && pl08x->has_slave) {
list_for_each_entry(p, &pl08x->slave.channels, vc.chan.device_node)
if (p->state == PL08X_CHAN_WAITING) {
next = p;
@@ -746,9 +965,30 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
* LLI handling
*/
-static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+static inline unsigned int
+pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x,
+ u32 cctl,
+ bool source)
{
- switch (coded) {
+ u32 val;
+
+ if (pl08x->vd->ftdmac020) {
+ if (source)
+ val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ else
+ val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ } else {
+ if (source)
+ val = (cctl & PL080_CONTROL_SWIDTH_MASK) >>
+ PL080_CONTROL_SWIDTH_SHIFT;
+ else
+ val = (cctl & PL080_CONTROL_DWIDTH_MASK) >>
+ PL080_CONTROL_DWIDTH_SHIFT;
+ }
+
+ switch (val) {
case PL080_WIDTH_8BIT:
return 1;
case PL080_WIDTH_16BIT:
@@ -762,49 +1002,106 @@ static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
return 0;
}
-static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
- size_t tsize)
+static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x,
+ u32 cctl,
+ u8 srcwidth, u8 dstwidth,
+ size_t tsize)
{
u32 retbits = cctl;
- /* Remove all src, dst and transfer size bits */
- retbits &= ~PL080_CONTROL_DWIDTH_MASK;
- retbits &= ~PL080_CONTROL_SWIDTH_MASK;
- retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+ /*
+ * Remove all src, dst and transfer size bits, then set the
+ * width and size according to the parameters. The bit offsets
+ * are different in the FTDMAC020 so we need to accound for this.
+ */
+ if (pl08x->vd->ftdmac020) {
+ retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK;
+ retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK;
+ retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+ switch (srcwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
- /* Then set the bits according to the parameters */
- switch (srcwidth) {
- case 1:
- retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
- break;
- case 2:
- retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
- break;
- case 4:
- retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
- break;
- default:
- BUG();
- break;
- }
+ switch (dstwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
- switch (dstwidth) {
- case 1:
- retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
- break;
- case 2:
- retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
- break;
- case 4:
- retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
- break;
- default:
- BUG();
- break;
+ tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+ retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT;
+ } else {
+ retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+ retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+ retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+ switch (srcwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ PL080_CONTROL_SWIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ PL080_CONTROL_SWIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ PL080_CONTROL_SWIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ switch (dstwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
+ retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
}
- tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
- retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
return retbits;
}
@@ -825,13 +1122,35 @@ struct pl08x_lli_build_data {
* - prefers the destination bus if both available
* - prefers bus with fixed address (i.e. peripheral)
*/
-static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd,
- struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl)
+static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x,
+ struct pl08x_lli_build_data *bd,
+ struct pl08x_bus_data **mbus,
+ struct pl08x_bus_data **sbus,
+ u32 cctl)
{
- if (!(cctl & PL080_CONTROL_DST_INCR)) {
+ bool dst_incr;
+ bool src_incr;
+
+ /*
+ * The FTDMAC020 only supports memory-to-memory transfer, so
+ * source and destination always increase.
+ */
+ if (pl08x->vd->ftdmac020) {
+ dst_incr = true;
+ src_incr = true;
+ } else {
+ dst_incr = !!(cctl & PL080_CONTROL_DST_INCR);
+ src_incr = !!(cctl & PL080_CONTROL_SRC_INCR);
+ }
+
+ /*
+ * If either bus is not advancing, i.e. it is a peripheral, that
+ * one becomes master
+ */
+ if (!dst_incr) {
*mbus = &bd->dstbus;
*sbus = &bd->srcbus;
- } else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+ } else if (!src_incr) {
*mbus = &bd->srcbus;
*sbus = &bd->dstbus;
} else {
@@ -869,10 +1188,16 @@ static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
if (pl08x->vd->pl080s)
llis_va[PL080S_LLI_CCTL2] = cctl2;
- if (cctl & PL080_CONTROL_SRC_INCR)
+ if (pl08x->vd->ftdmac020) {
+ /* FIXME: only memcpy so far so both increase */
bd->srcbus.addr += len;
- if (cctl & PL080_CONTROL_DST_INCR)
bd->dstbus.addr += len;
+ } else {
+ if (cctl & PL080_CONTROL_SRC_INCR)
+ bd->srcbus.addr += len;
+ if (cctl & PL080_CONTROL_DST_INCR)
+ bd->dstbus.addr += len;
+ }
BUG_ON(bd->remainder < len);
@@ -883,12 +1208,12 @@ static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
int num_llis, size_t *total_bytes)
{
- *cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
+ *cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len);
pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
(*total_bytes) += len;
}
-#ifdef VERBOSE_DEBUG
+#if 1
static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
const u32 *llis_va, int num_llis)
{
@@ -953,14 +1278,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
cctl = txd->cctl;
/* Find maximum width of the source bus */
- bd.srcbus.maxwidth =
- pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
- PL080_CONTROL_SWIDTH_SHIFT);
+ bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true);
/* Find maximum width of the destination bus */
- bd.dstbus.maxwidth =
- pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
- PL080_CONTROL_DWIDTH_SHIFT);
+ bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false);
list_for_each_entry(dsg, &txd->dsg_list, node) {
total_bytes = 0;
@@ -972,7 +1293,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
bd.srcbus.buswidth = bd.srcbus.maxwidth;
bd.dstbus.buswidth = bd.dstbus.maxwidth;
- pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
+ pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl);
dev_vdbg(&pl08x->adev->dev,
"src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n",
@@ -1009,8 +1330,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
* supported. Thus, we can't have scattered addresses.
*/
if (!bd.remainder) {
- u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
- PL080_CONFIG_FLOW_CONTROL_SHIFT;
+ u32 fc;
+
+ /* FTDMAC020 only does memory-to-memory */
+ if (pl08x->vd->ftdmac020)
+ fc = PL080_FLOW_MEM2MEM;
+ else
+ fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+ PL080_CONFIG_FLOW_CONTROL_SHIFT;
if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
(fc <= PL080_FLOW_SRC2DST_SRC))) {
dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
@@ -1027,8 +1354,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
return 0;
}
- cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
- bd.dstbus.buswidth, 0);
+ cctl = pl08x_lli_control_bits(pl08x, cctl,
+ bd.srcbus.buswidth, bd.dstbus.buswidth,
+ 0);
pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
0, cctl, 0);
break;
@@ -1107,8 +1435,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
"size 0x%08zx (remainder 0x%08zx)\n",
__func__, lli_len, bd.remainder);
- cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
- bd.dstbus.buswidth, tsize);
+ cctl = pl08x_lli_control_bits(pl08x, cctl,
+ bd.srcbus.buswidth, bd.dstbus.buswidth,
+ tsize);
pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
lli_len, cctl, tsize);
total_bytes += lli_len;
@@ -1151,7 +1480,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
/* The final LLI terminates the LLI. */
last_lli[PL080_LLI_LLI] = 0;
/* The final LLI element shall also fire an interrupt. */
- last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
+ if (pl08x->vd->ftdmac020)
+ last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK;
+ else
+ last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
}
pl08x_dump_lli(pl08x, llis_va, num_llis);
@@ -1317,14 +1649,25 @@ static const struct burst_table burst_sizes[] = {
* will be routed to each port. We try to have source and destination
* on separate ports, but always respect the allowable settings.
*/
-static u32 pl08x_select_bus(u8 src, u8 dst)
+static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst)
{
u32 cctl = 0;
+ u32 dst_ahb2;
+ u32 src_ahb2;
+
+ /* The FTDMAC020 use different bits to indicate src/dst bus */
+ if (ftdmac020) {
+ dst_ahb2 = FTDMAC020_LLI_DST_SEL;
+ src_ahb2 = FTDMAC020_LLI_SRC_SEL;
+ } else {
+ dst_ahb2 = PL080_CONTROL_DST_AHB2;
+ src_ahb2 = PL080_CONTROL_SRC_AHB2;
+ }
if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
- cctl |= PL080_CONTROL_DST_AHB2;
+ cctl |= dst_ahb2;
if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
- cctl |= PL080_CONTROL_SRC_AHB2;
+ cctl |= src_ahb2;
return cctl;
}
@@ -1412,14 +1755,134 @@ static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan)
{
struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
- if (txd) {
+ if (txd)
INIT_LIST_HEAD(&txd->dsg_list);
+ return txd;
+}
- /* Always enable error and terminal interrupts */
- txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
- PL080_CONFIG_TC_IRQ_MASK;
+static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+ u32 cctl = 0;
+
+ /* Conjure cctl */
+ switch (pl08x->pd->memcpy_burst_size) {
+ default:
+ dev_err(&pl08x->adev->dev,
+ "illegal burst size for memcpy, set to 1\n");
+ /* Fall through */
+ case PL08X_BURST_SZ_1:
+ cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_4:
+ cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_8:
+ cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_16:
+ cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_32:
+ cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_64:
+ cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_128:
+ cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_256:
+ cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
}
- return txd;
+
+ switch (pl08x->pd->memcpy_bus_width) {
+ default:
+ dev_err(&pl08x->adev->dev,
+ "illegal bus width for memcpy, set to 8 bits\n");
+ /* Fall through */
+ case PL08X_BUS_WIDTH_8_BITS:
+ cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
+ PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_16_BITS:
+ cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
+ PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_32_BITS:
+ cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
+ PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ }
+
+ /* Protection flags */
+ if (pl08x->pd->memcpy_prot_buff)
+ cctl |= PL080_CONTROL_PROT_BUFF;
+ if (pl08x->pd->memcpy_prot_cache)
+ cctl |= PL080_CONTROL_PROT_CACHE;
+
+ /* We are the kernel, so we are in privileged mode */
+ cctl |= PL080_CONTROL_PROT_SYS;
+
+ /* Both to be incremented or the code will break */
+ cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+
+ if (pl08x->vd->dualmaster)
+ cctl |= pl08x_select_bus(false,
+ pl08x->mem_buses,
+ pl08x->mem_buses);
+
+ return cctl;
+}
+
+static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+ u32 cctl = 0;
+
+ /* Conjure cctl */
+ switch (pl08x->pd->memcpy_bus_width) {
+ default:
+ dev_err(&pl08x->adev->dev,
+ "illegal bus width for memcpy, set to 8 bits\n");
+ /* Fall through */
+ case PL08X_BUS_WIDTH_8_BITS:
+ cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+ PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_16_BITS:
+ cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+ PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_32_BITS:
+ cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+ PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ }
+
+ /*
+ * By default mask the TC IRQ on all LLIs, it will be unmasked on
+ * the last LLI item by other code.
+ */
+ cctl |= FTDMAC020_LLI_TC_MSK;
+
+ /*
+ * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL
+ * and FTDMAC020_LLI_DSTAD_CTL as zero
+ */
+ if (pl08x->vd->dualmaster)
+ cctl |= pl08x_select_bus(true,
+ pl08x->mem_buses,
+ pl08x->mem_buses);
+
+ return cctl;
}
/*
@@ -1452,18 +1915,16 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
dsg->src_addr = src;
dsg->dst_addr = dest;
dsg->len = len;
-
- /* Set platform data for m2m */
- txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
- txd->cctl = pl08x->pd->memcpy_channel.cctl_memcpy &
- ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
-
- /* Both to be incremented or the code will break */
- txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
-
- if (pl08x->vd->dualmaster)
- txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
- pl08x->mem_buses);
+ if (pl08x->vd->ftdmac020) {
+ /* Writing CCFG zero ENABLES all interrupts */
+ txd->ccfg = 0;
+ txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x);
+ } else {
+ txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+ PL080_CONFIG_TC_IRQ_MASK |
+ PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+ txd->cctl = pl08x_memcpy_cctl(pl08x);
+ }
ret = pl08x_fill_llis_for_desc(plchan->host, txd);
if (!ret) {
@@ -1527,7 +1988,7 @@ static struct pl08x_txd *pl08x_init_txd(
return NULL;
}
- txd->cctl = cctl | pl08x_select_bus(src_buses, dst_buses);
+ txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses);
if (plchan->cfg.device_fc)
tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
@@ -1536,7 +1997,9 @@ static struct pl08x_txd *pl08x_init_txd(
tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
PL080_FLOW_PER2MEM;
- txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+ txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+ PL080_CONFIG_TC_IRQ_MASK |
+ tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
ret = pl08x_request_mux(plchan);
if (ret < 0) {
@@ -1813,6 +2276,11 @@ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
/* The Nomadik variant does not have the config register */
if (pl08x->vd->nomadik)
return;
+ /* The FTDMAC020 variant does this in another register */
+ if (pl08x->vd->ftdmac020) {
+ writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR);
+ return;
+ }
writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
}
@@ -1925,9 +2393,16 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
chan->signal = i;
pl08x_dma_slave_init(chan);
} else {
- chan->cd = &pl08x->pd->memcpy_channel;
+ chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL);
+ if (!chan->cd) {
+ kfree(chan);
+ return -ENOMEM;
+ }
+ chan->cd->bus_id = "memcpy";
+ chan->cd->periph_buses = pl08x->pd->mem_buses;
chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
if (!chan->name) {
+ kfree(chan->cd);
kfree(chan);
return -ENOMEM;
}
@@ -2009,12 +2484,15 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data)
pl08x_state_str(chan->state));
}
- seq_printf(s, "\nPL08x virtual slave channels:\n");
- seq_printf(s, "CHANNEL:\tSTATE:\n");
- seq_printf(s, "--------\t------\n");
- list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
- seq_printf(s, "%s\t\t%s\n", chan->name,
- pl08x_state_str(chan->state));
+ if (pl08x->has_slave) {
+ seq_printf(s, "\nPL08x virtual slave channels:\n");
+ seq_printf(s, "CHANNEL:\tSTATE:\n");
+ seq_printf(s, "--------\t------\n");
+ list_for_each_entry(chan, &pl08x->slave.channels,
+ vc.chan.device_node) {
+ seq_printf(s, "%s\t\t%s\n", chan->name,
+ pl08x_state_str(chan->state));
+ }
}
return 0;
@@ -2052,6 +2530,10 @@ static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x,
{
struct pl08x_dma_chan *chan;
+ /* Trying to get a slave channel from something with no slave support */
+ if (!pl08x->has_slave)
+ return NULL;
+
list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
if (chan->signal == id)
return &chan->vc.chan;
@@ -2099,7 +2581,6 @@ static int pl08x_of_probe(struct amba_device *adev,
{
struct pl08x_platform_data *pd;
struct pl08x_channel_data *chanp = NULL;
- u32 cctl_memcpy = 0;
u32 val;
int ret;
int i;
@@ -2139,36 +2620,28 @@ static int pl08x_of_probe(struct amba_device *adev,
dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n");
/* Fall through */
case 1:
- cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_1;
break;
case 4:
- cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_4;
break;
case 8:
- cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_8;
break;
case 16:
- cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_16;
break;
case 32:
- cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_32;
break;
case 64:
- cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_64;
break;
case 128:
- cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_128;
break;
case 256:
- cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
- PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+ pd->memcpy_burst_size = PL08X_BURST_SZ_256;
break;
}
@@ -2182,48 +2655,40 @@ static int pl08x_of_probe(struct amba_device *adev,
dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n");
/* Fall through */
case 8:
- cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
- PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS;
break;
case 16:
- cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
- PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS;
break;
case 32:
- cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
- PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS;
break;
}
- /* This is currently the only thing making sense */
- cctl_memcpy |= PL080_CONTROL_PROT_SYS;
-
- /* Set up memcpy channel */
- pd->memcpy_channel.bus_id = "memcpy";
- pd->memcpy_channel.cctl_memcpy = cctl_memcpy;
- /* Use the buses that can access memory, obviously */
- pd->memcpy_channel.periph_buses = pd->mem_buses;
-
/*
* Allocate channel data for all possible slave channels (one
* for each possible signal), channels will then be allocated
* for a device and have it's AHB interfaces set up at
* translation time.
*/
- chanp = devm_kcalloc(&adev->dev,
- pl08x->vd->signals,
- sizeof(struct pl08x_channel_data),
- GFP_KERNEL);
- if (!chanp)
- return -ENOMEM;
+ if (pl08x->vd->signals) {
+ chanp = devm_kcalloc(&adev->dev,
+ pl08x->vd->signals,
+ sizeof(struct pl08x_channel_data),
+ GFP_KERNEL);
+ if (!chanp)
+ return -ENOMEM;
- pd->slave_channels = chanp;
- for (i = 0; i < pl08x->vd->signals; i++) {
- /* chanp->periph_buses will be assigned at translation */
- chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
- chanp++;
+ pd->slave_channels = chanp;
+ for (i = 0; i < pl08x->vd->signals; i++) {
+ /*
+ * chanp->periph_buses will be assigned at translation
+ */
+ chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
+ chanp++;
+ }
+ pd->num_slave_channels = pl08x->vd->signals;
}
- pd->num_slave_channels = pl08x->vd->signals;
pl08x->pd = pd;
@@ -2242,7 +2707,7 @@ static inline int pl08x_of_probe(struct amba_device *adev,
static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
{
struct pl08x_driver_data *pl08x;
- const struct vendor_data *vd = id->data;
+ struct vendor_data *vd = id->data;
struct device_node *np = adev->dev.of_node;
u32 tsfr_size;
int ret = 0;
@@ -2268,6 +2733,34 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
pl08x->adev = adev;
pl08x->vd = vd;
+ pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+ if (!pl08x->base) {
+ ret = -ENOMEM;
+ goto out_no_ioremap;
+ }
+
+ if (vd->ftdmac020) {
+ u32 val;
+
+ val = readl(pl08x->base + FTDMAC020_REVISION);
+ dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n",
+ (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+ val = readl(pl08x->base + FTDMAC020_FEATURE);
+ dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, "
+ "%s built-in bridge, %s, %s linked lists\n",
+ (val >> 12) & 0x0f,
+ (val & BIT(10)) ? "no" : "has",
+ (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0",
+ (val & BIT(8)) ? "supports" : "does not support");
+
+ /* Vendor data from feature register */
+ if (!(val & BIT(8)))
+ dev_warn(&pl08x->adev->dev,
+ "linked lists not supported, required\n");
+ vd->channels = (val >> 12) & 0x0f;
+ vd->dualmaster = !!(val & BIT(9));
+ }
+
/* Initialize memcpy engine */
dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
pl08x->memcpy.dev = &adev->dev;
@@ -2284,25 +2777,38 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ if (vd->ftdmac020)
+ pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
- /* Initialize slave engine */
- dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
- dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
- pl08x->slave.dev = &adev->dev;
- pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
- pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
- pl08x->slave.device_tx_status = pl08x_dma_tx_status;
- pl08x->slave.device_issue_pending = pl08x_issue_pending;
- pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
- pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
- pl08x->slave.device_config = pl08x_config;
- pl08x->slave.device_pause = pl08x_pause;
- pl08x->slave.device_resume = pl08x_resume;
- pl08x->slave.device_terminate_all = pl08x_terminate_all;
- pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
- pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
- pl08x->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
- pl08x->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+
+ /*
+ * Initialize slave engine, if the block has no signals, that means
+ * we have no slave support.
+ */
+ if (vd->signals) {
+ pl08x->has_slave = true;
+ dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
+ pl08x->slave.dev = &adev->dev;
+ pl08x->slave.device_free_chan_resources =
+ pl08x_free_chan_resources;
+ pl08x->slave.device_prep_dma_interrupt =
+ pl08x_prep_dma_interrupt;
+ pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+ pl08x->slave.device_issue_pending = pl08x_issue_pending;
+ pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+ pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
+ pl08x->slave.device_config = pl08x_config;
+ pl08x->slave.device_pause = pl08x_pause;
+ pl08x->slave.device_resume = pl08x_resume;
+ pl08x->slave.device_terminate_all = pl08x_terminate_all;
+ pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+ pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+ pl08x->slave.directions =
+ BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ pl08x->slave.residue_granularity =
+ DMA_RESIDUE_GRANULARITY_SEGMENT;
+ }
/* Get the platform data */
pl08x->pd = dev_get_platdata(&adev->dev);
@@ -2344,19 +2850,18 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
goto out_no_lli_pool;
}
- pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
- if (!pl08x->base) {
- ret = -ENOMEM;
- goto out_no_ioremap;
- }
-
/* Turn on the PL08x */
pl08x_ensure_on(pl08x);
- /* Attach the interrupt handler */
- writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+ /* Clear any pending interrupts */
+ if (vd->ftdmac020)
+ /* This variant has error IRQs in bits 16-19 */
+ writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR);
+ else
+ writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+ /* Attach the interrupt handler */
ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
if (ret) {
dev_err(&adev->dev, "%s failed to request interrupt %d\n",
@@ -2377,7 +2882,25 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
ch->id = i;
ch->base = pl08x->base + PL080_Cx_BASE(i);
- ch->reg_config = ch->base + vd->config_offset;
+ if (vd->ftdmac020) {
+ /* FTDMA020 has a special channel busy register */
+ ch->reg_busy = ch->base + FTDMAC020_CH_BUSY;
+ ch->reg_config = ch->base + FTDMAC020_CH_CFG;
+ ch->reg_control = ch->base + FTDMAC020_CH_CSR;
+ ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR;
+ ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR;
+ ch->reg_lli = ch->base + FTDMAC020_CH_LLP;
+ ch->ftdmac020 = true;
+ } else {
+ ch->reg_config = ch->base + vd->config_offset;
+ ch->reg_control = ch->base + PL080_CH_CONTROL;
+ ch->reg_src = ch->base + PL080_CH_SRC_ADDR;
+ ch->reg_dst = ch->base + PL080_CH_DST_ADDR;
+ ch->reg_lli = ch->base + PL080_CH_LLI;
+ }
+ if (vd->pl080s)
+ ch->pl080s = true;
+
spin_lock_init(&ch->lock);
/*
@@ -2410,13 +2933,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
}
/* Register slave channels */
- ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
- pl08x->pd->num_slave_channels, true);
- if (ret < 0) {
- dev_warn(&pl08x->adev->dev,
- "%s failed to enumerate slave channels - %d\n",
- __func__, ret);
- goto out_no_slave;
+ if (pl08x->has_slave) {
+ ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+ pl08x->pd->num_slave_channels, true);
+ if (ret < 0) {
+ dev_warn(&pl08x->adev->dev,
+ "%s failed to enumerate slave channels - %d\n",
+ __func__, ret);
+ goto out_no_slave;
+ }
}
ret = dma_async_device_register(&pl08x->memcpy);
@@ -2427,12 +2952,14 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
goto out_no_memcpy_reg;
}
- ret = dma_async_device_register(&pl08x->slave);
- if (ret) {
- dev_warn(&pl08x->adev->dev,
+ if (pl08x->has_slave) {
+ ret = dma_async_device_register(&pl08x->slave);
+ if (ret) {
+ dev_warn(&pl08x->adev->dev,
"%s failed to register slave as an async device - %d\n",
__func__, ret);
- goto out_no_slave_reg;
+ goto out_no_slave_reg;
+ }
}
amba_set_drvdata(adev, pl08x);
@@ -2446,7 +2973,8 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
out_no_slave_reg:
dma_async_device_unregister(&pl08x->memcpy);
out_no_memcpy_reg:
- pl08x_free_virtual_channels(&pl08x->slave);
+ if (pl08x->has_slave)
+ pl08x_free_virtual_channels(&pl08x->slave);
out_no_slave:
pl08x_free_virtual_channels(&pl08x->memcpy);
out_no_memcpy:
@@ -2454,11 +2982,11 @@ out_no_memcpy:
out_no_phychans:
free_irq(adev->irq[0], pl08x);
out_no_irq:
- iounmap(pl08x->base);
-out_no_ioremap:
dma_pool_destroy(pl08x->pool);
out_no_lli_pool:
out_no_platdata:
+ iounmap(pl08x->base);
+out_no_ioremap:
kfree(pl08x);
out_no_pl08x:
amba_release_regions(adev);
@@ -2499,6 +3027,12 @@ static struct vendor_data vendor_pl081 = {
.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
};
+static struct vendor_data vendor_ftdmac020 = {
+ .config_offset = PL080_CH_CONFIG,
+ .ftdmac020 = true,
+ .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
static struct amba_id pl08x_ids[] = {
/* Samsung PL080S variant */
{
@@ -2524,6 +3058,12 @@ static struct amba_id pl08x_ids[] = {
.mask = 0x00ffffff,
.data = &vendor_nomadik,
},
+ /* Faraday Technology FTDMAC020 */
+ {
+ .id = 0x0003b080,
+ .mask = 0x000fffff,
+ .data = &vendor_ftdmac020,
+ },
{ 0, 0 },
};
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
new file mode 100644
index 000000000000..e41bbc7cb094
--- /dev/null
+++ b/drivers/dma/bcm-sba-raid.c
@@ -0,0 +1,1785 @@
+/*
+ * Copyright (C) 2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Broadcom SBA RAID Driver
+ *
+ * The Broadcom stream buffer accelerator (SBA) provides offloading
+ * capabilities for RAID operations. The SBA offload engine is accessible
+ * via Broadcom SoC specific ring manager. Two or more offload engines
+ * can share same Broadcom SoC specific ring manager due to this Broadcom
+ * SoC specific ring manager driver is implemented as a mailbox controller
+ * driver and offload engine drivers are implemented as mallbox clients.
+ *
+ * Typically, Broadcom SoC specific ring manager will implement larger
+ * number of hardware rings over one or more SBA hardware devices. By
+ * design, the internal buffer size of SBA hardware device is limited
+ * but all offload operations supported by SBA can be broken down into
+ * multiple small size requests and executed parallely on multiple SBA
+ * hardware devices for achieving high through-put.
+ *
+ * The Broadcom SBA RAID driver does not require any register programming
+ * except submitting request to SBA hardware device via mailbox channels.
+ * This driver implements a DMA device with one DMA channel using a set
+ * of mailbox channels provided by Broadcom SoC specific ring manager
+ * driver. To exploit parallelism (as described above), all DMA request
+ * coming to SBA RAID DMA channel are broken down to smaller requests
+ * and submitted to multiple mailbox channels in round-robin fashion.
+ * For having more SBA DMA channels, we can create more SBA device nodes
+ * in Broadcom SoC specific DTS based on number of hardware rings supported
+ * by Broadcom SoC ring manager.
+ */
+
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/list.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox/brcm-message.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/raid/pq.h>
+
+#include "dmaengine.h"
+
+/* SBA command related defines */
+#define SBA_TYPE_SHIFT 48
+#define SBA_TYPE_MASK GENMASK(1, 0)
+#define SBA_TYPE_A 0x0
+#define SBA_TYPE_B 0x2
+#define SBA_TYPE_C 0x3
+#define SBA_USER_DEF_SHIFT 32
+#define SBA_USER_DEF_MASK GENMASK(15, 0)
+#define SBA_R_MDATA_SHIFT 24
+#define SBA_R_MDATA_MASK GENMASK(7, 0)
+#define SBA_C_MDATA_MS_SHIFT 18
+#define SBA_C_MDATA_MS_MASK GENMASK(1, 0)
+#define SBA_INT_SHIFT 17
+#define SBA_INT_MASK BIT(0)
+#define SBA_RESP_SHIFT 16
+#define SBA_RESP_MASK BIT(0)
+#define SBA_C_MDATA_SHIFT 8
+#define SBA_C_MDATA_MASK GENMASK(7, 0)
+#define SBA_C_MDATA_BNUMx_SHIFT(__bnum) (2 * (__bnum))
+#define SBA_C_MDATA_BNUMx_MASK GENMASK(1, 0)
+#define SBA_C_MDATA_DNUM_SHIFT 5
+#define SBA_C_MDATA_DNUM_MASK GENMASK(4, 0)
+#define SBA_C_MDATA_LS(__v) ((__v) & 0xff)
+#define SBA_C_MDATA_MS(__v) (((__v) >> 8) & 0x3)
+#define SBA_CMD_SHIFT 0
+#define SBA_CMD_MASK GENMASK(3, 0)
+#define SBA_CMD_ZERO_BUFFER 0x4
+#define SBA_CMD_ZERO_ALL_BUFFERS 0x8
+#define SBA_CMD_LOAD_BUFFER 0x9
+#define SBA_CMD_XOR 0xa
+#define SBA_CMD_GALOIS_XOR 0xb
+#define SBA_CMD_WRITE_BUFFER 0xc
+#define SBA_CMD_GALOIS 0xe
+
+/* Driver helper macros */
+#define to_sba_request(tx) \
+ container_of(tx, struct sba_request, tx)
+#define to_sba_device(dchan) \
+ container_of(dchan, struct sba_device, dma_chan)
+
+enum sba_request_state {
+ SBA_REQUEST_STATE_FREE = 1,
+ SBA_REQUEST_STATE_ALLOCED = 2,
+ SBA_REQUEST_STATE_PENDING = 3,
+ SBA_REQUEST_STATE_ACTIVE = 4,
+ SBA_REQUEST_STATE_RECEIVED = 5,
+ SBA_REQUEST_STATE_COMPLETED = 6,
+ SBA_REQUEST_STATE_ABORTED = 7,
+};
+
+struct sba_request {
+ /* Global state */
+ struct list_head node;
+ struct sba_device *sba;
+ enum sba_request_state state;
+ bool fence;
+ /* Chained requests management */
+ struct sba_request *first;
+ struct list_head next;
+ unsigned int next_count;
+ atomic_t next_pending_count;
+ /* BRCM message data */
+ void *resp;
+ dma_addr_t resp_dma;
+ struct brcm_sba_command *cmds;
+ struct brcm_message msg;
+ struct dma_async_tx_descriptor tx;
+};
+
+enum sba_version {
+ SBA_VER_1 = 0,
+ SBA_VER_2
+};
+
+struct sba_device {
+ /* Underlying device */
+ struct device *dev;
+ /* DT configuration parameters */
+ enum sba_version ver;
+ /* Derived configuration parameters */
+ u32 max_req;
+ u32 hw_buf_size;
+ u32 hw_resp_size;
+ u32 max_pq_coefs;
+ u32 max_pq_srcs;
+ u32 max_cmd_per_req;
+ u32 max_xor_srcs;
+ u32 max_resp_pool_size;
+ u32 max_cmds_pool_size;
+ /* Maibox client and Mailbox channels */
+ struct mbox_client client;
+ int mchans_count;
+ atomic_t mchans_current;
+ struct mbox_chan **mchans;
+ struct device *mbox_dev;
+ /* DMA device and DMA channel */
+ struct dma_device dma_dev;
+ struct dma_chan dma_chan;
+ /* DMA channel resources */
+ void *resp_base;
+ dma_addr_t resp_dma_base;
+ void *cmds_base;
+ dma_addr_t cmds_dma_base;
+ spinlock_t reqs_lock;
+ struct sba_request *reqs;
+ bool reqs_fence;
+ struct list_head reqs_alloc_list;
+ struct list_head reqs_pending_list;
+ struct list_head reqs_active_list;
+ struct list_head reqs_received_list;
+ struct list_head reqs_completed_list;
+ struct list_head reqs_aborted_list;
+ struct list_head reqs_free_list;
+ int reqs_free_count;
+};
+
+/* ====== SBA command helper routines ===== */
+
+static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
+{
+ cmd &= ~((u64)mask << shift);
+ cmd |= ((u64)(val & mask) << shift);
+ return cmd;
+}
+
+static inline u32 __pure sba_cmd_load_c_mdata(u32 b0)
+{
+ return b0 & SBA_C_MDATA_BNUMx_MASK;
+}
+
+static inline u32 __pure sba_cmd_write_c_mdata(u32 b0)
+{
+ return b0 & SBA_C_MDATA_BNUMx_MASK;
+}
+
+static inline u32 __pure sba_cmd_xor_c_mdata(u32 b1, u32 b0)
+{
+ return (b0 & SBA_C_MDATA_BNUMx_MASK) |
+ ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1));
+}
+
+static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
+{
+ return (b0 & SBA_C_MDATA_BNUMx_MASK) |
+ ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)) |
+ ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
+}
+
+/* ====== Channel resource management routines ===== */
+
+static struct sba_request *sba_alloc_request(struct sba_device *sba)
+{
+ unsigned long flags;
+ struct sba_request *req = NULL;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ req = list_first_entry_or_null(&sba->reqs_free_list,
+ struct sba_request, node);
+ if (req) {
+ list_move_tail(&req->node, &sba->reqs_alloc_list);
+ req->state = SBA_REQUEST_STATE_ALLOCED;
+ req->fence = false;
+ req->first = req;
+ INIT_LIST_HEAD(&req->next);
+ req->next_count = 1;
+ atomic_set(&req->next_pending_count, 1);
+
+ sba->reqs_free_count--;
+
+ dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+ }
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ return req;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_pending_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ req->state = SBA_REQUEST_STATE_PENDING;
+ list_move_tail(&req->node, &sba->reqs_pending_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static bool _sba_active_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+ if (sba->reqs_fence)
+ return false;
+ req->state = SBA_REQUEST_STATE_ACTIVE;
+ list_move_tail(&req->node, &sba->reqs_active_list);
+ if (req->fence)
+ sba->reqs_fence = true;
+ return true;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_abort_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ req->state = SBA_REQUEST_STATE_ABORTED;
+ list_move_tail(&req->node, &sba->reqs_aborted_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_free_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ req->state = SBA_REQUEST_STATE_FREE;
+ list_move_tail(&req->node, &sba->reqs_free_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+ sba->reqs_free_count++;
+}
+
+static void sba_received_request(struct sba_request *req)
+{
+ unsigned long flags;
+ struct sba_device *sba = req->sba;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ req->state = SBA_REQUEST_STATE_RECEIVED;
+ list_move_tail(&req->node, &sba->reqs_received_list);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_complete_chained_requests(struct sba_request *req)
+{
+ unsigned long flags;
+ struct sba_request *nreq;
+ struct sba_device *sba = req->sba;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ req->state = SBA_REQUEST_STATE_COMPLETED;
+ list_move_tail(&req->node, &sba->reqs_completed_list);
+ list_for_each_entry(nreq, &req->next, next) {
+ nreq->state = SBA_REQUEST_STATE_COMPLETED;
+ list_move_tail(&nreq->node, &sba->reqs_completed_list);
+ }
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_free_chained_requests(struct sba_request *req)
+{
+ unsigned long flags;
+ struct sba_request *nreq;
+ struct sba_device *sba = req->sba;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ _sba_free_request(sba, req);
+ list_for_each_entry(nreq, &req->next, next)
+ _sba_free_request(sba, nreq);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_chain_request(struct sba_request *first,
+ struct sba_request *req)
+{
+ unsigned long flags;
+ struct sba_device *sba = req->sba;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ list_add_tail(&req->next, &first->next);
+ req->first = first;
+ first->next_count++;
+ atomic_set(&first->next_pending_count, first->next_count);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_cleanup_nonpending_requests(struct sba_device *sba)
+{
+ unsigned long flags;
+ struct sba_request *req, *req1;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Freeup all alloced request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node)
+ _sba_free_request(sba, req);
+
+ /* Freeup all received request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_received_list, node)
+ _sba_free_request(sba, req);
+
+ /* Freeup all completed request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node)
+ _sba_free_request(sba, req);
+
+ /* Set all active requests as aborted */
+ list_for_each_entry_safe(req, req1, &sba->reqs_active_list, node)
+ _sba_abort_request(sba, req);
+
+ /*
+ * Note: We expect that aborted request will be eventually
+ * freed by sba_receive_message()
+ */
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_cleanup_pending_requests(struct sba_device *sba)
+{
+ unsigned long flags;
+ struct sba_request *req, *req1;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Freeup all pending request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node)
+ _sba_free_request(sba, req);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+/* ====== DMAENGINE callbacks ===== */
+
+static void sba_free_chan_resources(struct dma_chan *dchan)
+{
+ /*
+ * Channel resources are pre-alloced so we just free-up
+ * whatever we can so that we can re-use pre-alloced
+ * channel resources next time.
+ */
+ sba_cleanup_nonpending_requests(to_sba_device(dchan));
+}
+
+static int sba_device_terminate_all(struct dma_chan *dchan)
+{
+ /* Cleanup all pending requests */
+ sba_cleanup_pending_requests(to_sba_device(dchan));
+
+ return 0;
+}
+
+static int sba_send_mbox_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ int mchans_idx, ret = 0;
+
+ /* Select mailbox channel in round-robin fashion */
+ mchans_idx = atomic_inc_return(&sba->mchans_current);
+ mchans_idx = mchans_idx % sba->mchans_count;
+
+ /* Send message for the request */
+ req->msg.error = 0;
+ ret = mbox_send_message(sba->mchans[mchans_idx], &req->msg);
+ if (ret < 0) {
+ dev_err(sba->dev, "send message failed with error %d", ret);
+ return ret;
+ }
+ ret = req->msg.error;
+ if (ret < 0) {
+ dev_err(sba->dev, "message error %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void sba_issue_pending(struct dma_chan *dchan)
+{
+ int ret;
+ unsigned long flags;
+ struct sba_request *req, *req1;
+ struct sba_device *sba = to_sba_device(dchan);
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Process all pending request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node) {
+ /* Try to make request active */
+ if (!_sba_active_request(sba, req))
+ break;
+
+ /* Send request to mailbox channel */
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ ret = sba_send_mbox_request(sba, req);
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* If something went wrong then keep request pending */
+ if (ret < 0) {
+ _sba_pending_request(sba, req);
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ unsigned long flags;
+ dma_cookie_t cookie;
+ struct sba_device *sba;
+ struct sba_request *req, *nreq;
+
+ if (unlikely(!tx))
+ return -EINVAL;
+
+ sba = to_sba_device(tx->chan);
+ req = to_sba_request(tx);
+
+ /* Assign cookie and mark all chained requests pending */
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ cookie = dma_cookie_assign(tx);
+ _sba_pending_request(sba, req);
+ list_for_each_entry(nreq, &req->next, next)
+ _sba_pending_request(sba, nreq);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ return cookie;
+}
+
+static enum dma_status sba_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ int mchan_idx;
+ enum dma_status ret;
+ struct sba_device *sba = to_sba_device(dchan);
+
+ for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
+ mbox_client_peek_data(sba->mchans[mchan_idx]);
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void sba_fillup_interrupt_msg(struct sba_request *req,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg)
+{
+ u64 cmd;
+ u32 c_mdata;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ /* Type-B command to load dummy data into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = req->resp_dma;
+ cmdsp->data_len = req->sba->hw_resp_size;
+ cmdsp++;
+
+ /* Type-A command to write buf0 to dummy location */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = req->resp_dma;
+ cmdsp->data_len = req->sba->hw_resp_size;
+ cmdsp++;
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+ struct sba_device *sba = to_sba_device(dchan);
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+
+ /*
+ * Force fence so that no requests are submitted
+ * until DMA callback for this request is invoked.
+ */
+ req->fence = true;
+
+ /* Fillup request message */
+ sba_fillup_interrupt_msg(req, req->cmds, &req->msg);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return &req->tx;
+}
+
+static void sba_fillup_memcpy_msg(struct sba_request *req,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t dst, dma_addr_t src)
+{
+ u64 cmd;
+ u32 c_mdata;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ /* Type-B command to load data into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = dst + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_memcpy_req(struct sba_device *sba,
+ dma_addr_t off, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+
+ /* Fillup request message */
+ sba_fillup_memcpy_msg(req, req->cmds, &req->msg,
+ off, len, dst, src);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ size_t req_len;
+ dma_addr_t off = 0;
+ struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *first = NULL, *req;
+
+ /* Create chained requests where each request is upto hw_buf_size */
+ while (len) {
+ req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+ req = sba_prep_dma_memcpy_req(sba, off, dst, src,
+ req_len, flags);
+ if (!req) {
+ if (first)
+ sba_free_chained_requests(first);
+ return NULL;
+ }
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ off += req_len;
+ len -= req_len;
+ }
+
+ return (first) ? &first->tx : NULL;
+}
+
+static void sba_fillup_xor_msg(struct sba_request *req,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t dst, dma_addr_t *src, u32 src_cnt)
+{
+ u64 cmd;
+ u32 c_mdata;
+ unsigned int i;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ /* Type-B command to load data into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src[0] + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Type-B commands to xor data with buf0 and put it back in buf0 */
+ for (i = 1; i < src_cnt; i++) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_xor_c_mdata(0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src[i] + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = dst + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+struct sba_request *
+sba_prep_dma_xor_req(struct sba_device *sba,
+ dma_addr_t off, dma_addr_t dst, dma_addr_t *src,
+ u32 src_cnt, size_t len, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+
+ /* Fillup request message */
+ sba_fillup_xor_msg(req, req->cmds, &req->msg,
+ off, len, dst, src, src_cnt);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_xor(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src,
+ u32 src_cnt, size_t len, unsigned long flags)
+{
+ size_t req_len;
+ dma_addr_t off = 0;
+ struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *first = NULL, *req;
+
+ /* Sanity checks */
+ if (unlikely(src_cnt > sba->max_xor_srcs))
+ return NULL;
+
+ /* Create chained requests where each request is upto hw_buf_size */
+ while (len) {
+ req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+ req = sba_prep_dma_xor_req(sba, off, dst, src, src_cnt,
+ req_len, flags);
+ if (!req) {
+ if (first)
+ sba_free_chained_requests(first);
+ return NULL;
+ }
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ off += req_len;
+ len -= req_len;
+ }
+
+ return (first) ? &first->tx : NULL;
+}
+
+static void sba_fillup_pq_msg(struct sba_request *req,
+ bool pq_continue,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t *dst_p, dma_addr_t *dst_q,
+ const u8 *scf, dma_addr_t *src, u32 src_cnt)
+{
+ u64 cmd;
+ u32 c_mdata;
+ unsigned int i;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ if (pq_continue) {
+ /* Type-B command to load old P into buf0 */
+ if (dst_p) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-B command to load old Q into buf1 */
+ if (dst_q) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(1);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+ } else {
+ /* Type-A command to zero all buffers */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ cmdsp++;
+ }
+
+ /* Type-B commands for generate P onto buf0 and Q onto buf1 */
+ for (i = 0; i < src_cnt; i++) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_pq_c_mdata(raid6_gflog[scf[i]], 1, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+ SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src[i] + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ if (dst_p) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf1 */
+ if (dst_q) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(1);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+struct sba_request *
+sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off,
+ dma_addr_t *dst_p, dma_addr_t *dst_q, dma_addr_t *src,
+ u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+
+ /* Fillup request messages */
+ sba_fillup_pq_msg(req, dmaf_continue(flags),
+ req->cmds, &req->msg,
+ off, len, dst_p, dst_q, scf, src, src_cnt);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static void sba_fillup_pq_single_msg(struct sba_request *req,
+ bool pq_continue,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t *dst_p, dma_addr_t *dst_q,
+ dma_addr_t src, u8 scf)
+{
+ u64 cmd;
+ u32 c_mdata;
+ u8 pos, dpos = raid6_gflog[scf];
+ struct brcm_sba_command *cmdsp = cmds;
+
+ if (!dst_p)
+ goto skip_p;
+
+ if (pq_continue) {
+ /* Type-B command to load old P into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /*
+ * Type-B commands to xor data with buf0 and put it
+ * back in buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_xor_c_mdata(0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ } else {
+ /* Type-B command to load old P into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+skip_p:
+ if (!dst_q)
+ goto skip_q;
+
+ /* Type-A command to zero all buffers */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ cmdsp++;
+
+ if (dpos == 255)
+ goto skip_q_computation;
+ pos = (dpos < req->sba->max_pq_coefs) ?
+ dpos : (req->sba->max_pq_coefs - 1);
+
+ /*
+ * Type-B command to generate initial Q from data
+ * and store output into buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_pq_c_mdata(pos, 0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+ SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ dpos -= pos;
+
+ /* Multiple Type-A command to generate final Q */
+ while (dpos) {
+ pos = (dpos < req->sba->max_pq_coefs) ?
+ dpos : (req->sba->max_pq_coefs - 1);
+
+ /*
+ * Type-A command to generate Q with buf0 and
+ * buf1 store result in buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_pq_c_mdata(pos, 0, 1);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+ SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ cmdsp++;
+
+ dpos -= pos;
+ }
+
+skip_q_computation:
+ if (pq_continue) {
+ /*
+ * Type-B command to XOR previous output with
+ * buf0 and write it into buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_xor_c_mdata(0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = req->resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+skip_q:
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+struct sba_request *
+sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off,
+ dma_addr_t *dst_p, dma_addr_t *dst_q,
+ dma_addr_t src, u8 scf, size_t len,
+ unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+
+ /* Fillup request messages */
+ sba_fillup_pq_single_msg(req, dmaf_continue(flags),
+ req->cmds, &req->msg, off, len,
+ dst_p, dst_q, src, scf);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_pq(struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+ u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+ u32 i, dst_q_index;
+ size_t req_len;
+ bool slow = false;
+ dma_addr_t off = 0;
+ dma_addr_t *dst_p = NULL, *dst_q = NULL;
+ struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *first = NULL, *req;
+
+ /* Sanity checks */
+ if (unlikely(src_cnt > sba->max_pq_srcs))
+ return NULL;
+ for (i = 0; i < src_cnt; i++)
+ if (sba->max_pq_coefs <= raid6_gflog[scf[i]])
+ slow = true;
+
+ /* Figure-out P and Q destination addresses */
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ dst_p = &dst[0];
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ dst_q = &dst[1];
+
+ /* Create chained requests where each request is upto hw_buf_size */
+ while (len) {
+ req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+ if (slow) {
+ dst_q_index = src_cnt;
+
+ if (dst_q) {
+ for (i = 0; i < src_cnt; i++) {
+ if (*dst_q == src[i]) {
+ dst_q_index = i;
+ break;
+ }
+ }
+ }
+
+ if (dst_q_index < src_cnt) {
+ i = dst_q_index;
+ req = sba_prep_dma_pq_single_req(sba,
+ off, dst_p, dst_q, src[i], scf[i],
+ req_len, flags | DMA_PREP_FENCE);
+ if (!req)
+ goto fail;
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ flags |= DMA_PREP_CONTINUE;
+ }
+
+ for (i = 0; i < src_cnt; i++) {
+ if (dst_q_index == i)
+ continue;
+
+ req = sba_prep_dma_pq_single_req(sba,
+ off, dst_p, dst_q, src[i], scf[i],
+ req_len, flags | DMA_PREP_FENCE);
+ if (!req)
+ goto fail;
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ flags |= DMA_PREP_CONTINUE;
+ }
+ } else {
+ req = sba_prep_dma_pq_req(sba, off,
+ dst_p, dst_q, src, src_cnt,
+ scf, req_len, flags);
+ if (!req)
+ goto fail;
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+ }
+
+ off += req_len;
+ len -= req_len;
+ }
+
+ return (first) ? &first->tx : NULL;
+
+fail:
+ if (first)
+ sba_free_chained_requests(first);
+ return NULL;
+}
+
+/* ====== Mailbox callbacks ===== */
+
+static void sba_dma_tx_actions(struct sba_request *req)
+{
+ struct dma_async_tx_descriptor *tx = &req->tx;
+
+ WARN_ON(tx->cookie < 0);
+
+ if (tx->cookie > 0) {
+ dma_cookie_complete(tx);
+
+ /*
+ * Call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (tx->callback)
+ tx->callback(tx->callback_param);
+
+ dma_descriptor_unmap(tx);
+ }
+
+ /* Run dependent operations */
+ dma_run_dependencies(tx);
+
+ /* If waiting for 'ack' then move to completed list */
+ if (!async_tx_test_ack(&req->tx))
+ sba_complete_chained_requests(req);
+ else
+ sba_free_chained_requests(req);
+}
+
+static void sba_receive_message(struct mbox_client *cl, void *msg)
+{
+ unsigned long flags;
+ struct brcm_message *m = msg;
+ struct sba_request *req = m->ctx, *req1;
+ struct sba_device *sba = req->sba;
+
+ /* Error count if message has error */
+ if (m->error < 0)
+ dev_err(sba->dev, "%s got message with error %d",
+ dma_chan_name(&sba->dma_chan), m->error);
+
+ /* Mark request as received */
+ sba_received_request(req);
+
+ /* Wait for all chained requests to be completed */
+ if (atomic_dec_return(&req->first->next_pending_count))
+ goto done;
+
+ /* Point to first request */
+ req = req->first;
+
+ /* Update request */
+ if (req->state == SBA_REQUEST_STATE_RECEIVED)
+ sba_dma_tx_actions(req);
+ else
+ sba_free_chained_requests(req);
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Re-check all completed request waiting for 'ack' */
+ list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node) {
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ sba_dma_tx_actions(req);
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+done:
+ /* Try to submit pending request */
+ sba_issue_pending(&sba->dma_chan);
+}
+
+/* ====== Platform driver routines ===== */
+
+static int sba_prealloc_channel_resources(struct sba_device *sba)
+{
+ int i, j, p, ret = 0;
+ struct sba_request *req = NULL;
+
+ sba->resp_base = dma_alloc_coherent(sba->dma_dev.dev,
+ sba->max_resp_pool_size,
+ &sba->resp_dma_base, GFP_KERNEL);
+ if (!sba->resp_base)
+ return -ENOMEM;
+
+ sba->cmds_base = dma_alloc_coherent(sba->dma_dev.dev,
+ sba->max_cmds_pool_size,
+ &sba->cmds_dma_base, GFP_KERNEL);
+ if (!sba->cmds_base) {
+ ret = -ENOMEM;
+ goto fail_free_resp_pool;
+ }
+
+ spin_lock_init(&sba->reqs_lock);
+ sba->reqs_fence = false;
+ INIT_LIST_HEAD(&sba->reqs_alloc_list);
+ INIT_LIST_HEAD(&sba->reqs_pending_list);
+ INIT_LIST_HEAD(&sba->reqs_active_list);
+ INIT_LIST_HEAD(&sba->reqs_received_list);
+ INIT_LIST_HEAD(&sba->reqs_completed_list);
+ INIT_LIST_HEAD(&sba->reqs_aborted_list);
+ INIT_LIST_HEAD(&sba->reqs_free_list);
+
+ sba->reqs = devm_kcalloc(sba->dev, sba->max_req,
+ sizeof(*req), GFP_KERNEL);
+ if (!sba->reqs) {
+ ret = -ENOMEM;
+ goto fail_free_cmds_pool;
+ }
+
+ for (i = 0, p = 0; i < sba->max_req; i++) {
+ req = &sba->reqs[i];
+ INIT_LIST_HEAD(&req->node);
+ req->sba = sba;
+ req->state = SBA_REQUEST_STATE_FREE;
+ INIT_LIST_HEAD(&req->next);
+ req->next_count = 1;
+ atomic_set(&req->next_pending_count, 0);
+ req->fence = false;
+ req->resp = sba->resp_base + p;
+ req->resp_dma = sba->resp_dma_base + p;
+ p += sba->hw_resp_size;
+ req->cmds = devm_kcalloc(sba->dev, sba->max_cmd_per_req,
+ sizeof(*req->cmds), GFP_KERNEL);
+ if (!req->cmds) {
+ ret = -ENOMEM;
+ goto fail_free_cmds_pool;
+ }
+ for (j = 0; j < sba->max_cmd_per_req; j++) {
+ req->cmds[j].cmd = 0;
+ req->cmds[j].cmd_dma = sba->cmds_base +
+ (i * sba->max_cmd_per_req + j) * sizeof(u64);
+ req->cmds[j].cmd_dma_addr = sba->cmds_dma_base +
+ (i * sba->max_cmd_per_req + j) * sizeof(u64);
+ req->cmds[j].flags = 0;
+ }
+ memset(&req->msg, 0, sizeof(req->msg));
+ dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+ req->tx.tx_submit = sba_tx_submit;
+ req->tx.phys = req->resp_dma;
+ list_add_tail(&req->node, &sba->reqs_free_list);
+ }
+
+ sba->reqs_free_count = sba->max_req;
+
+ return 0;
+
+fail_free_cmds_pool:
+ dma_free_coherent(sba->dma_dev.dev,
+ sba->max_cmds_pool_size,
+ sba->cmds_base, sba->cmds_dma_base);
+fail_free_resp_pool:
+ dma_free_coherent(sba->dma_dev.dev,
+ sba->max_resp_pool_size,
+ sba->resp_base, sba->resp_dma_base);
+ return ret;
+}
+
+static void sba_freeup_channel_resources(struct sba_device *sba)
+{
+ dmaengine_terminate_all(&sba->dma_chan);
+ dma_free_coherent(sba->dma_dev.dev, sba->max_cmds_pool_size,
+ sba->cmds_base, sba->cmds_dma_base);
+ dma_free_coherent(sba->dma_dev.dev, sba->max_resp_pool_size,
+ sba->resp_base, sba->resp_dma_base);
+ sba->resp_base = NULL;
+ sba->resp_dma_base = 0;
+}
+
+static int sba_async_register(struct sba_device *sba)
+{
+ int ret;
+ struct dma_device *dma_dev = &sba->dma_dev;
+
+ /* Initialize DMA channel cookie */
+ sba->dma_chan.device = dma_dev;
+ dma_cookie_init(&sba->dma_chan);
+
+ /* Initialize DMA device capability mask */
+ dma_cap_zero(dma_dev->cap_mask);
+ dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+ dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+ /*
+ * Set mailbox channel device as the base device of
+ * our dma_device because the actual memory accesses
+ * will be done by mailbox controller
+ */
+ dma_dev->dev = sba->mbox_dev;
+
+ /* Set base prep routines */
+ dma_dev->device_free_chan_resources = sba_free_chan_resources;
+ dma_dev->device_terminate_all = sba_device_terminate_all;
+ dma_dev->device_issue_pending = sba_issue_pending;
+ dma_dev->device_tx_status = sba_tx_status;
+
+ /* Set interrupt routine */
+ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_interrupt = sba_prep_dma_interrupt;
+
+ /* Set memcpy routine */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = sba_prep_dma_memcpy;
+
+ /* Set xor routine and capability */
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->device_prep_dma_xor = sba_prep_dma_xor;
+ dma_dev->max_xor = sba->max_xor_srcs;
+ }
+
+ /* Set pq routine and capability */
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_dev->device_prep_dma_pq = sba_prep_dma_pq;
+ dma_set_maxpq(dma_dev, sba->max_pq_srcs, 0);
+ }
+
+ /* Initialize DMA device channel list */
+ INIT_LIST_HEAD(&dma_dev->channels);
+ list_add_tail(&sba->dma_chan.device_node, &dma_dev->channels);
+
+ /* Register with Linux async DMA framework*/
+ ret = dma_async_device_register(dma_dev);
+ if (ret) {
+ dev_err(sba->dev, "async device register error %d", ret);
+ return ret;
+ }
+
+ dev_info(sba->dev, "%s capabilities: %s%s%s%s\n",
+ dma_chan_name(&sba->dma_chan),
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "interrupt " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "memcpy " : "",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "");
+
+ return 0;
+}
+
+static int sba_probe(struct platform_device *pdev)
+{
+ int i, ret = 0, mchans_count;
+ struct sba_device *sba;
+ struct platform_device *mbox_pdev;
+ struct of_phandle_args args;
+
+ /* Allocate main SBA struct */
+ sba = devm_kzalloc(&pdev->dev, sizeof(*sba), GFP_KERNEL);
+ if (!sba)
+ return -ENOMEM;
+
+ sba->dev = &pdev->dev;
+ platform_set_drvdata(pdev, sba);
+
+ /* Determine SBA version from DT compatible string */
+ if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba"))
+ sba->ver = SBA_VER_1;
+ else if (of_device_is_compatible(sba->dev->of_node,
+ "brcm,iproc-sba-v2"))
+ sba->ver = SBA_VER_2;
+ else
+ return -ENODEV;
+
+ /* Derived Configuration parameters */
+ switch (sba->ver) {
+ case SBA_VER_1:
+ sba->max_req = 1024;
+ sba->hw_buf_size = 4096;
+ sba->hw_resp_size = 8;
+ sba->max_pq_coefs = 6;
+ sba->max_pq_srcs = 6;
+ break;
+ case SBA_VER_2:
+ sba->max_req = 1024;
+ sba->hw_buf_size = 4096;
+ sba->hw_resp_size = 8;
+ sba->max_pq_coefs = 30;
+ /*
+ * We can support max_pq_srcs == max_pq_coefs because
+ * we are limited by number of SBA commands that we can
+ * fit in one message for underlying ring manager HW.
+ */
+ sba->max_pq_srcs = 12;
+ break;
+ default:
+ return -EINVAL;
+ }
+ sba->max_cmd_per_req = sba->max_pq_srcs + 3;
+ sba->max_xor_srcs = sba->max_cmd_per_req - 1;
+ sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size;
+ sba->max_cmds_pool_size = sba->max_req *
+ sba->max_cmd_per_req * sizeof(u64);
+
+ /* Setup mailbox client */
+ sba->client.dev = &pdev->dev;
+ sba->client.rx_callback = sba_receive_message;
+ sba->client.tx_block = false;
+ sba->client.knows_txdone = false;
+ sba->client.tx_tout = 0;
+
+ /* Number of channels equals number of mailbox channels */
+ ret = of_count_phandle_with_args(pdev->dev.of_node,
+ "mboxes", "#mbox-cells");
+ if (ret <= 0)
+ return -ENODEV;
+ mchans_count = ret;
+ sba->mchans_count = 0;
+ atomic_set(&sba->mchans_current, 0);
+
+ /* Allocate mailbox channel array */
+ sba->mchans = devm_kcalloc(&pdev->dev, sba->mchans_count,
+ sizeof(*sba->mchans), GFP_KERNEL);
+ if (!sba->mchans)
+ return -ENOMEM;
+
+ /* Request mailbox channels */
+ for (i = 0; i < mchans_count; i++) {
+ sba->mchans[i] = mbox_request_channel(&sba->client, i);
+ if (IS_ERR(sba->mchans[i])) {
+ ret = PTR_ERR(sba->mchans[i]);
+ goto fail_free_mchans;
+ }
+ sba->mchans_count++;
+ }
+
+ /* Find-out underlying mailbox device */
+ ret = of_parse_phandle_with_args(pdev->dev.of_node,
+ "mboxes", "#mbox-cells", 0, &args);
+ if (ret)
+ goto fail_free_mchans;
+ mbox_pdev = of_find_device_by_node(args.np);
+ of_node_put(args.np);
+ if (!mbox_pdev) {
+ ret = -ENODEV;
+ goto fail_free_mchans;
+ }
+ sba->mbox_dev = &mbox_pdev->dev;
+
+ /* All mailbox channels should be of same ring manager device */
+ for (i = 1; i < mchans_count; i++) {
+ ret = of_parse_phandle_with_args(pdev->dev.of_node,
+ "mboxes", "#mbox-cells", i, &args);
+ if (ret)
+ goto fail_free_mchans;
+ mbox_pdev = of_find_device_by_node(args.np);
+ of_node_put(args.np);
+ if (sba->mbox_dev != &mbox_pdev->dev) {
+ ret = -EINVAL;
+ goto fail_free_mchans;
+ }
+ }
+
+ /* Register DMA device with linux async framework */
+ ret = sba_async_register(sba);
+ if (ret)
+ goto fail_free_mchans;
+
+ /* Prealloc channel resource */
+ ret = sba_prealloc_channel_resources(sba);
+ if (ret)
+ goto fail_async_dev_unreg;
+
+ /* Print device info */
+ dev_info(sba->dev, "%s using SBAv%d and %d mailbox channels",
+ dma_chan_name(&sba->dma_chan), sba->ver+1,
+ sba->mchans_count);
+
+ return 0;
+
+fail_async_dev_unreg:
+ dma_async_device_unregister(&sba->dma_dev);
+fail_free_mchans:
+ for (i = 0; i < sba->mchans_count; i++)
+ mbox_free_channel(sba->mchans[i]);
+ return ret;
+}
+
+static int sba_remove(struct platform_device *pdev)
+{
+ int i;
+ struct sba_device *sba = platform_get_drvdata(pdev);
+
+ sba_freeup_channel_resources(sba);
+
+ dma_async_device_unregister(&sba->dma_dev);
+
+ for (i = 0; i < sba->mchans_count; i++)
+ mbox_free_channel(sba->mchans[i]);
+
+ return 0;
+}
+
+static const struct of_device_id sba_of_match[] = {
+ { .compatible = "brcm,iproc-sba", },
+ { .compatible = "brcm,iproc-sba-v2", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, sba_of_match);
+
+static struct platform_driver sba_driver = {
+ .probe = sba_probe,
+ .remove = sba_remove,
+ .driver = {
+ .name = "bcm-sba-raid",
+ .of_match_table = sba_of_match,
+ },
+};
+module_platform_driver(sba_driver);
+
+MODULE_DESCRIPTION("Broadcom SBA RAID driver");
+MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index c639c60b825a..bc31fe802061 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -306,8 +306,12 @@ static int dw_resume_early(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct dw_dma_chip *chip = platform_get_drvdata(pdev);
+ int ret;
+
+ ret = clk_prepare_enable(chip->clk);
+ if (ret)
+ return ret;
- clk_prepare_enable(chip->clk);
return dw_dma_enable(chip);
}
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index d37e8dda8079..ec240592f5c8 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -201,6 +201,7 @@ struct ep93xx_dma_engine {
struct dma_device dma_dev;
bool m2m;
int (*hw_setup)(struct ep93xx_dma_chan *);
+ void (*hw_synchronize)(struct ep93xx_dma_chan *);
void (*hw_shutdown)(struct ep93xx_dma_chan *);
void (*hw_submit)(struct ep93xx_dma_chan *);
int (*hw_interrupt)(struct ep93xx_dma_chan *);
@@ -323,6 +324,8 @@ static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
| M2P_CONTROL_ENABLE;
m2p_set_control(edmac, control);
+ edmac->buffer = 0;
+
return 0;
}
@@ -331,21 +334,27 @@ static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
}
-static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac)
{
+ unsigned long flags;
u32 control;
+ spin_lock_irqsave(&edmac->lock, flags);
control = readl(edmac->regs + M2P_CONTROL);
control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
m2p_set_control(edmac, control);
+ spin_unlock_irqrestore(&edmac->lock, flags);
while (m2p_channel_state(edmac) >= M2P_STATE_ON)
- cpu_relax();
+ schedule();
+}
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
m2p_set_control(edmac, 0);
- while (m2p_channel_state(edmac) == M2P_STATE_STALL)
- cpu_relax();
+ while (m2p_channel_state(edmac) != M2P_STATE_IDLE)
+ dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n");
}
static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
@@ -1161,6 +1170,26 @@ fail:
}
/**
+ * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the
+ * current context.
+ * @chan: channel
+ *
+ * Synchronizes the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and and it is safe to free the memory associated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ */
+static void ep93xx_dma_synchronize(struct dma_chan *chan)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+
+ if (edmac->edma->hw_synchronize)
+ edmac->edma->hw_synchronize(edmac);
+}
+
+/**
* ep93xx_dma_terminate_all - terminate all transactions
* @chan: channel
*
@@ -1323,6 +1352,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
dma_dev->device_config = ep93xx_dma_slave_config;
+ dma_dev->device_synchronize = ep93xx_dma_synchronize;
dma_dev->device_terminate_all = ep93xx_dma_terminate_all;
dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
dma_dev->device_tx_status = ep93xx_dma_tx_status;
@@ -1340,6 +1370,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
} else {
dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+ edma->hw_synchronize = m2p_hw_synchronize;
edma->hw_setup = m2p_hw_setup;
edma->hw_shutdown = m2p_hw_shutdown;
edma->hw_submit = m2p_hw_submit;
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index a28a01fcba67..f652a0e0f5a2 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -42,6 +42,7 @@
#define MV_XOR_V2_DMA_IMSG_THRD_OFF 0x018
#define MV_XOR_V2_DMA_IMSG_THRD_MASK 0x7FFF
#define MV_XOR_V2_DMA_IMSG_THRD_SHIFT 0x0
+#define MV_XOR_V2_DMA_IMSG_TIMER_EN BIT(18)
#define MV_XOR_V2_DMA_DESQ_AWATTR_OFF 0x01C
/* Same flags as MV_XOR_V2_DMA_DESQ_ARATTR_OFF */
#define MV_XOR_V2_DMA_DESQ_ALLOC_OFF 0x04C
@@ -55,6 +56,9 @@
#define MV_XOR_V2_DMA_DESQ_STOP_OFF 0x800
#define MV_XOR_V2_DMA_DESQ_DEALLOC_OFF 0x804
#define MV_XOR_V2_DMA_DESQ_ADD_OFF 0x808
+#define MV_XOR_V2_DMA_IMSG_TMOT 0x810
+#define MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK 0x1FFF
+#define MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT 0
/* XOR Global registers */
#define MV_XOR_V2_GLOB_BW_CTRL 0x4
@@ -90,6 +94,13 @@
*/
#define MV_XOR_V2_DESC_NUM 1024
+/*
+ * Threshold values for descriptors and timeout, determined by
+ * experimentation as giving a good level of performance.
+ */
+#define MV_XOR_V2_DONE_IMSG_THRD 0x14
+#define MV_XOR_V2_TIMER_THRD 0xB0
+
/**
* struct mv_xor_v2_descriptor - DMA HW descriptor
* @desc_id: used by S/W and is not affected by H/W.
@@ -161,6 +172,7 @@ struct mv_xor_v2_device {
struct mv_xor_v2_sw_desc *sw_desq;
int desc_size;
unsigned int npendings;
+ unsigned int hw_queue_idx;
};
/**
@@ -214,18 +226,6 @@ static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
}
/*
- * Return the next available index in the DESQ.
- */
-static int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)
-{
- /* read the index for the next available descriptor in the DESQ */
- u32 reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ALLOC_OFF);
-
- return ((reg >> MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT)
- & MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK);
-}
-
-/*
* notify the engine of new descriptors, and update the available index.
*/
static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev,
@@ -261,16 +261,23 @@ static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev)
* Set the IMSG threshold
*/
static inline
-void mv_xor_v2_set_imsg_thrd(struct mv_xor_v2_device *xor_dev, int thrd_val)
+void mv_xor_v2_enable_imsg_thrd(struct mv_xor_v2_device *xor_dev)
{
u32 reg;
+ /* Configure threshold of number of descriptors, and enable timer */
reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
-
reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
- reg |= (thrd_val << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
-
+ reg |= (MV_XOR_V2_DONE_IMSG_THRD << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
+ reg |= MV_XOR_V2_DMA_IMSG_TIMER_EN;
writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
+
+ /* Configure Timer Threshold */
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT);
+ reg &= (~MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK <<
+ MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT);
+ reg |= (MV_XOR_V2_TIMER_THRD << MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT);
+ writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT);
}
static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
@@ -288,12 +295,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
if (!ndescs)
return IRQ_NONE;
- /*
- * Update IMSG threshold, to disable new IMSG interrupts until
- * end of the tasklet
- */
- mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_DESC_NUM);
-
/* schedule a tasklet to handle descriptors callbacks */
tasklet_schedule(&xor_dev->irq_tasklet);
@@ -306,7 +307,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
static dma_cookie_t
mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
{
- int desq_ptr;
void *dest_hw_desc;
dma_cookie_t cookie;
struct mv_xor_v2_sw_desc *sw_desc =
@@ -322,15 +322,15 @@ mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
spin_lock_bh(&xor_dev->lock);
cookie = dma_cookie_assign(tx);
- /* get the next available slot in the DESQ */
- desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
-
/* copy the HW descriptor from the SW descriptor to the DESQ */
- dest_hw_desc = xor_dev->hw_desq_virt + desq_ptr;
+ dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx;
memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
xor_dev->npendings++;
+ xor_dev->hw_queue_idx++;
+ if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM)
+ xor_dev->hw_queue_idx = 0;
spin_unlock_bh(&xor_dev->lock);
@@ -344,6 +344,7 @@ static struct mv_xor_v2_sw_desc *
mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
{
struct mv_xor_v2_sw_desc *sw_desc;
+ bool found = false;
/* Lock the channel */
spin_lock_bh(&xor_dev->lock);
@@ -355,19 +356,23 @@ mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
return NULL;
}
- /* get a free SW descriptor from the SW DESQ */
- sw_desc = list_first_entry(&xor_dev->free_sw_desc,
- struct mv_xor_v2_sw_desc, free_list);
+ list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) {
+ if (async_tx_test_ack(&sw_desc->async_tx)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ spin_unlock_bh(&xor_dev->lock);
+ return NULL;
+ }
+
list_del(&sw_desc->free_list);
/* Release the channel */
spin_unlock_bh(&xor_dev->lock);
- /* set the async tx descriptor */
- dma_async_tx_descriptor_init(&sw_desc->async_tx, &xor_dev->dmachan);
- sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
- async_tx_ack(&sw_desc->async_tx);
-
return sw_desc;
}
@@ -389,6 +394,8 @@ mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
__func__, len, &src, &dest, flags);
sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+ if (!sw_desc)
+ return NULL;
sw_desc->async_tx.flags = flags;
@@ -443,6 +450,8 @@ mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
__func__, src_cnt, len, &dest, flags);
sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+ if (!sw_desc)
+ return NULL;
sw_desc->async_tx.flags = flags;
@@ -491,6 +500,8 @@ mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
container_of(chan, struct mv_xor_v2_device, dmachan);
sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+ if (!sw_desc)
+ return NULL;
/* set the HW descriptor */
hw_descriptor = &sw_desc->hw_desc;
@@ -524,9 +535,6 @@ static void mv_xor_v2_issue_pending(struct dma_chan *chan)
mv_xor_v2_add_desc_to_desq(xor_dev, xor_dev->npendings);
xor_dev->npendings = 0;
- /* Activate the channel */
- writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
-
spin_unlock_bh(&xor_dev->lock);
}
@@ -554,7 +562,6 @@ static void mv_xor_v2_tasklet(unsigned long data)
{
struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
int pending_ptr, num_of_pending, i;
- struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
@@ -562,17 +569,10 @@ static void mv_xor_v2_tasklet(unsigned long data)
/* get the pending descriptors parameters */
num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr);
- /* next HW descriptor */
- next_pending_hw_desc = xor_dev->hw_desq_virt + pending_ptr;
-
/* loop over free descriptors */
for (i = 0; i < num_of_pending; i++) {
-
- if (pending_ptr > MV_XOR_V2_DESC_NUM)
- pending_ptr = 0;
-
- if (next_pending_sw_desc != NULL)
- next_pending_hw_desc++;
+ struct mv_xor_v2_descriptor *next_pending_hw_desc =
+ xor_dev->hw_desq_virt + pending_ptr;
/* get the SW descriptor related to the HW descriptor */
next_pending_sw_desc =
@@ -608,15 +608,14 @@ static void mv_xor_v2_tasklet(unsigned long data)
/* increment the next descriptor */
pending_ptr++;
+ if (pending_ptr >= MV_XOR_V2_DESC_NUM)
+ pending_ptr = 0;
}
if (num_of_pending != 0) {
/* free the descriptores */
mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending);
}
-
- /* Update IMSG threshold, to enable new IMSG interrupts */
- mv_xor_v2_set_imsg_thrd(xor_dev, 0);
}
/*
@@ -648,9 +647,6 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32,
xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF);
- /* enable the DMA engine */
- writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
-
/*
* This is a temporary solution, until we activate the
* SMMU. Set the attributes for reading & writing data buffers
@@ -694,6 +690,30 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL;
writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
+ /* enable the DMA engine */
+ writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
+ return 0;
+}
+
+static int mv_xor_v2_suspend(struct platform_device *dev, pm_message_t state)
+{
+ struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev);
+
+ /* Set this bit to disable to stop the XOR unit. */
+ writel(0x1, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
+ return 0;
+}
+
+static int mv_xor_v2_resume(struct platform_device *dev)
+{
+ struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev);
+
+ mv_xor_v2_set_desc_size(xor_dev);
+ mv_xor_v2_enable_imsg_thrd(xor_dev);
+ mv_xor_v2_descq_init(xor_dev);
+
return 0;
}
@@ -725,6 +745,10 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, xor_dev);
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+ if (ret)
+ return ret;
+
xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
return -EPROBE_DEFER;
@@ -785,8 +809,15 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
/* add all SW descriptors to the free list */
for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) {
- xor_dev->sw_desq[i].idx = i;
- list_add(&xor_dev->sw_desq[i].free_list,
+ struct mv_xor_v2_sw_desc *sw_desc =
+ xor_dev->sw_desq + i;
+ sw_desc->idx = i;
+ dma_async_tx_descriptor_init(&sw_desc->async_tx,
+ &xor_dev->dmachan);
+ sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
+ async_tx_ack(&sw_desc->async_tx);
+
+ list_add(&sw_desc->free_list,
&xor_dev->free_sw_desc);
}
@@ -816,6 +847,8 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
list_add_tail(&xor_dev->dmachan.device_node,
&dma_dev->channels);
+ mv_xor_v2_enable_imsg_thrd(xor_dev);
+
mv_xor_v2_descq_init(xor_dev);
ret = dma_async_device_register(dma_dev);
@@ -865,6 +898,8 @@ MODULE_DEVICE_TABLE(of, mv_xor_v2_dt_ids);
static struct platform_driver mv_xor_v2_driver = {
.probe = mv_xor_v2_probe,
+ .suspend = mv_xor_v2_suspend,
+ .resume = mv_xor_v2_resume,
.remove = mv_xor_v2_remove,
.driver = {
.name = "mv_xor_v2",
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index db41795fe42a..d2cb4a0916e6 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -144,6 +144,7 @@ struct rcar_dmac_chan_map {
* @chan: base DMA channel object
* @iomem: channel I/O memory base
* @index: index of this channel in the controller
+ * @irq: channel IRQ
* @src: slave memory address and size on the source side
* @dst: slave memory address and size on the destination side
* @mid_rid: hardware MID/RID for the DMA client using this channel
@@ -161,6 +162,7 @@ struct rcar_dmac_chan {
struct dma_chan chan;
void __iomem *iomem;
unsigned int index;
+ int irq;
struct rcar_dmac_chan_slave src;
struct rcar_dmac_chan_slave dst;
@@ -1008,7 +1010,11 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
rcar_dmac_chan_halt(rchan);
spin_unlock_irq(&rchan->lock);
- /* Now no new interrupts will occur */
+ /*
+ * Now no new interrupts will occur, but one might already be
+ * running. Wait for it to finish before freeing resources.
+ */
+ synchronize_irq(rchan->irq);
if (rchan->mid_rid >= 0) {
/* The caller is holding dma_list_mutex */
@@ -1363,6 +1369,13 @@ done:
spin_unlock_irqrestore(&rchan->lock, flags);
}
+static void rcar_dmac_device_synchronize(struct dma_chan *chan)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+ synchronize_irq(rchan->irq);
+}
+
/* -----------------------------------------------------------------------------
* IRQ handling
*/
@@ -1647,7 +1660,6 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
struct dma_chan *chan = &rchan->chan;
char pdev_irqname[5];
char *irqname;
- int irq;
int ret;
rchan->index = index;
@@ -1664,8 +1676,8 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
/* Request the channel interrupt. */
sprintf(pdev_irqname, "ch%u", index);
- irq = platform_get_irq_byname(pdev, pdev_irqname);
- if (irq < 0) {
+ rchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+ if (rchan->irq < 0) {
dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
return -ENODEV;
}
@@ -1675,11 +1687,13 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
if (!irqname)
return -ENOMEM;
- ret = devm_request_threaded_irq(dmac->dev, irq, rcar_dmac_isr_channel,
+ ret = devm_request_threaded_irq(dmac->dev, rchan->irq,
+ rcar_dmac_isr_channel,
rcar_dmac_isr_channel_thread, 0,
irqname, rchan);
if (ret) {
- dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", irq, ret);
+ dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+ rchan->irq, ret);
return ret;
}
@@ -1843,6 +1857,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
engine->device_terminate_all = rcar_dmac_chan_terminate_all;
engine->device_tx_status = rcar_dmac_tx_status;
engine->device_issue_pending = rcar_dmac_issue_pending;
+ engine->device_synchronize = rcar_dmac_device_synchronize;
ret = dma_async_device_register(engine);
if (ret < 0)
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 72c649713ace..31a145154e9f 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -117,7 +117,7 @@ struct usb_dmac {
#define USB_DMASWR 0x0008
#define USB_DMASWR_SWR (1 << 0)
#define USB_DMAOR 0x0060
-#define USB_DMAOR_AE (1 << 2)
+#define USB_DMAOR_AE (1 << 1)
#define USB_DMAOR_DME (1 << 0)
#define USB_DMASAR 0x0000
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 580b5323a717..ab036b6b1804 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -44,7 +44,14 @@
#define PL080_SYNC (0x34)
-/* Per channel configuration registers */
+/* The Faraday Technology FTDMAC020 variant registers */
+#define FTDMAC020_CH_BUSY (0x20)
+/* Identical to PL080_CONFIG */
+#define FTDMAC020_CSR (0x24)
+/* Identical to PL080_SYNC */
+#define FTDMAC020_SYNC (0x2C)
+#define FTDMAC020_REVISION (0x30)
+#define FTDMAC020_FEATURE (0x34)
/* Per channel configuration registers */
#define PL080_Cx_BASE(x) ((0x100 + (x * 0x20)))
@@ -55,13 +62,20 @@
#define PL080_CH_CONFIG (0x10)
#define PL080S_CH_CONTROL2 (0x10)
#define PL080S_CH_CONFIG (0x14)
-
-#define PL080_LLI_ADDR_MASK (0x3fffffff << 2)
+/* The Faraday FTDMAC020 derivative shuffles the registers around */
+#define FTDMAC020_CH_CSR (0x00)
+#define FTDMAC020_CH_CFG (0x04)
+#define FTDMAC020_CH_SRC_ADDR (0x08)
+#define FTDMAC020_CH_DST_ADDR (0x0C)
+#define FTDMAC020_CH_LLP (0x10)
+#define FTDMAC020_CH_SIZE (0x14)
+
+#define PL080_LLI_ADDR_MASK GENMASK(31, 2)
#define PL080_LLI_ADDR_SHIFT (2)
#define PL080_LLI_LM_AHB2 BIT(0)
#define PL080_CONTROL_TC_IRQ_EN BIT(31)
-#define PL080_CONTROL_PROT_MASK (0x7 << 28)
+#define PL080_CONTROL_PROT_MASK GENMASK(30, 28)
#define PL080_CONTROL_PROT_SHIFT (28)
#define PL080_CONTROL_PROT_CACHE BIT(30)
#define PL080_CONTROL_PROT_BUFF BIT(29)
@@ -70,16 +84,16 @@
#define PL080_CONTROL_SRC_INCR BIT(26)
#define PL080_CONTROL_DST_AHB2 BIT(25)
#define PL080_CONTROL_SRC_AHB2 BIT(24)
-#define PL080_CONTROL_DWIDTH_MASK (0x7 << 21)
+#define PL080_CONTROL_DWIDTH_MASK GENMASK(23, 21)
#define PL080_CONTROL_DWIDTH_SHIFT (21)
-#define PL080_CONTROL_SWIDTH_MASK (0x7 << 18)
+#define PL080_CONTROL_SWIDTH_MASK GENMASK(20, 18)
#define PL080_CONTROL_SWIDTH_SHIFT (18)
-#define PL080_CONTROL_DB_SIZE_MASK (0x7 << 15)
+#define PL080_CONTROL_DB_SIZE_MASK GENMASK(17, 15)
#define PL080_CONTROL_DB_SIZE_SHIFT (15)
-#define PL080_CONTROL_SB_SIZE_MASK (0x7 << 12)
+#define PL080_CONTROL_SB_SIZE_MASK GENMASK(14, 12)
#define PL080_CONTROL_SB_SIZE_SHIFT (12)
-#define PL080_CONTROL_TRANSFER_SIZE_MASK (0xfff << 0)
-#define PL080S_CONTROL_TRANSFER_SIZE_MASK (0x1ffffff << 0)
+#define PL080_CONTROL_TRANSFER_SIZE_MASK GENMASK(11, 0)
+#define PL080S_CONTROL_TRANSFER_SIZE_MASK GENMASK(24, 0)
#define PL080_CONTROL_TRANSFER_SIZE_SHIFT (0)
#define PL080_BSIZE_1 (0x0)
@@ -102,11 +116,11 @@
#define PL080_CONFIG_LOCK BIT(16)
#define PL080_CONFIG_TC_IRQ_MASK BIT(15)
#define PL080_CONFIG_ERR_IRQ_MASK BIT(14)
-#define PL080_CONFIG_FLOW_CONTROL_MASK (0x7 << 11)
+#define PL080_CONFIG_FLOW_CONTROL_MASK GENMASK(13, 11)
#define PL080_CONFIG_FLOW_CONTROL_SHIFT (11)
-#define PL080_CONFIG_DST_SEL_MASK (0xf << 6)
+#define PL080_CONFIG_DST_SEL_MASK GENMASK(9, 6)
#define PL080_CONFIG_DST_SEL_SHIFT (6)
-#define PL080_CONFIG_SRC_SEL_MASK (0xf << 1)
+#define PL080_CONFIG_SRC_SEL_MASK GENMASK(4, 1)
#define PL080_CONFIG_SRC_SEL_SHIFT (1)
#define PL080_CONFIG_ENABLE BIT(0)
@@ -119,6 +133,73 @@
#define PL080_FLOW_PER2MEM_PER (0x6)
#define PL080_FLOW_SRC2DST_SRC (0x7)
+#define FTDMAC020_CH_CSR_TC_MSK BIT(31)
+/* Later versions have a threshold in bits 24..26, */
+#define FTDMAC020_CH_CSR_FIFOTH_MSK GENMASK(26, 24)
+#define FTDMAC020_CH_CSR_FIFOTH_SHIFT (24)
+#define FTDMAC020_CH_CSR_CHPR1_MSK GENMASK(23, 22)
+#define FTDMAC020_CH_CSR_PROT3 BIT(21)
+#define FTDMAC020_CH_CSR_PROT2 BIT(20)
+#define FTDMAC020_CH_CSR_PROT1 BIT(19)
+#define FTDMAC020_CH_CSR_SRC_SIZE_MSK GENMASK(18, 16)
+#define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT (16)
+#define FTDMAC020_CH_CSR_ABT BIT(15)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK GENMASK(13, 11)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT (11)
+#define FTDMAC020_CH_CSR_DST_WIDTH_MSK GENMASK(10, 8)
+#define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT (8)
+#define FTDMAC020_CH_CSR_MODE BIT(7)
+/* 00 = increase, 01 = decrease, 10 = fix */
+#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK GENMASK(6, 5)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT (5)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK GENMASK(4, 3)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT (3)
+#define FTDMAC020_CH_CSR_SRC_SEL BIT(2)
+#define FTDMAC020_CH_CSR_DST_SEL BIT(1)
+#define FTDMAC020_CH_CSR_EN BIT(0)
+
+/* FIFO threshold setting */
+#define FTDMAC020_CH_CSR_FIFOTH_1 (0x0)
+#define FTDMAC020_CH_CSR_FIFOTH_2 (0x1)
+#define FTDMAC020_CH_CSR_FIFOTH_4 (0x2)
+#define FTDMAC020_CH_CSR_FIFOTH_8 (0x3)
+#define FTDMAC020_CH_CSR_FIFOTH_16 (0x4)
+/* The FTDMAC020 supports 64bit wide transfers */
+#define FTDMAC020_WIDTH_64BIT (0x3)
+/* Address can be increased, decreased or fixed */
+#define FTDMAC020_CH_CSR_SRCAD_CTL_INC (0x0)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_DEC (0x1)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED (0x2)
+
+#define FTDMAC020_CH_CFG_LLP_CNT_MASK GENMASK(19, 16)
+#define FTDMAC020_CH_CFG_LLP_CNT_SHIFT (16)
+#define FTDMAC020_CH_CFG_BUSY BIT(8)
+#define FTDMAC020_CH_CFG_INT_ABT_MASK BIT(2)
+#define FTDMAC020_CH_CFG_INT_ERR_MASK BIT(1)
+#define FTDMAC020_CH_CFG_INT_TC_MASK BIT(0)
+
+/* Inside the LLIs, the applicable CSR fields are mapped differently */
+#define FTDMAC020_LLI_TC_MSK BIT(28)
+#define FTDMAC020_LLI_SRC_WIDTH_MSK GENMASK(27, 25)
+#define FTDMAC020_LLI_SRC_WIDTH_SHIFT (25)
+#define FTDMAC020_LLI_DST_WIDTH_MSK GENMASK(24, 22)
+#define FTDMAC020_LLI_DST_WIDTH_SHIFT (22)
+#define FTDMAC020_LLI_SRCAD_CTL_MSK GENMASK(21, 20)
+#define FTDMAC020_LLI_SRCAD_CTL_SHIFT (20)
+#define FTDMAC020_LLI_DSTAD_CTL_MSK GENMASK(19, 18)
+#define FTDMAC020_LLI_DSTAD_CTL_SHIFT (18)
+#define FTDMAC020_LLI_SRC_SEL BIT(17)
+#define FTDMAC020_LLI_DST_SEL BIT(16)
+#define FTDMAC020_LLI_TRANSFER_SIZE_MASK GENMASK(11, 0)
+#define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT (0)
+
+#define FTDMAC020_CFG_LLP_CNT_MASK GENMASK(19, 16)
+#define FTDMAC020_CFG_LLP_CNT_SHIFT (16)
+#define FTDMAC020_CFG_BUSY BIT(8)
+#define FTDMAC020_CFG_INT_ABT_MSK BIT(2)
+#define FTDMAC020_CFG_INT_ERR_MSK BIT(1)
+#define FTDMAC020_CFG_INT_TC_MSK BIT(0)
+
/* DMA linked list chain structure */
struct pl080_lli {
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 5308eae9ce35..79d1bcee738d 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -47,8 +47,6 @@ enum {
* devices with static assignments
* @muxval: a number usually used to poke into some mux regiser to
* mux in the signal to this channel
- * @cctl_memcpy: options for the channel control register for memcpy
- * *** not used for slave channels ***
* @addr: source/target address in physical memory for this DMA channel,
* can be the address of a FIFO register for burst requests for example.
* This can be left undefined if the PrimeCell API is used for configuring
@@ -63,12 +61,28 @@ struct pl08x_channel_data {
int min_signal;
int max_signal;
u32 muxval;
- u32 cctl_memcpy;
dma_addr_t addr;
bool single;
u8 periph_buses;
};
+enum pl08x_burst_size {
+ PL08X_BURST_SZ_1,
+ PL08X_BURST_SZ_4,
+ PL08X_BURST_SZ_8,
+ PL08X_BURST_SZ_16,
+ PL08X_BURST_SZ_32,
+ PL08X_BURST_SZ_64,
+ PL08X_BURST_SZ_128,
+ PL08X_BURST_SZ_256,
+};
+
+enum pl08x_bus_width {
+ PL08X_BUS_WIDTH_8_BITS,
+ PL08X_BUS_WIDTH_16_BITS,
+ PL08X_BUS_WIDTH_32_BITS,
+};
+
/**
* struct pl08x_platform_data - the platform configuration for the PL08x
* PrimeCells.
@@ -76,6 +90,11 @@ struct pl08x_channel_data {
* platform, all inclusive, including multiplexed channels. The available
* physical channels will be multiplexed around these signals as they are
* requested, just enumerate all possible channels.
+ * @num_slave_channels: number of elements in the slave channel array
+ * @memcpy_burst_size: the appropriate burst size for memcpy operations
+ * @memcpy_bus_width: memory bus width
+ * @memcpy_prot_buff: whether memcpy DMA is bufferable
+ * @memcpy_prot_cache: whether memcpy DMA is cacheable
* @get_xfer_signal: request a physical signal to be used for a DMA transfer
* immediately: if there is some multiplexing or similar blocking the use
* of the channel the transfer can be denied by returning less than zero,
@@ -90,7 +109,10 @@ struct pl08x_channel_data {
struct pl08x_platform_data {
struct pl08x_channel_data *slave_channels;
unsigned int num_slave_channels;
- struct pl08x_channel_data memcpy_channel;
+ enum pl08x_burst_size memcpy_burst_size;
+ enum pl08x_bus_width memcpy_bus_width;
+ bool memcpy_prot_buff;
+ bool memcpy_prot_cache;
int (*get_xfer_signal)(const struct pl08x_channel_data *);
void (*put_xfer_signal)(const struct pl08x_channel_data *, int);
u8 lli_buses;
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bbaaa1bf..30f945329818 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -142,6 +142,7 @@ int raid6_select_algo(void);
extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
extern const u8 raid6_gfexp[256] __attribute__((aligned(256)));
+extern const u8 raid6_gflog[256] __attribute__((aligned(256)));
extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
index 39787db588b0..e824d088f72c 100644
--- a/lib/raid6/mktables.c
+++ b/lib/raid6/mktables.c
@@ -125,6 +125,26 @@ int main(int argc, char *argv[])
printf("EXPORT_SYMBOL(raid6_gfexp);\n");
printf("#endif\n");
+ /* Compute log-of-2 table */
+ printf("\nconst u8 __attribute__((aligned(256)))\n"
+ "raid6_gflog[256] =\n" "{\n");
+ for (i = 0; i < 256; i += 8) {
+ printf("\t");
+ for (j = 0; j < 8; j++) {
+ v = 255;
+ for (k = 0; k < 256; k++)
+ if (exptbl[k] == (i + j)) {
+ v = k;
+ break;
+ }
+ printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+ }
+ }
+ printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gflog);\n");
+ printf("#endif\n");
+
/* Compute inverse table x^-1 == x^254 */
printf("\nconst u8 __attribute__((aligned(256)))\n"
"raid6_gfinv[256] =\n" "{\n");