From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Apr 2009 14:28:37 -0700 Subject: async_tx: rename zero_sum to val 'zero_sum' does not properly describe the operation of generating parity and checking that it validates against an existing buffer. Change the name of the operation to 'val' (for 'validate'). This is in anticipation of the p+q case where it is a requirement to identify the target parity buffers separately from the source buffers, because the target parity buffers will not have corresponding pq coefficients. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 4 ++-- drivers/dma/iop-adma.c | 38 +++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 92438e9dacc..6781e8f3c06 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -644,8 +644,8 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_memcpy); BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor); - BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && - !device->device_prep_dma_zero_sum); + BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && + !device->device_prep_dma_xor_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 2f052265122..6ff79a67269 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -660,9 +660,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, } static struct dma_async_tx_descriptor * -iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, - unsigned int src_cnt, size_t len, u32 *result, - unsigned long flags) +iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, + unsigned int src_cnt, size_t len, u32 *result, + unsigned long flags) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); struct iop_adma_desc_slot *sw_desc, *grp_start; @@ -906,7 +906,7 @@ out: #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ static int __devinit -iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) +iop_adma_xor_val_self_test(struct iop_adma_device *device) { int i, src_idx; struct page *dest; @@ -1002,7 +1002,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) PAGE_SIZE, DMA_TO_DEVICE); /* skip zero sum if the capability is not present */ - if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) goto free_resources; /* zero sum the sources with the destintation page */ @@ -1016,10 +1016,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_srcs[i] = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); cookie = iop_adma_tx_submit(tx); iop_adma_issue_pending(dma_chan); @@ -1072,10 +1072,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_srcs[i] = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); cookie = iop_adma_tx_submit(tx); iop_adma_issue_pending(dma_chan); @@ -1192,9 +1192,9 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dma_dev->max_xor = iop_adma_get_max_xor(); dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; } - if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) - dma_dev->device_prep_dma_zero_sum = - iop_adma_prep_dma_zero_sum; + if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_xor_val = + iop_adma_prep_dma_xor_val; if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) dma_dev->device_prep_dma_interrupt = iop_adma_prep_dma_interrupt; @@ -1249,7 +1249,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { - ret = iop_adma_xor_zero_sum_self_test(adev); + ret = iop_adma_xor_val_self_test(adev); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_iop_chan; @@ -1259,10 +1259,10 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) "( %s%s%s%s%s%s%s%s%s%s)\n", dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", - dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", - dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", + dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", -- cgit v1.2.3 From 04ce9ab385dc97eb55299d533cd3af79b8fc7529 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 14:22:28 -0700 Subject: async_xor: permit callers to pass in a 'dma/page scribble' region async_xor() needs space to perform dma and page address conversions. In most cases the code can simply reuse the struct page * array because the size of the native pointer matches the size of a dma/page address. In order to support archs where sizeof(dma_addr_t) is larger than sizeof(struct page *), or to preserve the input parameters, we utilize a memory region passed in by the caller. Since the code is now prepared to handle the case where it cannot perform address conversions on the stack, we no longer need the !HIGHMEM64G dependency in drivers/dma/Kconfig. [ Impact: don't clobber input buffers for address conversions ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3b3c01b6f1e..912a51b5cbd 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -4,7 +4,7 @@ menuconfig DMADEVICES bool "DMA Engine support" - depends on !HIGHMEM64G && HAS_DMA + depends on HAS_DMA help DMA engines can do asynchronous data transfers without involving the host CPU. Currently, this framework can be -- cgit v1.2.3 From 584ec22759c06cdfc189c03a727f20038526245b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:32:12 -0700 Subject: ioat: move to drivers/dma/ioat/ When first created the ioat driver was the only inhabitant of drivers/dma/. Now, it is the only multi-file (more than a .c and a .h) driver in the directory. Moving it to an ioat/ subdirectory allows the naming convention to be cleaned up, and allows for future splitting of the source files by hardware version (v1, v2, and v3). Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/Makefile | 3 +- drivers/dma/ioat.c | 202 ----- drivers/dma/ioat/Makefile | 2 + drivers/dma/ioat/dca.c | 681 +++++++++++++++ drivers/dma/ioat/dma.c | 1741 +++++++++++++++++++++++++++++++++++++++ drivers/dma/ioat/dma.h | 165 ++++ drivers/dma/ioat/hw.h | 70 ++ drivers/dma/ioat/pci.c | 202 +++++ drivers/dma/ioat/registers.h | 226 +++++ drivers/dma/ioat_dca.c | 681 --------------- drivers/dma/ioat_dma.c | 1741 --------------------------------------- drivers/dma/ioatdma.h | 165 ---- drivers/dma/ioatdma_hw.h | 70 -- drivers/dma/ioatdma_registers.h | 226 ----- 14 files changed, 3088 insertions(+), 3087 deletions(-) delete mode 100644 drivers/dma/ioat.c create mode 100644 drivers/dma/ioat/Makefile create mode 100644 drivers/dma/ioat/dca.c create mode 100644 drivers/dma/ioat/dma.c create mode 100644 drivers/dma/ioat/dma.h create mode 100644 drivers/dma/ioat/hw.h create mode 100644 drivers/dma/ioat/pci.c create mode 100644 drivers/dma/ioat/registers.h delete mode 100644 drivers/dma/ioat_dca.c delete mode 100644 drivers/dma/ioat_dma.c delete mode 100644 drivers/dma/ioatdma.h delete mode 100644 drivers/dma/ioatdma_hw.h delete mode 100644 drivers/dma/ioatdma_registers.h (limited to 'drivers/dma') diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 2e5dc96700d..a1cb2857bba 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,8 +1,7 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_DMATEST) += dmatest.o -obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_MV_XOR) += mv_xor.o diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c deleted file mode 100644 index 2225bb6ba3d..00000000000 --- a/drivers/dma/ioat.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include -#include -#include -#include -#include -#include "ioatdma.h" -#include "ioatdma_registers.h" -#include "ioatdma_hw.h" - -MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Intel Corporation"); - -static struct pci_device_id ioat_pci_tbl[] = { - /* I/OAT v1 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, - - /* I/OAT v2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, - - /* I/OAT v3 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, - { 0, } -}; - -struct ioat_device { - struct pci_dev *pdev; - void __iomem *iobase; - struct ioatdma_device *dma; - struct dca_provider *dca; -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id); -static void __devexit ioat_remove(struct pci_dev *pdev); - -static int ioat_dca_enabled = 1; -module_param(ioat_dca_enabled, int, 0644); -MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); - -static struct pci_driver ioat_pci_driver = { - .name = "ioatdma", - .id_table = ioat_pci_tbl, - .probe = ioat_probe, - .remove = __devexit_p(ioat_remove), -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - void __iomem *iobase; - struct ioat_device *device; - unsigned long mmio_start, mmio_len; - int err; - - err = pci_enable_device(pdev); - if (err) - goto err_enable_device; - - err = pci_request_regions(pdev, ioat_pci_driver.name); - if (err) - goto err_request_regions; - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - goto err_set_dma_mask; - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - goto err_set_dma_mask; - - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); - iobase = ioremap(mmio_start, mmio_len); - if (!iobase) { - err = -ENOMEM; - goto err_ioremap; - } - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } - device->pdev = pdev; - pci_set_drvdata(pdev, device); - device->iobase = iobase; - - pci_set_master(pdev); - - switch (readb(iobase + IOAT_VER_OFFSET)) { - case IOAT_VER_1_2: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat_dca_init(pdev, iobase); - break; - case IOAT_VER_2_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat2_dca_init(pdev, iobase); - break; - case IOAT_VER_3_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat3_dca_init(pdev, iobase); - break; - default: - err = -ENODEV; - break; - } - if (!device->dma) - err = -ENODEV; - - if (err) - goto err_version; - - return 0; - -err_version: - kfree(device); -err_kzalloc: - iounmap(iobase); -err_ioremap: -err_set_dma_mask: - pci_release_regions(pdev); - pci_disable_device(pdev); -err_request_regions: -err_enable_device: - return err; -} - -static void __devexit ioat_remove(struct pci_dev *pdev) -{ - struct ioat_device *device = pci_get_drvdata(pdev); - - dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca); - free_dca_provider(device->dca); - device->dca = NULL; - } - - if (device->dma) { - ioat_dma_remove(device->dma); - device->dma = NULL; - } - - kfree(device); -} - -static int __init ioat_init_module(void) -{ - return pci_register_driver(&ioat_pci_driver); -} -module_init(ioat_init_module); - -static void __exit ioat_exit_module(void) -{ - pci_unregister_driver(&ioat_pci_driver); -} -module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile new file mode 100644 index 00000000000..2ce3d3a4270 --- /dev/null +++ b/drivers/dma/ioat/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +ioatdma-objs := pci.o dma.o dca.o diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c new file mode 100644 index 00000000000..af1c762dd9d --- /dev/null +++ b/drivers/dma/ioat/dca.c @@ -0,0 +1,681 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include +#include +#include +#include +#include + +/* either a kernel change is needed, or we need something like this in kernel */ +#ifndef CONFIG_SMP +#include +#undef cpu_physical_id +#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) +#endif + +#include "dma.h" +#include "registers.h" + +/* + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 + * contain the bit number of the APIC ID to map into the DCA tag. If the valid + * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. + */ +#define DCA_TAG_MAP_VALID 0x80 + +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 + +#define DCA_TAG_MAP_MASK 0xDF + +/* expected tag map bytes for I/OAT ver.2 */ +#define DCA2_TAG_MAP_BYTE0 0x80 +#define DCA2_TAG_MAP_BYTE1 0x0 +#define DCA2_TAG_MAP_BYTE2 0x81 +#define DCA2_TAG_MAP_BYTE3 0x82 +#define DCA2_TAG_MAP_BYTE4 0x82 + +/* verify if tag map matches expected values */ +static inline int dca2_tag_map_valid(u8 *tag_map) +{ + return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) && + (tag_map[1] == DCA2_TAG_MAP_BYTE1) && + (tag_map[2] == DCA2_TAG_MAP_BYTE2) && + (tag_map[3] == DCA2_TAG_MAP_BYTE3) && + (tag_map[4] == DCA2_TAG_MAP_BYTE4)); +} + +/* + * "Legacy" DCA systems do not implement the DCA register set in the + * I/OAT device. Software needs direct support for their tag mappings. + */ + +#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) +#define IOAT_TAG_MAP_LEN 8 + +static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; +static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; +static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; +static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; + +/* pack PCI B/D/F into a u16 */ +static inline u16 dcaid_from_pcidev(struct pci_dev *pci) +{ + return (pci->bus->number << 8) | pci->devfn; +} + +static int dca_enabled_in_bios(struct pci_dev *pdev) +{ + /* CPUID level 9 returns DCA configuration */ + /* Bit 0 indicates DCA enabled by the BIOS */ + unsigned long cpuid_level_9; + int res; + + cpuid_level_9 = cpuid_eax(9); + res = test_bit(0, &cpuid_level_9); + if (!res) + dev_err(&pdev->dev, "DCA is disabled in BIOS\n"); + + return res; +} + +static int system_has_dca_enabled(struct pci_dev *pdev) +{ + if (boot_cpu_has(X86_FEATURE_DCA)) + return dca_enabled_in_bios(pdev); + + dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); + return 0; +} + +struct ioat_dca_slot { + struct pci_dev *pdev; /* requester device */ + u16 rid; /* requester id, as used by IOAT */ +}; + +#define IOAT_DCA_MAX_REQ 6 +#define IOAT3_DCA_MAX_REQ 2 + +struct ioat_dca_priv { + void __iomem *iobase; + void __iomem *dca_base; + int max_requesters; + int requester_count; + u8 tag_map[IOAT_TAG_MAP_LEN]; + struct ioat_dca_slot req_slots[0]; +}; + +/* 5000 series chipset DCA Port Requester ID Table Entry Format + * [15:8] PCI-Express Bus Number + * [7:3] PCI-Express Device Number + * [2:0] PCI-Express Function Number + * + * 5000 series chipset DCA control register format + * [7:1] Reserved (0) + * [0] Ignore Function Number + */ + +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + writew(id, ioatdca->dca_base + (i * 4)); + /* make sure the ignore function bit is off */ + writeb(0, ioatdca->dca_base + (i * 4) + 2); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + writew(0, ioatdca->dca_base + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry, tag; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA_TAG_MAP_VALID) { + bit = entry & ~DCA_TAG_MAP_VALID; + value = (apic_id & (1 << bit)) ? 1 : 0; + } else { + value = entry ? 1 : 0; + } + tag |= (value << i); + } + return tag; +} + +static int ioat_dca_dev_managed(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + pdev = to_pci_dev(dev); + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) + return 1; + } + return 0; +} + +static struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + + +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + u8 *tag_map = NULL; + int i; + int err; + u8 version; + u8 max_requesters; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + /* I/OAT v1 systems must have a known tag_map to support DCA */ + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT: + tag_map = ioat_tag_map_BNB; + break; + case PCI_DEVICE_ID_INTEL_IOAT_CNB: + tag_map = ioat_tag_map_CNB; + break; + case PCI_DEVICE_ID_INTEL_IOAT_SCNB: + tag_map = ioat_tag_map_SCNB; + break; + } + break; + case PCI_VENDOR_ID_UNISYS: + switch (pdev->device) { + case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: + tag_map = ioat_tag_map_UNISYS; + break; + } + break; + } + if (tag_map == NULL) + return NULL; + + version = readb(iobase + IOAT_VER_OFFSET); + if (version == IOAT_VER_3_0) + max_requesters = IOAT3_DCA_MAX_REQ; + else + max_requesters = IOAT_DCA_MAX_REQ; + + dca = alloc_dca_provider(&ioat_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * max_requesters)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->max_requesters = max_requesters; + ioatdca->dca_base = iobase + 0x54; + + /* copy over the APIC ID to DCA tag mapping */ + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) + ioatdca->tag_map[i] = tag_map[i]; + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} + + +static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat2_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat2_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + tag = ioat_dca_get_tag(dca, dev, cpu); + tag = (~tag) & 0x1F; + return tag; +} + +static struct dca_ops ioat2_dca_ops = { + .add_requester = ioat2_dca_add_requester, + .remove_requester = ioat2_dca_remove_requester, + .get_tag = ioat2_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u32 tag_map; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat2_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat2_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { + csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { + pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); + for (i = 0; i < 5; i++) { + bit = (tag_map >> (4 * i)) & 0x0f; + if (bit < 8) + ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; + else + ioatdca->tag_map[i] = 0; + } + + if (!dca2_tag_map_valid(ioatdca->tag_map)) { + dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, " + "disabling DCA\n"); + free_dca_provider(dca); + return NULL; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} + +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat3_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat3_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { + bit = entry & + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); + value = (apic_id & (1 << bit)) ? 1 : 0; + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; + value = (apic_id & (1 << bit)) ? 0 : 1; + } else { + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; + } + tag |= (value << i); + } + + return tag; +} + +static struct dca_ops ioat3_dca_ops = { + .add_requester = ioat3_dca_add_requester, + .remove_requester = ioat3_dca_remove_requester, + .get_tag = ioat3_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } tag_map; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat3_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map.low = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); + tag_map.high = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); + for (i = 0; i < 8; i++) { + bit = tag_map.full >> (8 * i); + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c new file mode 100644 index 00000000000..648797e8329 --- /dev/null +++ b/drivers/dma/ioat/dma.c @@ -0,0 +1,1741 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) +#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) +static int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + +#define RESET_DELAY msecs_to_jiffies(100) +#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) +static void ioat_dma_chan_reset_part2(struct work_struct *work); +static void ioat_dma_chan_watchdog(struct work_struct *work); + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +/* internal functions */ +static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); +static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); + +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); +static struct ioat_desc_sw * +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); + +static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( + struct ioatdma_device *device, + int index) +{ + return device->idx[index]; +} + +/** + * ioat_dma_do_interrupt - handler used for single vector interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +{ + struct ioatdma_device *instance = data; + struct ioat_dma_chan *ioat_chan; + unsigned long attnstatus; + int bit; + u8 intrctrl; + + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); + + if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) + return IRQ_NONE; + + if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_NONE; + } + + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); + for_each_bit(bit, &attnstatus, BITS_PER_LONG) { + ioat_chan = ioat_lookup_chan_by_index(instance, bit); + tasklet_schedule(&ioat_chan->cleanup_task); + } + + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_HANDLED; +} + +/** + * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +{ + struct ioat_dma_chan *ioat_chan = data; + + tasklet_schedule(&ioat_chan->cleanup_task); + + return IRQ_HANDLED; +} + +static void ioat_dma_cleanup_tasklet(unsigned long data); + +/** + * ioat_dma_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat_dma_enumerate_channels(struct ioatdma_device *device) +{ + u8 xfercap_scale; + u32 xfercap; + int i; + struct ioat_dma_chan *ioat_chan; + + /* + * IOAT ver.3 workarounds + */ + if (device->version == IOAT_VER_3_0) { + u32 chan_err_mask; + u16 dev_id; + u32 dmauncerrsts; + + /* + * Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + chan_err_mask = 0x3E07; + pci_write_config_dword(device->pdev, + IOAT_PCI_CHANERRMASK_INT_OFFSET, + chan_err_mask); + + /* + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(device->pdev, + IOAT_PCI_DEVICE_ID_OFFSET, + &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + dmauncerrsts = 0x10; + pci_write_config_dword(device->pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + dmauncerrsts); + } + } + + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); + +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { + device->common.chancnt--; + } +#endif + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); + if (!ioat_chan) { + device->common.chancnt = i; + break; + } + + ioat_chan->device = device; + ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); + ioat_chan->xfercap = xfercap; + ioat_chan->desccount = 0; + INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); + if (ioat_chan->device->version == IOAT_VER_2_0) + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | + IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + else if (ioat_chan->device->version == IOAT_VER_3_0) + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + spin_lock_init(&ioat_chan->cleanup_lock); + spin_lock_init(&ioat_chan->desc_lock); + INIT_LIST_HEAD(&ioat_chan->free_desc); + INIT_LIST_HEAD(&ioat_chan->used_desc); + /* This should be made common somewhere in dmaengine.c */ + ioat_chan->common.device = &device->common; + list_add_tail(&ioat_chan->common.device_node, + &device->common.channels); + device->idx[i] = ioat_chan; + tasklet_init(&ioat_chan->cleanup_task, + ioat_dma_cleanup_tasklet, + (unsigned long) ioat_chan); + tasklet_disable(&ioat_chan->cleanup_task); + } + return device->common.chancnt; +} + +/** + * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended + * descriptors to hw + * @chan: DMA channel handle + */ +static inline void __ioat1_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + + if (ioat_chan->pending > 0) { + spin_lock_bh(&ioat_chan->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + } +} + +static inline void __ioat2_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); +} + +static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + + if (ioat_chan->pending > 0) { + spin_lock_bh(&ioat_chan->desc_lock); + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + } +} + + +/** + * ioat_dma_chan_reset_part2 - reinit the channel after a reset + */ +static void ioat_dma_chan_reset_part2(struct work_struct *work) +{ + struct ioat_dma_chan *ioat_chan = + container_of(work, struct ioat_dma_chan, work.work); + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->desc_lock); + + ioat_chan->completion_virt->low = 0; + ioat_chan->completion_virt->high = 0; + ioat_chan->pending = 0; + + /* + * count the descriptors waiting, and be sure to do it + * right for both the CB1 line and the CB2 ring + */ + ioat_chan->dmacount = 0; + if (ioat_chan->used_desc.prev) { + desc = to_ioat_desc(ioat_chan->used_desc.prev); + do { + ioat_chan->dmacount++; + desc = to_ioat_desc(desc->node.next); + } while (&desc->node != ioat_chan->used_desc.next); + } + + /* + * write the new starting descriptor address + * this puts channel engine into ARMED state + */ + desc = to_ioat_desc(ioat_chan->used_desc.prev); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + /* tell the engine to go with what's left to be done */ + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + + break; + } + dev_err(&ioat_chan->device->pdev->dev, + "chan%d reset - %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + + spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +/** + * ioat_dma_reset_channel - restart a channel + * @ioat_chan: IOAT DMA channel handle + */ +static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) +{ + u32 chansts, chanerr; + + if (!ioat_chan->used_desc.prev) + return; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + chansts = (ioat_chan->completion_virt->low + & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + if (chanerr) { + dev_err(&ioat_chan->device->pdev->dev, + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(ioat_chan), chansts, chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* + * whack it upside the head with a reset + * and wait for things to settle out. + * force the pending count to a really big negative + * to make sure no one forces an issue_pending + * while we're waiting. + */ + + spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->pending = INT_MIN; + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + spin_unlock_bh(&ioat_chan->desc_lock); + + /* schedule the 2nd half instead of sleeping a long time */ + schedule_delayed_work(&ioat_chan->work, RESET_DELAY); +} + +/** + * ioat_dma_chan_watchdog - watch for stuck channels + */ +static void ioat_dma_chan_watchdog(struct work_struct *work) +{ + struct ioatdma_device *device = + container_of(work, struct ioatdma_device, work.work); + struct ioat_dma_chan *ioat_chan; + int i; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } completion_hw; + unsigned long compl_desc_addr_hw; + + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + + if (ioat_chan->device->version == IOAT_VER_1_2 + /* have we started processing anything yet */ + && ioat_chan->last_completion + /* have we completed any since last watchdog cycle? */ + && (ioat_chan->last_completion == + ioat_chan->watchdog_completion) + /* has TCP stuck on one cookie since last watchdog? */ + && (ioat_chan->watchdog_tcp_cookie == + ioat_chan->watchdog_last_tcp_cookie) + && (ioat_chan->watchdog_tcp_cookie != + ioat_chan->completed_cookie) + /* is there something in the chain to be processed? */ + /* CB1 chain always has at least the last one processed */ + && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) + && ioat_chan->pending == 0) { + + /* + * check CHANSTS register for completed + * descriptor address. + * if it is different than completion writeback, + * it is not zero + * and it has changed since the last watchdog + * we can assume that channel + * is still working correctly + * and the problem is in completion writeback. + * update completion writeback + * with actual CHANSTS value + * else + * try resetting the channel + */ + + completion_hw.low = readl(ioat_chan->reg_base + + IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); + completion_hw.high = readl(ioat_chan->reg_base + + IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); +#if (BITS_PER_LONG == 64) + compl_desc_addr_hw = + completion_hw.full + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +#else + compl_desc_addr_hw = + completion_hw.low & IOAT_LOW_COMPLETION_MASK; +#endif + + if ((compl_desc_addr_hw != 0) + && (compl_desc_addr_hw != ioat_chan->watchdog_completion) + && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { + ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; + ioat_chan->completion_virt->low = completion_hw.low; + ioat_chan->completion_virt->high = completion_hw.high; + } else { + ioat_dma_reset_channel(ioat_chan); + ioat_chan->watchdog_completion = 0; + ioat_chan->last_compl_desc_addr_hw = 0; + } + + /* + * for version 2.0 if there are descriptors yet to be processed + * and the last completed hasn't changed since the last watchdog + * if they haven't hit the pending level + * issue the pending to push them through + * else + * try resetting the channel + */ + } else if (ioat_chan->device->version == IOAT_VER_2_0 + && ioat_chan->used_desc.prev + && ioat_chan->last_completion + && ioat_chan->last_completion == ioat_chan->watchdog_completion) { + + if (ioat_chan->pending < ioat_pending_level) + ioat2_dma_memcpy_issue_pending(&ioat_chan->common); + else { + ioat_dma_reset_channel(ioat_chan); + ioat_chan->watchdog_completion = 0; + } + } else { + ioat_chan->last_compl_desc_addr_hw = 0; + ioat_chan->watchdog_completion + = ioat_chan->last_completion; + } + + ioat_chan->watchdog_last_tcp_cookie = + ioat_chan->watchdog_tcp_cookie; + } + + schedule_delayed_work(&device->work, WATCHDOG_DELAY); +} + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *first = tx_to_ioat_desc(tx); + struct ioat_desc_sw *prev, *new; + struct ioat_dma_descriptor *hw; + dma_cookie_t cookie; + LIST_HEAD(new_chain); + u32 copy; + size_t len; + dma_addr_t src, dst; + unsigned long orig_flags; + unsigned int desc_count = 0; + + /* src and dest and len are stored in the initial descriptor */ + len = first->len; + src = first->src; + dst = first->dst; + orig_flags = first->async_tx.flags; + new = first; + + spin_lock_bh(&ioat_chan->desc_lock); + prev = to_ioat_desc(ioat_chan->used_desc.prev); + prefetch(prev->hw); + do { + copy = min_t(size_t, len, ioat_chan->xfercap); + + async_tx_ack(&new->async_tx); + + hw = new->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + hw->next = 0; + + /* chain together the physical address list for the HW */ + wmb(); + prev->hw->next = (u64) new->async_tx.phys; + + len -= copy; + dst += copy; + src += copy; + + list_add_tail(&new->node, &new_chain); + desc_count++; + prev = new; + } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); + + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (first->async_tx.callback) { + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + if (first != new) { + /* move callback into to last desc */ + new->async_tx.callback = first->async_tx.callback; + new->async_tx.callback_param + = first->async_tx.callback_param; + first->async_tx.callback = NULL; + first->async_tx.callback_param = NULL; + } + } + + new->tx_cnt = desc_count; + new->async_tx.flags = orig_flags; /* client is in control of this ack */ + + /* store the original values for use in later cleanup */ + if (new != first) { + new->src = first->src; + new->dst = first->dst; + new->len = first->len; + } + + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = new->async_tx.cookie = cookie; + + /* write address into NextDescriptor field of last desc in chain */ + to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = + first->async_tx.phys; + list_splice_tail(&new_chain, &ioat_chan->used_desc); + + ioat_chan->dmacount += desc_count; + ioat_chan->pending += desc_count; + if (ioat_chan->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + return cookie; +} + +static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *first = tx_to_ioat_desc(tx); + struct ioat_desc_sw *new; + struct ioat_dma_descriptor *hw; + dma_cookie_t cookie; + u32 copy; + size_t len; + dma_addr_t src, dst; + unsigned long orig_flags; + unsigned int desc_count = 0; + + /* src and dest and len are stored in the initial descriptor */ + len = first->len; + src = first->src; + dst = first->dst; + orig_flags = first->async_tx.flags; + new = first; + + /* + * ioat_chan->desc_lock is still in force in version 2 path + * it gets unlocked at end of this function + */ + do { + copy = min_t(size_t, len, ioat_chan->xfercap); + + async_tx_ack(&new->async_tx); + + hw = new->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + desc_count++; + } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); + + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (first->async_tx.callback) { + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + if (first != new) { + /* move callback into to last desc */ + new->async_tx.callback = first->async_tx.callback; + new->async_tx.callback_param + = first->async_tx.callback_param; + first->async_tx.callback = NULL; + first->async_tx.callback_param = NULL; + } + } + + new->tx_cnt = desc_count; + new->async_tx.flags = orig_flags; /* client is in control of this ack */ + + /* store the original values for use in later cleanup */ + if (new != first) { + new->src = first->src; + new->dst = first->dst; + new->len = first->len; + } + + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = new->async_tx.cookie = cookie; + + ioat_chan->dmacount += desc_count; + ioat_chan->pending += desc_count; + if (ioat_chan->pending >= ioat_pending_level) + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + return cookie; +} + +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat_chan: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ +static struct ioat_desc_sw *ioat_dma_alloc_descriptor( + struct ioat_dma_chan *ioat_chan, + gfp_t flags) +{ + struct ioat_dma_descriptor *desc; + struct ioat_desc_sw *desc_sw; + struct ioatdma_device *ioatdma_device; + dma_addr_t phys; + + ioatdma_device = to_ioatdma_device(ioat_chan->common.device); + desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); + if (unlikely(!desc)) + return NULL; + + desc_sw = kzalloc(sizeof(*desc_sw), flags); + if (unlikely(!desc_sw)) { + pci_pool_free(ioatdma_device->dma_pool, desc, phys); + return NULL; + } + + memset(desc, 0, sizeof(*desc)); + dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc_sw->async_tx.tx_submit = ioat1_tx_submit; + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + desc_sw->async_tx.tx_submit = ioat2_tx_submit; + break; + } + + desc_sw->hw = desc; + desc_sw->async_tx.phys = phys; + + return desc_sw; +} + +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "initial descriptors per channel (default: 256)"); + +/** + * ioat2_dma_massage_chan_desc - link the descriptors into a circle + * @ioat_chan: the channel to be massaged + */ +static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *desc, *_desc; + + /* setup used_desc */ + ioat_chan->used_desc.next = ioat_chan->free_desc.next; + ioat_chan->used_desc.prev = NULL; + + /* pull free_desc out of the circle so that every node is a hw + * descriptor, but leave it pointing to the list + */ + ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; + ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; + + /* circle link the hw descriptors */ + desc = to_ioat_desc(ioat_chan->free_desc.next); + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + } +} + +/** + * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ +static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *desc; + u16 chanctrl; + u32 chanerr; + int i; + LIST_HEAD(tmp_list); + + /* have we already been set up? */ + if (!list_empty(&ioat_chan->free_desc)) + return ioat_chan->desccount; + + /* Setup register to interrupt and write completion status on error */ + chanctrl = IOAT_CHANCTRL_ERR_INT_EN | + IOAT_CHANCTRL_ANY_ERR_ABORT_EN | + IOAT_CHANCTRL_ERR_COMPLETION_EN; + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(&ioat_chan->device->pdev->dev, + "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* Allocate descriptors */ + for (i = 0; i < ioat_initial_desc_count; i++) { + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "Only %d initial descriptors\n", i); + break; + } + list_add_tail(&desc->node, &tmp_list); + } + spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->desccount = i; + list_splice(&tmp_list, &ioat_chan->free_desc); + if (ioat_chan->device->version != IOAT_VER_1_2) + ioat2_dma_massage_chan_desc(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + ioat_chan->completion_virt = + pci_pool_alloc(ioat_chan->device->completion_pool, + GFP_KERNEL, + &ioat_chan->completion_addr); + memset(ioat_chan->completion_virt, 0, + sizeof(*ioat_chan->completion_virt)); + writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) ioat_chan->completion_addr) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + tasklet_enable(&ioat_chan->cleanup_task); + ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ + return ioat_chan->desccount; +} + +/** + * ioat_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); + struct ioat_desc_sw *desc, *_desc; + int in_use_descs = 0; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (ioat_chan->desccount == 0) + return; + + tasklet_disable(&ioat_chan->cleanup_task); + ioat_dma_memcpy_cleanup(ioat_chan); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat_chan->desc_lock); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat_chan->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + list_for_each_entry_safe(desc, _desc, + ioat_chan->free_desc.next, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + desc = to_ioat_desc(ioat_chan->free_desc.next); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + INIT_LIST_HEAD(&ioat_chan->free_desc); + INIT_LIST_HEAD(&ioat_chan->used_desc); + break; + } + spin_unlock_bh(&ioat_chan->desc_lock); + + pci_pool_free(ioatdma_device->completion_pool, + ioat_chan->completion_virt, + ioat_chan->completion_addr); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + dev_err(&ioat_chan->device->pdev->dev, + "Freeing %d in use descriptors!\n", + in_use_descs - 1); + + ioat_chan->last_completion = ioat_chan->completion_addr = 0; + ioat_chan->pending = 0; + ioat_chan->dmacount = 0; + ioat_chan->desccount = 0; + ioat_chan->watchdog_completion = 0; + ioat_chan->last_compl_desc_addr_hw = 0; + ioat_chan->watchdog_tcp_cookie = + ioat_chan->watchdog_last_tcp_cookie = 0; +} + +/** + * ioat_dma_get_next_descriptor - return the next available descriptor + * @ioat_chan: IOAT DMA channel handle + * + * Gets the next descriptor from the chain, and must be called with the + * channel's desc_lock held. Allocates more descriptors if the channel + * has run out. + */ +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *new; + + if (!list_empty(&ioat_chan->free_desc)) { + new = to_ioat_desc(ioat_chan->free_desc.next); + list_del(&new->node); + } else { + /* try to get another desc */ + new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "alloc failed\n"); + return NULL; + } + } + + prefetch(new->hw); + return new; +} + +static struct ioat_desc_sw * +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *new; + + /* + * used.prev points to where to start processing + * used.next points to next free descriptor + * if used.prev == NULL, there are none waiting to be processed + * if used.next == used.prev.prev, there is only one free descriptor, + * and we need to use it to as a noop descriptor before + * linking in a new set of descriptors, since the device + * has probably already read the pointer to it + */ + if (ioat_chan->used_desc.prev && + ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { + + struct ioat_desc_sw *desc; + struct ioat_desc_sw *noop_desc; + int i; + + /* set up the noop descriptor */ + noop_desc = to_ioat_desc(ioat_chan->used_desc.next); + /* set size to non-zero value (channel returns error when size is 0) */ + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; + noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; + noop_desc->hw->src_addr = 0; + noop_desc->hw->dst_addr = 0; + + ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; + ioat_chan->pending++; + ioat_chan->dmacount++; + + /* try to get a few more descriptors */ + for (i = 16; i; i--) { + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "alloc failed\n"); + break; + } + list_add_tail(&desc->node, ioat_chan->used_desc.next); + + desc->hw->next + = to_ioat_desc(desc->node.next)->async_tx.phys; + to_ioat_desc(desc->node.prev)->hw->next + = desc->async_tx.phys; + ioat_chan->desccount++; + } + + ioat_chan->used_desc.next = noop_desc->node.next; + } + new = to_ioat_desc(ioat_chan->used_desc.next); + prefetch(new); + ioat_chan->used_desc.next = new->node.next; + + if (ioat_chan->used_desc.prev == NULL) + ioat_chan->used_desc.prev = &new->node; + + prefetch(new->hw); + return new; +} + +static struct ioat_desc_sw *ioat_dma_get_next_descriptor( + struct ioat_dma_chan *ioat_chan) +{ + if (!ioat_chan) + return NULL; + + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + return ioat1_dma_get_next_descriptor(ioat_chan); + case IOAT_VER_2_0: + case IOAT_VER_3_0: + return ioat2_dma_get_next_descriptor(ioat_chan); + } + return NULL; +} + +static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( + struct dma_chan *chan, + dma_addr_t dma_dest, + dma_addr_t dma_src, + size_t len, + unsigned long flags) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *new; + + spin_lock_bh(&ioat_chan->desc_lock); + new = ioat_dma_get_next_descriptor(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + if (new) { + new->len = len; + new->dst = dma_dest; + new->src = dma_src; + new->async_tx.flags = flags; + return &new->async_tx; + } else { + dev_err(&ioat_chan->device->pdev->dev, + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + return NULL; + } +} + +static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( + struct dma_chan *chan, + dma_addr_t dma_dest, + dma_addr_t dma_src, + size_t len, + unsigned long flags) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *new; + + spin_lock_bh(&ioat_chan->desc_lock); + new = ioat2_dma_get_next_descriptor(ioat_chan); + + /* + * leave ioat_chan->desc_lock set in ioat 2 path + * it will get unlocked at end of tx_submit + */ + + if (new) { + new->len = len; + new->dst = dma_dest; + new->src = dma_src; + new->async_tx.flags = flags; + return &new->async_tx; + } else { + spin_unlock_bh(&ioat_chan->desc_lock); + dev_err(&ioat_chan->device->pdev->dev, + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + return NULL; + } +} + +static void ioat_dma_cleanup_tasklet(unsigned long data) +{ + struct ioat_dma_chan *chan = (void *)data; + ioat_dma_memcpy_cleanup(chan); + writew(IOAT_CHANCTRL_INT_DISABLE, + chan->reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void +ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) +{ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + } + + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + } +} + +/** + * ioat_dma_memcpy_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + */ +static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) +{ + unsigned long phys_complete; + struct ioat_desc_sw *desc, *_desc; + dma_cookie_t cookie = 0; + unsigned long desc_phys; + struct ioat_desc_sw *latest_desc; + + prefetch(ioat_chan->completion_virt); + + if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) + return; + + /* The completion writeback can happen at any time, + so reads by the driver need to be atomic operations + The descriptor physical addresses are limited to 32-bits + when the CPU can only do a 32-bit mov */ + +#if (BITS_PER_LONG == 64) + phys_complete = + ioat_chan->completion_virt->full + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +#else + phys_complete = + ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; +#endif + + if ((ioat_chan->completion_virt->full + & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == + IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { + dev_err(&ioat_chan->device->pdev->dev, + "Channel halted, chanerr = %x\n", + readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); + + /* TODO do something to salvage the situation */ + } + + if (phys_complete == ioat_chan->last_completion) { + spin_unlock_bh(&ioat_chan->cleanup_lock); + /* + * perhaps we're stuck so hard that the watchdog can't go off? + * try to catch it after 2 seconds + */ + if (ioat_chan->device->version != IOAT_VER_3_0) { + if (time_after(jiffies, + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); + ioat_chan->last_completion_time = jiffies; + } + } + return; + } + ioat_chan->last_completion_time = jiffies; + + cookie = 0; + if (!spin_trylock_bh(&ioat_chan->desc_lock)) { + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } + + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { + + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + ioat_dma_unmap(ioat_chan, desc); + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } + } + + if (desc->async_tx.phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(&desc->async_tx)) { + list_move_tail(&desc->node, + &ioat_chan->free_desc); + } else + desc->async_tx.cookie = 0; + } else { + /* + * last used desc. Do not remove, so we can + * append from it, but don't look at it next + * time, either + */ + desc->async_tx.cookie = 0; + + /* TODO check status bits? */ + break; + } + } + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + /* has some other thread has already cleaned up? */ + if (ioat_chan->used_desc.prev == NULL) + break; + + /* work backwards to find latest finished desc */ + desc = to_ioat_desc(ioat_chan->used_desc.next); + latest_desc = NULL; + do { + desc = to_ioat_desc(desc->node.prev); + desc_phys = (unsigned long)desc->async_tx.phys + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; + if (desc_phys == phys_complete) { + latest_desc = desc; + break; + } + } while (&desc->node != ioat_chan->used_desc.prev); + + if (latest_desc != NULL) { + + /* work forwards to clear finished descriptors */ + for (desc = to_ioat_desc(ioat_chan->used_desc.prev); + &desc->node != latest_desc->node.next && + &desc->node != ioat_chan->used_desc.next; + desc = to_ioat_desc(desc->node.next)) { + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + ioat_dma_unmap(ioat_chan, desc); + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } + } + } + + /* move used.prev up beyond those that are finished */ + if (&desc->node == ioat_chan->used_desc.next) + ioat_chan->used_desc.prev = NULL; + else + ioat_chan->used_desc.prev = &desc->node; + } + break; + } + + spin_unlock_bh(&ioat_chan->desc_lock); + + ioat_chan->last_completion = phys_complete; + if (cookie != 0) + ioat_chan->completed_cookie = cookie; + + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +/** + * ioat_dma_is_complete - poll the status of a IOAT DMA transaction + * @chan: IOAT DMA channel handle + * @cookie: DMA transaction identifier + * @done: if not %NULL, updated with last completed transaction + * @used: if not %NULL, updated with last used transaction + */ +static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + last_used = chan->cookie; + last_complete = ioat_chan->completed_cookie; + ioat_chan->watchdog_tcp_cookie = cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) + return ret; + + ioat_dma_memcpy_cleanup(ioat_chan); + + last_used = chan->cookie; + last_complete = ioat_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat_chan->desc_lock); + + desc = ioat_dma_get_next_descriptor(ioat_chan); + + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return; + } + + desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL + | IOAT_DMA_DESCRIPTOR_CTL_INT_GN + | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + /* set size to non-zero value (channel returns error when size is 0) */ + desc->hw->size = NULL_DESC_BUFFER_SIZE; + desc->hw->src_addr = 0; + desc->hw->dst_addr = 0; + async_tx_ack(&desc->async_tx); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc->hw->next = 0; + list_add_tail(&desc->node, &ioat_chan->used_desc); + + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + ioat_chan->dmacount++; + __ioat2_dma_memcpy_issue_pending(ioat_chan); + break; + } + spin_unlock_bh(&ioat_chan->desc_lock); +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @device: device to be tested + */ +static int ioat_dma_self_test(struct ioatdma_device *device) +{ + int i; + u8 *src; + u8 *dest; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (device->common.device_alloc_chan_resources(dma_chan) < 1) { + dev_err(&device->pdev->dev, + "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, + DMA_TO_DEVICE); + dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, + DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; + tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, + IOAT_TEST_SIZE, flags); + if (!tx) { + dev_err(&device->pdev->dev, + "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(&device->pdev->dev, + "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + device->common.device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) + != DMA_SUCCESS) { + dev_err(&device->pdev->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(&device->pdev->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + device->common.device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), " + "msix-single-vector, msi, intx)"); + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @device: ioat device + */ +static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +{ + struct ioat_dma_chan *ioat_chan; + int err, i, j, msixcnt; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msix-single-vector")) + goto msix_single_vector; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", + ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = device->common.chancnt; + for (i = 0; i < msixcnt; i++) + device->msix_entries[i].entry = i; + + err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); + if (err < 0) + goto msi; + if (err > 0) + goto msix_single_vector; + + for (i = 0; i < msixcnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + err = request_irq(device->msix_entries[i].vector, + ioat_dma_do_interrupt_msix, + 0, "ioat-msix", ioat_chan); + if (err) { + for (j = 0; j < i; j++) { + ioat_chan = + ioat_lookup_chan_by_index(device, j); + free_irq(device->msix_entries[j].vector, + ioat_chan); + } + goto msix_single_vector; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + device->irq_mode = msix_multi_vector; + goto done; + +msix_single_vector: + device->msix_entries[0].entry = 0; + err = pci_enable_msix(device->pdev, device->msix_entries, 1); + if (err) + goto msi; + + err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, + 0, "ioat-msix", device); + if (err) { + pci_disable_msix(device->pdev); + goto msi; + } + device->irq_mode = msix_single_vector; + goto done; + +msi: + err = pci_enable_msi(device->pdev); + if (err) + goto intx; + + err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, + 0, "ioat-msi", device); + if (err) { + pci_disable_msi(device->pdev); + goto intx; + } + /* + * CB 1.2 devices need a bit set in configuration space to enable MSI + */ + if (device->version == IOAT_VER_1_2) { + u32 dmactrl; + pci_read_config_dword(device->pdev, + IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(device->pdev, + IOAT_PCI_DMACTRL_OFFSET, dmactrl); + } + device->irq_mode = msi; + goto done; + +intx: + err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); + if (err) + goto err_no_irq; + device->irq_mode = intx; + +done: + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + dev_err(&device->pdev->dev, "no usable interrupts\n"); + device->irq_mode = none; + return -1; +} + +/** + * ioat_dma_remove_interrupts - remove whatever interrupts were set + * @device: ioat device + */ +static void ioat_dma_remove_interrupts(struct ioatdma_device *device) +{ + struct ioat_dma_chan *ioat_chan; + int i; + + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + + switch (device->irq_mode) { + case msix_multi_vector: + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + free_irq(device->msix_entries[i].vector, ioat_chan); + } + pci_disable_msix(device->pdev); + break; + case msix_single_vector: + free_irq(device->msix_entries[0].vector, device); + pci_disable_msix(device->pdev); + break; + case msi: + free_irq(device->pdev->irq, device); + pci_disable_msi(device->pdev); + break; + case intx: + free_irq(device->pdev->irq, device); + break; + case none: + dev_warn(&device->pdev->dev, + "call to %s without interrupts setup\n", __func__); + } + device->irq_mode = none; +} + +struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, + void __iomem *iobase) +{ + int err; + struct ioatdma_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + err = -ENOMEM; + goto err_kzalloc; + } + device->pdev = pdev; + device->reg_base = iobase; + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + + /* DMA coherent memory pool for DMA descriptor allocations */ + device->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!device->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + device->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + if (!device->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + INIT_LIST_HEAD(&device->common.channels); + ioat_dma_enumerate_channels(device); + + device->common.device_alloc_chan_resources = + ioat_dma_alloc_chan_resources; + device->common.device_free_chan_resources = + ioat_dma_free_chan_resources; + device->common.dev = &pdev->dev; + + dma_cap_set(DMA_MEMCPY, device->common.cap_mask); + device->common.device_is_tx_complete = ioat_dma_is_complete; + switch (device->version) { + case IOAT_VER_1_2: + device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + device->common.device_issue_pending = + ioat1_dma_memcpy_issue_pending; + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + device->common.device_issue_pending = + ioat2_dma_memcpy_issue_pending; + break; + } + + dev_err(&device->pdev->dev, + "Intel(R) I/OAT DMA Engine found," + " %d channels, device version 0x%02x, driver version %s\n", + device->common.chancnt, device->version, IOAT_DMA_VERSION); + + if (!device->common.chancnt) { + dev_err(&device->pdev->dev, + "Intel(R) I/OAT DMA Engine problem found: " + "zero channels detected\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(device); + if (err) + goto err_setup_interrupts; + + err = ioat_dma_self_test(device); + if (err) + goto err_self_test; + + ioat_set_tcp_copy_break(device); + + dma_async_device_register(&device->common); + + if (device->version != IOAT_VER_3_0) { + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, + WATCHDOG_DELAY); + } + + return device; + +err_self_test: + ioat_dma_remove_interrupts(device); +err_setup_interrupts: + pci_pool_destroy(device->completion_pool); +err_completion_pool: + pci_pool_destroy(device->dma_pool); +err_dma_pool: + kfree(device); +err_kzalloc: + dev_err(&pdev->dev, + "Intel(R) I/OAT DMA Engine initialization failed\n"); + return NULL; +} + +void ioat_dma_remove(struct ioatdma_device *device) +{ + struct dma_chan *chan, *_chan; + struct ioat_dma_chan *ioat_chan; + + if (device->version != IOAT_VER_3_0) + cancel_delayed_work(&device->work); + + ioat_dma_remove_interrupts(device); + + dma_async_device_unregister(&device->common); + + pci_pool_destroy(device->dma_pool); + pci_pool_destroy(device->completion_pool); + + iounmap(device->reg_base); + pci_release_regions(device->pdev); + pci_disable_device(device->pdev); + + list_for_each_entry_safe(chan, _chan, + &device->common.channels, device_node) { + ioat_chan = to_ioat_chan(chan); + list_del(&chan->device_node); + kfree(ioat_chan); + } + kfree(device); +} + diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h new file mode 100644 index 00000000000..e80e787fe64 --- /dev/null +++ b/drivers/dma/ioat/dma.h @@ -0,0 +1,165 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_H +#define IOATDMA_H + +#include +#include "hw.h" +#include +#include +#include +#include +#include + +#define IOAT_DMA_VERSION "3.64" + +enum ioat_interrupt { + none = 0, + msix_multi_vector = 1, + msix_single_vector = 2, + msi = 3, + intx = 4, +}; + +#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 +#define IOAT_WATCHDOG_PERIOD (2 * HZ) + + +/** + * struct ioatdma_device - internal representation of a IOAT device + * @pdev: PCI-Express device + * @reg_base: MMIO register space base address + * @dma_pool: for allocating DMA descriptors + * @common: embedded struct dma_device + * @version: version of ioatdma device + * @irq_mode: which style irq to use + * @msix_entries: irq handlers + * @idx: per channel data + */ + +struct ioatdma_device { + struct pci_dev *pdev; + void __iomem *reg_base; + struct pci_pool *dma_pool; + struct pci_pool *completion_pool; + struct dma_device common; + u8 version; + enum ioat_interrupt irq_mode; + struct delayed_work work; + struct msix_entry msix_entries[4]; + struct ioat_dma_chan *idx[4]; +}; + +/** + * struct ioat_dma_chan - internal representation of a DMA channel + */ +struct ioat_dma_chan { + + void __iomem *reg_base; + + dma_cookie_t completed_cookie; + unsigned long last_completion; + unsigned long last_completion_time; + + size_t xfercap; /* XFERCAP register value expanded out */ + + spinlock_t cleanup_lock; + spinlock_t desc_lock; + struct list_head free_desc; + struct list_head used_desc; + unsigned long watchdog_completion; + int watchdog_tcp_cookie; + u32 watchdog_last_tcp_cookie; + struct delayed_work work; + + int pending; + int dmacount; + int desccount; + + struct ioatdma_device *device; + struct dma_chan common; + + dma_addr_t completion_addr; + union { + u64 full; /* HW completion writeback */ + struct { + u32 low; + u32 high; + }; + } *completion_virt; + unsigned long last_compl_desc_addr_hw; + struct tasklet_struct cleanup_task; +}; + +/* wrapper around hardware descriptor format + additional software fields */ + +/** + * struct ioat_desc_sw - wrapper around hardware descriptor + * @hw: hardware DMA descriptor + * @node: this descriptor will either be on the free list, + * or attached to a transaction list (async_tx.tx_list) + * @tx_cnt: number of descriptors required to complete the transaction + * @async_tx: the generic software descriptor for all engines + */ +struct ioat_desc_sw { + struct ioat_dma_descriptor *hw; + struct list_head node; + int tx_cnt; + size_t len; + dma_addr_t src; + dma_addr_t dst; + struct dma_async_tx_descriptor async_tx; +}; + +static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +{ + #ifdef CONFIG_NET_DMA + switch (dev->version) { + case IOAT_VER_1_2: + sysctl_tcp_dma_copybreak = 4096; + break; + case IOAT_VER_2_0: + sysctl_tcp_dma_copybreak = 2048; + break; + case IOAT_VER_3_0: + sysctl_tcp_dma_copybreak = 262144; + break; + } + #endif +} + +#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) +struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, + void __iomem *iobase); +void ioat_dma_remove(struct ioatdma_device *device); +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +#else +#define ioat_dma_probe(pdev, iobase) NULL +#define ioat_dma_remove(device) do { } while (0) +#define ioat_dca_init(pdev, iobase) NULL +#define ioat2_dca_init(pdev, iobase) NULL +#define ioat3_dca_init(pdev, iobase) NULL +#endif + +#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h new file mode 100644 index 00000000000..afa57eef86c --- /dev/null +++ b/drivers/dma/ioat/hw.h @@ -0,0 +1,70 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_HW_H_ +#define _IOAT_HW_H_ + +/* PCI Configuration Space Values */ +#define IOAT_PCI_VID 0x8086 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ + +struct ioat_dma_descriptor { + uint32_t size; + uint32_t ctl; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t rsv2; + uint64_t user1; + uint64_t user2; +}; + +#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 +#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 +#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 +#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 +#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 +#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 +#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 +#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 +#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 +#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 +#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 + +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 + +#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 + +#endif diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c new file mode 100644 index 00000000000..d7948bfd8fb --- /dev/null +++ b/drivers/dma/ioat/pci.c @@ -0,0 +1,202 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include +#include +#include +#include +#include +#include "dma.h" +#include "registers.h" +#include "hw.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + { 0, } +}; + +struct ioat_device { + struct pci_dev *pdev; + void __iomem *iobase; + struct ioatdma_device *dma; + struct dca_provider *dca; +}; + +static int __devinit ioat_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void __devexit ioat_remove(struct pci_dev *pdev); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); + +static struct pci_driver ioat_pci_driver = { + .name = "ioatdma", + .id_table = ioat_pci_tbl, + .probe = ioat_probe, + .remove = __devexit_p(ioat_remove), +}; + +static int __devinit ioat_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + void __iomem *iobase; + struct ioat_device *device; + unsigned long mmio_start, mmio_len; + int err; + + err = pci_enable_device(pdev); + if (err) + goto err_enable_device; + + err = pci_request_regions(pdev, ioat_pci_driver.name); + if (err) + goto err_request_regions; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + goto err_set_dma_mask; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + goto err_set_dma_mask; + + mmio_start = pci_resource_start(pdev, 0); + mmio_len = pci_resource_len(pdev, 0); + iobase = ioremap(mmio_start, mmio_len); + if (!iobase) { + err = -ENOMEM; + goto err_ioremap; + } + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + err = -ENOMEM; + goto err_kzalloc; + } + device->pdev = pdev; + pci_set_drvdata(pdev, device); + device->iobase = iobase; + + pci_set_master(pdev); + + switch (readb(iobase + IOAT_VER_OFFSET)) { + case IOAT_VER_1_2: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat_dca_init(pdev, iobase); + break; + case IOAT_VER_2_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat2_dca_init(pdev, iobase); + break; + case IOAT_VER_3_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat3_dca_init(pdev, iobase); + break; + default: + err = -ENODEV; + break; + } + if (!device->dma) + err = -ENODEV; + + if (err) + goto err_version; + + return 0; + +err_version: + kfree(device); +err_kzalloc: + iounmap(iobase); +err_ioremap: +err_set_dma_mask: + pci_release_regions(pdev); + pci_disable_device(pdev); +err_request_regions: +err_enable_device: + return err; +} + +static void __devexit ioat_remove(struct pci_dev *pdev) +{ + struct ioat_device *device = pci_get_drvdata(pdev); + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca); + free_dca_provider(device->dca); + device->dca = NULL; + } + + if (device->dma) { + ioat_dma_remove(device->dma); + device->dma = NULL; + } + + kfree(device); +} + +static int __init ioat_init_module(void) +{ + return pci_register_driver(&ioat_pci_driver); +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h new file mode 100644 index 00000000000..49bc277424f --- /dev/null +++ b/drivers/dma/ioat/registers.h @@ -0,0 +1,226 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_REGISTERS_H_ +#define _IOAT_REGISTERS_H_ + +#define IOAT_PCI_DMACTRL_OFFSET 0x48 +#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 +#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 + +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 + +/* MMIO Device Registers */ +#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ + +#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */ +#define IOAT_XFERCAP_4KB 12 +#define IOAT_XFERCAP_8KB 13 +#define IOAT_XFERCAP_16KB 14 +#define IOAT_XFERCAP_32KB 15 +#define IOAT_XFERCAP_32GB 0 + +#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */ +#define IOAT_GENCTRL_DEBUG_EN 0x01 + +#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */ +#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ +#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ +#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ +#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ + +#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ + +#define IOAT_VER_OFFSET 0x08 /* 8-bit */ +#define IOAT_VER_MAJOR_MASK 0xF0 +#define IOAT_VER_MINOR_MASK 0x0F +#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) +#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) + +#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ + +#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ +#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ +#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ + +#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ +#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 + +#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ + +/* DMA Channel Registers */ +#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ +#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 +#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 +#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 +#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 +#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 +#define IOAT_CHANCTRL_INT_DISABLE 0x0001 + +#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ +#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ +#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ + + +#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ +#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ +#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET) +#define IOAT1_CHANSTS_OFFSET_LOW 0x04 +#define IOAT2_CHANSTS_OFFSET_LOW 0x08 +#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW) +#define IOAT1_CHANSTS_OFFSET_HIGH 0x08 +#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C +#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F +#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 + + + +#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ + +#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ +#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 +#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ + +/* CB DCA Memory Space Registers */ +#define IOAT_DCAOFFSET_OFFSET 0x14 +/* CB_BAR + IOAT_DCAOFFSET value */ +#define IOAT_DCA_VER_OFFSET 0x00 +#define IOAT_DCA_VER_MAJOR_MASK 0xF0 +#define IOAT_DCA_VER_MINOR_MASK 0x0F + +#define IOAT_DCA_COMP_OFFSET 0x02 +#define IOAT_DCA_COMP_V1 0x1 + +#define IOAT_FSB_CAPABILITY_OFFSET 0x04 +#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 + +#define IOAT_PCI_CAPABILITY_OFFSET 0x06 +#define IOAT_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 +#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 + +#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A +#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 + +#define IOAT_APICID_TAG_MAP_OFFSET 0x0C +#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F +#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 +#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 +#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 +#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 +#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 +#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 +#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 +#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 +#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 +#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 + +#define IOAT_DCA_GREQID_OFFSET 0x10 +#define IOAT_DCA_GREQID_SIZE 0x04 +#define IOAT_DCA_GREQID_MASK 0xFFFF +#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 +#define IOAT_DCA_GREQID_VALID 0x20000000 +#define IOAT_DCA_GREQID_LASTID 0x80000000 + +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 + +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT3_CSI_CONTROL_OFFSET 0x0C +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 + +#define IOAT3_PCI_CONTROL_OFFSET 0x0E +#define IOAT3_PCI_CONTROL_MEMWR 0x1 + +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 + +#define IOAT3_DCA_GREQID_OFFSET 0x02 + +#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ +#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ +#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) +#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C +#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 +#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) +#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 +#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 +#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) + +#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ +#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ +#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) +#define IOAT_CHANCMD_RESET 0x20 +#define IOAT_CHANCMD_RESUME 0x10 +#define IOAT_CHANCMD_ABORT 0x08 +#define IOAT_CHANCMD_SUSPEND 0x04 +#define IOAT_CHANCMD_APPEND 0x02 +#define IOAT_CHANCMD_START 0x01 + +#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */ +#define IOAT_CHANCMP_OFFSET_LOW 0x18 +#define IOAT_CHANCMP_OFFSET_HIGH 0x1C + +#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */ +#define IOAT_CDAR_OFFSET_LOW 0x20 +#define IOAT_CDAR_OFFSET_HIGH 0x24 + +#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ +#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 +#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 +#define IOAT_CHANERR_CHANCMD_ERR 0x0020 +#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 +#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 +#define IOAT_CHANERR_READ_DATA_ERR 0x0100 +#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 +#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 +#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 +#define IOAT_CHANERR_SOFT_ERR 0x4000 +#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 + +#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ + +#endif /* _IOAT_REGISTERS_H_ */ diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c deleted file mode 100644 index c012a1e1504..00000000000 --- a/drivers/dma/ioat_dca.c +++ /dev/null @@ -1,681 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -#include -#include -#include -#include -#include - -/* either a kernel change is needed, or we need something like this in kernel */ -#ifndef CONFIG_SMP -#include -#undef cpu_physical_id -#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) -#endif - -#include "ioatdma.h" -#include "ioatdma_registers.h" - -/* - * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 - * contain the bit number of the APIC ID to map into the DCA tag. If the valid - * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. - */ -#define DCA_TAG_MAP_VALID 0x80 - -#define DCA3_TAG_MAP_BIT_TO_INV 0x80 -#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 -#define DCA3_TAG_MAP_LITERAL_VAL 0x1 - -#define DCA_TAG_MAP_MASK 0xDF - -/* expected tag map bytes for I/OAT ver.2 */ -#define DCA2_TAG_MAP_BYTE0 0x80 -#define DCA2_TAG_MAP_BYTE1 0x0 -#define DCA2_TAG_MAP_BYTE2 0x81 -#define DCA2_TAG_MAP_BYTE3 0x82 -#define DCA2_TAG_MAP_BYTE4 0x82 - -/* verify if tag map matches expected values */ -static inline int dca2_tag_map_valid(u8 *tag_map) -{ - return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) && - (tag_map[1] == DCA2_TAG_MAP_BYTE1) && - (tag_map[2] == DCA2_TAG_MAP_BYTE2) && - (tag_map[3] == DCA2_TAG_MAP_BYTE3) && - (tag_map[4] == DCA2_TAG_MAP_BYTE4)); -} - -/* - * "Legacy" DCA systems do not implement the DCA register set in the - * I/OAT device. Software needs direct support for their tag mappings. - */ - -#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) -#define IOAT_TAG_MAP_LEN 8 - -static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; -static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; - -/* pack PCI B/D/F into a u16 */ -static inline u16 dcaid_from_pcidev(struct pci_dev *pci) -{ - return (pci->bus->number << 8) | pci->devfn; -} - -static int dca_enabled_in_bios(struct pci_dev *pdev) -{ - /* CPUID level 9 returns DCA configuration */ - /* Bit 0 indicates DCA enabled by the BIOS */ - unsigned long cpuid_level_9; - int res; - - cpuid_level_9 = cpuid_eax(9); - res = test_bit(0, &cpuid_level_9); - if (!res) - dev_err(&pdev->dev, "DCA is disabled in BIOS\n"); - - return res; -} - -static int system_has_dca_enabled(struct pci_dev *pdev) -{ - if (boot_cpu_has(X86_FEATURE_DCA)) - return dca_enabled_in_bios(pdev); - - dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); - return 0; -} - -struct ioat_dca_slot { - struct pci_dev *pdev; /* requester device */ - u16 rid; /* requester id, as used by IOAT */ -}; - -#define IOAT_DCA_MAX_REQ 6 -#define IOAT3_DCA_MAX_REQ 2 - -struct ioat_dca_priv { - void __iomem *iobase; - void __iomem *dca_base; - int max_requesters; - int requester_count; - u8 tag_map[IOAT_TAG_MAP_LEN]; - struct ioat_dca_slot req_slots[0]; -}; - -/* 5000 series chipset DCA Port Requester ID Table Entry Format - * [15:8] PCI-Express Bus Number - * [7:3] PCI-Express Device Number - * [2:0] PCI-Express Function Number - * - * 5000 series chipset DCA control register format - * [7:1] Reserved (0) - * [0] Ignore Function Number - */ - -static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - - /* This implementation only supports PCI-Express */ - if (dev->bus != &pci_bus_type) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - writew(id, ioatdca->dca_base + (i * 4)); - /* make sure the ignore function bit is off */ - writeb(0, ioatdca->dca_base + (i * 4) + 2); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - - /* This implementation only supports PCI-Express */ - if (dev->bus != &pci_bus_type) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - writew(0, ioatdca->dca_base + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - int i, apic_id, bit, value; - u8 entry, tag; - - tag = 0; - apic_id = cpu_physical_id(cpu); - - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { - entry = ioatdca->tag_map[i]; - if (entry & DCA_TAG_MAP_VALID) { - bit = entry & ~DCA_TAG_MAP_VALID; - value = (apic_id & (1 << bit)) ? 1 : 0; - } else { - value = entry ? 1 : 0; - } - tag |= (value << i); - } - return tag; -} - -static int ioat_dca_dev_managed(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - - pdev = to_pci_dev(dev); - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) - return 1; - } - return 0; -} - -static struct dca_ops ioat_dca_ops = { - .add_requester = ioat_dca_add_requester, - .remove_requester = ioat_dca_remove_requester, - .get_tag = ioat_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - - -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - u8 *tag_map = NULL; - int i; - int err; - u8 version; - u8 max_requesters; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - /* I/OAT v1 systems must have a known tag_map to support DCA */ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT: - tag_map = ioat_tag_map_BNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_CNB: - tag_map = ioat_tag_map_CNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_SCNB: - tag_map = ioat_tag_map_SCNB; - break; - } - break; - case PCI_VENDOR_ID_UNISYS: - switch (pdev->device) { - case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: - tag_map = ioat_tag_map_UNISYS; - break; - } - break; - } - if (tag_map == NULL) - return NULL; - - version = readb(iobase + IOAT_VER_OFFSET); - if (version == IOAT_VER_3_0) - max_requesters = IOAT3_DCA_MAX_REQ; - else - max_requesters = IOAT_DCA_MAX_REQ; - - dca = alloc_dca_provider(&ioat_dca_ops, - sizeof(*ioatdca) + - (sizeof(struct ioat_dca_slot) * max_requesters)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->max_requesters = max_requesters; - ioatdca->dca_base = iobase + 0x54; - - /* copy over the APIC ID to DCA tag mapping */ - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) - ioatdca->tag_map[i] = tag_map[i]; - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - - -static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (dev->bus != &pci_bus_type) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(id | IOAT_DCA_GREQID_VALID, - ioatdca->iobase + global_req_table + (i * 4)); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat2_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (dev->bus != &pci_bus_type) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(0, ioatdca->iobase + global_req_table + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat2_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - u8 tag; - - tag = ioat_dca_get_tag(dca, dev, cpu); - tag = (~tag) & 0x1F; - return tag; -} - -static struct dca_ops ioat2_dca_ops = { - .add_requester = ioat2_dca_add_requester, - .remove_requester = ioat2_dca_remove_requester, - .get_tag = ioat2_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - -static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) -{ - int slots = 0; - u32 req; - u16 global_req_table; - - global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); - if (global_req_table == 0) - return 0; - do { - req = readl(iobase + global_req_table + (slots * sizeof(u32))); - slots++; - } while ((req & IOAT_DCA_GREQID_LASTID) == 0); - - return slots; -} - -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - int slots; - int i; - int err; - u32 tag_map; - u16 dca_offset; - u16 csi_fsb_control; - u16 pcie_control; - u8 bit; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); - if (dca_offset == 0) - return NULL; - - slots = ioat2_dca_count_dca_slots(iobase, dca_offset); - if (slots == 0) - return NULL; - - dca = alloc_dca_provider(&ioat2_dca_ops, - sizeof(*ioatdca) - + (sizeof(struct ioat_dca_slot) * slots)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->iobase = iobase; - ioatdca->dca_base = iobase + dca_offset; - ioatdca->max_requesters = slots; - - /* some bios might not know to turn these on */ - csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { - csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; - writew(csi_fsb_control, - ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - } - pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { - pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; - writew(pcie_control, - ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - } - - - /* TODO version, compatibility and configuration checks */ - - /* copy out the APIC to DCA tag map */ - tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); - for (i = 0; i < 5; i++) { - bit = (tag_map >> (4 * i)) & 0x0f; - if (bit < 8) - ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; - else - ioatdca->tag_map[i] = 0; - } - - if (!dca2_tag_map_valid(ioatdca->tag_map)) { - dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, " - "disabling DCA\n"); - free_dca_provider(dca); - return NULL; - } - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - -static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (dev->bus != &pci_bus_type) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - global_req_table = - readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); - writel(id | IOAT_DCA_GREQID_VALID, - ioatdca->iobase + global_req_table + (i * 4)); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat3_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (dev->bus != &pci_bus_type) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - global_req_table = - readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); - writel(0, ioatdca->iobase + global_req_table + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat3_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - u8 tag; - - struct ioat_dca_priv *ioatdca = dca_priv(dca); - int i, apic_id, bit, value; - u8 entry; - - tag = 0; - apic_id = cpu_physical_id(cpu); - - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { - entry = ioatdca->tag_map[i]; - if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { - bit = entry & - ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); - value = (apic_id & (1 << bit)) ? 1 : 0; - } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { - bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; - value = (apic_id & (1 << bit)) ? 0 : 1; - } else { - value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; - } - tag |= (value << i); - } - - return tag; -} - -static struct dca_ops ioat3_dca_ops = { - .add_requester = ioat3_dca_add_requester, - .remove_requester = ioat3_dca_remove_requester, - .get_tag = ioat3_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - -static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) -{ - int slots = 0; - u32 req; - u16 global_req_table; - - global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); - if (global_req_table == 0) - return 0; - - do { - req = readl(iobase + global_req_table + (slots * sizeof(u32))); - slots++; - } while ((req & IOAT_DCA_GREQID_LASTID) == 0); - - return slots; -} - -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - int slots; - int i; - int err; - u16 dca_offset; - u16 csi_fsb_control; - u16 pcie_control; - u8 bit; - - union { - u64 full; - struct { - u32 low; - u32 high; - }; - } tag_map; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); - if (dca_offset == 0) - return NULL; - - slots = ioat3_dca_count_dca_slots(iobase, dca_offset); - if (slots == 0) - return NULL; - - dca = alloc_dca_provider(&ioat3_dca_ops, - sizeof(*ioatdca) - + (sizeof(struct ioat_dca_slot) * slots)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->iobase = iobase; - ioatdca->dca_base = iobase + dca_offset; - ioatdca->max_requesters = slots; - - /* some bios might not know to turn these on */ - csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); - if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { - csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; - writew(csi_fsb_control, - ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); - } - pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); - if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { - pcie_control |= IOAT3_PCI_CONTROL_MEMWR; - writew(pcie_control, - ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); - } - - - /* TODO version, compatibility and configuration checks */ - - /* copy out the APIC to DCA tag map */ - tag_map.low = - readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); - tag_map.high = - readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); - for (i = 0; i < 8; i++) { - bit = tag_map.full >> (8 * i); - ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; - } - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c deleted file mode 100644 index a600fc0f796..00000000000 --- a/drivers/dma/ioat_dma.c +++ /dev/null @@ -1,1741 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ioatdma.h" -#include "ioatdma_registers.h" -#include "ioatdma_hw.h" - -#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) - -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) -static int ioat_pending_level = 4; -module_param(ioat_pending_level, int, 0644); -MODULE_PARM_DESC(ioat_pending_level, - "high-water mark for pushing ioat descriptors (default: 4)"); - -#define RESET_DELAY msecs_to_jiffies(100) -#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) -static void ioat_dma_chan_reset_part2(struct work_struct *work); -static void ioat_dma_chan_watchdog(struct work_struct *work); - -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - -/* internal functions */ -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); - -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); - -static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( - struct ioatdma_device *device, - int index) -{ - return device->idx[index]; -} - -/** - * ioat_dma_do_interrupt - handler used for single vector interrupt mode - * @irq: interrupt id - * @data: interrupt data - */ -static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) -{ - struct ioatdma_device *instance = data; - struct ioat_dma_chan *ioat_chan; - unsigned long attnstatus; - int bit; - u8 intrctrl; - - intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); - - if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) - return IRQ_NONE; - - if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { - writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); - return IRQ_NONE; - } - - attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); - for_each_bit(bit, &attnstatus, BITS_PER_LONG) { - ioat_chan = ioat_lookup_chan_by_index(instance, bit); - tasklet_schedule(&ioat_chan->cleanup_task); - } - - writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); - return IRQ_HANDLED; -} - -/** - * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode - * @irq: interrupt id - * @data: interrupt data - */ -static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) -{ - struct ioat_dma_chan *ioat_chan = data; - - tasklet_schedule(&ioat_chan->cleanup_task); - - return IRQ_HANDLED; -} - -static void ioat_dma_cleanup_tasklet(unsigned long data); - -/** - * ioat_dma_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -static int ioat_dma_enumerate_channels(struct ioatdma_device *device) -{ - u8 xfercap_scale; - u32 xfercap; - int i; - struct ioat_dma_chan *ioat_chan; - - /* - * IOAT ver.3 workarounds - */ - if (device->version == IOAT_VER_3_0) { - u32 chan_err_mask; - u16 dev_id; - u32 dmauncerrsts; - - /* - * Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - chan_err_mask = 0x3E07; - pci_write_config_dword(device->pdev, - IOAT_PCI_CHANERRMASK_INT_OFFSET, - chan_err_mask); - - /* - * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(device->pdev, - IOAT_PCI_DEVICE_ID_OFFSET, - &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - dmauncerrsts = 0x10; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - dmauncerrsts); - } - } - - device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); - -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { - device->common.chancnt--; - } -#endif - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); - if (!ioat_chan) { - device->common.chancnt = i; - break; - } - - ioat_chan->device = device; - ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); - ioat_chan->xfercap = xfercap; - ioat_chan->desccount = 0; - INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); - if (ioat_chan->device->version == IOAT_VER_2_0) - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | - IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - else if (ioat_chan->device->version == IOAT_VER_3_0) - writel(IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - spin_lock_init(&ioat_chan->cleanup_lock); - spin_lock_init(&ioat_chan->desc_lock); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); - /* This should be made common somewhere in dmaengine.c */ - ioat_chan->common.device = &device->common; - list_add_tail(&ioat_chan->common.device_node, - &device->common.channels); - device->idx[i] = ioat_chan; - tasklet_init(&ioat_chan->cleanup_task, - ioat_dma_cleanup_tasklet, - (unsigned long) ioat_chan); - tasklet_disable(&ioat_chan->cleanup_task); - } - return device->common.chancnt; -} - -/** - * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended - * descriptors to hw - * @chan: DMA channel handle - */ -static inline void __ioat1_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) -{ - ioat_chan->pending = 0; - writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); -} - -static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - } -} - -static inline void __ioat2_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) -{ - ioat_chan->pending = 0; - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); -} - -static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - } -} - - -/** - * ioat_dma_chan_reset_part2 - reinit the channel after a reset - */ -static void ioat_dma_chan_reset_part2(struct work_struct *work) -{ - struct ioat_dma_chan *ioat_chan = - container_of(work, struct ioat_dma_chan, work.work); - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat_chan->cleanup_lock); - spin_lock_bh(&ioat_chan->desc_lock); - - ioat_chan->completion_virt->low = 0; - ioat_chan->completion_virt->high = 0; - ioat_chan->pending = 0; - - /* - * count the descriptors waiting, and be sure to do it - * right for both the CB1 line and the CB2 ring - */ - ioat_chan->dmacount = 0; - if (ioat_chan->used_desc.prev) { - desc = to_ioat_desc(ioat_chan->used_desc.prev); - do { - ioat_chan->dmacount++; - desc = to_ioat_desc(desc->node.next); - } while (&desc->node != ioat_chan->used_desc.next); - } - - /* - * write the new starting descriptor address - * this puts channel engine into ARMED state - */ - desc = to_ioat_desc(ioat_chan->used_desc.prev); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - break; - case IOAT_VER_2_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - /* tell the engine to go with what's left to be done */ - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - - break; - } - dev_err(&ioat_chan->device->pdev->dev, - "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - - spin_unlock_bh(&ioat_chan->desc_lock); - spin_unlock_bh(&ioat_chan->cleanup_lock); -} - -/** - * ioat_dma_reset_channel - restart a channel - * @ioat_chan: IOAT DMA channel handle - */ -static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) -{ - u32 chansts, chanerr; - - if (!ioat_chan->used_desc.prev) - return; - - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = (ioat_chan->completion_virt->low - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); - if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(ioat_chan), chansts, chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - } - - /* - * whack it upside the head with a reset - * and wait for things to settle out. - * force the pending count to a really big negative - * to make sure no one forces an issue_pending - * while we're waiting. - */ - - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->pending = INT_MIN; - writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - spin_unlock_bh(&ioat_chan->desc_lock); - - /* schedule the 2nd half instead of sleeping a long time */ - schedule_delayed_work(&ioat_chan->work, RESET_DELAY); -} - -/** - * ioat_dma_chan_watchdog - watch for stuck channels - */ -static void ioat_dma_chan_watchdog(struct work_struct *work) -{ - struct ioatdma_device *device = - container_of(work, struct ioatdma_device, work.work); - struct ioat_dma_chan *ioat_chan; - int i; - - union { - u64 full; - struct { - u32 low; - u32 high; - }; - } completion_hw; - unsigned long compl_desc_addr_hw; - - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - - if (ioat_chan->device->version == IOAT_VER_1_2 - /* have we started processing anything yet */ - && ioat_chan->last_completion - /* have we completed any since last watchdog cycle? */ - && (ioat_chan->last_completion == - ioat_chan->watchdog_completion) - /* has TCP stuck on one cookie since last watchdog? */ - && (ioat_chan->watchdog_tcp_cookie == - ioat_chan->watchdog_last_tcp_cookie) - && (ioat_chan->watchdog_tcp_cookie != - ioat_chan->completed_cookie) - /* is there something in the chain to be processed? */ - /* CB1 chain always has at least the last one processed */ - && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) - && ioat_chan->pending == 0) { - - /* - * check CHANSTS register for completed - * descriptor address. - * if it is different than completion writeback, - * it is not zero - * and it has changed since the last watchdog - * we can assume that channel - * is still working correctly - * and the problem is in completion writeback. - * update completion writeback - * with actual CHANSTS value - * else - * try resetting the channel - */ - - completion_hw.low = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); - completion_hw.high = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); -#if (BITS_PER_LONG == 64) - compl_desc_addr_hw = - completion_hw.full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - compl_desc_addr_hw = - completion_hw.low & IOAT_LOW_COMPLETION_MASK; -#endif - - if ((compl_desc_addr_hw != 0) - && (compl_desc_addr_hw != ioat_chan->watchdog_completion) - && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { - ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; - ioat_chan->completion_virt->low = completion_hw.low; - ioat_chan->completion_virt->high = completion_hw.high; - } else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - } - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - } else if (ioat_chan->device->version == IOAT_VER_2_0 - && ioat_chan->used_desc.prev - && ioat_chan->last_completion - && ioat_chan->last_completion == ioat_chan->watchdog_completion) { - - if (ioat_chan->pending < ioat_pending_level) - ioat2_dma_memcpy_issue_pending(&ioat_chan->common); - else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - } - } else { - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_completion - = ioat_chan->last_completion; - } - - ioat_chan->watchdog_last_tcp_cookie = - ioat_chan->watchdog_tcp_cookie; - } - - schedule_delayed_work(&device->work, WATCHDOG_DELAY); -} - -static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *prev, *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - LIST_HEAD(new_chain); - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->async_tx.flags; - new = first; - - spin_lock_bh(&ioat_chan->desc_lock); - prev = to_ioat_desc(ioat_chan->used_desc.prev); - prefetch(prev->hw); - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->async_tx); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - hw->next = 0; - - /* chain together the physical address list for the HW */ - wmb(); - prev->hw->next = (u64) new->async_tx.phys; - - len -= copy; - dst += copy; - src += copy; - - list_add_tail(&new->node, &new_chain); - desc_count++; - prev = new; - } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; - if (first != new) { - /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; - - /* write address into NextDescriptor field of last desc in chain */ - to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = - first->async_tx.phys; - list_splice_tail(&new_chain, &ioat_chan->used_desc); - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - return cookie; -} - -static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->async_tx.flags; - new = first; - - /* - * ioat_chan->desc_lock is still in force in version 2 path - * it gets unlocked at end of this function - */ - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->async_tx); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - desc_count++; - } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; - if (first != new) { - /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - return cookie; -} - -/** - * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat_chan: the channel supplying the memory pool for the descriptors - * @flags: allocation flags - */ -static struct ioat_desc_sw *ioat_dma_alloc_descriptor( - struct ioat_dma_chan *ioat_chan, - gfp_t flags) -{ - struct ioat_dma_descriptor *desc; - struct ioat_desc_sw *desc_sw; - struct ioatdma_device *ioatdma_device; - dma_addr_t phys; - - ioatdma_device = to_ioatdma_device(ioat_chan->common.device); - desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); - if (unlikely(!desc)) - return NULL; - - desc_sw = kzalloc(sizeof(*desc_sw), flags); - if (unlikely(!desc_sw)) { - pci_pool_free(ioatdma_device->dma_pool, desc, phys); - return NULL; - } - - memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - desc_sw->async_tx.tx_submit = ioat1_tx_submit; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - desc_sw->async_tx.tx_submit = ioat2_tx_submit; - break; - } - - desc_sw->hw = desc; - desc_sw->async_tx.phys = phys; - - return desc_sw; -} - -static int ioat_initial_desc_count = 256; -module_param(ioat_initial_desc_count, int, 0644); -MODULE_PARM_DESC(ioat_initial_desc_count, - "initial descriptors per channel (default: 256)"); - -/** - * ioat2_dma_massage_chan_desc - link the descriptors into a circle - * @ioat_chan: the channel to be massaged - */ -static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *desc, *_desc; - - /* setup used_desc */ - ioat_chan->used_desc.next = ioat_chan->free_desc.next; - ioat_chan->used_desc.prev = NULL; - - /* pull free_desc out of the circle so that every node is a hw - * descriptor, but leave it pointing to the list - */ - ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; - ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; - - /* circle link the hw descriptors */ - desc = to_ioat_desc(ioat_chan->free_desc.next); - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; - list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; - } -} - -/** - * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: the channel to be filled out - */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *desc; - u16 chanctrl; - u32 chanerr; - int i; - LIST_HEAD(tmp_list); - - /* have we already been set up? */ - if (!list_empty(&ioat_chan->free_desc)) - return ioat_chan->desccount; - - /* Setup register to interrupt and write completion status on error */ - chanctrl = IOAT_CHANCTRL_ERR_INT_EN | - IOAT_CHANCTRL_ANY_ERR_ABORT_EN | - IOAT_CHANCTRL_ERR_COMPLETION_EN; - writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); - - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - } - - /* Allocate descriptors */ - for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "Only %d initial descriptors\n", i); - break; - } - list_add_tail(&desc->node, &tmp_list); - } - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->desccount = i; - list_splice(&tmp_list, &ioat_chan->free_desc); - if (ioat_chan->device->version != IOAT_VER_1_2) - ioat2_dma_massage_chan_desc(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - ioat_chan->completion_virt = - pci_pool_alloc(ioat_chan->device->completion_pool, - GFP_KERNEL, - &ioat_chan->completion_addr); - memset(ioat_chan->completion_virt, 0, - sizeof(*ioat_chan->completion_virt)); - writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) ioat_chan->completion_addr) >> 32, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - tasklet_enable(&ioat_chan->cleanup_task); - ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ - return ioat_chan->desccount; -} - -/** - * ioat_dma_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -static void ioat_dma_free_chan_resources(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); - struct ioat_desc_sw *desc, *_desc; - int in_use_descs = 0; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (ioat_chan->desccount == 0) - return; - - tasklet_disable(&ioat_chan->cleanup_task); - ioat_dma_memcpy_cleanup(ioat_chan); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - mdelay(100); - - spin_lock_bh(&ioat_chan->desc_lock); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat_chan->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - list_for_each_entry_safe(desc, _desc, - ioat_chan->free_desc.next, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - desc = to_ioat_desc(ioat_chan->free_desc.next); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); - break; - } - spin_unlock_bh(&ioat_chan->desc_lock); - - pci_pool_free(ioatdma_device->completion_pool, - ioat_chan->completion_virt, - ioat_chan->completion_addr); - - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - dev_err(&ioat_chan->device->pdev->dev, - "Freeing %d in use descriptors!\n", - in_use_descs - 1); - - ioat_chan->last_completion = ioat_chan->completion_addr = 0; - ioat_chan->pending = 0; - ioat_chan->dmacount = 0; - ioat_chan->desccount = 0; - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_tcp_cookie = - ioat_chan->watchdog_last_tcp_cookie = 0; -} - -/** - * ioat_dma_get_next_descriptor - return the next available descriptor - * @ioat_chan: IOAT DMA channel handle - * - * Gets the next descriptor from the chain, and must be called with the - * channel's desc_lock held. Allocates more descriptors if the channel - * has run out. - */ -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *new; - - if (!list_empty(&ioat_chan->free_desc)) { - new = to_ioat_desc(ioat_chan->free_desc.next); - list_del(&new->node); - } else { - /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); - return NULL; - } - } - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *new; - - /* - * used.prev points to where to start processing - * used.next points to next free descriptor - * if used.prev == NULL, there are none waiting to be processed - * if used.next == used.prev.prev, there is only one free descriptor, - * and we need to use it to as a noop descriptor before - * linking in a new set of descriptors, since the device - * has probably already read the pointer to it - */ - if (ioat_chan->used_desc.prev && - ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { - - struct ioat_desc_sw *desc; - struct ioat_desc_sw *noop_desc; - int i; - - /* set up the noop descriptor */ - noop_desc = to_ioat_desc(ioat_chan->used_desc.next); - /* set size to non-zero value (channel returns error when size is 0) */ - noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; - noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; - noop_desc->hw->src_addr = 0; - noop_desc->hw->dst_addr = 0; - - ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; - ioat_chan->pending++; - ioat_chan->dmacount++; - - /* try to get a few more descriptors */ - for (i = 16; i; i--) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); - break; - } - list_add_tail(&desc->node, ioat_chan->used_desc.next); - - desc->hw->next - = to_ioat_desc(desc->node.next)->async_tx.phys; - to_ioat_desc(desc->node.prev)->hw->next - = desc->async_tx.phys; - ioat_chan->desccount++; - } - - ioat_chan->used_desc.next = noop_desc->node.next; - } - new = to_ioat_desc(ioat_chan->used_desc.next); - prefetch(new); - ioat_chan->used_desc.next = new->node.next; - - if (ioat_chan->used_desc.prev == NULL) - ioat_chan->used_desc.prev = &new->node; - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw *ioat_dma_get_next_descriptor( - struct ioat_dma_chan *ioat_chan) -{ - if (!ioat_chan) - return NULL; - - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - return ioat1_dma_get_next_descriptor(ioat_chan); - case IOAT_VER_2_0: - case IOAT_VER_3_0: - return ioat2_dma_get_next_descriptor(ioat_chan); - } - return NULL; -} - -static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat_dma_get_next_descriptor(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; - } else { - dev_err(&ioat_chan->device->pdev->dev, - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - return NULL; - } -} - -static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat2_dma_get_next_descriptor(ioat_chan); - - /* - * leave ioat_chan->desc_lock set in ioat 2 path - * it will get unlocked at end of tx_submit - */ - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; - } else { - spin_unlock_bh(&ioat_chan->desc_lock); - dev_err(&ioat_chan->device->pdev->dev, - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - return NULL; - } -} - -static void ioat_dma_cleanup_tasklet(unsigned long data) -{ - struct ioat_dma_chan *chan = (void *)data; - ioat_dma_memcpy_cleanup(chan); - writew(IOAT_CHANCTRL_INT_DISABLE, - chan->reg_base + IOAT_CHANCTRL_OFFSET); -} - -static void -ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) -{ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - else - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - } - - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - else - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - } -} - -/** - * ioat_dma_memcpy_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - */ -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) -{ - unsigned long phys_complete; - struct ioat_desc_sw *desc, *_desc; - dma_cookie_t cookie = 0; - unsigned long desc_phys; - struct ioat_desc_sw *latest_desc; - - prefetch(ioat_chan->completion_virt); - - if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) - return; - - /* The completion writeback can happen at any time, - so reads by the driver need to be atomic operations - The descriptor physical addresses are limited to 32-bits - when the CPU can only do a 32-bit mov */ - -#if (BITS_PER_LONG == 64) - phys_complete = - ioat_chan->completion_virt->full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - phys_complete = - ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; -#endif - - if ((ioat_chan->completion_virt->full - & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == - IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { - dev_err(&ioat_chan->device->pdev->dev, - "Channel halted, chanerr = %x\n", - readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); - - /* TODO do something to salvage the situation */ - } - - if (phys_complete == ioat_chan->last_completion) { - spin_unlock_bh(&ioat_chan->cleanup_lock); - /* - * perhaps we're stuck so hard that the watchdog can't go off? - * try to catch it after 2 seconds - */ - if (ioat_chan->device->version != IOAT_VER_3_0) { - if (time_after(jiffies, - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); - ioat_chan->last_completion_time = jiffies; - } - } - return; - } - ioat_chan->last_completion_time = jiffies; - - cookie = 0; - if (!spin_trylock_bh(&ioat_chan->desc_lock)) { - spin_unlock_bh(&ioat_chan->cleanup_lock); - return; - } - - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { - - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; - } - } - - if (desc->async_tx.phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(&desc->async_tx)) { - list_move_tail(&desc->node, - &ioat_chan->free_desc); - } else - desc->async_tx.cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can - * append from it, but don't look at it next - * time, either - */ - desc->async_tx.cookie = 0; - - /* TODO check status bits? */ - break; - } - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - /* has some other thread has already cleaned up? */ - if (ioat_chan->used_desc.prev == NULL) - break; - - /* work backwards to find latest finished desc */ - desc = to_ioat_desc(ioat_chan->used_desc.next); - latest_desc = NULL; - do { - desc = to_ioat_desc(desc->node.prev); - desc_phys = (unsigned long)desc->async_tx.phys - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; - if (desc_phys == phys_complete) { - latest_desc = desc; - break; - } - } while (&desc->node != ioat_chan->used_desc.prev); - - if (latest_desc != NULL) { - - /* work forwards to clear finished descriptors */ - for (desc = to_ioat_desc(ioat_chan->used_desc.prev); - &desc->node != latest_desc->node.next && - &desc->node != ioat_chan->used_desc.next; - desc = to_ioat_desc(desc->node.next)) { - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; - ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; - } - } - } - - /* move used.prev up beyond those that are finished */ - if (&desc->node == ioat_chan->used_desc.next) - ioat_chan->used_desc.prev = NULL; - else - ioat_chan->used_desc.prev = &desc->node; - } - break; - } - - spin_unlock_bh(&ioat_chan->desc_lock); - - ioat_chan->last_completion = phys_complete; - if (cookie != 0) - ioat_chan->completed_cookie = cookie; - - spin_unlock_bh(&ioat_chan->cleanup_lock); -} - -/** - * ioat_dma_is_complete - poll the status of a IOAT DMA transaction - * @chan: IOAT DMA channel handle - * @cookie: DMA transaction identifier - * @done: if not %NULL, updated with last completed transaction - * @used: if not %NULL, updated with last used transaction - */ -static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, - dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - dma_cookie_t last_used; - dma_cookie_t last_complete; - enum dma_status ret; - - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - ioat_chan->watchdog_tcp_cookie = cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - ret = dma_async_is_complete(cookie, last_complete, last_used); - if (ret == DMA_SUCCESS) - return ret; - - ioat_dma_memcpy_cleanup(ioat_chan); - - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - return dma_async_is_complete(cookie, last_complete, last_used); -} - -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat_chan->desc_lock); - - desc = ioat_dma_get_next_descriptor(ioat_chan); - - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return; - } - - desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL - | IOAT_DMA_DESCRIPTOR_CTL_INT_GN - | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - /* set size to non-zero value (channel returns error when size is 0) */ - desc->hw->size = NULL_DESC_BUFFER_SIZE; - desc->hw->src_addr = 0; - desc->hw->dst_addr = 0; - async_tx_ack(&desc->async_tx); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - desc->hw->next = 0; - list_add_tail(&desc->node, &ioat_chan->used_desc); - - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - ioat_chan->dmacount++; - __ioat2_dma_memcpy_issue_pending(ioat_chan); - break; - } - spin_unlock_bh(&ioat_chan->desc_lock); -} - -/* - * Perform a IOAT transaction to verify the HW works. - */ -#define IOAT_TEST_SIZE 2000 - -static void ioat_dma_test_callback(void *dma_async_param) -{ - struct completion *cmp = dma_async_param; - - complete(cmp); -} - -/** - * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. - * @device: device to be tested - */ -static int ioat_dma_self_test(struct ioatdma_device *device) -{ - int i; - u8 *src; - u8 *dest; - struct dma_chan *dma_chan; - struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - int err = 0; - struct completion cmp; - unsigned long tmo; - unsigned long flags; - - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } - - /* Fill in src buffer */ - for (i = 0; i < IOAT_TEST_SIZE; i++) - src[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (device->common.device_alloc_chan_resources(dma_chan) < 1) { - dev_err(&device->pdev->dev, - "selftest cannot allocate chan resource\n"); - err = -ENODEV; - goto out; - } - - dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, - DMA_TO_DEVICE); - dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, - DMA_FROM_DEVICE); - flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; - tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, flags); - if (!tx) { - dev_err(&device->pdev->dev, - "Self-test prep failed, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(&device->pdev->dev, - "Self-test setup failed, disabling\n"); - err = -ENODEV; - goto free_resources; - } - device->common.device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) - != DMA_SUCCESS) { - dev_err(&device->pdev->dev, - "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(&device->pdev->dev, - "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - device->common.device_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -static char ioat_interrupt_style[32] = "msix"; -module_param_string(ioat_interrupt_style, ioat_interrupt_style, - sizeof(ioat_interrupt_style), 0644); -MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), " - "msix-single-vector, msi, intx)"); - -/** - * ioat_dma_setup_interrupts - setup interrupt handler - * @device: ioat device - */ -static int ioat_dma_setup_interrupts(struct ioatdma_device *device) -{ - struct ioat_dma_chan *ioat_chan; - int err, i, j, msixcnt; - u8 intrctrl = 0; - - if (!strcmp(ioat_interrupt_style, "msix")) - goto msix; - if (!strcmp(ioat_interrupt_style, "msix-single-vector")) - goto msix_single_vector; - if (!strcmp(ioat_interrupt_style, "msi")) - goto msi; - if (!strcmp(ioat_interrupt_style, "intx")) - goto intx; - dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", - ioat_interrupt_style); - goto err_no_irq; - -msix: - /* The number of MSI-X vectors should equal the number of channels */ - msixcnt = device->common.chancnt; - for (i = 0; i < msixcnt; i++) - device->msix_entries[i].entry = i; - - err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); - if (err < 0) - goto msi; - if (err > 0) - goto msix_single_vector; - - for (i = 0; i < msixcnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - err = request_irq(device->msix_entries[i].vector, - ioat_dma_do_interrupt_msix, - 0, "ioat-msix", ioat_chan); - if (err) { - for (j = 0; j < i; j++) { - ioat_chan = - ioat_lookup_chan_by_index(device, j); - free_irq(device->msix_entries[j].vector, - ioat_chan); - } - goto msix_single_vector; - } - } - intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = msix_multi_vector; - goto done; - -msix_single_vector: - device->msix_entries[0].entry = 0; - err = pci_enable_msix(device->pdev, device->msix_entries, 1); - if (err) - goto msi; - - err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, - 0, "ioat-msix", device); - if (err) { - pci_disable_msix(device->pdev); - goto msi; - } - device->irq_mode = msix_single_vector; - goto done; - -msi: - err = pci_enable_msi(device->pdev); - if (err) - goto intx; - - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - 0, "ioat-msi", device); - if (err) { - pci_disable_msi(device->pdev); - goto intx; - } - /* - * CB 1.2 devices need a bit set in configuration space to enable MSI - */ - if (device->version == IOAT_VER_1_2) { - u32 dmactrl; - pci_read_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, dmactrl); - } - device->irq_mode = msi; - goto done; - -intx: - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); - if (err) - goto err_no_irq; - device->irq_mode = intx; - -done: - intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; - writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); - return 0; - -err_no_irq: - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - dev_err(&device->pdev->dev, "no usable interrupts\n"); - device->irq_mode = none; - return -1; -} - -/** - * ioat_dma_remove_interrupts - remove whatever interrupts were set - * @device: ioat device - */ -static void ioat_dma_remove_interrupts(struct ioatdma_device *device) -{ - struct ioat_dma_chan *ioat_chan; - int i; - - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - - switch (device->irq_mode) { - case msix_multi_vector: - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - free_irq(device->msix_entries[i].vector, ioat_chan); - } - pci_disable_msix(device->pdev); - break; - case msix_single_vector: - free_irq(device->msix_entries[0].vector, device); - pci_disable_msix(device->pdev); - break; - case msi: - free_irq(device->pdev->irq, device); - pci_disable_msi(device->pdev); - break; - case intx: - free_irq(device->pdev->irq, device); - break; - case none: - dev_warn(&device->pdev->dev, - "call to %s without interrupts setup\n", __func__); - } - device->irq_mode = none; -} - -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase) -{ - int err; - struct ioatdma_device *device; - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } - device->pdev = pdev; - device->reg_base = iobase; - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - - /* DMA coherent memory pool for DMA descriptor allocations */ - device->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!device->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - device->completion_pool = pci_pool_create("completion_pool", pdev, - sizeof(u64), SMP_CACHE_BYTES, - SMP_CACHE_BYTES); - if (!device->completion_pool) { - err = -ENOMEM; - goto err_completion_pool; - } - - INIT_LIST_HEAD(&device->common.channels); - ioat_dma_enumerate_channels(device); - - device->common.device_alloc_chan_resources = - ioat_dma_alloc_chan_resources; - device->common.device_free_chan_resources = - ioat_dma_free_chan_resources; - device->common.dev = &pdev->dev; - - dma_cap_set(DMA_MEMCPY, device->common.cap_mask); - device->common.device_is_tx_complete = ioat_dma_is_complete; - switch (device->version) { - case IOAT_VER_1_2: - device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - device->common.device_issue_pending = - ioat1_dma_memcpy_issue_pending; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - device->common.device_issue_pending = - ioat2_dma_memcpy_issue_pending; - break; - } - - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine found," - " %d channels, device version 0x%02x, driver version %s\n", - device->common.chancnt, device->version, IOAT_DMA_VERSION); - - if (!device->common.chancnt) { - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine problem found: " - "zero channels detected\n"); - goto err_setup_interrupts; - } - - err = ioat_dma_setup_interrupts(device); - if (err) - goto err_setup_interrupts; - - err = ioat_dma_self_test(device); - if (err) - goto err_self_test; - - ioat_set_tcp_copy_break(device); - - dma_async_device_register(&device->common); - - if (device->version != IOAT_VER_3_0) { - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, - WATCHDOG_DELAY); - } - - return device; - -err_self_test: - ioat_dma_remove_interrupts(device); -err_setup_interrupts: - pci_pool_destroy(device->completion_pool); -err_completion_pool: - pci_pool_destroy(device->dma_pool); -err_dma_pool: - kfree(device); -err_kzalloc: - dev_err(&pdev->dev, - "Intel(R) I/OAT DMA Engine initialization failed\n"); - return NULL; -} - -void ioat_dma_remove(struct ioatdma_device *device) -{ - struct dma_chan *chan, *_chan; - struct ioat_dma_chan *ioat_chan; - - if (device->version != IOAT_VER_3_0) - cancel_delayed_work(&device->work); - - ioat_dma_remove_interrupts(device); - - dma_async_device_unregister(&device->common); - - pci_pool_destroy(device->dma_pool); - pci_pool_destroy(device->completion_pool); - - iounmap(device->reg_base); - pci_release_regions(device->pdev); - pci_disable_device(device->pdev); - - list_for_each_entry_safe(chan, _chan, - &device->common.channels, device_node) { - ioat_chan = to_ioat_chan(chan); - list_del(&chan->device_node); - kfree(ioat_chan); - } - kfree(device); -} - diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h deleted file mode 100644 index a52ff4bd460..00000000000 --- a/drivers/dma/ioatdma.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_H -#define IOATDMA_H - -#include -#include "ioatdma_hw.h" -#include -#include -#include -#include -#include - -#define IOAT_DMA_VERSION "3.64" - -enum ioat_interrupt { - none = 0, - msix_multi_vector = 1, - msix_single_vector = 2, - msi = 3, - intx = 4, -}; - -#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 -#define IOAT_DMA_DCA_ANY_CPU ~0 -#define IOAT_WATCHDOG_PERIOD (2 * HZ) - - -/** - * struct ioatdma_device - internal representation of a IOAT device - * @pdev: PCI-Express device - * @reg_base: MMIO register space base address - * @dma_pool: for allocating DMA descriptors - * @common: embedded struct dma_device - * @version: version of ioatdma device - * @irq_mode: which style irq to use - * @msix_entries: irq handlers - * @idx: per channel data - */ - -struct ioatdma_device { - struct pci_dev *pdev; - void __iomem *reg_base; - struct pci_pool *dma_pool; - struct pci_pool *completion_pool; - struct dma_device common; - u8 version; - enum ioat_interrupt irq_mode; - struct delayed_work work; - struct msix_entry msix_entries[4]; - struct ioat_dma_chan *idx[4]; -}; - -/** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - - void __iomem *reg_base; - - dma_cookie_t completed_cookie; - unsigned long last_completion; - unsigned long last_completion_time; - - size_t xfercap; /* XFERCAP register value expanded out */ - - spinlock_t cleanup_lock; - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; - unsigned long watchdog_completion; - int watchdog_tcp_cookie; - u32 watchdog_last_tcp_cookie; - struct delayed_work work; - - int pending; - int dmacount; - int desccount; - - struct ioatdma_device *device; - struct dma_chan common; - - dma_addr_t completion_addr; - union { - u64 full; /* HW completion writeback */ - struct { - u32 low; - u32 high; - }; - } *completion_virt; - unsigned long last_compl_desc_addr_hw; - struct tasklet_struct cleanup_task; -}; - -/* wrapper around hardware descriptor format + additional software fields */ - -/** - * struct ioat_desc_sw - wrapper around hardware descriptor - * @hw: hardware DMA descriptor - * @node: this descriptor will either be on the free list, - * or attached to a transaction list (async_tx.tx_list) - * @tx_cnt: number of descriptors required to complete the transaction - * @async_tx: the generic software descriptor for all engines - */ -struct ioat_desc_sw { - struct ioat_dma_descriptor *hw; - struct list_head node; - int tx_cnt; - size_t len; - dma_addr_t src; - dma_addr_t dst; - struct dma_async_tx_descriptor async_tx; -}; - -static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) -{ - #ifdef CONFIG_NET_DMA - switch (dev->version) { - case IOAT_VER_1_2: - sysctl_tcp_dma_copybreak = 4096; - break; - case IOAT_VER_2_0: - sysctl_tcp_dma_copybreak = 2048; - break; - case IOAT_VER_3_0: - sysctl_tcp_dma_copybreak = 262144; - break; - } - #endif -} - -#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase); -void ioat_dma_remove(struct ioatdma_device *device); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -#else -#define ioat_dma_probe(pdev, iobase) NULL -#define ioat_dma_remove(device) do { } while (0) -#define ioat_dca_init(pdev, iobase) NULL -#define ioat2_dca_init(pdev, iobase) NULL -#define ioat3_dca_init(pdev, iobase) NULL -#endif - -#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h deleted file mode 100644 index afa57eef86c..00000000000 --- a/drivers/dma/ioatdma_hw.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef _IOAT_HW_H_ -#define _IOAT_HW_H_ - -/* PCI Configuration Space Values */ -#define IOAT_PCI_VID 0x8086 - -/* CB device ID's */ -#define IOAT_PCI_DID_5000 0x1A38 -#define IOAT_PCI_DID_CNB 0x360B -#define IOAT_PCI_DID_SCNB 0x65FF -#define IOAT_PCI_DID_SNB 0x402F - -#define IOAT_PCI_RID 0x00 -#define IOAT_PCI_SVID 0x8086 -#define IOAT_PCI_SID 0x8086 -#define IOAT_VER_1_2 0x12 /* Version 1.2 */ -#define IOAT_VER_2_0 0x20 /* Version 2.0 */ -#define IOAT_VER_3_0 0x30 /* Version 3.0 */ - -struct ioat_dma_descriptor { - uint32_t size; - uint32_t ctl; - uint64_t src_addr; - uint64_t dst_addr; - uint64_t next; - uint64_t rsv1; - uint64_t rsv2; - uint64_t user1; - uint64_t user2; -}; - -#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 -#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 -#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 -#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 -#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 -#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 -#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 -#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 -#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 -#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 - -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 - -#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 - -#endif diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h deleted file mode 100644 index 49bc277424f..00000000000 --- a/drivers/dma/ioatdma_registers.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef _IOAT_REGISTERS_H_ -#define _IOAT_REGISTERS_H_ - -#define IOAT_PCI_DMACTRL_OFFSET 0x48 -#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 -#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 - -#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 -#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 -#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 - -/* MMIO Device Registers */ -#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ - -#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */ -#define IOAT_XFERCAP_4KB 12 -#define IOAT_XFERCAP_8KB 13 -#define IOAT_XFERCAP_16KB 14 -#define IOAT_XFERCAP_32KB 15 -#define IOAT_XFERCAP_32GB 0 - -#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */ -#define IOAT_GENCTRL_DEBUG_EN 0x01 - -#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */ -#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ -#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ -#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ -#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ - -#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ - -#define IOAT_VER_OFFSET 0x08 /* 8-bit */ -#define IOAT_VER_MAJOR_MASK 0xF0 -#define IOAT_VER_MINOR_MASK 0x0F -#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) -#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) - -#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ - -#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ -#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ -#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ - -#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ -#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 - -#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ - -/* DMA Channel Registers */ -#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ -#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 -#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 -#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 -#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 -#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 -#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 -#define IOAT_CHANCTRL_INT_DISABLE 0x0001 - -#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ -#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ -#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ - - -#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ -#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ -#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET) -#define IOAT1_CHANSTS_OFFSET_LOW 0x04 -#define IOAT2_CHANSTS_OFFSET_LOW 0x08 -#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW) -#define IOAT1_CHANSTS_OFFSET_HIGH 0x08 -#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C -#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) -#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F -#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 -#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 - - - -#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ - -#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ -#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 -#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ - -/* CB DCA Memory Space Registers */ -#define IOAT_DCAOFFSET_OFFSET 0x14 -/* CB_BAR + IOAT_DCAOFFSET value */ -#define IOAT_DCA_VER_OFFSET 0x00 -#define IOAT_DCA_VER_MAJOR_MASK 0xF0 -#define IOAT_DCA_VER_MINOR_MASK 0x0F - -#define IOAT_DCA_COMP_OFFSET 0x02 -#define IOAT_DCA_COMP_V1 0x1 - -#define IOAT_FSB_CAPABILITY_OFFSET 0x04 -#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 - -#define IOAT_PCI_CAPABILITY_OFFSET 0x06 -#define IOAT_PCI_CAPABILITY_MEMWR 0x1 - -#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 -#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 - -#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A -#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 - -#define IOAT_APICID_TAG_MAP_OFFSET 0x0C -#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F -#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 -#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 -#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 -#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 -#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 -#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 -#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 -#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 -#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 -#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 - -#define IOAT_DCA_GREQID_OFFSET 0x10 -#define IOAT_DCA_GREQID_SIZE 0x04 -#define IOAT_DCA_GREQID_MASK 0xFFFF -#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 -#define IOAT_DCA_GREQID_VALID 0x20000000 -#define IOAT_DCA_GREQID_LASTID 0x80000000 - -#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 -#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 - -#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A -#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 - -#define IOAT3_CSI_CONTROL_OFFSET 0x0C -#define IOAT3_CSI_CONTROL_PREFETCH 0x1 - -#define IOAT3_PCI_CONTROL_OFFSET 0x0E -#define IOAT3_PCI_CONTROL_MEMWR 0x1 - -#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 -#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 -#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 - -#define IOAT3_DCA_GREQID_OFFSET 0x02 - -#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ -#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ -#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) -#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C -#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 -#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) -#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 -#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 -#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) - -#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ -#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ -#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ - ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) -#define IOAT_CHANCMD_RESET 0x20 -#define IOAT_CHANCMD_RESUME 0x10 -#define IOAT_CHANCMD_ABORT 0x08 -#define IOAT_CHANCMD_SUSPEND 0x04 -#define IOAT_CHANCMD_APPEND 0x02 -#define IOAT_CHANCMD_START 0x01 - -#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */ -#define IOAT_CHANCMP_OFFSET_LOW 0x18 -#define IOAT_CHANCMP_OFFSET_HIGH 0x1C - -#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */ -#define IOAT_CDAR_OFFSET_LOW 0x20 -#define IOAT_CDAR_OFFSET_HIGH 0x24 - -#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ -#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 -#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 -#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 -#define IOAT_CHANERR_CHANCMD_ERR 0x0020 -#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 -#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 -#define IOAT_CHANERR_READ_DATA_ERR 0x0100 -#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 -#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 -#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 -#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 -#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 -#define IOAT_CHANERR_SOFT_ERR 0x4000 -#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 - -#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ - -#endif /* _IOAT_REGISTERS_H_ */ -- cgit v1.2.3 From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:19:02 -0700 Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx We currently walk the parent chain when waiting for a given tx to complete however this walk may race with the driver cleanup routine. The routines in async_raid6_recov.c may fall back to the synchronous path at any point so we need to be prepared to call async_tx_quiesce() (which calls dma_wait_for_async_tx). To remove the ->parent walk we guarantee that every time a dependency is attached ->issue_pending() is invoked, then we can simply poll the initial descriptor until completion. This also allows for a lighter weight 'issue pending' implementation as there is no longer a requirement to iterate through all the channels' ->issue_pending() routines as long as operations have been submitted in an ordered chain. async_tx_issue_pending() is added for this case. Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 45 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6781e8f3c06..e002e0e0d05 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init); /* dma_wait_for_async_tx - spin wait for a transaction to complete * @tx: in-flight transaction to wait on - * - * This routine assumes that tx was obtained from a call to async_memcpy, - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped - * and submitted). Walking the parent chain is only meant to cover for DMA - * drivers that do not implement the DMA_INTERRUPT capability and may race with - * the driver's descriptor cleanup routine. */ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { - enum dma_status status; - struct dma_async_tx_descriptor *iter; - struct dma_async_tx_descriptor *parent; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); if (!tx) return DMA_SUCCESS; - WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" - " %s\n", __func__, dma_chan_name(tx->chan)); - - /* poll through the dependency chain, return when tx is complete */ - do { - iter = tx; - - /* find the root of the unsubmitted dependency chain */ - do { - parent = iter->parent; - if (!parent) - break; - else - iter = parent; - } while (parent); - - /* there is a small window for ->parent == NULL and - * ->cookie == -EBUSY - */ - while (iter->cookie == -EBUSY) - cpu_relax(); - - status = dma_sync_wait(iter->chan, iter->cookie); - } while (status == DMA_IN_PROGRESS || (iter != tx)); - - return status; + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); } EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); -- cgit v1.2.3 From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:36 -0700 Subject: async_tx: add support for asynchronous GF multiplication [ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin Cc: David Woodhouse Signed-off-by: Yuri Tikhonov Signed-off-by: Ilya Yanok Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 4 ++++ drivers/dma/iop-adma.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index e002e0e0d05..cd5673d3043 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_xor); BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 6ff79a67269..4496bc60666 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " "( %s%s%s%s%s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", -- cgit v1.2.3 From 58691d64c44ae41ddf098ecb31e9a994026e3cff Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:27 -0700 Subject: dmatest: add pq support Test raid6 p+q operations with a simple "always multiply by 1" q calculation to fit into dmatest's current destination verification scheme. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index a27c0fb1bc1..a5ee5413905 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -43,6 +43,11 @@ module_param(xor_sources, uint, S_IRUGO); MODULE_PARM_DESC(xor_sources, "Number of xor source buffers (default: 3)"); +static unsigned int pq_sources = 3; +module_param(pq_sources, uint, S_IRUGO); +MODULE_PARM_DESC(pq_sources, + "Number of p+q source buffers (default: 3)"); + /* * Initialization patterns. All bytes in the source buffer has bit 7 * set, all bytes in the destination buffer has bit 7 cleared. @@ -227,6 +232,7 @@ static int dmatest_func(void *data) dma_cookie_t cookie; enum dma_status status; enum dma_ctrl_flags flags; + u8 pq_coefs[pq_sources]; int ret; int src_cnt; int dst_cnt; @@ -243,6 +249,11 @@ static int dmatest_func(void *data) else if (thread->type == DMA_XOR) { src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ dst_cnt = 1; + } else if (thread->type == DMA_PQ) { + src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ + dst_cnt = 2; + for (i = 0; i < pq_sources; i++) + pq_coefs[i] = 1; } else goto err_srcs; @@ -310,6 +321,15 @@ static int dmatest_func(void *data) dma_dsts[0] + dst_off, dma_srcs, xor_sources, len, flags); + else if (thread->type == DMA_PQ) { + dma_addr_t dma_pq[dst_cnt]; + + for (i = 0; i < dst_cnt; i++) + dma_pq[i] = dma_dsts[i] + dst_off; + tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, + pq_sources, pq_coefs, + len, flags); + } if (!tx) { for (i = 0; i < src_cnt; i++) @@ -446,6 +466,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty op = "copy"; else if (type == DMA_XOR) op = "xor"; + else if (type == DMA_PQ) + op = "pq"; else return -EINVAL; @@ -501,6 +523,10 @@ static int dmatest_add_channel(struct dma_chan *chan) cnt = dmatest_add_threads(dtc, DMA_XOR); thread_count += cnt > 0 ?: 0; } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(dtc, DMA_PQ); + thread_count += cnt > 0 ?: 0; + } pr_info("dmatest: Started %u threads using %s\n", thread_count, dma_chan_name(chan)); -- cgit v1.2.3 From 507fbec4cff442ebce6706db34603bfb9cc3b5a9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:12:39 -0700 Subject: iop-adma: cleanup iop_adma_run_tx_complete_actions Replace 'desc->async_tx.' with 'tx->' [ Impact: pure cleanup ] Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 4496bc60666..ce45f3fb034 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -61,17 +61,18 @@ static dma_cookie_t iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct iop_adma_chan *iop_chan, dma_cookie_t cookie) { - BUG_ON(desc->async_tx.cookie < 0); - if (desc->async_tx.cookie > 0) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie < 0); + if (tx->cookie > 0) { + cookie = tx->cookie; + tx->cookie = 0; /* call the callback (must not sleep or submit new * operations to this channel) */ - if (desc->async_tx.callback) - desc->async_tx.callback( - desc->async_tx.callback_param); + if (tx->callback) + tx->callback(tx->callback_param); /* unmap dma addresses * (unmap_single vs unmap_page?) @@ -81,7 +82,7 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct device *dev = &iop_chan->device->pdev->dev; u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = desc->async_tx.flags; + enum dma_ctrl_flags flags = tx->flags; u32 src_cnt; dma_addr_t addr; dma_addr_t dest; @@ -115,7 +116,7 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, } /* run dependent operations */ - dma_run_dependencies(&desc->async_tx); + dma_run_dependencies(tx); return cookie; } -- cgit v1.2.3 From 72be12f0c39df46832403cbfd82e132a883f5ddc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 13:38:29 -0700 Subject: iop-adma: fix lockdep false positive lockdep correctly identifies a potential recursive locking case for iop_chan->lock, but in the dependency submission case we expect that the same class will be acquired for both the parent dependency and the child channel. Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index ce45f3fb034..9c752bd295e 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -288,7 +288,12 @@ static void iop_adma_tasklet(unsigned long data) { struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; - spin_lock(&iop_chan->lock); + /* lockdep will flag depedency submissions as potentially + * recursive locking, this is not the case as a dependency + * submission will never recurse a channels submit routine. + * There are checks in async_tx.c to prevent this. + */ + spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); __iop_adma_slot_cleanup(iop_chan); spin_unlock(&iop_chan->lock); } -- cgit v1.2.3 From 7bf649aee8ac93ecc280f8745dcf8ec19d7b9fb1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Aug 2009 14:32:04 -0700 Subject: iop-adma: P+Q support for iop13xx adma engines iop33x support is not included because that engine is a bit more awkward to handle in that it can either be in xor mode or pq mode. The dmaengine/async_tx layers currently only comprehend static capabilities. Note iop13xx does not support hardware PQ continuation so the driver must handle the DMA_PREP_CONTINUE flag for operations across > 16 sources. From the comment for dma_maxpq: /* When an engine does not support native continuation we need 3 extra * source slots to reuse P and Q with the following coefficients: * 1/ {00} * P : remove P from Q', but use it as a source for P' * 2/ {01} * Q : use Q to continue Q' calculation * 3/ {00} * Q : subtract Q from P' to cancel (2) */ Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 231 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 197 insertions(+), 34 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 9c752bd295e..5a0f4fe2ee6 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -57,6 +57,80 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) } } +static void +iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = iop_desc_get_dest_addr(unmap, iop_chan); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + } + desc->group_head = NULL; +} + +static void +iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt = unmap->unmap_src_cnt; + dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan); + dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan); + int i; + + if (tx->flags & DMA_PREP_CONTINUE) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + dma_addr_t addr; + + for (i = 0; i < src_cnt; i++) { + addr = iop_desc_get_src_addr(unmap, iop_chan, i); + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + if (desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE); + dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE); + } + } + + desc->group_head = NULL; +} + + static dma_cookie_t iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct iop_adma_chan *iop_chan, dma_cookie_t cookie) @@ -78,40 +152,10 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, * (unmap_single vs unmap_page?) */ if (desc->group_head && desc->unmap_len) { - struct iop_adma_desc_slot *unmap = desc->group_head; - struct device *dev = - &iop_chan->device->pdev->dev; - u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = tx->flags; - u32 src_cnt; - dma_addr_t addr; - dma_addr_t dest; - - src_cnt = unmap->unmap_src_cnt; - dest = iop_desc_get_dest_addr(unmap, iop_chan); - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - enum dma_data_direction dir; - - if (src_cnt > 1) /* is xor? */ - dir = DMA_BIDIRECTIONAL; - else - dir = DMA_FROM_DEVICE; - - dma_unmap_page(dev, dest, len, dir); - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - while (src_cnt--) { - addr = iop_desc_get_src_addr(unmap, - iop_chan, - src_cnt); - if (addr == dest) - continue; - dma_unmap_page(dev, addr, len, - DMA_TO_DEVICE); - } - } - desc->group_head = NULL; + if (iop_desc_is_pq(desc)) + iop_desc_unmap_pq(iop_chan, desc); + else + iop_desc_unmap(iop_chan, desc); } } @@ -702,6 +746,118 @@ iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, return sw_desc ? &sw_desc->async_tx : NULL; } +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + int continue_srcs; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u flags: %lx\n", + __func__, src_cnt, len, flags); + + if (dmaf_p_disabled_continue(flags)) + continue_srcs = 1+src_cnt; + else if (dmaf_continue(flags)) + continue_srcs = 3+src_cnt; + else + continue_srcs = 0+src_cnt; + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + int i; + + g = sw_desc->group_head; + iop_desc_set_byte_count(g, iop_chan, len); + + /* even if P is disabled its destination address (bits + * [3:0]) must match Q. It is ok if P points to an + * invalid address, it won't be written. + */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1] & 0x7; + + iop_desc_set_pq_addr(g, dst); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + for (i = 0; i < src_cnt; i++) + iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); + + /* if we are continuing a previous operation factor in + * the old p and q values, see the comment for dma_maxpq + * in include/linux/dmaengine.h + */ + if (dmaf_p_disabled_continue(flags)) + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + else if (dmaf_continue(flags)) { + iop_desc_set_pq_src_addr(g, i++, dst[0], 0); + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + iop_desc_set_pq_src_addr(g, i++, dst[1], 0); + } + iop_desc_init_pq(g, i, flags); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, enum sum_check_flags *pqres, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __func__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + /* for validate operations p and q are tagged onto the + * end of the source list + */ + int pq_idx = src_cnt; + + g = sw_desc->group_head; + iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); + iop_desc_set_pq_zero_sum_byte_count(g, len); + g->pq_check_result = pqres; + pr_debug("\t%s: g->pq_check_result: %p\n", + __func__, g->pq_check_result); + sw_desc->unmap_src_cnt = src_cnt+2; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, + src[src_cnt], + scf[src_cnt]); + iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + static void iop_adma_free_chan_resources(struct dma_chan *chan) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); @@ -1201,6 +1357,13 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) dma_dev->device_prep_dma_xor_val = iop_adma_prep_dma_xor_val; + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); + dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_pq_val = + iop_adma_prep_dma_pq_val; if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) dma_dev->device_prep_dma_interrupt = iop_adma_prep_dma_interrupt; -- cgit v1.2.3 From f6dbf651615900646fe0ba1ef5ce1027e5b4748d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:12:40 -0700 Subject: iop-adma: P+Q self test Even though the intent is to extend dmatest with P+Q tests there is still value in having an always-on sanity check to prevent an unintentionally broken driver from registering. This depends on raid6_pq.ko for verification, the side effect being that PQ capable channels will fail to register when raid6 is disabled. Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 181 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 5a0f4fe2ee6..f4c59e59f6c 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1267,6 +1268,170 @@ out: return err; } +#ifdef CONFIG_MD_RAID6_PQ +static int __devinit +iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) +{ + /* combined sources, software pq results, and extra hw pq results */ + struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; + /* ptr to the extra hw pq buffers defined above */ + struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; + /* address conversion buffers (dma_map / page_address) */ + void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; + dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST]; + dma_addr_t pq_dest[2]; + + int i; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u32 zero_sum_result; + int err = 0; + struct device *dev; + + dev_dbg(device->common.dev, "%s\n", __func__); + + for (i = 0; i < ARRAY_SIZE(pq); i++) { + pq[i] = alloc_page(GFP_KERNEL); + if (!pq[i]) { + while (i--) + __free_page(pq[i]); + return -ENOMEM; + } + } + + /* Fill in src buffers */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { + pq_sw[i] = page_address(pq[i]); + memset(pq_sw[i], 0x11111111 * (1<common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + dev = dma_chan->device->dev; + + /* initialize the dests */ + memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); + memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); + + /* test pq */ + pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); + pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, + IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, + PAGE_SIZE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); + + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], + page_address(pq_hw[0]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test p failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], + page_address(pq_hw[1]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test q failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test correct zero sum using the software generated pq values */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = ~0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + + /* test incorrect zero sum */ + i = IOP_ADMA_NUM_SRC_TEST; + memset(pq_sw[i] + 100, 0, 100); + memset(pq_sw[i+1] + 200, 0, 200); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = 0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { + dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + i = ARRAY_SIZE(pq); + while (i--) + __free_page(pq[i]); + return err; +} +#endif + static int __devexit iop_adma_remove(struct platform_device *dev) { struct iop_adma_device *device = platform_get_drvdata(dev); @@ -1417,13 +1582,28 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || - dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { ret = iop_adma_xor_val_self_test(adev); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_iop_chan; } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { + #ifdef CONFIG_MD_RAID6_PQ + ret = iop_adma_pq_zero_sum_self_test(adev); + dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); + #else + /* can not test raid6, so do not publish capability */ + dma_cap_clear(DMA_PQ, dma_dev->cap_mask); + dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); + ret = 0; + #endif + if (ret) + goto err_free_iop_chan; + } + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " "( %s%s%s%s%s%s%s%s%s%s)\n", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", -- cgit v1.2.3 From 1f27adc2f050836c12deb4d99afe507636537a0b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:29:02 -0700 Subject: ioat: move definitions to dma.h Some of these defines may be useful outside of dma.c and the header is private so there are no namespace pollution concerns. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 14 -------------- drivers/dma/ioat/dma.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 648797e8329..16c080786a6 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -38,28 +38,14 @@ #include "registers.h" #include "hw.h" -#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) - -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) static int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); -#define RESET_DELAY msecs_to_jiffies(100) -#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) static void ioat_dma_chan_reset_part2(struct work_struct *work); static void ioat_dma_chan_watchdog(struct work_struct *work); -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - /* internal functions */ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index e80e787fe64..ccb400f5e27 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -43,6 +43,22 @@ enum ioat_interrupt { #define IOAT_DMA_DCA_ANY_CPU ~0 #define IOAT_WATCHDOG_PERIOD (2 * HZ) +#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) +#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) + +#define RESET_DELAY msecs_to_jiffies(100) +#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + /** * struct ioatdma_device - internal representation of a IOAT device -- cgit v1.2.3 From e6c0b69a43150c1a37cf342ce5faedf12583bf79 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:29:44 -0700 Subject: ioat: convert ioat_probe to pcim/devm The driver currently duplicates much of what these routines offer, so just use the common code. For example ->irq_mode tracks what interrupt mode was initialized, which duplicates the ->msix_enabled and ->msi_enabled handling in pcim_release. This also adds a check to the return value of dma_async_device_register, which can fail. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 130 ++++++++++++++++--------------------------------- drivers/dma/ioat/dma.h | 11 ----- drivers/dma/ioat/hw.h | 1 + drivers/dma/ioat/pci.c | 67 +++++++++---------------- 4 files changed, 68 insertions(+), 141 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 16c080786a6..65f8b7492a4 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -121,6 +121,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) u32 xfercap; int i; struct ioat_dma_chan *ioat_chan; + struct device *dev = &device->pdev->dev; /* * IOAT ver.3 workarounds @@ -164,7 +165,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) } #endif for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); + ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); if (!ioat_chan) { device->common.chancnt = i; break; @@ -1450,7 +1451,11 @@ MODULE_PARM_DESC(ioat_interrupt_style, static int ioat_dma_setup_interrupts(struct ioatdma_device *device) { struct ioat_dma_chan *ioat_chan; - int err, i, j, msixcnt; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; u8 intrctrl = 0; if (!strcmp(ioat_interrupt_style, "msix")) @@ -1461,8 +1466,7 @@ static int ioat_dma_setup_interrupts(struct ioatdma_device *device) goto msi; if (!strcmp(ioat_interrupt_style, "intx")) goto intx; - dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", - ioat_interrupt_style); + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); goto err_no_irq; msix: @@ -1471,55 +1475,55 @@ msix: for (i = 0; i < msixcnt; i++) device->msix_entries[i].entry = i; - err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); + err = pci_enable_msix(pdev, device->msix_entries, msixcnt); if (err < 0) goto msi; if (err > 0) goto msix_single_vector; for (i = 0; i < msixcnt; i++) { + msix = &device->msix_entries[i]; ioat_chan = ioat_lookup_chan_by_index(device, i); - err = request_irq(device->msix_entries[i].vector, - ioat_dma_do_interrupt_msix, - 0, "ioat-msix", ioat_chan); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", ioat_chan); if (err) { for (j = 0; j < i; j++) { + msix = &device->msix_entries[j]; ioat_chan = ioat_lookup_chan_by_index(device, j); - free_irq(device->msix_entries[j].vector, - ioat_chan); + devm_free_irq(dev, msix->vector, ioat_chan); } goto msix_single_vector; } } intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = msix_multi_vector; goto done; msix_single_vector: - device->msix_entries[0].entry = 0; - err = pci_enable_msix(device->pdev, device->msix_entries, 1); + msix = &device->msix_entries[0]; + msix->entry = 0; + err = pci_enable_msix(pdev, device->msix_entries, 1); if (err) goto msi; - err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, - 0, "ioat-msix", device); + err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, + "ioat-msix", device); if (err) { - pci_disable_msix(device->pdev); + pci_disable_msix(pdev); goto msi; } - device->irq_mode = msix_single_vector; goto done; msi: - err = pci_enable_msi(device->pdev); + err = pci_enable_msi(pdev); if (err) goto intx; - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - 0, "ioat-msi", device); + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", device); if (err) { - pci_disable_msi(device->pdev); + pci_disable_msi(pdev); goto intx; } /* @@ -1527,21 +1531,17 @@ msi: */ if (device->version == IOAT_VER_1_2) { u32 dmactrl; - pci_read_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, dmactrl); + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); } - device->irq_mode = msi; goto done; intx: - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); if (err) goto err_no_irq; - device->irq_mode = intx; done: intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; @@ -1551,60 +1551,26 @@ done: err_no_irq: /* Disable all interrupt generation */ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - dev_err(&device->pdev->dev, "no usable interrupts\n"); - device->irq_mode = none; - return -1; + dev_err(dev, "no usable interrupts\n"); + return err; } -/** - * ioat_dma_remove_interrupts - remove whatever interrupts were set - * @device: ioat device - */ -static void ioat_dma_remove_interrupts(struct ioatdma_device *device) +static void ioat_disable_interrupts(struct ioatdma_device *device) { - struct ioat_dma_chan *ioat_chan; - int i; - /* Disable all interrupt generation */ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - - switch (device->irq_mode) { - case msix_multi_vector: - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - free_irq(device->msix_entries[i].vector, ioat_chan); - } - pci_disable_msix(device->pdev); - break; - case msix_single_vector: - free_irq(device->msix_entries[0].vector, device); - pci_disable_msix(device->pdev); - break; - case msi: - free_irq(device->pdev->irq, device); - pci_disable_msi(device->pdev); - break; - case intx: - free_irq(device->pdev->irq, device); - break; - case none: - dev_warn(&device->pdev->dev, - "call to %s without interrupts setup\n", __func__); - } - device->irq_mode = none; } struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) { int err; + struct device *dev = &pdev->dev; struct ioatdma_device *device; - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { + device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); + if (!device) err = -ENOMEM; - goto err_kzalloc; - } device->pdev = pdev; device->reg_base = iobase; device->version = readb(device->reg_base + IOAT_VER_OFFSET); @@ -1651,14 +1617,12 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, break; } - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine found," + dev_err(dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", device->common.chancnt, device->version, IOAT_DMA_VERSION); if (!device->common.chancnt) { - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine problem found: " + dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: " "zero channels detected\n"); goto err_setup_interrupts; } @@ -1671,9 +1635,11 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, if (err) goto err_self_test; - ioat_set_tcp_copy_break(device); + err = dma_async_device_register(&device->common); + if (err) + goto err_self_test; - dma_async_device_register(&device->common); + ioat_set_tcp_copy_break(device); if (device->version != IOAT_VER_3_0) { INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); @@ -1684,16 +1650,12 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, return device; err_self_test: - ioat_dma_remove_interrupts(device); + ioat_disable_interrupts(device); err_setup_interrupts: pci_pool_destroy(device->completion_pool); err_completion_pool: pci_pool_destroy(device->dma_pool); err_dma_pool: - kfree(device); -err_kzalloc: - dev_err(&pdev->dev, - "Intel(R) I/OAT DMA Engine initialization failed\n"); return NULL; } @@ -1705,23 +1667,17 @@ void ioat_dma_remove(struct ioatdma_device *device) if (device->version != IOAT_VER_3_0) cancel_delayed_work(&device->work); - ioat_dma_remove_interrupts(device); + ioat_disable_interrupts(device); dma_async_device_unregister(&device->common); pci_pool_destroy(device->dma_pool); pci_pool_destroy(device->completion_pool); - iounmap(device->reg_base); - pci_release_regions(device->pdev); - pci_disable_device(device->pdev); - list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) { ioat_chan = to_ioat_chan(chan); list_del(&chan->device_node); - kfree(ioat_chan); } - kfree(device); } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index ccb400f5e27..5e8d7cfabc2 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -31,14 +31,6 @@ #define IOAT_DMA_VERSION "3.64" -enum ioat_interrupt { - none = 0, - msix_multi_vector = 1, - msix_single_vector = 2, - msi = 3, - intx = 4, -}; - #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 #define IOAT_WATCHDOG_PERIOD (2 * HZ) @@ -59,7 +51,6 @@ enum ioat_interrupt { */ #define NULL_DESC_BUFFER_SIZE 1 - /** * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device @@ -67,7 +58,6 @@ enum ioat_interrupt { * @dma_pool: for allocating DMA descriptors * @common: embedded struct dma_device * @version: version of ioatdma device - * @irq_mode: which style irq to use * @msix_entries: irq handlers * @idx: per channel data */ @@ -79,7 +69,6 @@ struct ioatdma_device { struct pci_pool *completion_pool; struct dma_device common; u8 version; - enum ioat_interrupt irq_mode; struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_dma_chan *idx[4]; diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index afa57eef86c..1438fa5c4d1 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -23,6 +23,7 @@ /* PCI Configuration Space Values */ #define IOAT_PCI_VID 0x8086 +#define IOAT_MMIO_BAR 0 /* CB device ID's */ #define IOAT_PCI_DID_5000 0x1A38 diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index d7948bfd8fb..982e38fd177 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -62,7 +62,6 @@ static struct pci_device_id ioat_pci_tbl[] = { struct ioat_device { struct pci_dev *pdev; - void __iomem *iobase; struct ioatdma_device *dma; struct dca_provider *dca; }; @@ -75,8 +74,10 @@ static int ioat_dca_enabled = 1; module_param(ioat_dca_enabled, int, 0644); MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +#define DRV_NAME "ioatdma" + static struct pci_driver ioat_pci_driver = { - .name = "ioatdma", + .name = DRV_NAME, .id_table = ioat_pci_tbl, .probe = ioat_probe, .remove = __devexit_p(ioat_remove), @@ -85,47 +86,42 @@ static struct pci_driver ioat_pci_driver = { static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + void __iomem * const *iomap; void __iomem *iobase; + struct device *dev = &pdev->dev; struct ioat_device *device; - unsigned long mmio_start, mmio_len; int err; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) - goto err_enable_device; + return err; - err = pci_request_regions(pdev, ioat_pci_driver.name); + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); if (err) - goto err_request_regions; + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) - goto err_set_dma_mask; + return err; err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) - goto err_set_dma_mask; - - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); - iobase = ioremap(mmio_start, mmio_len); - if (!iobase) { - err = -ENOMEM; - goto err_ioremap; - } + return err; + + device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } device->pdev = pdev; pci_set_drvdata(pdev, device); - device->iobase = iobase; + iobase = iomap[IOAT_MMIO_BAR]; pci_set_master(pdev); @@ -146,28 +142,15 @@ static int __devinit ioat_probe(struct pci_dev *pdev, device->dca = ioat3_dca_init(pdev, iobase); break; default: - err = -ENODEV; - break; + return -ENODEV; } - if (!device->dma) - err = -ENODEV; - if (err) - goto err_version; + if (!device->dma) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } return 0; - -err_version: - kfree(device); -err_kzalloc: - iounmap(iobase); -err_ioremap: -err_set_dma_mask: - pci_release_regions(pdev); - pci_disable_device(pdev); -err_request_regions: -err_enable_device: - return err; } static void __devexit ioat_remove(struct pci_dev *pdev) @@ -185,8 +168,6 @@ static void __devexit ioat_remove(struct pci_dev *pdev) ioat_dma_remove(device->dma); device->dma = NULL; } - - kfree(device); } static int __init ioat_init_module(void) -- cgit v1.2.3 From bc3c70258526a635325f1f15138a96297879bc1a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:33:42 -0700 Subject: ioat: cleanup some long deref chains and 80 column collisions * reduce device->common. to dma-> in ioat_dma_{probe,remove,selftest} * ioat_lookup_chan_by_index to ioat_chan_by_index * multi-line function definitions * ioat_desc_sw.async_tx to ioat_desc_sw.txd * desc->txd. to tx-> in cleanup routine Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 304 +++++++++++++++++++++++-------------------------- drivers/dma/ioat/dma.h | 7 +- 2 files changed, 144 insertions(+), 167 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 65f8b7492a4..462dae62719 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -55,9 +55,8 @@ ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); static struct ioat_desc_sw * ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); -static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( - struct ioatdma_device *device, - int index) +static inline struct ioat_dma_chan * +ioat_chan_by_index(struct ioatdma_device *device, int index) { return device->idx[index]; } @@ -87,7 +86,7 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_bit(bit, &attnstatus, BITS_PER_LONG) { - ioat_chan = ioat_lookup_chan_by_index(instance, bit); + ioat_chan = ioat_chan_by_index(instance, bit); tasklet_schedule(&ioat_chan->cleanup_task); } @@ -205,8 +204,8 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) * descriptors to hw * @chan: DMA channel handle */ -static inline void __ioat1_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) +static inline void +__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat_chan) { ioat_chan->pending = 0; writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); @@ -223,8 +222,8 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) } } -static inline void __ioat2_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) +static inline void +__ioat2_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat_chan) { ioat_chan->pending = 0; writew(ioat_chan->dmacount, @@ -279,18 +278,18 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work) desc = to_ioat_desc(ioat_chan->used_desc.prev); switch (ioat_chan->device->version) { case IOAT_VER_1_2: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, + writel(((u64) desc->txd.phys) >> 32, ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); break; case IOAT_VER_2_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, + writel(((u64) desc->txd.phys) >> 32, ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); /* tell the engine to go with what's left to be done */ @@ -299,7 +298,7 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work) break; } - dev_err(&ioat_chan->device->pdev->dev, + dev_err(to_dev(ioat_chan), "chan%d reset - %d descs waiting, %d total desc\n", chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); @@ -322,7 +321,7 @@ static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) chansts = (ioat_chan->completion_virt->low & IOAT_CHANSTS_DMA_TRANSFER_STATUS); if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, + dev_err(to_dev(ioat_chan), "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", chan_num(ioat_chan), chansts, chanerr); writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); @@ -367,7 +366,7 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) unsigned long compl_desc_addr_hw; for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); + ioat_chan = ioat_chan_by_index(device, i); if (ioat_chan->device->version == IOAT_VER_1_2 /* have we started processing anything yet */ @@ -475,7 +474,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) len = first->len; src = first->src; dst = first->dst; - orig_flags = first->async_tx.flags; + orig_flags = first->txd.flags; new = first; spin_lock_bh(&ioat_chan->desc_lock); @@ -484,7 +483,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) do { copy = min_t(size_t, len, ioat_chan->xfercap); - async_tx_ack(&new->async_tx); + async_tx_ack(&new->txd); hw = new->hw; hw->size = copy; @@ -495,7 +494,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) /* chain together the physical address list for the HW */ wmb(); - prev->hw->next = (u64) new->async_tx.phys; + prev->hw->next = (u64) new->txd.phys; len -= copy; dst += copy; @@ -507,27 +506,26 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); + dev_err(to_dev(ioat_chan), "tx submit failed\n"); spin_unlock_bh(&ioat_chan->desc_lock); return -ENOMEM; } hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { + if (first->txd.callback) { hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; if (first != new) { /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; + new->txd.callback = first->txd.callback; + new->txd.callback_param + = first->txd.callback_param; + first->txd.callback = NULL; + first->txd.callback_param = NULL; } } new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ + new->txd.flags = orig_flags; /* client is in control of this ack */ /* store the original values for use in later cleanup */ if (new != first) { @@ -541,11 +539,11 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) cookie++; if (cookie < 0) cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; + ioat_chan->common.cookie = new->txd.cookie = cookie; /* write address into NextDescriptor field of last desc in chain */ to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = - first->async_tx.phys; + first->txd.phys; list_splice_tail(&new_chain, &ioat_chan->used_desc); ioat_chan->dmacount += desc_count; @@ -574,7 +572,7 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) len = first->len; src = first->src; dst = first->dst; - orig_flags = first->async_tx.flags; + orig_flags = first->txd.flags; new = first; /* @@ -584,7 +582,7 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) do { copy = min_t(size_t, len, ioat_chan->xfercap); - async_tx_ack(&new->async_tx); + async_tx_ack(&new->txd); hw = new->hw; hw->size = copy; @@ -599,27 +597,26 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); + dev_err(to_dev(ioat_chan), "tx submit failed\n"); spin_unlock_bh(&ioat_chan->desc_lock); return -ENOMEM; } hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { + if (first->txd.callback) { hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; if (first != new) { /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; + new->txd.callback = first->txd.callback; + new->txd.callback_param + = first->txd.callback_param; + first->txd.callback = NULL; + first->txd.callback_param = NULL; } } new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ + new->txd.flags = orig_flags; /* client is in control of this ack */ /* store the original values for use in later cleanup */ if (new != first) { @@ -633,7 +630,7 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) cookie++; if (cookie < 0) cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; + ioat_chan->common.cookie = new->txd.cookie = cookie; ioat_chan->dmacount += desc_count; ioat_chan->pending += desc_count; @@ -649,9 +646,8 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) * @ioat_chan: the channel supplying the memory pool for the descriptors * @flags: allocation flags */ -static struct ioat_desc_sw *ioat_dma_alloc_descriptor( - struct ioat_dma_chan *ioat_chan, - gfp_t flags) +static struct ioat_desc_sw * +ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat_chan, gfp_t flags) { struct ioat_dma_descriptor *desc; struct ioat_desc_sw *desc_sw; @@ -670,19 +666,19 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( } memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat_chan->common); switch (ioat_chan->device->version) { case IOAT_VER_1_2: - desc_sw->async_tx.tx_submit = ioat1_tx_submit; + desc_sw->txd.tx_submit = ioat1_tx_submit; break; case IOAT_VER_2_0: case IOAT_VER_3_0: - desc_sw->async_tx.tx_submit = ioat2_tx_submit; + desc_sw->txd.tx_submit = ioat2_tx_submit; break; } desc_sw->hw = desc; - desc_sw->async_tx.phys = phys; + desc_sw->txd.phys = phys; return desc_sw; } @@ -712,9 +708,9 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) /* circle link the hw descriptors */ desc = to_ioat_desc(ioat_chan->free_desc.next); - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; } } @@ -743,8 +739,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "CHANERR = %x, clearing\n", chanerr); + dev_err(to_dev(ioat_chan), "CHANERR = %x, clearing\n", chanerr); writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); } @@ -752,7 +747,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < ioat_initial_desc_count; i++) { desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, + dev_err(to_dev(ioat_chan), "Only %d initial descriptors\n", i); break; } @@ -819,14 +814,14 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) in_use_descs++; list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); + desc->txd.phys); kfree(desc); } list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) { list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); + desc->txd.phys); kfree(desc); } break; @@ -836,12 +831,12 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) ioat_chan->free_desc.next, node) { list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); + desc->txd.phys); kfree(desc); } desc = to_ioat_desc(ioat_chan->free_desc.next); pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); + desc->txd.phys); kfree(desc); INIT_LIST_HEAD(&ioat_chan->free_desc); INIT_LIST_HEAD(&ioat_chan->used_desc); @@ -855,8 +850,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) /* one is ok since we left it on there on purpose */ if (in_use_descs > 1) - dev_err(&ioat_chan->device->pdev->dev, - "Freeing %d in use descriptors!\n", + dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n", in_use_descs - 1); ioat_chan->last_completion = ioat_chan->completion_addr = 0; @@ -889,8 +883,7 @@ ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) /* try to get another desc */ new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); + dev_err(to_dev(ioat_chan), "alloc failed\n"); return NULL; } } @@ -936,16 +929,15 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) for (i = 16; i; i--) { desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); + dev_err(to_dev(ioat_chan), "alloc failed\n"); break; } list_add_tail(&desc->node, ioat_chan->used_desc.next); desc->hw->next - = to_ioat_desc(desc->node.next)->async_tx.phys; + = to_ioat_desc(desc->node.next)->txd.phys; to_ioat_desc(desc->node.prev)->hw->next - = desc->async_tx.phys; + = desc->txd.phys; ioat_chan->desccount++; } @@ -962,8 +954,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) return new; } -static struct ioat_desc_sw *ioat_dma_get_next_descriptor( - struct ioat_dma_chan *ioat_chan) +static struct ioat_desc_sw * +ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) { if (!ioat_chan) return NULL; @@ -978,12 +970,9 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor( return NULL; } -static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) +static struct dma_async_tx_descriptor * +ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); struct ioat_desc_sw *new; @@ -996,22 +985,19 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( new->len = len; new->dst = dma_dest; new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; + new->txd.flags = flags; + return &new->txd; } else { - dev_err(&ioat_chan->device->pdev->dev, + dev_err(to_dev(ioat_chan), "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); return NULL; } } -static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) +static struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); struct ioat_desc_sw *new; @@ -1028,11 +1014,11 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( new->len = len; new->dst = dma_dest; new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; + new->txd.flags = flags; + return &new->txd; } else { spin_unlock_bh(&ioat_chan->desc_lock); - dev_err(&ioat_chan->device->pdev->dev, + dev_err(to_dev(ioat_chan), "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); return NULL; @@ -1050,8 +1036,8 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) static void ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) { - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) pci_unmap_single(ioat_chan->device->pdev, pci_unmap_addr(desc, dst), pci_unmap_len(desc, len), @@ -1063,8 +1049,8 @@ ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) PCI_DMA_FROMDEVICE); } - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) pci_unmap_single(ioat_chan->device->pdev, pci_unmap_addr(desc, src), pci_unmap_len(desc, len), @@ -1088,6 +1074,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) dma_cookie_t cookie = 0; unsigned long desc_phys; struct ioat_desc_sw *latest_desc; + struct dma_async_tx_descriptor *tx; prefetch(ioat_chan->completion_virt); @@ -1111,8 +1098,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) if ((ioat_chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { - dev_err(&ioat_chan->device->pdev->dev, - "Channel halted, chanerr = %x\n", + dev_err(to_dev(ioat_chan), "Channel halted, chanerr = %x\n", readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); /* TODO do something to salvage the situation */ @@ -1145,38 +1131,38 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) case IOAT_VER_1_2: list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { - + tx = &desc->txd; /* * Incoming DMA requests may use multiple descriptors, * due to exceeding xfercap, perhaps. If so, only the * last one will have a cookie, and require unmapping. */ - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; + if (tx->cookie) { + cookie = tx->cookie; ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } } - if (desc->async_tx.phys != phys_complete) { + if (tx->phys != phys_complete) { /* * a completed entry, but not the last, so clean * up if the client is done with the descriptor */ - if (async_tx_test_ack(&desc->async_tx)) { + if (async_tx_test_ack(tx)) { list_move_tail(&desc->node, &ioat_chan->free_desc); } else - desc->async_tx.cookie = 0; + tx->cookie = 0; } else { /* * last used desc. Do not remove, so we can * append from it, but don't look at it next * time, either */ - desc->async_tx.cookie = 0; + tx->cookie = 0; /* TODO check status bits? */ break; @@ -1191,10 +1177,11 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) /* work backwards to find latest finished desc */ desc = to_ioat_desc(ioat_chan->used_desc.next); + tx = &desc->txd; latest_desc = NULL; do { desc = to_ioat_desc(desc->node.prev); - desc_phys = (unsigned long)desc->async_tx.phys + desc_phys = (unsigned long)tx->phys & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; if (desc_phys == phys_complete) { latest_desc = desc; @@ -1203,19 +1190,18 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) } while (&desc->node != ioat_chan->used_desc.prev); if (latest_desc != NULL) { - /* work forwards to clear finished descriptors */ for (desc = to_ioat_desc(ioat_chan->used_desc.prev); &desc->node != latest_desc->node.next && &desc->node != ioat_chan->used_desc.next; desc = to_ioat_desc(desc->node.next)) { - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; + if (tx->cookie) { + cookie = tx->cookie; + tx->cookie = 0; ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } } } @@ -1245,10 +1231,9 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) * @done: if not %NULL, updated with last completed transaction * @used: if not %NULL, updated with last used transaction */ -static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, - dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) +static enum dma_status +ioat_dma_is_complete(struct dma_chan *chan, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); dma_cookie_t last_used; @@ -1290,7 +1275,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) desc = ioat_dma_get_next_descriptor(ioat_chan); if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, + dev_err(to_dev(ioat_chan), "Unable to start null desc - get next desc failed\n"); spin_unlock_bh(&ioat_chan->desc_lock); return; @@ -1303,15 +1288,15 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) desc->hw->size = NULL_DESC_BUFFER_SIZE; desc->hw->src_addr = 0; desc->hw->dst_addr = 0; - async_tx_ack(&desc->async_tx); + async_tx_ack(&desc->txd); switch (ioat_chan->device->version) { case IOAT_VER_1_2: desc->hw->next = 0; list_add_tail(&desc->node, &ioat_chan->used_desc); - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, + writel(((u64) desc->txd.phys) >> 32, ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); writeb(IOAT_CHANCMD_START, ioat_chan->reg_base @@ -1319,9 +1304,9 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) break; case IOAT_VER_2_0: case IOAT_VER_3_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, + writel(((u64) desc->txd.phys) >> 32, ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); ioat_chan->dmacount++; @@ -1352,6 +1337,8 @@ static int ioat_dma_self_test(struct ioatdma_device *device) int i; u8 *src; u8 *dest; + struct dma_device *dma = &device->common; + struct device *dev = &device->pdev->dev; struct dma_chan *dma_chan; struct dma_async_tx_descriptor *tx; dma_addr_t dma_dest, dma_src; @@ -1375,26 +1362,21 @@ static int ioat_dma_self_test(struct ioatdma_device *device) src[i] = (u8)i; /* Start copy, using first DMA channel */ - dma_chan = container_of(device->common.channels.next, - struct dma_chan, + dma_chan = container_of(dma->channels.next, struct dma_chan, device_node); - if (device->common.device_alloc_chan_resources(dma_chan) < 1) { - dev_err(&device->pdev->dev, - "selftest cannot allocate chan resource\n"); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); err = -ENODEV; goto out; } - dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, - DMA_TO_DEVICE); - dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, - DMA_FROM_DEVICE); + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, IOAT_TEST_SIZE, flags); if (!tx) { - dev_err(&device->pdev->dev, - "Self-test prep failed, disabling\n"); + dev_err(dev, "Self-test prep failed, disabling\n"); err = -ENODEV; goto free_resources; } @@ -1405,32 +1387,29 @@ static int ioat_dma_self_test(struct ioatdma_device *device) tx->callback_param = &cmp; cookie = tx->tx_submit(tx); if (cookie < 0) { - dev_err(&device->pdev->dev, - "Self-test setup failed, disabling\n"); + dev_err(dev, "Self-test setup failed, disabling\n"); err = -ENODEV; goto free_resources; } - device->common.device_issue_pending(dma_chan); + dma->device_issue_pending(dma_chan); tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); if (tmo == 0 || - device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) + dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { - dev_err(&device->pdev->dev, - "Self-test copy timed out, disabling\n"); + dev_err(dev, "Self-test copy timed out, disabling\n"); err = -ENODEV; goto free_resources; } if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(&device->pdev->dev, - "Self-test copy failed compare, disabling\n"); + dev_err(dev, "Self-test copy failed compare, disabling\n"); err = -ENODEV; goto free_resources; } free_resources: - device->common.device_free_chan_resources(dma_chan); + dma->device_free_chan_resources(dma_chan); out: kfree(src); kfree(dest); @@ -1483,15 +1462,14 @@ msix: for (i = 0; i < msixcnt; i++) { msix = &device->msix_entries[i]; - ioat_chan = ioat_lookup_chan_by_index(device, i); + ioat_chan = ioat_chan_by_index(device, i); err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt_msix, 0, "ioat-msix", ioat_chan); if (err) { for (j = 0; j < i; j++) { msix = &device->msix_entries[j]; - ioat_chan = - ioat_lookup_chan_by_index(device, j); + ioat_chan = ioat_chan_by_index(device, j); devm_free_irq(dev, msix->vector, ioat_chan); } goto msix_single_vector; @@ -1561,12 +1539,13 @@ static void ioat_disable_interrupts(struct ioatdma_device *device) writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); } -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase) +struct ioatdma_device * +ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) { int err; struct device *dev = &pdev->dev; struct ioatdma_device *device; + struct dma_device *dma; device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); if (!device) @@ -1574,6 +1553,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, device->pdev = pdev; device->reg_base = iobase; device->version = readb(device->reg_base + IOAT_VER_OFFSET); + dma = &device->common; /* DMA coherent memory pool for DMA descriptor allocations */ device->dma_pool = pci_pool_create("dma_desc_pool", pdev, @@ -1592,36 +1572,32 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, goto err_completion_pool; } - INIT_LIST_HEAD(&device->common.channels); + INIT_LIST_HEAD(&dma->channels); ioat_dma_enumerate_channels(device); - device->common.device_alloc_chan_resources = - ioat_dma_alloc_chan_resources; - device->common.device_free_chan_resources = - ioat_dma_free_chan_resources; - device->common.dev = &pdev->dev; + dma->device_alloc_chan_resources = ioat_dma_alloc_chan_resources; + dma->device_free_chan_resources = ioat_dma_free_chan_resources; + dma->dev = &pdev->dev; - dma_cap_set(DMA_MEMCPY, device->common.cap_mask); - device->common.device_is_tx_complete = ioat_dma_is_complete; + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->device_is_tx_complete = ioat_dma_is_complete; switch (device->version) { case IOAT_VER_1_2: - device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - device->common.device_issue_pending = - ioat1_dma_memcpy_issue_pending; + dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; break; case IOAT_VER_2_0: case IOAT_VER_3_0: - device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - device->common.device_issue_pending = - ioat2_dma_memcpy_issue_pending; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; break; } dev_err(dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", - device->common.chancnt, device->version, IOAT_DMA_VERSION); + dma->chancnt, device->version, IOAT_DMA_VERSION); - if (!device->common.chancnt) { + if (!dma->chancnt) { dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: " "zero channels detected\n"); goto err_setup_interrupts; @@ -1635,7 +1611,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, if (err) goto err_self_test; - err = dma_async_device_register(&device->common); + err = dma_async_device_register(dma); if (err) goto err_self_test; @@ -1663,19 +1639,19 @@ void ioat_dma_remove(struct ioatdma_device *device) { struct dma_chan *chan, *_chan; struct ioat_dma_chan *ioat_chan; + struct dma_device *dma = &device->common; if (device->version != IOAT_VER_3_0) cancel_delayed_work(&device->work); ioat_disable_interrupts(device); - dma_async_device_unregister(&device->common); + dma_async_device_unregister(dma); pci_pool_destroy(device->dma_pool); pci_pool_destroy(device->completion_pool); - list_for_each_entry_safe(chan, _chan, - &device->common.channels, device_node) { + list_for_each_entry_safe(chan, _chan, &dma->channels, device_node) { ioat_chan = to_ioat_chan(chan); list_del(&chan->device_node); } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 5e8d7cfabc2..c5eabae4c1b 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -38,7 +38,8 @@ #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) +#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) @@ -123,7 +124,7 @@ struct ioat_dma_chan { * @node: this descriptor will either be on the free list, * or attached to a transaction list (async_tx.tx_list) * @tx_cnt: number of descriptors required to complete the transaction - * @async_tx: the generic software descriptor for all engines + * @txd: the generic software descriptor for all engines */ struct ioat_desc_sw { struct ioat_dma_descriptor *hw; @@ -132,7 +133,7 @@ struct ioat_desc_sw { size_t len; dma_addr_t src; dma_addr_t dst; - struct dma_async_tx_descriptor async_tx; + struct dma_async_tx_descriptor txd; }; static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) -- cgit v1.2.3 From b31b78f1ab7806759622b703357e39a21f757281 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:42:32 -0700 Subject: ioat: kill function prototype ifdef guards The only .c files that utilize these protected prototypes depend on CONFIG_INTEL_IOATDMA=y, so there is no value gained in providing empty prototypes. [ Impact: pure cleanup ] Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index c5eabae4c1b..6e27ddb1e98 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -153,19 +153,10 @@ static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) #endif } -#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase); void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -#else -#define ioat_dma_probe(pdev, iobase) NULL -#define ioat_dma_remove(device) do { } while (0) -#define ioat_dca_init(pdev, iobase) NULL -#define ioat2_dca_init(pdev, iobase) NULL -#define ioat3_dca_init(pdev, iobase) NULL -#endif - #endif /* IOATDMA_H */ -- cgit v1.2.3 From f2427e276ffec5ce599c6bc116e0927269a360ef Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:42:38 -0700 Subject: ioat: split ioat_dma_probe into core/version-specific routines Towards the removal of ioatdma_device.version split the initialization path into distinct versions. This conversion: 1/ moves version specific probe code to version specific routines 2/ removes the need for ioat_device 3/ turns off the ioat1 msi quirk if the device is reinitialized for intx Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 253 ++++++++++++++++++++++++++++++------------------- drivers/dma/ioat/dma.h | 23 ++--- drivers/dma/ioat/pci.c | 79 ++++++++------- 3 files changed, 200 insertions(+), 155 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 462dae62719..b7508041c6d 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -121,52 +121,21 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) int i; struct ioat_dma_chan *ioat_chan; struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; - /* - * IOAT ver.3 workarounds - */ - if (device->version == IOAT_VER_3_0) { - u32 chan_err_mask; - u16 dev_id; - u32 dmauncerrsts; - - /* - * Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - chan_err_mask = 0x3E07; - pci_write_config_dword(device->pdev, - IOAT_PCI_CHANERRMASK_INT_OFFSET, - chan_err_mask); - - /* - * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(device->pdev, - IOAT_PCI_DEVICE_ID_OFFSET, - &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - dmauncerrsts = 0x10; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - dmauncerrsts); - } - } - - device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { - device->common.chancnt--; - } + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; #endif - for (i = 0; i < device->common.chancnt; i++) { + for (i = 0; i < dma->chancnt; i++) { ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); if (!ioat_chan) { - device->common.chancnt = i; + dma->chancnt = i; break; } @@ -175,28 +144,20 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ioat_chan->xfercap = xfercap; ioat_chan->desccount = 0; INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); - if (ioat_chan->device->version == IOAT_VER_2_0) - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | - IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - else if (ioat_chan->device->version == IOAT_VER_3_0) - writel(IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); spin_lock_init(&ioat_chan->cleanup_lock); spin_lock_init(&ioat_chan->desc_lock); INIT_LIST_HEAD(&ioat_chan->free_desc); INIT_LIST_HEAD(&ioat_chan->used_desc); /* This should be made common somewhere in dmaengine.c */ ioat_chan->common.device = &device->common; - list_add_tail(&ioat_chan->common.device_node, - &device->common.channels); + list_add_tail(&ioat_chan->common.device_node, &dma->channels); device->idx[i] = ioat_chan; tasklet_init(&ioat_chan->cleanup_task, ioat_dma_cleanup_tasklet, (unsigned long) ioat_chan); tasklet_disable(&ioat_chan->cleanup_task); } - return device->common.chancnt; + return dma->chancnt; } /** @@ -1504,15 +1465,6 @@ msi: pci_disable_msi(pdev); goto intx; } - /* - * CB 1.2 devices need a bit set in configuration space to enable MSI - */ - if (device->version == IOAT_VER_1_2) { - u32 dmactrl; - pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); - } goto done; intx: @@ -1522,6 +1474,8 @@ intx: goto err_no_irq; done: + if (device->intr_quirk) + device->intr_quirk(device); intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); return 0; @@ -1539,21 +1493,12 @@ static void ioat_disable_interrupts(struct ioatdma_device *device) writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); } -struct ioatdma_device * -ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) +static int ioat_probe(struct ioatdma_device *device) { - int err; + int err = -ENODEV; + struct dma_device *dma = &device->common; + struct pci_dev *pdev = device->pdev; struct device *dev = &pdev->dev; - struct ioatdma_device *device; - struct dma_device *dma; - - device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); - if (!device) - err = -ENOMEM; - device->pdev = pdev; - device->reg_base = iobase; - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - dma = &device->common; /* DMA coherent memory pool for DMA descriptor allocations */ device->dma_pool = pci_pool_create("dma_desc_pool", pdev, @@ -1572,26 +1517,13 @@ ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) goto err_completion_pool; } - INIT_LIST_HEAD(&dma->channels); ioat_dma_enumerate_channels(device); + dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->device_alloc_chan_resources = ioat_dma_alloc_chan_resources; dma->device_free_chan_resources = ioat_dma_free_chan_resources; - dma->dev = &pdev->dev; - - dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->device_is_tx_complete = ioat_dma_is_complete; - switch (device->version) { - case IOAT_VER_1_2: - dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; - break; - } + dma->dev = &pdev->dev; dev_err(dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", @@ -1611,19 +1543,7 @@ ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) if (err) goto err_self_test; - err = dma_async_device_register(dma); - if (err) - goto err_self_test; - - ioat_set_tcp_copy_break(device); - - if (device->version != IOAT_VER_3_0) { - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, - WATCHDOG_DELAY); - } - - return device; + return 0; err_self_test: ioat_disable_interrupts(device); @@ -1632,7 +1552,142 @@ err_setup_interrupts: err_completion_pool: pci_pool_destroy(device->dma_pool); err_dma_pool: - return NULL; + return err; +} + +static int ioat_register(struct ioatdma_device *device) +{ + int err = dma_async_device_register(&device->common); + + if (err) { + ioat_disable_interrupts(device); + pci_pool_destroy(device->completion_pool); + pci_pool_destroy(device->dma_pool); + } + + return err; +} + +/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ +static void ioat1_intr_quirk(struct ioatdma_device *device) +{ + struct pci_dev *pdev = device->pdev; + u32 dmactrl; + + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + if (pdev->msi_enabled) + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + else + dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); +} + +int ioat1_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + int err; + + device->intr_quirk = ioat1_intr_quirk; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(4096); + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat_dca_init(pdev, device->reg_base); + + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; +} + +int ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *chan; + struct ioat_dma_chan *ioat_chan; + int err; + + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(chan, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(chan); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; +} + +int ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *chan; + struct ioat_dma_chan *ioat_chan; + int err; + u16 dev_id; + + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(chan, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(chan); + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return err; } void ioat_dma_remove(struct ioatdma_device *device) diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 6e27ddb1e98..1226e35f270 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -61,6 +61,8 @@ * @version: version of ioatdma device * @msix_entries: irq handlers * @idx: per channel data + * @dca: direct cache access context + * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) */ struct ioatdma_device { @@ -73,6 +75,8 @@ struct ioatdma_device { struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_dma_chan *idx[4]; + struct dca_provider *dca; + void (*intr_quirk)(struct ioatdma_device *device); }; /** @@ -136,25 +140,16 @@ struct ioat_desc_sw { struct dma_async_tx_descriptor txd; }; -static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +static inline void ioat_set_tcp_copy_break(unsigned long copybreak) { #ifdef CONFIG_NET_DMA - switch (dev->version) { - case IOAT_VER_1_2: - sysctl_tcp_dma_copybreak = 4096; - break; - case IOAT_VER_2_0: - sysctl_tcp_dma_copybreak = 2048; - break; - case IOAT_VER_3_0: - sysctl_tcp_dma_copybreak = 262144; - break; - } + sysctl_tcp_dma_copybreak = copybreak; #endif } -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase); +int ioat1_dma_probe(struct ioatdma_device *dev, int dca); +int ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int ioat3_dma_probe(struct ioatdma_device *dev, int dca); void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 982e38fd177..55414d88ac1 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -60,14 +60,8 @@ static struct pci_device_id ioat_pci_tbl[] = { { 0, } }; -struct ioat_device { - struct pci_dev *pdev; - struct ioatdma_device *dma; - struct dca_provider *dca; -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id); +static int __devinit ioat_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id); static void __devexit ioat_remove(struct pci_dev *pdev); static int ioat_dca_enabled = 1; @@ -79,17 +73,28 @@ MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)" static struct pci_driver ioat_pci_driver = { .name = DRV_NAME, .id_table = ioat_pci_tbl, - .probe = ioat_probe, + .probe = ioat_pci_probe, .remove = __devexit_p(ioat_remove), }; -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { void __iomem * const *iomap; - void __iomem *iobase; struct device *dev = &pdev->dev; - struct ioat_device *device; + struct ioatdma_device *device; int err; err = pcim_enable_device(pdev); @@ -119,33 +124,24 @@ static int __devinit ioat_probe(struct pci_dev *pdev, if (!device) return -ENOMEM; - device->pdev = pdev; - pci_set_drvdata(pdev, device); - iobase = iomap[IOAT_MMIO_BAR]; - pci_set_master(pdev); - switch (readb(iobase + IOAT_VER_OFFSET)) { - case IOAT_VER_1_2: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat_dca_init(pdev, iobase); - break; - case IOAT_VER_2_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat2_dca_init(pdev, iobase); - break; - case IOAT_VER_3_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat3_dca_init(pdev, iobase); - break; - default: + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version == IOAT_VER_1_2) + err = ioat1_dma_probe(device, ioat_dca_enabled); + else if (device->version == IOAT_VER_2_0) + err = ioat2_dma_probe(device, ioat_dca_enabled); + else if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else return -ENODEV; - } - if (!device->dma) { + if (err) { dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); return -ENODEV; } @@ -155,7 +151,10 @@ static int __devinit ioat_probe(struct pci_dev *pdev, static void __devexit ioat_remove(struct pci_dev *pdev) { - struct ioat_device *device = pci_get_drvdata(pdev); + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; dev_err(&pdev->dev, "Removing dma and dca services\n"); if (device->dca) { @@ -163,11 +162,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev) free_dca_provider(device->dca); device->dca = NULL; } - - if (device->dma) { - ioat_dma_remove(device->dma); - device->dma = NULL; - } + ioat_dma_remove(device); } static int __init ioat_init_module(void) -- cgit v1.2.3 From 77867fff033ea549096c49d863c564ad7d8be36f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:44:04 -0700 Subject: ioat: fix type mismatch for ->dmacount ->dmacount tracks the sequence number of active descriptors. It is written to the DMACOUNT register to update the channel's view of pending descriptors in the chain. The register is 16-bits so ->dmacount should be unsigned and 16-bit as well. Also modify ->desccount to maintain alignment. This was never a problem in practice because we never compared dmacount values, but this is a bug waiting to happen. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 1226e35f270..9f0c853b6a7 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -102,8 +102,8 @@ struct ioat_dma_chan { struct delayed_work work; int pending; - int dmacount; - int desccount; + u16 dmacount; + u16 desccount; struct ioatdma_device *device; struct dma_chan common; -- cgit v1.2.3 From c7984f4e4e3af3bf8027d636283ea8658c7f80b9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:44:04 -0700 Subject: ioat: define descriptor control bit-field This cleans up a mess of and'ing and or'ing bit definitions, and allows simple assignments from the specified dma_ctrl_flags parameter. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 28 ++++++++++++++++------------ drivers/dma/ioat/hw.h | 38 ++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 32 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index b7508041c6d..4840d4805d8 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -472,9 +472,9 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) return -ENOMEM; } - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + hw->ctl_f.compl_write = 1; if (first->txd.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + hw->ctl_f.int_en = 1; if (first != new) { /* move callback into to last desc */ new->txd.callback = first->txd.callback; @@ -563,9 +563,9 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) return -ENOMEM; } - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + hw->ctl_f.compl_write = 1; if (first->txd.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + hw->ctl_f.int_en = 1; if (first != new) { /* move callback into to last desc */ new->txd.callback = first->txd.callback; @@ -878,7 +878,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) noop_desc = to_ioat_desc(ioat_chan->used_desc.next); /* set size to non-zero value (channel returns error when size is 0) */ noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; - noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; + noop_desc->hw->ctl = 0; + noop_desc->hw->ctl_f.null = 1; noop_desc->hw->src_addr = 0; noop_desc->hw->dst_addr = 0; @@ -1230,6 +1231,7 @@ ioat_dma_is_complete(struct dma_chan *chan, dma_cookie_t cookie, static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) { struct ioat_desc_sw *desc; + struct ioat_dma_descriptor *hw; spin_lock_bh(&ioat_chan->desc_lock); @@ -1242,17 +1244,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) return; } - desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL - | IOAT_DMA_DESCRIPTOR_CTL_INT_GN - | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; /* set size to non-zero value (channel returns error when size is 0) */ - desc->hw->size = NULL_DESC_BUFFER_SIZE; - desc->hw->src_addr = 0; - desc->hw->dst_addr = 0; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; async_tx_ack(&desc->txd); switch (ioat_chan->device->version) { case IOAT_VER_1_2: - desc->hw->next = 0; + hw->next = 0; list_add_tail(&desc->node, &ioat_chan->used_desc); writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 1438fa5c4d1..e13f3ed4776 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -40,7 +40,24 @@ struct ioat_dma_descriptor { uint32_t size; - uint32_t ctl; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int null:1; + unsigned int src_brk:1; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd2:13; + unsigned int op:8; + } ctl_f; + }; uint64_t src_addr; uint64_t dst_addr; uint64_t next; @@ -49,23 +66,4 @@ struct ioat_dma_descriptor { uint64_t user1; uint64_t user2; }; - -#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 -#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 -#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 -#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 -#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 -#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 -#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 -#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 -#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 -#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 - -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 - -#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 - #endif -- cgit v1.2.3 From a0587bcf3e64029a4da2a5666cad18df38db0d56 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:44:04 -0700 Subject: ioat1: move descriptor allocation from submit to prep The async_tx api assumes that after a successful ->prep a subsequent ->submit will not fail due to a lack of resources. This also fixes a bug in the allocation failure case. Previously the descriptors allocated prior to the allocation failure would not be returned to the free list. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 154 +++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 89 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 4840d4805d8..c4333be0760 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -420,95 +420,29 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *prev, *new; - struct ioat_dma_descriptor *hw; + struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); + struct ioat_desc_sw *first; + struct ioat_desc_sw *chain_tail; dma_cookie_t cookie; - LIST_HEAD(new_chain); - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->txd.flags; - new = first; spin_lock_bh(&ioat_chan->desc_lock); - prev = to_ioat_desc(ioat_chan->used_desc.prev); - prefetch(prev->hw); - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->txd); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - hw->next = 0; - - /* chain together the physical address list for the HW */ - wmb(); - prev->hw->next = (u64) new->txd.phys; - - len -= copy; - dst += copy; - src += copy; - - list_add_tail(&new->node, &new_chain); - desc_count++; - prev = new; - } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(to_dev(ioat_chan), "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl_f.compl_write = 1; - if (first->txd.callback) { - hw->ctl_f.int_en = 1; - if (first != new) { - /* move callback into to last desc */ - new->txd.callback = first->txd.callback; - new->txd.callback_param - = first->txd.callback_param; - first->txd.callback = NULL; - first->txd.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->txd.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - /* cookie incr and addition to used_list must be atomic */ cookie = ioat_chan->common.cookie; cookie++; if (cookie < 0) cookie = 1; - ioat_chan->common.cookie = new->txd.cookie = cookie; + ioat_chan->common.cookie = tx->cookie = cookie; /* write address into NextDescriptor field of last desc in chain */ - to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = - first->txd.phys; - list_splice_tail(&new_chain, &ioat_chan->used_desc); - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; + first = to_ioat_desc(tx->tx_list.next); + chain_tail = to_ioat_desc(ioat_chan->used_desc.prev); + /* make descriptor updates globally visible before chaining */ + wmb(); + chain_tail->hw->next = first->txd.phys; + list_splice_tail_init(&tx->tx_list, &ioat_chan->used_desc); + + ioat_chan->dmacount += desc->tx_cnt; + ioat_chan->pending += desc->tx_cnt; if (ioat_chan->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); @@ -937,24 +871,66 @@ ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; + struct ioat_desc_sw *desc; + size_t copy; + LIST_HEAD(chain); + dma_addr_t src = dma_src; + dma_addr_t dest = dma_dest; + size_t total_len = len; + struct ioat_dma_descriptor *hw = NULL; + int tx_cnt = 0; spin_lock_bh(&ioat_chan->desc_lock); - new = ioat_dma_get_next_descriptor(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); + desc = ioat_dma_get_next_descriptor(ioat_chan); + do { + if (!desc) + break; - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->txd.flags = flags; - return &new->txd; - } else { + tx_cnt++; + copy = min_t(size_t, len, ioat_chan->xfercap); + + hw = desc->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dest; + + list_add_tail(&desc->node, &chain); + + len -= copy; + dest += copy; + src += copy; + if (len) { + struct ioat_desc_sw *next; + + async_tx_ack(&desc->txd); + next = ioat_dma_get_next_descriptor(ioat_chan); + hw->next = next ? next->txd.phys : 0; + desc = next; + } else + hw->next = 0; + } while (len); + + if (!desc) { dev_err(to_dev(ioat_chan), "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + list_splice(&chain, &ioat_chan->free_desc); + spin_unlock_bh(&ioat_chan->desc_lock); return NULL; } + spin_unlock_bh(&ioat_chan->desc_lock); + + desc->txd.flags = flags; + desc->tx_cnt = tx_cnt; + desc->src = dma_src; + desc->dst = dma_dest; + desc->len = total_len; + list_splice(&chain, &desc->txd.tx_list); + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + + return &desc->txd; } static struct dma_async_tx_descriptor * -- cgit v1.2.3 From a6a39ca1badbeafc16941fcf2c1010c8c65c8ddc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:44:05 -0700 Subject: ioat: fix self test interrupts If a callback is to be attached to a descriptor the channel needs to know at ->prep time so it can set the interrupt enable bit. This is in preparation for moving descriptor ioat2 descriptor preparation from ->submit to ->prep. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index c4333be0760..cc5c557ddc8 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1313,7 +1313,8 @@ static int ioat_dma_self_test(struct ioatdma_device *device) dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); - flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE | + DMA_PREP_INTERRUPT; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, IOAT_TEST_SIZE, flags); if (!tx) { -- cgit v1.2.3 From dcbc853af6f0c056088e4df0794d9bf36184809e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:44:50 -0700 Subject: ioat: prepare the code for ioat[12]_dma_chan split Prepare the code for the conversion of the ioat2 linked-list-ring into a native ring buffer. After this conversion ioat2 channels will share less of the ioat1 infrastructure, but there will still be places where sharing is possible. struct ioat_chan_common is created to house the channel attributes that will remain common between ioat1 and ioat2 channels. For every routine that accesses both common and hardware specific fields the old unified 'ioat_chan' pointer is split into an 'ioat' and 'chan' pointer. Where 'chan' references common fields and 'ioat' the hardware/version specific. [ Impact: pure structure member movement/variable renames, no logic changes ] Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 711 +++++++++++++++++++++++++------------------------ drivers/dma/ioat/dma.h | 49 ++-- 2 files changed, 390 insertions(+), 370 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index cc5c557ddc8..2e81e0c76e6 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -47,15 +47,15 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work); static void ioat_dma_chan_watchdog(struct work_struct *work); /* internal functions */ -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); +static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat); +static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat); static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat); static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat); -static inline struct ioat_dma_chan * +static inline struct ioat_chan_common * ioat_chan_by_index(struct ioatdma_device *device, int index) { return device->idx[index]; @@ -69,7 +69,7 @@ ioat_chan_by_index(struct ioatdma_device *device, int index) static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) { struct ioatdma_device *instance = data; - struct ioat_dma_chan *ioat_chan; + struct ioat_chan_common *chan; unsigned long attnstatus; int bit; u8 intrctrl; @@ -86,8 +86,8 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_bit(bit, &attnstatus, BITS_PER_LONG) { - ioat_chan = ioat_chan_by_index(instance, bit); - tasklet_schedule(&ioat_chan->cleanup_task); + chan = ioat_chan_by_index(instance, bit); + tasklet_schedule(&chan->cleanup_task); } writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); @@ -101,9 +101,9 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) */ static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) { - struct ioat_dma_chan *ioat_chan = data; + struct ioat_chan_common *chan = data; - tasklet_schedule(&ioat_chan->cleanup_task); + tasklet_schedule(&chan->cleanup_task); return IRQ_HANDLED; } @@ -119,7 +119,8 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) u8 xfercap_scale; u32 xfercap; int i; - struct ioat_dma_chan *ioat_chan; + struct ioat_chan_common *chan; + struct ioat_dma_chan *ioat; struct device *dev = &device->pdev->dev; struct dma_device *dma = &device->common; @@ -133,29 +134,30 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) dma->chancnt--; #endif for (i = 0; i < dma->chancnt; i++) { - ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); - if (!ioat_chan) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) { dma->chancnt = i; break; } - ioat_chan->device = device; - ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); - ioat_chan->xfercap = xfercap; - ioat_chan->desccount = 0; - INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); - spin_lock_init(&ioat_chan->cleanup_lock); - spin_lock_init(&ioat_chan->desc_lock); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); + chan = &ioat->base; + chan->device = device; + chan->reg_base = device->reg_base + (0x80 * (i + 1)); + ioat->xfercap = xfercap; + ioat->desccount = 0; + INIT_DELAYED_WORK(&chan->work, ioat_dma_chan_reset_part2); + spin_lock_init(&chan->cleanup_lock); + spin_lock_init(&ioat->desc_lock); + INIT_LIST_HEAD(&ioat->free_desc); + INIT_LIST_HEAD(&ioat->used_desc); /* This should be made common somewhere in dmaengine.c */ - ioat_chan->common.device = &device->common; - list_add_tail(&ioat_chan->common.device_node, &dma->channels); - device->idx[i] = ioat_chan; - tasklet_init(&ioat_chan->cleanup_task, + chan->common.device = &device->common; + list_add_tail(&chan->common.device_node, &dma->channels); + device->idx[i] = chan; + tasklet_init(&chan->cleanup_task, ioat_dma_cleanup_tasklet, - (unsigned long) ioat_chan); - tasklet_disable(&ioat_chan->cleanup_task); + (unsigned long) ioat); + tasklet_disable(&chan->cleanup_task); } return dma->chancnt; } @@ -166,39 +168,42 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) * @chan: DMA channel handle */ static inline void -__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat_chan) +__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) { - ioat_chan->pending = 0; - writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); + void __iomem *reg_base = ioat->base.reg_base; + + ioat->pending = 0; + writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); } static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_dma_chan *ioat = to_ioat_chan(chan); - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); + if (ioat->pending > 0) { + spin_lock_bh(&ioat->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); } } static inline void -__ioat2_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat_chan) +__ioat2_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) { - ioat_chan->pending = 0; - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + void __iomem *reg_base = ioat->base.reg_base; + + ioat->pending = 0; + writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); } static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_dma_chan *ioat = to_ioat_chan(chan); - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); + if (ioat->pending > 0) { + spin_lock_bh(&ioat->desc_lock); + __ioat2_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); } } @@ -208,84 +213,88 @@ static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) */ static void ioat_dma_chan_reset_part2(struct work_struct *work) { - struct ioat_dma_chan *ioat_chan = - container_of(work, struct ioat_dma_chan, work.work); + struct ioat_chan_common *chan; + struct ioat_dma_chan *ioat; struct ioat_desc_sw *desc; - spin_lock_bh(&ioat_chan->cleanup_lock); - spin_lock_bh(&ioat_chan->desc_lock); + chan = container_of(work, struct ioat_chan_common, work.work); + ioat = container_of(chan, struct ioat_dma_chan, base); + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->desc_lock); - ioat_chan->completion_virt->low = 0; - ioat_chan->completion_virt->high = 0; - ioat_chan->pending = 0; + chan->completion_virt->low = 0; + chan->completion_virt->high = 0; + ioat->pending = 0; /* * count the descriptors waiting, and be sure to do it * right for both the CB1 line and the CB2 ring */ - ioat_chan->dmacount = 0; - if (ioat_chan->used_desc.prev) { - desc = to_ioat_desc(ioat_chan->used_desc.prev); + ioat->dmacount = 0; + if (ioat->used_desc.prev) { + desc = to_ioat_desc(ioat->used_desc.prev); do { - ioat_chan->dmacount++; + ioat->dmacount++; desc = to_ioat_desc(desc->node.next); - } while (&desc->node != ioat_chan->used_desc.next); + } while (&desc->node != ioat->used_desc.next); } /* * write the new starting descriptor address * this puts channel engine into ARMED state */ - desc = to_ioat_desc(ioat_chan->used_desc.prev); - switch (ioat_chan->device->version) { + desc = to_ioat_desc(ioat->used_desc.prev); + switch (chan->device->version) { case IOAT_VER_1_2: writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + writeb(IOAT_CHANCMD_START, chan->reg_base + + IOAT_CHANCMD_OFFSET(chan->device->version)); break; case IOAT_VER_2_0: writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); /* tell the engine to go with what's left to be done */ - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + writew(ioat->dmacount, + chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); break; } - dev_err(to_dev(ioat_chan), + dev_err(to_dev(chan), "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + chan_num(chan), ioat->dmacount, ioat->desccount); - spin_unlock_bh(&ioat_chan->desc_lock); - spin_unlock_bh(&ioat_chan->cleanup_lock); + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); } /** * ioat_dma_reset_channel - restart a channel - * @ioat_chan: IOAT DMA channel handle + * @ioat: IOAT DMA channel handle */ -static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) +static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat) { + struct ioat_chan_common *chan = &ioat->base; + void __iomem *reg_base = chan->reg_base; u32 chansts, chanerr; - if (!ioat_chan->used_desc.prev) + if (!ioat->used_desc.prev) return; - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = (ioat_chan->completion_virt->low + chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); + chansts = (chan->completion_virt->low & IOAT_CHANSTS_DMA_TRANSFER_STATUS); if (chanerr) { - dev_err(to_dev(ioat_chan), + dev_err(to_dev(chan), "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(ioat_chan), chansts, chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + chan_num(chan), chansts, chanerr); + writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); } /* @@ -296,15 +305,14 @@ static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) * while we're waiting. */ - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->pending = INT_MIN; + spin_lock_bh(&ioat->desc_lock); + ioat->pending = INT_MIN; writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - spin_unlock_bh(&ioat_chan->desc_lock); + reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + spin_unlock_bh(&ioat->desc_lock); /* schedule the 2nd half instead of sleeping a long time */ - schedule_delayed_work(&ioat_chan->work, RESET_DELAY); + schedule_delayed_work(&chan->work, RESET_DELAY); } /** @@ -314,7 +322,8 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) { struct ioatdma_device *device = container_of(work, struct ioatdma_device, work.work); - struct ioat_dma_chan *ioat_chan; + struct ioat_dma_chan *ioat; + struct ioat_chan_common *chan; int i; union { @@ -327,23 +336,21 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) unsigned long compl_desc_addr_hw; for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_chan_by_index(device, i); + chan = ioat_chan_by_index(device, i); + ioat = container_of(chan, struct ioat_dma_chan, base); - if (ioat_chan->device->version == IOAT_VER_1_2 + if (chan->device->version == IOAT_VER_1_2 /* have we started processing anything yet */ - && ioat_chan->last_completion + && chan->last_completion /* have we completed any since last watchdog cycle? */ - && (ioat_chan->last_completion == - ioat_chan->watchdog_completion) + && (chan->last_completion == chan->watchdog_completion) /* has TCP stuck on one cookie since last watchdog? */ - && (ioat_chan->watchdog_tcp_cookie == - ioat_chan->watchdog_last_tcp_cookie) - && (ioat_chan->watchdog_tcp_cookie != - ioat_chan->completed_cookie) + && (chan->watchdog_tcp_cookie == chan->watchdog_last_tcp_cookie) + && (chan->watchdog_tcp_cookie != chan->completed_cookie) /* is there something in the chain to be processed? */ /* CB1 chain always has at least the last one processed */ - && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) - && ioat_chan->pending == 0) { + && (ioat->used_desc.prev != ioat->used_desc.next) + && ioat->pending == 0) { /* * check CHANSTS register for completed @@ -360,10 +367,10 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) * try resetting the channel */ - completion_hw.low = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); - completion_hw.high = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); + completion_hw.low = readl(chan->reg_base + + IOAT_CHANSTS_OFFSET_LOW(chan->device->version)); + completion_hw.high = readl(chan->reg_base + + IOAT_CHANSTS_OFFSET_HIGH(chan->device->version)); #if (BITS_PER_LONG == 64) compl_desc_addr_hw = completion_hw.full @@ -374,15 +381,15 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) #endif if ((compl_desc_addr_hw != 0) - && (compl_desc_addr_hw != ioat_chan->watchdog_completion) - && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { - ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; - ioat_chan->completion_virt->low = completion_hw.low; - ioat_chan->completion_virt->high = completion_hw.high; + && (compl_desc_addr_hw != chan->watchdog_completion) + && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) { + chan->last_compl_desc_addr_hw = compl_desc_addr_hw; + chan->completion_virt->low = completion_hw.low; + chan->completion_virt->high = completion_hw.high; } else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; + ioat_dma_reset_channel(ioat); + chan->watchdog_completion = 0; + chan->last_compl_desc_addr_hw = 0; } /* @@ -393,25 +400,22 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) * else * try resetting the channel */ - } else if (ioat_chan->device->version == IOAT_VER_2_0 - && ioat_chan->used_desc.prev - && ioat_chan->last_completion - && ioat_chan->last_completion == ioat_chan->watchdog_completion) { + } else if (chan->device->version == IOAT_VER_2_0 + && ioat->used_desc.prev + && chan->last_completion + && chan->last_completion == chan->watchdog_completion) { - if (ioat_chan->pending < ioat_pending_level) - ioat2_dma_memcpy_issue_pending(&ioat_chan->common); + if (ioat->pending < ioat_pending_level) + ioat2_dma_memcpy_issue_pending(&chan->common); else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; + ioat_dma_reset_channel(ioat); + chan->watchdog_completion = 0; } } else { - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_completion - = ioat_chan->last_completion; + chan->last_compl_desc_addr_hw = 0; + chan->watchdog_completion = chan->last_completion; } - - ioat_chan->watchdog_last_tcp_cookie = - ioat_chan->watchdog_tcp_cookie; + chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; } schedule_delayed_work(&device->work, WATCHDOG_DELAY); @@ -419,40 +423,42 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct dma_chan *c = tx->chan; + struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); struct ioat_desc_sw *first; struct ioat_desc_sw *chain_tail; dma_cookie_t cookie; - spin_lock_bh(&ioat_chan->desc_lock); + spin_lock_bh(&ioat->desc_lock); /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; + cookie = c->cookie; cookie++; if (cookie < 0) cookie = 1; - ioat_chan->common.cookie = tx->cookie = cookie; + c->cookie = cookie; + tx->cookie = cookie; /* write address into NextDescriptor field of last desc in chain */ first = to_ioat_desc(tx->tx_list.next); - chain_tail = to_ioat_desc(ioat_chan->used_desc.prev); + chain_tail = to_ioat_desc(ioat->used_desc.prev); /* make descriptor updates globally visible before chaining */ wmb(); chain_tail->hw->next = first->txd.phys; - list_splice_tail_init(&tx->tx_list, &ioat_chan->used_desc); + list_splice_tail_init(&tx->tx_list, &ioat->used_desc); - ioat_chan->dmacount += desc->tx_cnt; - ioat_chan->pending += desc->tx_cnt; - if (ioat_chan->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); + ioat->dmacount += desc->tx_cnt; + ioat->pending += desc->tx_cnt; + if (ioat->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); return cookie; } static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_dma_chan *ioat = to_ioat_chan(tx->chan); struct ioat_desc_sw *first = tx_to_ioat_desc(tx); struct ioat_desc_sw *new; struct ioat_dma_descriptor *hw; @@ -471,11 +477,11 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) new = first; /* - * ioat_chan->desc_lock is still in force in version 2 path + * ioat->desc_lock is still in force in version 2 path * it gets unlocked at end of this function */ do { - copy = min_t(size_t, len, ioat_chan->xfercap); + copy = min_t(size_t, len, ioat->xfercap); async_tx_ack(&new->txd); @@ -489,11 +495,11 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) dst += copy; src += copy; desc_count++; - } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); + } while (len && (new = ioat2_dma_get_next_descriptor(ioat))); if (!new) { - dev_err(to_dev(ioat_chan), "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); + dev_err(to_dev(&ioat->base), "tx submit failed\n"); + spin_unlock_bh(&ioat->desc_lock); return -ENOMEM; } @@ -521,35 +527,35 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) } /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; + cookie = ioat->base.common.cookie; cookie++; if (cookie < 0) cookie = 1; - ioat_chan->common.cookie = new->txd.cookie = cookie; + ioat->base.common.cookie = new->txd.cookie = cookie; - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); + ioat->dmacount += desc_count; + ioat->pending += desc_count; + if (ioat->pending >= ioat_pending_level) + __ioat2_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); return cookie; } /** * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat_chan: the channel supplying the memory pool for the descriptors + * @ioat: the channel supplying the memory pool for the descriptors * @flags: allocation flags */ static struct ioat_desc_sw * -ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat_chan, gfp_t flags) +ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) { struct ioat_dma_descriptor *desc; struct ioat_desc_sw *desc_sw; struct ioatdma_device *ioatdma_device; dma_addr_t phys; - ioatdma_device = to_ioatdma_device(ioat_chan->common.device); + ioatdma_device = ioat->base.device; desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); if (unlikely(!desc)) return NULL; @@ -561,8 +567,8 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat_chan, gfp_t flags) } memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->txd, &ioat_chan->common); - switch (ioat_chan->device->version) { + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); + switch (ioatdma_device->version) { case IOAT_VER_1_2: desc_sw->txd.tx_submit = ioat1_tx_submit; break; @@ -585,26 +591,26 @@ MODULE_PARM_DESC(ioat_initial_desc_count, /** * ioat2_dma_massage_chan_desc - link the descriptors into a circle - * @ioat_chan: the channel to be massaged + * @ioat: the channel to be massaged */ -static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) +static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat) { struct ioat_desc_sw *desc, *_desc; /* setup used_desc */ - ioat_chan->used_desc.next = ioat_chan->free_desc.next; - ioat_chan->used_desc.prev = NULL; + ioat->used_desc.next = ioat->free_desc.next; + ioat->used_desc.prev = NULL; /* pull free_desc out of the circle so that every node is a hw * descriptor, but leave it pointing to the list */ - ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; - ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; + ioat->free_desc.prev->next = ioat->free_desc.next; + ioat->free_desc.next->prev = ioat->free_desc.prev; /* circle link the hw descriptors */ - desc = to_ioat_desc(ioat_chan->free_desc.next); + desc = to_ioat_desc(ioat->free_desc.next); desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; - list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { + list_for_each_entry_safe(desc, _desc, ioat->free_desc.next, node) { desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; } } @@ -613,9 +619,10 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors * @chan: the channel to be filled out */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) +static int ioat_dma_alloc_chan_resources(struct dma_chan *c) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; struct ioat_desc_sw *desc; u16 chanctrl; u32 chanerr; @@ -623,89 +630,87 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) LIST_HEAD(tmp_list); /* have we already been set up? */ - if (!list_empty(&ioat_chan->free_desc)) - return ioat_chan->desccount; + if (!list_empty(&ioat->free_desc)) + return ioat->desccount; /* Setup register to interrupt and write completion status on error */ chanctrl = IOAT_CHANCTRL_ERR_INT_EN | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN; - writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + writew(chanctrl, chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); if (chanerr) { - dev_err(to_dev(ioat_chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); } /* Allocate descriptors */ for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); + desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); if (!desc) { - dev_err(to_dev(ioat_chan), - "Only %d initial descriptors\n", i); + dev_err(to_dev(chan), "Only %d initial descriptors\n", i); break; } list_add_tail(&desc->node, &tmp_list); } - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->desccount = i; - list_splice(&tmp_list, &ioat_chan->free_desc); - if (ioat_chan->device->version != IOAT_VER_1_2) - ioat2_dma_massage_chan_desc(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); + spin_lock_bh(&ioat->desc_lock); + ioat->desccount = i; + list_splice(&tmp_list, &ioat->free_desc); + if (chan->device->version != IOAT_VER_1_2) + ioat2_dma_massage_chan_desc(ioat); + spin_unlock_bh(&ioat->desc_lock); /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - ioat_chan->completion_virt = - pci_pool_alloc(ioat_chan->device->completion_pool, - GFP_KERNEL, - &ioat_chan->completion_addr); - memset(ioat_chan->completion_virt, 0, - sizeof(*ioat_chan->completion_virt)); - writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) ioat_chan->completion_addr) >> 32, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - tasklet_enable(&ioat_chan->cleanup_task); - ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ - return ioat_chan->desccount; + chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, + &chan->completion_addr); + memset(chan->completion_virt, 0, + sizeof(*chan->completion_virt)); + writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_addr) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + tasklet_enable(&chan->cleanup_task); + ioat_dma_start_null_desc(ioat); /* give chain to dma device */ + return ioat->desccount; } /** * ioat_dma_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -static void ioat_dma_free_chan_resources(struct dma_chan *chan) +static void ioat_dma_free_chan_resources(struct dma_chan *c) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *ioatdma_device = chan->device; struct ioat_desc_sw *desc, *_desc; int in_use_descs = 0; /* Before freeing channel resources first check * if they have been previously allocated for this channel. */ - if (ioat_chan->desccount == 0) + if (ioat->desccount == 0) return; - tasklet_disable(&ioat_chan->cleanup_task); - ioat_dma_memcpy_cleanup(ioat_chan); + tasklet_disable(&chan->cleanup_task); + ioat_dma_memcpy_cleanup(ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. */ writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); mdelay(100); - spin_lock_bh(&ioat_chan->desc_lock); - switch (ioat_chan->device->version) { + spin_lock_bh(&ioat->desc_lock); + switch (chan->device->version) { case IOAT_VER_1_2: list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { + &ioat->used_desc, node) { in_use_descs++; list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, @@ -713,7 +718,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) kfree(desc); } list_for_each_entry_safe(desc, _desc, - &ioat_chan->free_desc, node) { + &ioat->free_desc, node) { list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->txd.phys); @@ -723,62 +728,61 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) case IOAT_VER_2_0: case IOAT_VER_3_0: list_for_each_entry_safe(desc, _desc, - ioat_chan->free_desc.next, node) { + ioat->free_desc.next, node) { list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->txd.phys); kfree(desc); } - desc = to_ioat_desc(ioat_chan->free_desc.next); + desc = to_ioat_desc(ioat->free_desc.next); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->txd.phys); kfree(desc); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); + INIT_LIST_HEAD(&ioat->free_desc); + INIT_LIST_HEAD(&ioat->used_desc); break; } - spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat->desc_lock); pci_pool_free(ioatdma_device->completion_pool, - ioat_chan->completion_virt, - ioat_chan->completion_addr); + chan->completion_virt, + chan->completion_addr); /* one is ok since we left it on there on purpose */ if (in_use_descs > 1) - dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n", + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", in_use_descs - 1); - ioat_chan->last_completion = ioat_chan->completion_addr = 0; - ioat_chan->pending = 0; - ioat_chan->dmacount = 0; - ioat_chan->desccount = 0; - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_tcp_cookie = - ioat_chan->watchdog_last_tcp_cookie = 0; + chan->last_completion = chan->completion_addr = 0; + chan->watchdog_completion = 0; + chan->last_compl_desc_addr_hw = 0; + chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; + ioat->pending = 0; + ioat->dmacount = 0; + ioat->desccount = 0; } /** - * ioat_dma_get_next_descriptor - return the next available descriptor - * @ioat_chan: IOAT DMA channel handle + * ioat1_dma_get_next_descriptor - return the next available descriptor + * @ioat: IOAT DMA channel handle * * Gets the next descriptor from the chain, and must be called with the * channel's desc_lock held. Allocates more descriptors if the channel * has run out. */ static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) { struct ioat_desc_sw *new; - if (!list_empty(&ioat_chan->free_desc)) { - new = to_ioat_desc(ioat_chan->free_desc.next); + if (!list_empty(&ioat->free_desc)) { + new = to_ioat_desc(ioat->free_desc.next); list_del(&new->node); } else { /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); if (!new) { - dev_err(to_dev(ioat_chan), "alloc failed\n"); + dev_err(to_dev(&ioat->base), "alloc failed\n"); return NULL; } } @@ -788,7 +792,7 @@ ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) } static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat) { struct ioat_desc_sw *new; @@ -801,15 +805,15 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) * linking in a new set of descriptors, since the device * has probably already read the pointer to it */ - if (ioat_chan->used_desc.prev && - ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { + if (ioat->used_desc.prev && + ioat->used_desc.next == ioat->used_desc.prev->prev) { struct ioat_desc_sw *desc; struct ioat_desc_sw *noop_desc; int i; /* set up the noop descriptor */ - noop_desc = to_ioat_desc(ioat_chan->used_desc.next); + noop_desc = to_ioat_desc(ioat->used_desc.next); /* set size to non-zero value (channel returns error when size is 0) */ noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; noop_desc->hw->ctl = 0; @@ -817,60 +821,61 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) noop_desc->hw->src_addr = 0; noop_desc->hw->dst_addr = 0; - ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; - ioat_chan->pending++; - ioat_chan->dmacount++; + ioat->used_desc.next = ioat->used_desc.next->next; + ioat->pending++; + ioat->dmacount++; /* try to get a few more descriptors */ for (i = 16; i; i--) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + desc = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); if (!desc) { - dev_err(to_dev(ioat_chan), "alloc failed\n"); + dev_err(to_dev(&ioat->base), + "alloc failed\n"); break; } - list_add_tail(&desc->node, ioat_chan->used_desc.next); + list_add_tail(&desc->node, ioat->used_desc.next); desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; to_ioat_desc(desc->node.prev)->hw->next = desc->txd.phys; - ioat_chan->desccount++; + ioat->desccount++; } - ioat_chan->used_desc.next = noop_desc->node.next; + ioat->used_desc.next = noop_desc->node.next; } - new = to_ioat_desc(ioat_chan->used_desc.next); + new = to_ioat_desc(ioat->used_desc.next); prefetch(new); - ioat_chan->used_desc.next = new->node.next; + ioat->used_desc.next = new->node.next; - if (ioat_chan->used_desc.prev == NULL) - ioat_chan->used_desc.prev = &new->node; + if (ioat->used_desc.prev == NULL) + ioat->used_desc.prev = &new->node; prefetch(new->hw); return new; } static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat) { - if (!ioat_chan) + if (!ioat) return NULL; - switch (ioat_chan->device->version) { + switch (ioat->base.device->version) { case IOAT_VER_1_2: - return ioat1_dma_get_next_descriptor(ioat_chan); + return ioat1_dma_get_next_descriptor(ioat); case IOAT_VER_2_0: case IOAT_VER_3_0: - return ioat2_dma_get_next_descriptor(ioat_chan); + return ioat2_dma_get_next_descriptor(ioat); } return NULL; } static struct dma_async_tx_descriptor * -ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, +ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_desc_sw *desc; size_t copy; LIST_HEAD(chain); @@ -880,14 +885,14 @@ ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, struct ioat_dma_descriptor *hw = NULL; int tx_cnt = 0; - spin_lock_bh(&ioat_chan->desc_lock); - desc = ioat_dma_get_next_descriptor(ioat_chan); + spin_lock_bh(&ioat->desc_lock); + desc = ioat_dma_get_next_descriptor(ioat); do { if (!desc) break; tx_cnt++; - copy = min_t(size_t, len, ioat_chan->xfercap); + copy = min_t(size_t, len, ioat->xfercap); hw = desc->hw; hw->size = copy; @@ -904,7 +909,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, struct ioat_desc_sw *next; async_tx_ack(&desc->txd); - next = ioat_dma_get_next_descriptor(ioat_chan); + next = ioat_dma_get_next_descriptor(ioat); hw->next = next ? next->txd.phys : 0; desc = next; } else @@ -912,14 +917,16 @@ ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, } while (len); if (!desc) { - dev_err(to_dev(ioat_chan), + struct ioat_chan_common *chan = &ioat->base; + + dev_err(to_dev(chan), "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - list_splice(&chain, &ioat_chan->free_desc); - spin_unlock_bh(&ioat_chan->desc_lock); + chan_num(chan), ioat->dmacount, ioat->desccount); + list_splice(&chain, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); return NULL; } - spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat->desc_lock); desc->txd.flags = flags; desc->tx_cnt = tx_cnt; @@ -934,17 +941,17 @@ ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, } static struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, +ioat2_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_desc_sw *new; - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat2_dma_get_next_descriptor(ioat_chan); + spin_lock_bh(&ioat->desc_lock); + new = ioat2_dma_get_next_descriptor(ioat); /* - * leave ioat_chan->desc_lock set in ioat 2 path + * leave ioat->desc_lock set in ioat 2 path * it will get unlocked at end of tx_submit */ @@ -955,10 +962,12 @@ ioat2_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, new->txd.flags = flags; return &new->txd; } else { - spin_unlock_bh(&ioat_chan->desc_lock); - dev_err(to_dev(ioat_chan), + struct ioat_chan_common *chan = &ioat->base; + + spin_unlock_bh(&ioat->desc_lock); + dev_err(to_dev(chan), "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + chan_num(chan), ioat->dmacount, ioat->desccount); return NULL; } } @@ -968,20 +977,20 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) struct ioat_dma_chan *chan = (void *)data; ioat_dma_memcpy_cleanup(chan); writew(IOAT_CHANCTRL_INT_DISABLE, - chan->reg_base + IOAT_CHANCTRL_OFFSET); + chan->base.reg_base + IOAT_CHANCTRL_OFFSET); } static void -ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) +ioat_dma_unmap(struct ioat_chan_common *chan, struct ioat_desc_sw *desc) { if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_single(chan->device->pdev, pci_unmap_addr(desc, dst), pci_unmap_len(desc, len), PCI_DMA_FROMDEVICE); else - pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_page(chan->device->pdev, pci_unmap_addr(desc, dst), pci_unmap_len(desc, len), PCI_DMA_FROMDEVICE); @@ -989,12 +998,12 @@ ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_single(chan->device->pdev, pci_unmap_addr(desc, src), pci_unmap_len(desc, len), PCI_DMA_TODEVICE); else - pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_page(chan->device->pdev, pci_unmap_addr(desc, src), pci_unmap_len(desc, len), PCI_DMA_TODEVICE); @@ -1005,8 +1014,9 @@ ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) * ioat_dma_memcpy_cleanup - cleanup up finished descriptors * @chan: ioat channel to be cleaned up */ -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) +static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) { + struct ioat_chan_common *chan = &ioat->base; unsigned long phys_complete; struct ioat_desc_sw *desc, *_desc; dma_cookie_t cookie = 0; @@ -1014,9 +1024,9 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) struct ioat_desc_sw *latest_desc; struct dma_async_tx_descriptor *tx; - prefetch(ioat_chan->completion_virt); + prefetch(chan->completion_virt); - if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) + if (!spin_trylock_bh(&chan->cleanup_lock)) return; /* The completion writeback can happen at any time, @@ -1026,49 +1036,47 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) #if (BITS_PER_LONG == 64) phys_complete = - ioat_chan->completion_virt->full + chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; #else - phys_complete = - ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; + phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; #endif - if ((ioat_chan->completion_virt->full + if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { - dev_err(to_dev(ioat_chan), "Channel halted, chanerr = %x\n", - readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); + dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", + readl(chan->reg_base + IOAT_CHANERR_OFFSET)); /* TODO do something to salvage the situation */ } - if (phys_complete == ioat_chan->last_completion) { - spin_unlock_bh(&ioat_chan->cleanup_lock); + if (phys_complete == chan->last_completion) { + spin_unlock_bh(&chan->cleanup_lock); /* * perhaps we're stuck so hard that the watchdog can't go off? * try to catch it after 2 seconds */ - if (ioat_chan->device->version != IOAT_VER_3_0) { + if (chan->device->version != IOAT_VER_3_0) { if (time_after(jiffies, - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); - ioat_chan->last_completion_time = jiffies; + chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat_dma_chan_watchdog(&(chan->device->work.work)); + chan->last_completion_time = jiffies; } } return; } - ioat_chan->last_completion_time = jiffies; + chan->last_completion_time = jiffies; cookie = 0; - if (!spin_trylock_bh(&ioat_chan->desc_lock)) { - spin_unlock_bh(&ioat_chan->cleanup_lock); + if (!spin_trylock_bh(&ioat->desc_lock)) { + spin_unlock_bh(&chan->cleanup_lock); return; } - switch (ioat_chan->device->version) { + switch (chan->device->version) { case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { tx = &desc->txd; /* * Incoming DMA requests may use multiple descriptors, @@ -1077,7 +1085,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) */ if (tx->cookie) { cookie = tx->cookie; - ioat_dma_unmap(ioat_chan, desc); + ioat_dma_unmap(chan, desc); if (tx->callback) { tx->callback(tx->callback_param); tx->callback = NULL; @@ -1091,7 +1099,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) */ if (async_tx_test_ack(tx)) { list_move_tail(&desc->node, - &ioat_chan->free_desc); + &ioat->free_desc); } else tx->cookie = 0; } else { @@ -1110,11 +1118,11 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) case IOAT_VER_2_0: case IOAT_VER_3_0: /* has some other thread has already cleaned up? */ - if (ioat_chan->used_desc.prev == NULL) + if (ioat->used_desc.prev == NULL) break; /* work backwards to find latest finished desc */ - desc = to_ioat_desc(ioat_chan->used_desc.next); + desc = to_ioat_desc(ioat->used_desc.next); tx = &desc->txd; latest_desc = NULL; do { @@ -1125,18 +1133,18 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) latest_desc = desc; break; } - } while (&desc->node != ioat_chan->used_desc.prev); + } while (&desc->node != ioat->used_desc.prev); if (latest_desc != NULL) { /* work forwards to clear finished descriptors */ - for (desc = to_ioat_desc(ioat_chan->used_desc.prev); + for (desc = to_ioat_desc(ioat->used_desc.prev); &desc->node != latest_desc->node.next && - &desc->node != ioat_chan->used_desc.next; + &desc->node != ioat->used_desc.next; desc = to_ioat_desc(desc->node.next)) { if (tx->cookie) { cookie = tx->cookie; tx->cookie = 0; - ioat_dma_unmap(ioat_chan, desc); + ioat_dma_unmap(chan, desc); if (tx->callback) { tx->callback(tx->callback_param); tx->callback = NULL; @@ -1145,21 +1153,21 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) } /* move used.prev up beyond those that are finished */ - if (&desc->node == ioat_chan->used_desc.next) - ioat_chan->used_desc.prev = NULL; + if (&desc->node == ioat->used_desc.next) + ioat->used_desc.prev = NULL; else - ioat_chan->used_desc.prev = &desc->node; + ioat->used_desc.prev = &desc->node; } break; } - spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat->desc_lock); - ioat_chan->last_completion = phys_complete; + chan->last_completion = phys_complete; if (cookie != 0) - ioat_chan->completed_cookie = cookie; + chan->completed_cookie = cookie; - spin_unlock_bh(&ioat_chan->cleanup_lock); + spin_unlock_bh(&chan->cleanup_lock); } /** @@ -1170,17 +1178,18 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) * @used: if not %NULL, updated with last used transaction */ static enum dma_status -ioat_dma_is_complete(struct dma_chan *chan, dma_cookie_t cookie, +ioat_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) { - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; dma_cookie_t last_used; dma_cookie_t last_complete; enum dma_status ret; - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - ioat_chan->watchdog_tcp_cookie = cookie; + last_used = c->cookie; + last_complete = chan->completed_cookie; + chan->watchdog_tcp_cookie = cookie; if (done) *done = last_complete; @@ -1191,10 +1200,10 @@ ioat_dma_is_complete(struct dma_chan *chan, dma_cookie_t cookie, if (ret == DMA_SUCCESS) return ret; - ioat_dma_memcpy_cleanup(ioat_chan); + ioat_dma_memcpy_cleanup(ioat); - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; + last_used = c->cookie; + last_complete = chan->completed_cookie; if (done) *done = last_complete; @@ -1204,19 +1213,20 @@ ioat_dma_is_complete(struct dma_chan *chan, dma_cookie_t cookie, return dma_async_is_complete(cookie, last_complete, last_used); } -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) +static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) { + struct ioat_chan_common *chan = &ioat->base; struct ioat_desc_sw *desc; struct ioat_dma_descriptor *hw; - spin_lock_bh(&ioat_chan->desc_lock); + spin_lock_bh(&ioat->desc_lock); - desc = ioat_dma_get_next_descriptor(ioat_chan); + desc = ioat_dma_get_next_descriptor(ioat); if (!desc) { - dev_err(to_dev(ioat_chan), + dev_err(to_dev(chan), "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat->desc_lock); return; } @@ -1230,31 +1240,31 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) hw->src_addr = 0; hw->dst_addr = 0; async_tx_ack(&desc->txd); - switch (ioat_chan->device->version) { + switch (chan->device->version) { case IOAT_VER_1_2: hw->next = 0; - list_add_tail(&desc->node, &ioat_chan->used_desc); + list_add_tail(&desc->node, &ioat->used_desc); writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + writeb(IOAT_CHANCMD_START, chan->reg_base + + IOAT_CHANCMD_OFFSET(chan->device->version)); break; case IOAT_VER_2_0: case IOAT_VER_3_0: writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - ioat_chan->dmacount++; - __ioat2_dma_memcpy_issue_pending(ioat_chan); + ioat->dmacount++; + __ioat2_dma_memcpy_issue_pending(ioat); break; } - spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat->desc_lock); } /* @@ -1371,7 +1381,7 @@ MODULE_PARM_DESC(ioat_interrupt_style, */ static int ioat_dma_setup_interrupts(struct ioatdma_device *device) { - struct ioat_dma_chan *ioat_chan; + struct ioat_chan_common *chan; struct pci_dev *pdev = device->pdev; struct device *dev = &pdev->dev; struct msix_entry *msix; @@ -1404,15 +1414,15 @@ msix: for (i = 0; i < msixcnt; i++) { msix = &device->msix_entries[i]; - ioat_chan = ioat_chan_by_index(device, i); + chan = ioat_chan_by_index(device, i); err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt_msix, 0, - "ioat-msix", ioat_chan); + "ioat-msix", chan); if (err) { for (j = 0; j < i; j++) { msix = &device->msix_entries[j]; - ioat_chan = ioat_chan_by_index(device, j); - devm_free_irq(dev, msix->vector, ioat_chan); + chan = ioat_chan_by_index(device, j); + devm_free_irq(dev, msix->vector, chan); } goto msix_single_vector; } @@ -1594,8 +1604,8 @@ int ioat2_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; struct dma_device *dma; - struct dma_chan *chan; - struct ioat_dma_chan *ioat_chan; + struct dma_chan *c; + struct ioat_chan_common *chan; int err; dma = &device->common; @@ -1607,10 +1617,10 @@ int ioat2_dma_probe(struct ioatdma_device *device, int dca) return err; ioat_set_tcp_copy_break(2048); - list_for_each_entry(chan, &dma->channels, device_node) { - ioat_chan = to_ioat_chan(chan); + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + chan->reg_base + IOAT_DCACTRL_OFFSET); } err = ioat_register(device); @@ -1629,8 +1639,8 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; struct dma_device *dma; - struct dma_chan *chan; - struct ioat_dma_chan *ioat_chan; + struct dma_chan *c; + struct ioat_chan_common *chan; int err; u16 dev_id; @@ -1656,10 +1666,10 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) return err; ioat_set_tcp_copy_break(262144); - list_for_each_entry(chan, &dma->channels, device_node) { - ioat_chan = to_ioat_chan(chan); + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); writel(IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + chan->reg_base + IOAT_DCACTRL_OFFSET); } err = ioat_register(device); @@ -1673,8 +1683,6 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) void ioat_dma_remove(struct ioatdma_device *device) { - struct dma_chan *chan, *_chan; - struct ioat_dma_chan *ioat_chan; struct dma_device *dma = &device->common; if (device->version != IOAT_VER_3_0) @@ -1687,9 +1695,6 @@ void ioat_dma_remove(struct ioatdma_device *device) pci_pool_destroy(device->dma_pool); pci_pool_destroy(device->completion_pool); - list_for_each_entry_safe(chan, _chan, &dma->channels, device_node) { - ioat_chan = to_ioat_chan(chan); - list_del(&chan->device_node); - } + INIT_LIST_HEAD(&dma->channels); } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 9f0c853b6a7..5b31db73ad8 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -35,7 +35,6 @@ #define IOAT_DMA_DCA_ANY_CPU ~0 #define IOAT_WATCHDOG_PERIOD (2 * HZ) -#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) @@ -74,37 +73,24 @@ struct ioatdma_device { u8 version; struct delayed_work work; struct msix_entry msix_entries[4]; - struct ioat_dma_chan *idx[4]; + struct ioat_chan_common *idx[4]; struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); }; -/** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - +struct ioat_chan_common { void __iomem *reg_base; - dma_cookie_t completed_cookie; unsigned long last_completion; unsigned long last_completion_time; - size_t xfercap; /* XFERCAP register value expanded out */ - spinlock_t cleanup_lock; - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; + dma_cookie_t completed_cookie; unsigned long watchdog_completion; int watchdog_tcp_cookie; u32 watchdog_last_tcp_cookie; struct delayed_work work; - int pending; - u16 dmacount; - u16 desccount; - struct ioatdma_device *device; struct dma_chan common; @@ -120,6 +106,35 @@ struct ioat_dma_chan { struct tasklet_struct cleanup_task; }; +/** + * struct ioat_dma_chan - internal representation of a DMA channel + */ +struct ioat_dma_chan { + struct ioat_chan_common base; + + size_t xfercap; /* XFERCAP register value expanded out */ + + spinlock_t desc_lock; + struct list_head free_desc; + struct list_head used_desc; + + int pending; + u16 dmacount; + u16 desccount; +}; + +static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) +{ + return container_of(c, struct ioat_chan_common, common); +} + +static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat_dma_chan, base); +} + /* wrapper around hardware descriptor format + additional software fields */ /** -- cgit v1.2.3 From 5cbafa65b92ee4f5b8ba915cddf94b91f186b989 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 26 Aug 2009 13:01:44 -0700 Subject: ioat2,3: convert to a true ring buffer Replace the current linked list munged into a ring with a native ring buffer implementation. The benefit of this approach is reduced overhead as many parameters can be derived from ring position with simple pointer comparisons and descriptor allocation/freeing becomes just a manipulation of head/tail pointers. It requires a contiguous allocation for the software descriptor information. Since this arrangement is significantly different from the ioat1 chain, move ioat2,3 support into its own file and header. Common routines are exported from driver/dma/ioat/dma.[ch]. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/Makefile | 2 +- drivers/dma/ioat/dma.c | 874 ++++++++++------------------------------------ drivers/dma/ioat/dma.h | 50 ++- drivers/dma/ioat/dma_v2.c | 750 +++++++++++++++++++++++++++++++++++++++ drivers/dma/ioat/dma_v2.h | 131 +++++++ drivers/dma/ioat/pci.c | 1 + 6 files changed, 1122 insertions(+), 686 deletions(-) create mode 100644 drivers/dma/ioat/dma_v2.c create mode 100644 drivers/dma/ioat/dma_v2.h (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index 2ce3d3a4270..205a639e84d 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := pci.o dma.o dca.o +ioatdma-objs := pci.o dma.o dma_v2.o dca.o diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 2e81e0c76e6..64b4d75059a 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -38,28 +38,14 @@ #include "registers.h" #include "hw.h" -static int ioat_pending_level = 4; +int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); -static void ioat_dma_chan_reset_part2(struct work_struct *work); -static void ioat_dma_chan_watchdog(struct work_struct *work); - /* internal functions */ -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat); -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat); - -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat); -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat); - -static inline struct ioat_chan_common * -ioat_chan_by_index(struct ioatdma_device *device, int index) -{ - return device->idx[index]; -} +static void ioat1_cleanup(struct ioat_dma_chan *ioat); +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); /** * ioat_dma_do_interrupt - handler used for single vector interrupt mode @@ -108,18 +94,38 @@ static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) return IRQ_HANDLED; } -static void ioat_dma_cleanup_tasklet(unsigned long data); +static void ioat1_cleanup_tasklet(unsigned long data); + +/* common channel initialization */ +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + work_func_t work_fn, void (*tasklet)(unsigned long), + unsigned long tasklet_data) +{ + struct dma_device *dma = &device->common; + + chan->device = device; + chan->reg_base = device->reg_base + (0x80 * (idx + 1)); + INIT_DELAYED_WORK(&chan->work, work_fn); + spin_lock_init(&chan->cleanup_lock); + chan->common.device = dma; + list_add_tail(&chan->common.device_node, &dma->channels); + device->idx[idx] = chan; + tasklet_init(&chan->cleanup_task, tasklet, tasklet_data); + tasklet_disable(&chan->cleanup_task); +} + +static void ioat1_reset_part2(struct work_struct *work); /** - * ioat_dma_enumerate_channels - find and initialize the device's channels + * ioat1_dma_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated */ -static int ioat_dma_enumerate_channels(struct ioatdma_device *device) +static int ioat1_enumerate_channels(struct ioatdma_device *device) { u8 xfercap_scale; u32 xfercap; int i; - struct ioat_chan_common *chan; struct ioat_dma_chan *ioat; struct device *dev = &device->pdev->dev; struct dma_device *dma = &device->common; @@ -135,31 +141,20 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) #endif for (i = 0; i < dma->chancnt; i++) { ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) { - dma->chancnt = i; + if (!ioat) break; - } - chan = &ioat->base; - chan->device = device; - chan->reg_base = device->reg_base + (0x80 * (i + 1)); + ioat_init_channel(device, &ioat->base, i, + ioat1_reset_part2, + ioat1_cleanup_tasklet, + (unsigned long) ioat); ioat->xfercap = xfercap; - ioat->desccount = 0; - INIT_DELAYED_WORK(&chan->work, ioat_dma_chan_reset_part2); - spin_lock_init(&chan->cleanup_lock); spin_lock_init(&ioat->desc_lock); INIT_LIST_HEAD(&ioat->free_desc); INIT_LIST_HEAD(&ioat->used_desc); - /* This should be made common somewhere in dmaengine.c */ - chan->common.device = &device->common; - list_add_tail(&chan->common.device_node, &dma->channels); - device->idx[i] = chan; - tasklet_init(&chan->cleanup_task, - ioat_dma_cleanup_tasklet, - (unsigned long) ioat); - tasklet_disable(&chan->cleanup_task); } - return dma->chancnt; + dma->chancnt = i; + return i; } /** @@ -187,35 +182,16 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) } } -static inline void -__ioat2_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) -{ - void __iomem *reg_base = ioat->base.reg_base; - - ioat->pending = 0; - writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); -} - -static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(chan); - - if (ioat->pending > 0) { - spin_lock_bh(&ioat->desc_lock); - __ioat2_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); - } -} - - /** - * ioat_dma_chan_reset_part2 - reinit the channel after a reset + * ioat1_reset_part2 - reinit the channel after a reset */ -static void ioat_dma_chan_reset_part2(struct work_struct *work) +static void ioat1_reset_part2(struct work_struct *work) { struct ioat_chan_common *chan; struct ioat_dma_chan *ioat; struct ioat_desc_sw *desc; + int dmacount; + bool start_null = false; chan = container_of(work, struct ioat_chan_common, work.work); ioat = container_of(chan, struct ioat_dma_chan, base); @@ -226,26 +202,22 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work) chan->completion_virt->high = 0; ioat->pending = 0; - /* - * count the descriptors waiting, and be sure to do it - * right for both the CB1 line and the CB2 ring - */ - ioat->dmacount = 0; + /* count the descriptors waiting */ + dmacount = 0; if (ioat->used_desc.prev) { desc = to_ioat_desc(ioat->used_desc.prev); do { - ioat->dmacount++; + dmacount++; desc = to_ioat_desc(desc->node.next); } while (&desc->node != ioat->used_desc.next); } - /* - * write the new starting descriptor address - * this puts channel engine into ARMED state - */ - desc = to_ioat_desc(ioat->used_desc.prev); - switch (chan->device->version) { - case IOAT_VER_1_2: + if (dmacount) { + /* + * write the new starting descriptor address + * this puts channel engine into ARMED state + */ + desc = to_ioat_desc(ioat->used_desc.prev); writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, @@ -253,32 +225,24 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work) writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - break; - case IOAT_VER_2_0: - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - /* tell the engine to go with what's left to be done */ - writew(ioat->dmacount, - chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + } else + start_null = true; + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); - break; - } dev_err(to_dev(chan), "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, ioat->desccount); + chan_num(chan), dmacount, ioat->desccount); - spin_unlock_bh(&ioat->desc_lock); - spin_unlock_bh(&chan->cleanup_lock); + if (start_null) + ioat1_dma_start_null_desc(ioat); } /** - * ioat_dma_reset_channel - restart a channel + * ioat1_reset_channel - restart a channel * @ioat: IOAT DMA channel handle */ -static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat) +static void ioat1_reset_channel(struct ioat_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; void __iomem *reg_base = chan->reg_base; @@ -316,9 +280,9 @@ static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat) } /** - * ioat_dma_chan_watchdog - watch for stuck channels + * ioat1_chan_watchdog - watch for stuck channels */ -static void ioat_dma_chan_watchdog(struct work_struct *work) +static void ioat1_chan_watchdog(struct work_struct *work) { struct ioatdma_device *device = container_of(work, struct ioatdma_device, work.work); @@ -339,16 +303,15 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) chan = ioat_chan_by_index(device, i); ioat = container_of(chan, struct ioat_dma_chan, base); - if (chan->device->version == IOAT_VER_1_2 - /* have we started processing anything yet */ - && chan->last_completion - /* have we completed any since last watchdog cycle? */ + if (/* have we started processing anything yet */ + chan->last_completion + /* have we completed any since last watchdog cycle? */ && (chan->last_completion == chan->watchdog_completion) - /* has TCP stuck on one cookie since last watchdog? */ + /* has TCP stuck on one cookie since last watchdog? */ && (chan->watchdog_tcp_cookie == chan->watchdog_last_tcp_cookie) && (chan->watchdog_tcp_cookie != chan->completed_cookie) - /* is there something in the chain to be processed? */ - /* CB1 chain always has at least the last one processed */ + /* is there something in the chain to be processed? */ + /* CB1 chain always has at least the last one processed */ && (ioat->used_desc.prev != ioat->used_desc.next) && ioat->pending == 0) { @@ -387,34 +350,15 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) chan->completion_virt->low = completion_hw.low; chan->completion_virt->high = completion_hw.high; } else { - ioat_dma_reset_channel(ioat); + ioat1_reset_channel(ioat); chan->watchdog_completion = 0; chan->last_compl_desc_addr_hw = 0; } - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - } else if (chan->device->version == IOAT_VER_2_0 - && ioat->used_desc.prev - && chan->last_completion - && chan->last_completion == chan->watchdog_completion) { - - if (ioat->pending < ioat_pending_level) - ioat2_dma_memcpy_issue_pending(&chan->common); - else { - ioat_dma_reset_channel(ioat); - chan->watchdog_completion = 0; - } } else { chan->last_compl_desc_addr_hw = 0; chan->watchdog_completion = chan->last_completion; } + chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; } @@ -447,7 +391,6 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) chain_tail->hw->next = first->txd.phys; list_splice_tail_init(&tx->tx_list, &ioat->used_desc); - ioat->dmacount += desc->tx_cnt; ioat->pending += desc->tx_cnt; if (ioat->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat); @@ -456,92 +399,6 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) return cookie; } -static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->txd.flags; - new = first; - - /* - * ioat->desc_lock is still in force in version 2 path - * it gets unlocked at end of this function - */ - do { - copy = min_t(size_t, len, ioat->xfercap); - - async_tx_ack(&new->txd); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - desc_count++; - } while (len && (new = ioat2_dma_get_next_descriptor(ioat))); - - if (!new) { - dev_err(to_dev(&ioat->base), "tx submit failed\n"); - spin_unlock_bh(&ioat->desc_lock); - return -ENOMEM; - } - - hw->ctl_f.compl_write = 1; - if (first->txd.callback) { - hw->ctl_f.int_en = 1; - if (first != new) { - /* move callback into to last desc */ - new->txd.callback = first->txd.callback; - new->txd.callback_param - = first->txd.callback_param; - first->txd.callback = NULL; - first->txd.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->txd.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat->base.common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat->base.common.cookie = new->txd.cookie = cookie; - - ioat->dmacount += desc_count; - ioat->pending += desc_count; - if (ioat->pending >= ioat_pending_level) - __ioat2_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); - - return cookie; -} - /** * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair * @ioat: the channel supplying the memory pool for the descriptors @@ -567,17 +424,9 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) } memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); - switch (ioatdma_device->version) { - case IOAT_VER_1_2: - desc_sw->txd.tx_submit = ioat1_tx_submit; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - desc_sw->txd.tx_submit = ioat2_tx_submit; - break; - } + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); + desc_sw->txd.tx_submit = ioat1_tx_submit; desc_sw->hw = desc; desc_sw->txd.phys = phys; @@ -587,39 +436,12 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) static int ioat_initial_desc_count = 256; module_param(ioat_initial_desc_count, int, 0644); MODULE_PARM_DESC(ioat_initial_desc_count, - "initial descriptors per channel (default: 256)"); - -/** - * ioat2_dma_massage_chan_desc - link the descriptors into a circle - * @ioat: the channel to be massaged - */ -static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat) -{ - struct ioat_desc_sw *desc, *_desc; - - /* setup used_desc */ - ioat->used_desc.next = ioat->free_desc.next; - ioat->used_desc.prev = NULL; - - /* pull free_desc out of the circle so that every node is a hw - * descriptor, but leave it pointing to the list - */ - ioat->free_desc.prev->next = ioat->free_desc.next; - ioat->free_desc.next->prev = ioat->free_desc.prev; - - /* circle link the hw descriptors */ - desc = to_ioat_desc(ioat->free_desc.next); - desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; - list_for_each_entry_safe(desc, _desc, ioat->free_desc.next, node) { - desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; - } -} - + "ioat1: initial descriptors per channel (default: 256)"); /** - * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors + * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors * @chan: the channel to be filled out */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *c) +static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) { struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_chan_common *chan = &ioat->base; @@ -657,8 +479,6 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *c) spin_lock_bh(&ioat->desc_lock); ioat->desccount = i; list_splice(&tmp_list, &ioat->free_desc); - if (chan->device->version != IOAT_VER_1_2) - ioat2_dma_massage_chan_desc(ioat); spin_unlock_bh(&ioat->desc_lock); /* allocate a completion writeback area */ @@ -674,15 +494,15 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *c) chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); tasklet_enable(&chan->cleanup_task); - ioat_dma_start_null_desc(ioat); /* give chain to dma device */ + ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ return ioat->desccount; } /** - * ioat_dma_free_chan_resources - release all the descriptors + * ioat1_dma_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -static void ioat_dma_free_chan_resources(struct dma_chan *c) +static void ioat1_dma_free_chan_resources(struct dma_chan *c) { struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_chan_common *chan = &ioat->base; @@ -697,7 +517,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *c) return; tasklet_disable(&chan->cleanup_task); - ioat_dma_memcpy_cleanup(ioat); + ioat1_cleanup(ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. @@ -707,40 +527,20 @@ static void ioat_dma_free_chan_resources(struct dma_chan *c) mdelay(100); spin_lock_bh(&ioat->desc_lock); - switch (chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - list_for_each_entry_safe(desc, _desc, - ioat->free_desc.next, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - desc = to_ioat_desc(ioat->free_desc.next); + list_for_each_entry_safe(desc, _desc, + &ioat->used_desc, node) { + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat->free_desc, node) { + list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->txd.phys); kfree(desc); - INIT_LIST_HEAD(&ioat->free_desc); - INIT_LIST_HEAD(&ioat->used_desc); - break; } spin_unlock_bh(&ioat->desc_lock); @@ -758,7 +558,6 @@ static void ioat_dma_free_chan_resources(struct dma_chan *c) chan->last_compl_desc_addr_hw = 0; chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; ioat->pending = 0; - ioat->dmacount = 0; ioat->desccount = 0; } @@ -791,86 +590,6 @@ ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) return new; } -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat) -{ - struct ioat_desc_sw *new; - - /* - * used.prev points to where to start processing - * used.next points to next free descriptor - * if used.prev == NULL, there are none waiting to be processed - * if used.next == used.prev.prev, there is only one free descriptor, - * and we need to use it to as a noop descriptor before - * linking in a new set of descriptors, since the device - * has probably already read the pointer to it - */ - if (ioat->used_desc.prev && - ioat->used_desc.next == ioat->used_desc.prev->prev) { - - struct ioat_desc_sw *desc; - struct ioat_desc_sw *noop_desc; - int i; - - /* set up the noop descriptor */ - noop_desc = to_ioat_desc(ioat->used_desc.next); - /* set size to non-zero value (channel returns error when size is 0) */ - noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; - noop_desc->hw->ctl = 0; - noop_desc->hw->ctl_f.null = 1; - noop_desc->hw->src_addr = 0; - noop_desc->hw->dst_addr = 0; - - ioat->used_desc.next = ioat->used_desc.next->next; - ioat->pending++; - ioat->dmacount++; - - /* try to get a few more descriptors */ - for (i = 16; i; i--) { - desc = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); - if (!desc) { - dev_err(to_dev(&ioat->base), - "alloc failed\n"); - break; - } - list_add_tail(&desc->node, ioat->used_desc.next); - - desc->hw->next - = to_ioat_desc(desc->node.next)->txd.phys; - to_ioat_desc(desc->node.prev)->hw->next - = desc->txd.phys; - ioat->desccount++; - } - - ioat->used_desc.next = noop_desc->node.next; - } - new = to_ioat_desc(ioat->used_desc.next); - prefetch(new); - ioat->used_desc.next = new->node.next; - - if (ioat->used_desc.prev == NULL) - ioat->used_desc.prev = &new->node; - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat) -{ - if (!ioat) - return NULL; - - switch (ioat->base.device->version) { - case IOAT_VER_1_2: - return ioat1_dma_get_next_descriptor(ioat); - case IOAT_VER_2_0: - case IOAT_VER_3_0: - return ioat2_dma_get_next_descriptor(ioat); - } - return NULL; -} - static struct dma_async_tx_descriptor * ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) @@ -886,7 +605,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, int tx_cnt = 0; spin_lock_bh(&ioat->desc_lock); - desc = ioat_dma_get_next_descriptor(ioat); + desc = ioat1_dma_get_next_descriptor(ioat); do { if (!desc) break; @@ -909,7 +628,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, struct ioat_desc_sw *next; async_tx_ack(&desc->txd); - next = ioat_dma_get_next_descriptor(ioat); + next = ioat1_dma_get_next_descriptor(ioat); hw->next = next ? next->txd.phys : 0; desc = next; } else @@ -920,8 +639,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, struct ioat_chan_common *chan = &ioat->base; dev_err(to_dev(chan), - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, ioat->desccount); + "chan%d - get_next_desc failed\n", chan_num(chan)); list_splice(&chain, &ioat->free_desc); spin_unlock_bh(&ioat->desc_lock); return NULL; @@ -940,94 +658,43 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, return &desc->txd; } -static struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat->desc_lock); - new = ioat2_dma_get_next_descriptor(ioat); - - /* - * leave ioat->desc_lock set in ioat 2 path - * it will get unlocked at end of tx_submit - */ - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->txd.flags = flags; - return &new->txd; - } else { - struct ioat_chan_common *chan = &ioat->base; - - spin_unlock_bh(&ioat->desc_lock); - dev_err(to_dev(chan), - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, ioat->desccount); - return NULL; - } -} - -static void ioat_dma_cleanup_tasklet(unsigned long data) +static void ioat1_cleanup_tasklet(unsigned long data) { struct ioat_dma_chan *chan = (void *)data; - ioat_dma_memcpy_cleanup(chan); + ioat1_cleanup(chan); writew(IOAT_CHANCTRL_INT_DISABLE, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); } -static void -ioat_dma_unmap(struct ioat_chan_common *chan, struct ioat_desc_sw *desc) +static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) { - if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - pci_unmap_single(chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - else - pci_unmap_page(chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - } - - if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - pci_unmap_single(chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - else - pci_unmap_page(chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - } + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); } -/** - * ioat_dma_memcpy_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - */ -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) + +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; - struct ioat_desc_sw *desc, *_desc; - dma_cookie_t cookie = 0; - unsigned long desc_phys; - struct ioat_desc_sw *latest_desc; - struct dma_async_tx_descriptor *tx; + struct pci_dev *pdev = chan->device->pdev; + size_t offset = len - hw->size; - prefetch(chan->completion_virt); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) + ioat_unmap(pdev, hw->src_addr - offset, len, + PCI_DMA_TODEVICE, flags, 0); +} + +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) +{ + unsigned long phys_complete; /* The completion writeback can happen at any time, so reads by the driver need to be atomic operations @@ -1051,18 +718,37 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) /* TODO do something to salvage the situation */ } + return phys_complete; +} + +/** + * ioat1_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + struct ioat_desc_sw *desc, *_desc; + dma_cookie_t cookie = 0; + struct dma_async_tx_descriptor *tx; + + prefetch(chan->completion_virt); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + phys_complete = ioat_get_current_completion(chan); if (phys_complete == chan->last_completion) { spin_unlock_bh(&chan->cleanup_lock); /* * perhaps we're stuck so hard that the watchdog can't go off? * try to catch it after 2 seconds */ - if (chan->device->version != IOAT_VER_3_0) { - if (time_after(jiffies, - chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(chan->device->work.work)); - chan->last_completion_time = jiffies; - } + if (time_after(jiffies, + chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat1_chan_watchdog(&(chan->device->work.work)); + chan->last_completion_time = jiffies; } return; } @@ -1074,91 +760,42 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) return; } - switch (chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { - tx = &desc->txd; - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - if (tx->cookie) { - cookie = tx->cookie; - ioat_dma_unmap(chan, desc); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + tx = &desc->txd; + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + if (tx->cookie) { + cookie = tx->cookie; + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } + } - if (tx->phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(tx)) { - list_move_tail(&desc->node, - &ioat->free_desc); - } else - tx->cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can - * append from it, but don't look at it next - * time, either - */ + if (tx->phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(tx)) + list_move_tail(&desc->node, &ioat->free_desc); + else tx->cookie = 0; + } else { + /* + * last used desc. Do not remove, so we can + * append from it, but don't look at it next + * time, either + */ + tx->cookie = 0; - /* TODO check status bits? */ - break; - } - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - /* has some other thread has already cleaned up? */ - if (ioat->used_desc.prev == NULL) + /* TODO check status bits? */ break; - - /* work backwards to find latest finished desc */ - desc = to_ioat_desc(ioat->used_desc.next); - tx = &desc->txd; - latest_desc = NULL; - do { - desc = to_ioat_desc(desc->node.prev); - desc_phys = (unsigned long)tx->phys - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; - if (desc_phys == phys_complete) { - latest_desc = desc; - break; - } - } while (&desc->node != ioat->used_desc.prev); - - if (latest_desc != NULL) { - /* work forwards to clear finished descriptors */ - for (desc = to_ioat_desc(ioat->used_desc.prev); - &desc->node != latest_desc->node.next && - &desc->node != ioat->used_desc.next; - desc = to_ioat_desc(desc->node.next)) { - if (tx->cookie) { - cookie = tx->cookie; - tx->cookie = 0; - ioat_dma_unmap(chan, desc); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - } - - /* move used.prev up beyond those that are finished */ - if (&desc->node == ioat->used_desc.next) - ioat->used_desc.prev = NULL; - else - ioat->used_desc.prev = &desc->node; } - break; } spin_unlock_bh(&ioat->desc_lock); @@ -1170,50 +807,21 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) spin_unlock_bh(&chan->cleanup_lock); } -/** - * ioat_dma_is_complete - poll the status of a IOAT DMA transaction - * @chan: IOAT DMA channel handle - * @cookie: DMA transaction identifier - * @done: if not %NULL, updated with last completed transaction - * @used: if not %NULL, updated with last used transaction - */ static enum dma_status -ioat_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) { struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - dma_cookie_t last_used; - dma_cookie_t last_complete; - enum dma_status ret; - - last_used = c->cookie; - last_complete = chan->completed_cookie; - chan->watchdog_tcp_cookie = cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - ret = dma_async_is_complete(cookie, last_complete, last_used); - if (ret == DMA_SUCCESS) - return ret; - ioat_dma_memcpy_cleanup(ioat); + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; - last_used = c->cookie; - last_complete = chan->completed_cookie; + ioat1_cleanup(ioat); - if (done) - *done = last_complete; - if (used) - *used = last_used; - - return dma_async_is_complete(cookie, last_complete, last_used); + return ioat_is_complete(c, cookie, done, used); } -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; struct ioat_desc_sw *desc; @@ -1221,7 +829,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) spin_lock_bh(&ioat->desc_lock); - desc = ioat_dma_get_next_descriptor(ioat); + desc = ioat1_dma_get_next_descriptor(ioat); if (!desc) { dev_err(to_dev(chan), @@ -1240,30 +848,16 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) hw->src_addr = 0; hw->dst_addr = 0; async_tx_ack(&desc->txd); - switch (chan->device->version) { - case IOAT_VER_1_2: - hw->next = 0; - list_add_tail(&desc->node, &ioat->used_desc); + hw->next = 0; + list_add_tail(&desc->node, &ioat->used_desc); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, chan->reg_base - + IOAT_CHANCMD_OFFSET(chan->device->version)); - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->txd.phys) >> 32, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - ioat->dmacount++; - __ioat2_dma_memcpy_issue_pending(ioat); - break; - } + writeb(IOAT_CHANCMD_START, chan->reg_base + + IOAT_CHANCMD_OFFSET(chan->device->version)); spin_unlock_bh(&ioat->desc_lock); } @@ -1484,7 +1078,7 @@ static void ioat_disable_interrupts(struct ioatdma_device *device) writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); } -static int ioat_probe(struct ioatdma_device *device) +int ioat_probe(struct ioatdma_device *device) { int err = -ENODEV; struct dma_device *dma = &device->common; @@ -1503,17 +1097,15 @@ static int ioat_probe(struct ioatdma_device *device) device->completion_pool = pci_pool_create("completion_pool", pdev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES); + if (!device->completion_pool) { err = -ENOMEM; goto err_completion_pool; } - ioat_dma_enumerate_channels(device); + device->enumerate_channels(device); dma_cap_set(DMA_MEMCPY, dma->cap_mask); - dma->device_alloc_chan_resources = ioat_dma_alloc_chan_resources; - dma->device_free_chan_resources = ioat_dma_free_chan_resources; - dma->device_is_tx_complete = ioat_dma_is_complete; dma->dev = &pdev->dev; dev_err(dev, "Intel(R) I/OAT DMA Engine found," @@ -1546,7 +1138,7 @@ err_dma_pool: return err; } -static int ioat_register(struct ioatdma_device *device) +int ioat_register(struct ioatdma_device *device) { int err = dma_async_device_register(&device->common); @@ -1580,9 +1172,13 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca) int err; device->intr_quirk = ioat1_intr_quirk; + device->enumerate_channels = ioat1_enumerate_channels; dma = &device->common; dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; + dma->device_free_chan_resources = ioat1_dma_free_chan_resources; + dma->device_is_tx_complete = ioat1_dma_is_complete; err = ioat_probe(device); if (err) @@ -1594,93 +1190,12 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca) if (dca) device->dca = ioat_dca_init(pdev, device->reg_base); - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, WATCHDOG_DELAY); - - return err; -} - -int ioat2_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(2048); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat2_dca_init(pdev, device->reg_base); - - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + INIT_DELAYED_WORK(&device->work, ioat1_chan_watchdog); schedule_delayed_work(&device->work, WATCHDOG_DELAY); return err; } -int ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - u16 dev_id; - - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; - - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(262144); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return err; -} - void ioat_dma_remove(struct ioatdma_device *device) { struct dma_device *dma = &device->common; @@ -1697,4 +1212,3 @@ void ioat_dma_remove(struct ioatdma_device *device) INIT_LIST_HEAD(&dma->channels); } - diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 5b31db73ad8..84065dfa4d4 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -62,6 +62,7 @@ * @idx: per channel data * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) + * @enumerate_channels: hw version specific channel enumeration */ struct ioatdma_device { @@ -76,6 +77,7 @@ struct ioatdma_device { struct ioat_chan_common *idx[4]; struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); + int (*enumerate_channels)(struct ioatdma_device *device); }; struct ioat_chan_common { @@ -106,6 +108,7 @@ struct ioat_chan_common { struct tasklet_struct cleanup_task; }; + /** * struct ioat_dma_chan - internal representation of a DMA channel */ @@ -119,7 +122,6 @@ struct ioat_dma_chan { struct list_head used_desc; int pending; - u16 dmacount; u16 desccount; }; @@ -135,6 +137,33 @@ static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) return container_of(chan, struct ioat_dma_chan, base); } +/** + * ioat_is_complete - poll the status of an ioat transaction + * @c: channel handle + * @cookie: transaction identifier + * @done: if set, updated with last completed transaction + * @used: if set, updated with last used transaction + */ +static inline enum dma_status +ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat_chan_common *chan = to_chan_common(c); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + last_used = c->cookie; + last_complete = chan->completed_cookie; + chan->watchdog_tcp_cookie = cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + /* wrapper around hardware descriptor format + additional software fields */ /** @@ -162,11 +191,22 @@ static inline void ioat_set_tcp_copy_break(unsigned long copybreak) #endif } +static inline struct ioat_chan_common * +ioat_chan_by_index(struct ioatdma_device *device, int index) +{ + return device->idx[index]; +} + +int ioat_probe(struct ioatdma_device *device); +int ioat_register(struct ioatdma_device *device); int ioat1_dma_probe(struct ioatdma_device *dev, int dca); -int ioat2_dma_probe(struct ioatdma_device *dev, int dca); -int ioat3_dma_probe(struct ioatdma_device *dev, int dca); void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + work_func_t work_fn, void (*tasklet)(unsigned long), + unsigned long tasklet_data); +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw); #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c new file mode 100644 index 00000000000..49ba1c73d95 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.c @@ -0,0 +1,750 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine (versions >= 2), which + * does asynchronous data movement and checksumming operations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +static int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); + +static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) +{ + void * __iomem reg_base = ioat->base.reg_base; + + ioat->pending = 0; + ioat->dmacount += ioat2_ring_pending(ioat); + ioat->issued = ioat->head; + /* make descriptor updates globally visible before notifying channel */ + wmb(); + writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + +} + +static void ioat2_issue_pending(struct dma_chan *chan) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); + + spin_lock_bh(&ioat->ring_lock); + if (ioat->pending == 1) + __ioat2_issue_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +/** + * ioat2_update_pending - log pending descriptors + * @ioat: ioat2+ channel + * + * set pending to '1' unless pending is already set to '2', pending == 2 + * indicates that submission is temporarily blocked due to an in-flight + * reset. If we are already above the ioat_pending_level threshold then + * just issue pending. + * + * called with ring_lock held + */ +static void ioat2_update_pending(struct ioat2_dma_chan *ioat) +{ + if (unlikely(ioat->pending == 2)) + return; + else if (ioat2_ring_pending(ioat) > ioat_pending_level) + __ioat2_issue_pending(ioat); + else + ioat->pending = 1; +} + +static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + void __iomem *reg_base = ioat->base.reg_base; + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + int idx; + + if (ioat2_ring_space(ioat) < 1) { + dev_err(to_dev(&ioat->base), + "Unable to start null desc - ring full\n"); + return; + } + + idx = ioat2_desc_alloc(ioat, 1); + desc = ioat2_get_ring_ent(ioat, idx); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, + reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->txd.phys) >> 32, + reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + __ioat2_issue_pending(ioat); +} + +static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + spin_lock_bh(&ioat->ring_lock); + __ioat2_start_null_desc(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +static void ioat2_cleanup(struct ioat2_dma_chan *ioat); + +/** + * ioat2_reset_part2 - reinit the channel after a reset + */ +static void ioat2_reset_part2(struct work_struct *work) +{ + struct ioat_chan_common *chan; + struct ioat2_dma_chan *ioat; + + chan = container_of(work, struct ioat_chan_common, work.work); + ioat = container_of(chan, struct ioat2_dma_chan, base); + + /* ensure that ->tail points to the stalled descriptor + * (ioat->pending is set to 2 at this point so no new + * descriptors will be issued while we perform this cleanup) + */ + ioat2_cleanup(ioat); + + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->ring_lock); + + /* set the tail to be re-issued */ + ioat->issued = ioat->tail; + ioat->dmacount = 0; + + if (ioat2_ring_pending(ioat)) { + struct ioat_ring_ent *desc; + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->txd.phys) >> 32, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + __ioat2_issue_pending(ioat); + } else + __ioat2_start_null_desc(ioat); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); + + dev_info(to_dev(chan), + "chan%d reset - %d descs waiting, %d total desc\n", + chan_num(chan), ioat->dmacount, 1 << ioat->alloc_order); +} + +/** + * ioat2_reset_channel - restart a channel + * @ioat: IOAT DMA channel handle + */ +static void ioat2_reset_channel(struct ioat2_dma_chan *ioat) +{ + u32 chansts, chanerr; + struct ioat_chan_common *chan = &ioat->base; + u16 active; + + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + spin_unlock_bh(&ioat->ring_lock); + if (!active) + return; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + chansts = (chan->completion_virt->low + & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + if (chanerr) { + dev_err(to_dev(chan), + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(chan), chansts, chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + spin_lock_bh(&ioat->ring_lock); + ioat->pending = 2; + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + + IOAT_CHANCMD_OFFSET(chan->device->version)); + spin_unlock_bh(&ioat->ring_lock); + schedule_delayed_work(&chan->work, RESET_DELAY); +} + +/** + * ioat2_chan_watchdog - watch for stuck channels + */ +static void ioat2_chan_watchdog(struct work_struct *work) +{ + struct ioatdma_device *device = + container_of(work, struct ioatdma_device, work.work); + struct ioat2_dma_chan *ioat; + struct ioat_chan_common *chan; + u16 active; + int i; + + for (i = 0; i < device->common.chancnt; i++) { + chan = ioat_chan_by_index(device, i); + ioat = container_of(chan, struct ioat2_dma_chan, base); + + /* + * for version 2.0 if there are descriptors yet to be processed + * and the last completed hasn't changed since the last watchdog + * if they haven't hit the pending level + * issue the pending to push them through + * else + * try resetting the channel + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + spin_unlock_bh(&ioat->ring_lock); + + if (active && + chan->last_completion && + chan->last_completion == chan->watchdog_completion) { + + if (ioat->pending == 1) + ioat2_issue_pending(&chan->common); + else { + ioat2_reset_channel(ioat); + chan->watchdog_completion = 0; + } + } else { + chan->last_compl_desc_addr_hw = 0; + chan->watchdog_completion = chan->last_completion; + } + chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; + } + schedule_delayed_work(&device->work, WATCHDOG_DELAY); +} + +/** + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up + */ +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + struct dma_async_tx_descriptor *tx; + + prefetch(chan->completion_virt); + + spin_lock_bh(&chan->cleanup_lock); + phys_complete = ioat_get_current_completion(chan); + if (phys_complete == chan->last_completion) { + spin_unlock_bh(&chan->cleanup_lock); + /* + * perhaps we're stuck so hard that the watchdog can't go off? + * try to catch it after WATCHDOG_DELAY seconds + */ + if (chan->device->version < IOAT_VER_3_0) { + unsigned long tmo; + + tmo = chan->last_completion_time + HZ*WATCHDOG_DELAY; + if (time_after(jiffies, tmo)) { + ioat2_chan_watchdog(&(chan->device->work.work)); + chan->last_completion_time = jiffies; + } + } + return; + } + chan->last_completion_time = jiffies; + + spin_lock_bh(&ioat->ring_lock); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + tx = &desc->txd; + if (tx->cookie) { + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + spin_unlock_bh(&ioat->ring_lock); + + chan->last_completion = phys_complete; + + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat2_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat2_cleanup(ioat); + writew(IOAT_CHANCTRL_INT_DISABLE, + ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +/** + * ioat2_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat2_enumerate_channels(struct ioatdma_device *device) +{ + struct ioat2_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + if (xfercap_log == 0) + return 0; + + /* FIXME which i/oat version is i7300? */ +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i, + ioat2_reset_part2, + ioat2_cleanup_tasklet, + (unsigned long) ioat); + ioat->xfercap_log = xfercap_log; + spin_lock_init(&ioat->ring_lock); + } + dma->chancnt = i; + return i; +} + +static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + dma_cookie_t cookie = c->cookie; + + cookie++; + if (cookie < 0) + cookie = 1; + tx->cookie = cookie; + c->cookie = cookie; + ioat2_update_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); + + return cookie; +} + +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) +{ + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *dma; + dma_addr_t phys; + + dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(dma->dma_pool, GFP_KERNEL, &phys); + if (!hw) + return NULL; + memset(hw, 0, sizeof(*hw)); + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + pci_pool_free(dma->dma_pool, hw, phys); + return NULL; + } + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat2_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} + +static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *dma; + + dma = to_ioatdma_device(chan->device); + pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); + kfree(desc); +} + +/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring + * @chan: channel to be initialized + */ +static int ioat2_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent **ring; + u16 chanctrl; + u32 chanerr; + int descs; + int i; + + /* have we already been set up? */ + if (ioat->ring) + return 1 << ioat->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + chanctrl = IOAT_CHANCTRL_ERR_INT_EN | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | + IOAT_CHANCTRL_ERR_COMPLETION_EN; + writew(chanctrl, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, + &chan->completion_addr); + if (!chan->completion_virt) + return -ENOMEM; + + memset(chan->completion_virt, 0, + sizeof(*chan->completion_virt)); + writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_addr) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + ioat->alloc_order = ioat_get_alloc_order(); + descs = 1 << ioat->alloc_order; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), GFP_KERNEL); + if (!ring) + return -ENOMEM; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return -ENOMEM; + } + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + spin_lock_bh(&ioat->ring_lock); + ioat->ring = ring; + ioat->head = 0; + ioat->issued = 0; + ioat->tail = 0; + ioat->pending = 0; + spin_unlock_bh(&ioat->ring_lock); + + tasklet_enable(&chan->cleanup_task); + ioat2_start_null_desc(ioat); + + return descs; +} + +/** + * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops + * @idx: gets starting descriptor index on successful allocation + * @ioat: ioat2,3 channel (ring) to operate on + * @num_descs: allocation length + */ +static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +{ + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&ioat->ring_lock); + if (unlikely(ioat2_ring_space(ioat) < num_descs)) { + if (printk_ratelimit()) + dev_dbg(to_dev(chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, + ioat->issued); + spin_unlock_bh(&ioat->ring_lock); + + /* do direct reclaim in the allocation failure case */ + ioat2_cleanup(ioat); + + return -ENOMEM; + } + + dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + + *idx = ioat2_desc_alloc(ioat, num_descs); + return 0; /* with ioat->ring_lock held */ +} + +static struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs; + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + for (i = 0; i < num_descs; i++) { + size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + } + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + +/** + * ioat2_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat2_free_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *ioatdma_device = chan->device; + struct ioat_ring_ent *desc; + const u16 total_descs = 1 << ioat->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat->ring) + return; + + tasklet_disable(&chan->cleanup_task); + ioat2_cleanup(ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->ring_lock); + descs = ioat2_ring_space(ioat); + for (i = 0; i < descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->head + i); + ioat2_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + ioat2_free_ring_ent(desc, c); + } + + kfree(ioat->ring); + ioat->ring = NULL; + ioat->alloc_order = 0; + pci_pool_free(ioatdma_device->completion_pool, + chan->completion_virt, + chan->completion_addr); + spin_unlock_bh(&ioat->ring_lock); + + chan->last_completion = 0; + chan->completion_addr = 0; + ioat->pending = 0; + ioat->dmacount = 0; + chan->watchdog_completion = 0; + chan->last_compl_desc_addr_hw = 0; + chan->watchdog_tcp_cookie = 0; + chan->watchdog_last_tcp_cookie = 0; +} + +static enum dma_status +ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat2_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +int ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + + device->enumerate_channels = ioat2_enumerate_channels; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat2_is_complete; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + INIT_DELAYED_WORK(&device->work, ioat2_chan_watchdog); + schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; +} + +int ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + u16 dev_id; + + device->enumerate_channels = ioat2_enumerate_channels; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat2_is_complete; + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return err; +} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h new file mode 100644 index 00000000000..94a553eacdb --- /dev/null +++ b/drivers/dma/ioat/dma_v2.h @@ -0,0 +1,131 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_V2_H +#define IOATDMA_V2_H + +#include +#include "dma.h" +#include "hw.h" + + +extern int ioat_pending_level; + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) + +/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes + * @base: common ioat channel parameters + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @pending: lock free indicator for issued != head + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @ring: software ring buffer implementation of hardware ring + * @ring_lock: protects ring attributes + */ +struct ioat2_dma_chan { + struct ioat_chan_common base; + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + int pending; + struct ioat_ring_ent **ring; + spinlock_t ring_lock; +}; + +static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat2_dma_chan, base); +} + +static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat) +{ + return (1 << ioat->alloc_order) - 1; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat); +} + +static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat) +{ + u16 num_descs = ioat2_ring_mask(ioat) + 1; + u16 active = ioat2_ring_active(ioat); + + BUG_ON(active > num_descs); + + return num_descs - active; +} + +/* assumes caller already checked space */ +static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len) +{ + ioat->head += len; + return ioat->head - len; +} + +static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) +{ + u16 num_descs = len >> ioat->xfercap_log; + + num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); + return num_descs; +} + +struct ioat_ring_ent { + struct ioat_dma_descriptor *hw; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +static inline struct ioat_ring_ent * +ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) +{ + return ioat->ring[idx & ioat2_ring_mask(ioat)]; +} + +int ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int ioat3_dma_probe(struct ioatdma_device *dev, int dca); +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 55414d88ac1..c4e43226925 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -31,6 +31,7 @@ #include #include #include "dma.h" +#include "dma_v2.h" #include "registers.h" #include "hw.h" -- cgit v1.2.3 From 38e12f64a165e83617c21dae3c15972fd8d639f5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:00:46 -0700 Subject: ioat1: kill unused unmap parameters The unified ioat1/ioat2 ioat_dma_unmap() implementation derives the source and dest addresses from the unmap descriptor. There is no longer a need to track this information in struct ioat_desc_sw. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 2 -- drivers/dma/ioat/dma.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 64b4d75059a..696d4de3bb8 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -648,8 +648,6 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, desc->txd.flags = flags; desc->tx_cnt = tx_cnt; - desc->src = dma_src; - desc->dst = dma_dest; desc->len = total_len; list_splice(&chain, &desc->txd.tx_list); hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 84065dfa4d4..fa15e77652a 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -179,8 +179,6 @@ struct ioat_desc_sw { struct list_head node; int tx_cnt; size_t len; - dma_addr_t src; - dma_addr_t dst; struct dma_async_tx_descriptor txd; }; -- cgit v1.2.3 From 6df9183a153291a2585a8dfe67597fc18c201147 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:00:55 -0700 Subject: ioat: add some dev_dbg() calls Provide some output for debugging the driver. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 29 ++++++++++++++++++++++++++--- drivers/dma/ioat/dma.h | 28 ++++++++++++++++++++++++++++ drivers/dma/ioat/dma_v2.c | 25 ++++++++++++++++++++++++- drivers/dma/ioat/dma_v2.h | 3 +++ 4 files changed, 81 insertions(+), 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 696d4de3bb8..edf4f5e5de7 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -134,6 +134,7 @@ static int ioat1_enumerate_channels(struct ioatdma_device *device) dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); + dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) @@ -167,6 +168,8 @@ __ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) { void __iomem *reg_base = ioat->base.reg_base; + dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", + __func__, ioat->pending); ioat->pending = 0; writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); } @@ -251,6 +254,7 @@ static void ioat1_reset_channel(struct ioat_dma_chan *ioat) if (!ioat->used_desc.prev) return; + dev_dbg(to_dev(chan), "%s\n", __func__); chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); chansts = (chan->completion_virt->low & IOAT_CHANSTS_DMA_TRANSFER_STATUS); @@ -382,6 +386,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) cookie = 1; c->cookie = cookie; tx->cookie = cookie; + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); /* write address into NextDescriptor field of last desc in chain */ first = to_ioat_desc(tx->tx_list.next); @@ -390,6 +395,8 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) wmb(); chain_tail->hw->next = first->txd.phys; list_splice_tail_init(&tx->tx_list, &ioat->used_desc); + dump_desc_dbg(ioat, chain_tail); + dump_desc_dbg(ioat, first); ioat->pending += desc->tx_cnt; if (ioat->pending >= ioat_pending_level) @@ -429,6 +436,7 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) desc_sw->txd.tx_submit = ioat1_tx_submit; desc_sw->hw = desc; desc_sw->txd.phys = phys; + set_desc_id(desc_sw, -1); return desc_sw; } @@ -474,6 +482,7 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) dev_err(to_dev(chan), "Only %d initial descriptors\n", i); break; } + set_desc_id(desc, i); list_add_tail(&desc->node, &tmp_list); } spin_lock_bh(&ioat->desc_lock); @@ -495,6 +504,8 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) tasklet_enable(&chan->cleanup_task); ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, ioat->desccount); return ioat->desccount; } @@ -527,8 +538,10 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) mdelay(100); spin_lock_bh(&ioat->desc_lock); - list_for_each_entry_safe(desc, _desc, - &ioat->used_desc, node) { + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", + __func__, desc_id(desc)); + dump_desc_dbg(ioat, desc); in_use_descs++; list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, @@ -585,7 +598,8 @@ ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) return NULL; } } - + dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", + __func__, desc_id(new)); prefetch(new->hw); return new; } @@ -630,6 +644,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, async_tx_ack(&desc->txd); next = ioat1_dma_get_next_descriptor(ioat); hw->next = next ? next->txd.phys : 0; + dump_desc_dbg(ioat, desc); desc = next; } else hw->next = 0; @@ -652,6 +667,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, list_splice(&chain, &desc->txd.tx_list); hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); return &desc->txd; } @@ -707,6 +723,9 @@ unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; #endif + dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { @@ -758,6 +777,8 @@ static void ioat1_cleanup(struct ioat_dma_chan *ioat) return; } + dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n", + __func__, phys_complete); list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { tx = &desc->txd; /* @@ -765,6 +786,7 @@ static void ioat1_cleanup(struct ioat_dma_chan *ioat) * due to exceeding xfercap, perhaps. If so, only the * last one will have a cookie, and require unmapping. */ + dump_desc_dbg(ioat, desc); if (tx->cookie) { cookie = tx->cookie; ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); @@ -848,6 +870,7 @@ static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) async_tx_ack(&desc->txd); hw->next = 0; list_add_tail(&desc->node, &ioat->used_desc); + dump_desc_dbg(ioat, desc); writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index fa15e77652a..9f9edc2cd07 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -173,6 +173,7 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, * or attached to a transaction list (async_tx.tx_list) * @tx_cnt: number of descriptors required to complete the transaction * @txd: the generic software descriptor for all engines + * @id: identifier for debug */ struct ioat_desc_sw { struct ioat_dma_descriptor *hw; @@ -180,8 +181,35 @@ struct ioat_desc_sw { int tx_cnt; size_t len; struct dma_async_tx_descriptor txd; + #ifdef DEBUG + int id; + #endif }; +#ifdef DEBUG +#define set_desc_id(desc, i) ((desc)->id = (i)) +#define desc_id(desc) ((desc)->id) +#else +#define set_desc_id(desc, i) +#define desc_id(desc) (0) +#endif + +static inline void +__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, + struct dma_async_tx_descriptor *tx, int id) +{ + struct device *dev = to_dev(chan); + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" + " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, + (unsigned long long) tx->phys, + (unsigned long long) hw->next, tx->cookie, tx->flags, + hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); +} + +#define dump_desc_dbg(c, d) \ + ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + static inline void ioat_set_tcp_copy_break(unsigned long copybreak) { #ifdef CONFIG_NET_DMA diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 49ba1c73d95..58881860f40 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -54,7 +54,9 @@ static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) /* make descriptor updates globally visible before notifying channel */ wmb(); writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - + dev_dbg(to_dev(&ioat->base), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); } static void ioat2_issue_pending(struct dma_chan *chan) @@ -101,6 +103,8 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) return; } + dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); idx = ioat2_desc_alloc(ioat, 1); desc = ioat2_get_ring_ent(ioat, idx); @@ -118,6 +122,7 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) reg_base + IOAT2_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + dump_desc_dbg(ioat, desc); __ioat2_issue_pending(ioat); } @@ -154,6 +159,10 @@ static void ioat2_reset_part2(struct work_struct *work) ioat->issued = ioat->tail; ioat->dmacount = 0; + dev_dbg(to_dev(&ioat->base), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); + if (ioat2_ring_pending(ioat)) { struct ioat_ring_ent *desc; @@ -221,6 +230,8 @@ static void ioat2_chan_watchdog(struct work_struct *work) u16 active; int i; + dev_dbg(&device->pdev->dev, "%s\n", __func__); + for (i = 0; i < device->common.chancnt; i++) { chan = ioat_chan_by_index(device, i); ioat = container_of(chan, struct ioat2_dma_chan, base); @@ -295,11 +306,15 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) spin_lock_bh(&ioat->ring_lock); + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + active = ioat2_ring_active(ioat); for (i = 0; i < active && !seen_current; i++) { prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); desc = ioat2_get_ring_ent(ioat, ioat->tail + i); tx = &desc->txd; + dump_desc_dbg(ioat, desc); if (tx->cookie) { ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); chan->completed_cookie = tx->cookie; @@ -348,6 +363,7 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device) xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); if (xfercap_log == 0) return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); /* FIXME which i/oat version is i7300? */ #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL @@ -381,6 +397,8 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) cookie = 1; tx->cookie = cookie; c->cookie = cookie; + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + ioat2_update_pending(ioat); spin_unlock_bh(&ioat->ring_lock); @@ -480,6 +498,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) kfree(ring); return -ENOMEM; } + set_desc_id(ring[i], i); } /* link descs */ @@ -571,12 +590,14 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, len -= copy; dst += copy; src += copy; + dump_desc_dbg(ioat, desc); } desc->txd.flags = flags; desc->len = total_len; hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); /* we leave the channel locked to ensure in order submission */ return &desc->txd; @@ -614,6 +635,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) spin_lock_bh(&ioat->ring_lock); descs = ioat2_ring_space(ioat); + dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); for (i = 0; i < descs; i++) { desc = ioat2_get_ring_ent(ioat, ioat->head + i); ioat2_free_ring_ent(desc, c); @@ -625,6 +647,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) for (i = 0; i < total_descs - descs; i++) { desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); ioat2_free_ring_ent(desc, c); } diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 94a553eacdb..c72ccb5dfd5 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -116,6 +116,9 @@ struct ioat_ring_ent { struct ioat_dma_descriptor *hw; struct dma_async_tx_descriptor txd; size_t len; + #ifdef DEBUG + int id; + #endif }; static inline struct ioat_ring_ent * -- cgit v1.2.3 From 4fb9b9e8d55880523db550043dfb204696dd0422 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:01:04 -0700 Subject: ioat: cleanup completion status reads The cleanup path makes an effort to only perform an atomic read of the 64-bit completion address. However in the 32-bit case it does not matter if we read the upper-32 and lower-32 non-atomically because the upper-32 will always be zero. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 78 ++++++++++++++++---------------------------- drivers/dma/ioat/dma.h | 10 ++---- drivers/dma/ioat/dma_v2.c | 25 +++++++------- drivers/dma/ioat/registers.h | 8 ++--- 4 files changed, 46 insertions(+), 75 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index edf4f5e5de7..08417ad4edc 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -201,8 +201,7 @@ static void ioat1_reset_part2(struct work_struct *work) spin_lock_bh(&chan->cleanup_lock); spin_lock_bh(&ioat->desc_lock); - chan->completion_virt->low = 0; - chan->completion_virt->high = 0; + *chan->completion = 0; ioat->pending = 0; /* count the descriptors waiting */ @@ -256,8 +255,7 @@ static void ioat1_reset_channel(struct ioat_dma_chan *ioat) dev_dbg(to_dev(chan), "%s\n", __func__); chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); - chansts = (chan->completion_virt->low - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; if (chanerr) { dev_err(to_dev(chan), "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", @@ -293,14 +291,8 @@ static void ioat1_chan_watchdog(struct work_struct *work) struct ioat_dma_chan *ioat; struct ioat_chan_common *chan; int i; - - union { - u64 full; - struct { - u32 low; - u32 high; - }; - } completion_hw; + u64 completion; + u32 completion_low; unsigned long compl_desc_addr_hw; for (i = 0; i < device->common.chancnt; i++) { @@ -334,25 +326,24 @@ static void ioat1_chan_watchdog(struct work_struct *work) * try resetting the channel */ - completion_hw.low = readl(chan->reg_base + + /* we need to read the low address first as this + * causes the chipset to latch the upper bits + * for the subsequent read + */ + completion_low = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(chan->device->version)); - completion_hw.high = readl(chan->reg_base + + completion = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(chan->device->version)); -#if (BITS_PER_LONG == 64) - compl_desc_addr_hw = - completion_hw.full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - compl_desc_addr_hw = - completion_hw.low & IOAT_LOW_COMPLETION_MASK; -#endif + completion <<= 32; + completion |= completion_low; + compl_desc_addr_hw = completion & + IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; if ((compl_desc_addr_hw != 0) && (compl_desc_addr_hw != chan->watchdog_completion) && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) { chan->last_compl_desc_addr_hw = compl_desc_addr_hw; - chan->completion_virt->low = completion_hw.low; - chan->completion_virt->high = completion_hw.high; + *chan->completion = completion; } else { ioat1_reset_channel(ioat); chan->watchdog_completion = 0; @@ -492,14 +483,12 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, - &chan->completion_addr); - memset(chan->completion_virt, 0, - sizeof(*chan->completion_virt)); - writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_addr) >> 32, + writel(((u64) chan->completion_dma) >> 32, chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); tasklet_enable(&chan->cleanup_task); @@ -558,15 +547,16 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) spin_unlock_bh(&ioat->desc_lock); pci_pool_free(ioatdma_device->completion_pool, - chan->completion_virt, - chan->completion_addr); + chan->completion, + chan->completion_dma); /* one is ok since we left it on there on purpose */ if (in_use_descs > 1) dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", in_use_descs - 1); - chan->last_completion = chan->completion_addr = 0; + chan->last_completion = 0; + chan->completion_dma = 0; chan->watchdog_completion = 0; chan->last_compl_desc_addr_hw = 0; chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; @@ -709,25 +699,15 @@ void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) { unsigned long phys_complete; + u64 completion; - /* The completion writeback can happen at any time, - so reads by the driver need to be atomic operations - The descriptor physical addresses are limited to 32-bits - when the CPU can only do a 32-bit mov */ - -#if (BITS_PER_LONG == 64) - phys_complete = - chan->completion_virt->full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; -#endif + completion = *chan->completion; + phys_complete = completion & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, (unsigned long long) phys_complete); - if ((chan->completion_virt->full - & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == + if ((completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", readl(chan->reg_base + IOAT_CHANERR_OFFSET)); @@ -750,7 +730,7 @@ static void ioat1_cleanup(struct ioat_dma_chan *ioat) dma_cookie_t cookie = 0; struct dma_async_tx_descriptor *tx; - prefetch(chan->completion_virt); + prefetch(chan->completion); if (!spin_trylock_bh(&chan->cleanup_lock)) return; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 9f9edc2cd07..5fd6e2de84d 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -96,14 +96,8 @@ struct ioat_chan_common { struct ioatdma_device *device; struct dma_chan common; - dma_addr_t completion_addr; - union { - u64 full; /* HW completion writeback */ - struct { - u32 low; - u32 high; - }; - } *completion_virt; + dma_addr_t completion_dma; + u64 *completion; unsigned long last_compl_desc_addr_hw; struct tasklet_struct cleanup_task; }; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 58881860f40..ca113424934 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -200,8 +200,7 @@ static void ioat2_reset_channel(struct ioat2_dma_chan *ioat) return; chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = (chan->completion_virt->low - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; if (chanerr) { dev_err(to_dev(chan), "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", @@ -281,7 +280,7 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) int i; struct dma_async_tx_descriptor *tx; - prefetch(chan->completion_virt); + prefetch(chan->completion); spin_lock_bh(&chan->cleanup_lock); phys_complete = ioat_get_current_completion(chan); @@ -470,17 +469,15 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, - &chan->completion_addr); - if (!chan->completion_virt) + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + if (!chan->completion) return -ENOMEM; - memset(chan->completion_virt, 0, - sizeof(*chan->completion_virt)); - writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_addr) >> 32, + writel(((u64) chan->completion_dma) >> 32, chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); ioat->alloc_order = ioat_get_alloc_order(); @@ -655,12 +652,12 @@ static void ioat2_free_chan_resources(struct dma_chan *c) ioat->ring = NULL; ioat->alloc_order = 0; pci_pool_free(ioatdma_device->completion_pool, - chan->completion_virt, - chan->completion_addr); + chan->completion, + chan->completion_dma); spin_unlock_bh(&ioat->ring_lock); chan->last_completion = 0; - chan->completion_addr = 0; + chan->completion_dma = 0; ioat->pending = 0; ioat->dmacount = 0; chan->watchdog_completion = 0; diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 49bc277424f..a83c7332125 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -94,10 +94,10 @@ #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) -#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F -#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 -#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) +#define IOAT_CHANSTS_SOFT_ERR 0x10ULL +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x7ULL #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 -- cgit v1.2.3 From bb3207863014c7310593146f11fbc6573eab43c8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:01:14 -0700 Subject: ioat: ignore reserved bits for chancnt and xfercap Don't trust that the reserved bits are always zero, also sanity check the returned value. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 7 +++++++ drivers/dma/ioat/dma_v2.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 08417ad4edc..5173ba97ba3 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -132,7 +132,14 @@ static int ioat1_enumerate_channels(struct ioatdma_device *device) INIT_LIST_HEAD(&dma->channels); dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_scale &= 0x1f; /* bits [4:0] valid */ xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index ca113424934..137cf879265 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -359,7 +359,14 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device) INIT_LIST_HEAD(&dma->channels); dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ if (xfercap_log == 0) return 0; dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); -- cgit v1.2.3 From f6ab95b55735fa03cad8d0f966647e5df206e207 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:01:21 -0700 Subject: ioat: preserve chanctrl bits when re-arming interrupts The register write in ioat_dma_cleanup_tasklet is unfortunate in two ways: 1/ It clears the extra 'enable' bits that we set at alloc_chan_resources time 2/ It gives the impression that it disables interrupts when it is in fact re-arming interrupts [ Impact: fix, persist the value of the chanctrl register when re-arming ] Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 10 +++------- drivers/dma/ioat/dma_v2.c | 8 ++------ drivers/dma/ioat/registers.h | 6 +++++- 3 files changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 5173ba97ba3..6dd0af194b8 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -452,7 +452,6 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_desc_sw *desc; - u16 chanctrl; u32 chanerr; int i; LIST_HEAD(tmp_list); @@ -462,10 +461,7 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) return ioat->desccount; /* Setup register to interrupt and write completion status on error */ - chanctrl = IOAT_CHANCTRL_ERR_INT_EN | - IOAT_CHANCTRL_ANY_ERR_ABORT_EN | - IOAT_CHANCTRL_ERR_COMPLETION_EN; - writew(chanctrl, chan->reg_base + IOAT_CHANCTRL_OFFSET); + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); if (chanerr) { @@ -672,9 +668,9 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, static void ioat1_cleanup_tasklet(unsigned long data) { struct ioat_dma_chan *chan = (void *)data; + ioat1_cleanup(chan); - writew(IOAT_CHANCTRL_INT_DISABLE, - chan->base.reg_base + IOAT_CHANCTRL_OFFSET); + writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); } static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 137cf879265..2f34f290041 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -341,8 +341,7 @@ static void ioat2_cleanup_tasklet(unsigned long data) struct ioat2_dma_chan *ioat = (void *) data; ioat2_cleanup(ioat); - writew(IOAT_CHANCTRL_INT_DISABLE, - ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } /** @@ -454,7 +453,6 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; - u16 chanctrl; u32 chanerr; int descs; int i; @@ -464,9 +462,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) return 1 << ioat->alloc_order; /* Setup register to interrupt and write completion status on error */ - chanctrl = IOAT_CHANCTRL_ERR_INT_EN | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | - IOAT_CHANCTRL_ERR_COMPLETION_EN; - writew(chanctrl, chan->reg_base + IOAT_CHANCTRL_OFFSET); + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); if (chanerr) { diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index a83c7332125..4380f6fbf05 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -75,7 +75,11 @@ #define IOAT_CHANCTRL_ERR_INT_EN 0x0010 #define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 -#define IOAT_CHANCTRL_INT_DISABLE 0x0001 +#define IOAT_CHANCTRL_INT_REARM 0x0001 +#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ERR_COMPLETION_EN |\ + IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\ + IOAT_CHANCTRL_ERR_INT_EN) #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ -- cgit v1.2.3 From 345d852391cf3fdc73f23a9ca522c6e7b5eb5a52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:01:30 -0700 Subject: ioat: ___devinit annotate the initialization paths Mark all single use initialization routines with __devinit. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dca.c | 9 ++++++--- drivers/dma/ioat/dma.c | 12 ++++++------ drivers/dma/ioat/dma.h | 11 ++++++----- drivers/dma/ioat/dma_v2.c | 4 ++-- drivers/dma/ioat/dma_v2.h | 8 ++++---- 5 files changed, 24 insertions(+), 20 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index af1c762dd9d..69d02615c4d 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = { }; -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) return slots; } -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) return slots; } -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 6dd0af194b8..abc96c4c079 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -870,7 +870,7 @@ static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) */ #define IOAT_TEST_SIZE 2000 -static void ioat_dma_test_callback(void *dma_async_param) +static void __devinit ioat_dma_test_callback(void *dma_async_param) { struct completion *cmp = dma_async_param; @@ -881,7 +881,7 @@ static void ioat_dma_test_callback(void *dma_async_param) * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. * @device: device to be tested */ -static int ioat_dma_self_test(struct ioatdma_device *device) +static int __devinit ioat_dma_self_test(struct ioatdma_device *device) { int i; u8 *src; @@ -1082,7 +1082,7 @@ static void ioat_disable_interrupts(struct ioatdma_device *device) writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); } -int ioat_probe(struct ioatdma_device *device) +int __devinit ioat_probe(struct ioatdma_device *device) { int err = -ENODEV; struct dma_device *dma = &device->common; @@ -1142,7 +1142,7 @@ err_dma_pool: return err; } -int ioat_register(struct ioatdma_device *device) +int __devinit ioat_register(struct ioatdma_device *device) { int err = dma_async_device_register(&device->common); @@ -1169,7 +1169,7 @@ static void ioat1_intr_quirk(struct ioatdma_device *device) pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); } -int ioat1_dma_probe(struct ioatdma_device *device, int dca) +int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; struct dma_device *dma; @@ -1200,7 +1200,7 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca) return err; } -void ioat_dma_remove(struct ioatdma_device *device) +void __devexit ioat_dma_remove(struct ioatdma_device *device) { struct dma_device *dma = &device->common; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 5fd6e2de84d..e47083b52ee 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -217,11 +217,12 @@ ioat_chan_by_index(struct ioatdma_device *device, int index) return device->idx[index]; } -int ioat_probe(struct ioatdma_device *device); -int ioat_register(struct ioatdma_device *device); -int ioat1_dma_probe(struct ioatdma_device *dev, int dca); -void ioat_dma_remove(struct ioatdma_device *device); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int __devinit ioat_probe(struct ioatdma_device *device); +int __devinit ioat_register(struct ioatdma_device *device); +int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); +void __devexit ioat_dma_remove(struct ioatdma_device *device); +struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, + void __iomem *iobase); unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx, diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 2f34f290041..1aa2974e7a9 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -683,7 +683,7 @@ ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, return ioat_is_complete(c, cookie, done, used); } -int ioat2_dma_probe(struct ioatdma_device *device, int dca) +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; struct dma_device *dma; @@ -722,7 +722,7 @@ int ioat2_dma_probe(struct ioatdma_device *device, int dca) return err; } -int ioat3_dma_probe(struct ioatdma_device *device, int dca) +int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; struct dma_device *dma; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index c72ccb5dfd5..bdde5373cf6 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -127,8 +127,8 @@ ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) return ioat->ring[idx & ioat2_ring_mask(ioat)]; } -int ioat2_dma_probe(struct ioatdma_device *dev, int dca); -int ioat3_dma_probe(struct ioatdma_device *dev, int dca); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); +struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); #endif /* IOATDMA_V2_H */ -- cgit v1.2.3 From ad643f54c8514998333bc6c7b201fda2267496be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:01:38 -0700 Subject: ioat1: trim ioat_dma_desc_sw Save 4 bytes per software descriptor by transmitting tx_cnt in an unused portion of the hardware descriptor. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 4 ++-- drivers/dma/ioat/dma.h | 2 -- drivers/dma/ioat/hw.h | 6 +++++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index abc96c4c079..f59b6f42f86 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -396,7 +396,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) dump_desc_dbg(ioat, chain_tail); dump_desc_dbg(ioat, first); - ioat->pending += desc->tx_cnt; + ioat->pending += desc->hw->tx_cnt; if (ioat->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat); spin_unlock_bh(&ioat->desc_lock); @@ -655,11 +655,11 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, spin_unlock_bh(&ioat->desc_lock); desc->txd.flags = flags; - desc->tx_cnt = tx_cnt; desc->len = total_len; list_splice(&chain, &desc->txd.tx_list); hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); hw->ctl_f.compl_write = 1; + hw->tx_cnt = tx_cnt; dump_desc_dbg(ioat, desc); return &desc->txd; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index e47083b52ee..ec851cf5345 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -165,14 +165,12 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, * @hw: hardware DMA descriptor * @node: this descriptor will either be on the free list, * or attached to a transaction list (async_tx.tx_list) - * @tx_cnt: number of descriptors required to complete the transaction * @txd: the generic software descriptor for all engines * @id: identifier for debug */ struct ioat_desc_sw { struct ioat_dma_descriptor *hw; struct list_head node; - int tx_cnt; size_t len; struct dma_async_tx_descriptor txd; #ifdef DEBUG diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index e13f3ed4776..7481fb13ce0 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -63,7 +63,11 @@ struct ioat_dma_descriptor { uint64_t next; uint64_t rsv1; uint64_t rsv2; - uint64_t user1; + /* store some driver data in an unused portion of the descriptor */ + union { + uint64_t user1; + uint64_t tx_cnt; + }; uint64_t user2; }; #endif -- cgit v1.2.3 From 09c8a5b85e5f1e74a19bdd7c85547429d51df1cd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:01:49 -0700 Subject: ioat: switch watchdog and reset handler from workqueue to timer In order to support dynamic resizing of the descriptor ring or polling for a descriptor in the presence of a hung channel the reset handler needs to make progress while in a non-preemptible context. The current workqueue implementation precludes polling channel reset completion under spin_lock(). This conversion also allows us to return to opportunistic cleanup in the ioat2 case as the timer implementation guarantees at least one cleanup after every descriptor is submitted. This means the worst case completion latency becomes the timer frequency (for exceptional circumstances), but with the benefit of avoiding busy waiting when the lock is contended. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 351 +++++++++++++++++-------------------------- drivers/dma/ioat/dma.h | 112 +++++++++++--- drivers/dma/ioat/dma_v2.c | 321 +++++++++++++++++---------------------- drivers/dma/ioat/dma_v2.h | 10 ++ drivers/dma/ioat/registers.h | 22 +-- 5 files changed, 388 insertions(+), 428 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index f59b6f42f86..17a518d0386 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -99,23 +99,26 @@ static void ioat1_cleanup_tasklet(unsigned long data); /* common channel initialization */ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx, - work_func_t work_fn, void (*tasklet)(unsigned long), - unsigned long tasklet_data) + void (*timer_fn)(unsigned long), + void (*tasklet)(unsigned long), + unsigned long ioat) { struct dma_device *dma = &device->common; chan->device = device; chan->reg_base = device->reg_base + (0x80 * (idx + 1)); - INIT_DELAYED_WORK(&chan->work, work_fn); spin_lock_init(&chan->cleanup_lock); chan->common.device = dma; list_add_tail(&chan->common.device_node, &dma->channels); device->idx[idx] = chan; - tasklet_init(&chan->cleanup_task, tasklet, tasklet_data); + init_timer(&chan->timer); + chan->timer.function = timer_fn; + chan->timer.data = ioat; + tasklet_init(&chan->cleanup_task, tasklet, ioat); tasklet_disable(&chan->cleanup_task); } -static void ioat1_reset_part2(struct work_struct *work); +static void ioat1_timer_event(unsigned long data); /** * ioat1_dma_enumerate_channels - find and initialize the device's channels @@ -153,7 +156,7 @@ static int ioat1_enumerate_channels(struct ioatdma_device *device) break; ioat_init_channel(device, &ioat->base, i, - ioat1_reset_part2, + ioat1_timer_event, ioat1_cleanup_tasklet, (unsigned long) ioat); ioat->xfercap = xfercap; @@ -192,61 +195,6 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) } } -/** - * ioat1_reset_part2 - reinit the channel after a reset - */ -static void ioat1_reset_part2(struct work_struct *work) -{ - struct ioat_chan_common *chan; - struct ioat_dma_chan *ioat; - struct ioat_desc_sw *desc; - int dmacount; - bool start_null = false; - - chan = container_of(work, struct ioat_chan_common, work.work); - ioat = container_of(chan, struct ioat_dma_chan, base); - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->desc_lock); - - *chan->completion = 0; - ioat->pending = 0; - - /* count the descriptors waiting */ - dmacount = 0; - if (ioat->used_desc.prev) { - desc = to_ioat_desc(ioat->used_desc.prev); - do { - dmacount++; - desc = to_ioat_desc(desc->node.next); - } while (&desc->node != ioat->used_desc.next); - } - - if (dmacount) { - /* - * write the new starting descriptor address - * this puts channel engine into ARMED state - */ - desc = to_ioat_desc(ioat->used_desc.prev); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, chan->reg_base - + IOAT_CHANCMD_OFFSET(chan->device->version)); - } else - start_null = true; - spin_unlock_bh(&ioat->desc_lock); - spin_unlock_bh(&chan->cleanup_lock); - - dev_err(to_dev(chan), - "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(chan), dmacount, ioat->desccount); - - if (start_null) - ioat1_dma_start_null_desc(ioat); -} - /** * ioat1_reset_channel - restart a channel * @ioat: IOAT DMA channel handle @@ -257,12 +205,9 @@ static void ioat1_reset_channel(struct ioat_dma_chan *ioat) void __iomem *reg_base = chan->reg_base; u32 chansts, chanerr; - if (!ioat->used_desc.prev) - return; - - dev_dbg(to_dev(chan), "%s\n", __func__); + dev_warn(to_dev(chan), "reset\n"); chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); - chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; + chansts = *chan->completion & IOAT_CHANSTS_STATUS; if (chanerr) { dev_err(to_dev(chan), "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", @@ -278,93 +223,11 @@ static void ioat1_reset_channel(struct ioat_dma_chan *ioat) * while we're waiting. */ - spin_lock_bh(&ioat->desc_lock); ioat->pending = INT_MIN; writeb(IOAT_CHANCMD_RESET, reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - spin_unlock_bh(&ioat->desc_lock); - - /* schedule the 2nd half instead of sleeping a long time */ - schedule_delayed_work(&chan->work, RESET_DELAY); -} - -/** - * ioat1_chan_watchdog - watch for stuck channels - */ -static void ioat1_chan_watchdog(struct work_struct *work) -{ - struct ioatdma_device *device = - container_of(work, struct ioatdma_device, work.work); - struct ioat_dma_chan *ioat; - struct ioat_chan_common *chan; - int i; - u64 completion; - u32 completion_low; - unsigned long compl_desc_addr_hw; - - for (i = 0; i < device->common.chancnt; i++) { - chan = ioat_chan_by_index(device, i); - ioat = container_of(chan, struct ioat_dma_chan, base); - - if (/* have we started processing anything yet */ - chan->last_completion - /* have we completed any since last watchdog cycle? */ - && (chan->last_completion == chan->watchdog_completion) - /* has TCP stuck on one cookie since last watchdog? */ - && (chan->watchdog_tcp_cookie == chan->watchdog_last_tcp_cookie) - && (chan->watchdog_tcp_cookie != chan->completed_cookie) - /* is there something in the chain to be processed? */ - /* CB1 chain always has at least the last one processed */ - && (ioat->used_desc.prev != ioat->used_desc.next) - && ioat->pending == 0) { - - /* - * check CHANSTS register for completed - * descriptor address. - * if it is different than completion writeback, - * it is not zero - * and it has changed since the last watchdog - * we can assume that channel - * is still working correctly - * and the problem is in completion writeback. - * update completion writeback - * with actual CHANSTS value - * else - * try resetting the channel - */ - - /* we need to read the low address first as this - * causes the chipset to latch the upper bits - * for the subsequent read - */ - completion_low = readl(chan->reg_base + - IOAT_CHANSTS_OFFSET_LOW(chan->device->version)); - completion = readl(chan->reg_base + - IOAT_CHANSTS_OFFSET_HIGH(chan->device->version)); - completion <<= 32; - completion |= completion_low; - compl_desc_addr_hw = completion & - IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; - - if ((compl_desc_addr_hw != 0) - && (compl_desc_addr_hw != chan->watchdog_completion) - && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) { - chan->last_compl_desc_addr_hw = compl_desc_addr_hw; - *chan->completion = completion; - } else { - ioat1_reset_channel(ioat); - chan->watchdog_completion = 0; - chan->last_compl_desc_addr_hw = 0; - } - } else { - chan->last_compl_desc_addr_hw = 0; - chan->watchdog_completion = chan->last_completion; - } - - chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; - } - - schedule_delayed_work(&device->work, WATCHDOG_DELAY); + set_bit(IOAT_RESET_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + RESET_DELAY); } static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) @@ -372,6 +235,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) struct dma_chan *c = tx->chan; struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); + struct ioat_chan_common *chan = &ioat->base; struct ioat_desc_sw *first; struct ioat_desc_sw *chain_tail; dma_cookie_t cookie; @@ -396,6 +260,9 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) dump_desc_dbg(ioat, chain_tail); dump_desc_dbg(ioat, first); + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat->pending += desc->hw->tx_cnt; if (ioat->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat); @@ -520,6 +387,7 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) return; tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); ioat1_cleanup(ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce @@ -560,9 +428,6 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) chan->last_completion = 0; chan->completion_dma = 0; - chan->watchdog_completion = 0; - chan->last_compl_desc_addr_hw = 0; - chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; ioat->pending = 0; ioat->desccount = 0; } @@ -705,15 +570,15 @@ unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) u64 completion; completion = *chan->completion; - phys_complete = completion & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; + phys_complete = ioat_chansts_to_addr(completion); dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, (unsigned long long) phys_complete); - if ((completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == - IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { + if (is_ioat_halted(completion)) { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", - readl(chan->reg_base + IOAT_CHANERR_OFFSET)); + chanerr); /* TODO do something to salvage the situation */ } @@ -721,48 +586,31 @@ unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) return phys_complete; } -/** - * ioat1_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat) +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + unsigned long *phys_complete) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; - struct ioat_desc_sw *desc, *_desc; - dma_cookie_t cookie = 0; - struct dma_async_tx_descriptor *tx; - - prefetch(chan->completion); - - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; + *phys_complete = ioat_get_current_completion(chan); + if (*phys_complete == chan->last_completion) + return false; + clear_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - phys_complete = ioat_get_current_completion(chan); - if (phys_complete == chan->last_completion) { - spin_unlock_bh(&chan->cleanup_lock); - /* - * perhaps we're stuck so hard that the watchdog can't go off? - * try to catch it after 2 seconds - */ - if (time_after(jiffies, - chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat1_chan_watchdog(&(chan->device->work.work)); - chan->last_completion_time = jiffies; - } - return; - } - chan->last_completion_time = jiffies; + return true; +} - cookie = 0; - if (!spin_trylock_bh(&ioat->desc_lock)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } +static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct list_head *_desc, *n; + struct dma_async_tx_descriptor *tx; dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n", __func__, phys_complete); - list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + list_for_each_safe(_desc, n, &ioat->used_desc) { + struct ioat_desc_sw *desc; + + prefetch(n); + desc = list_entry(_desc, typeof(*desc), node); tx = &desc->txd; /* * Incoming DMA requests may use multiple descriptors, @@ -771,7 +619,8 @@ static void ioat1_cleanup(struct ioat_dma_chan *ioat) */ dump_desc_dbg(ioat, desc); if (tx->cookie) { - cookie = tx->cookie; + chan->completed_cookie = tx->cookie; + tx->cookie = 0; ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); if (tx->callback) { tx->callback(tx->callback_param); @@ -786,27 +635,110 @@ static void ioat1_cleanup(struct ioat_dma_chan *ioat) */ if (async_tx_test_ack(tx)) list_move_tail(&desc->node, &ioat->free_desc); - else - tx->cookie = 0; } else { /* * last used desc. Do not remove, so we can - * append from it, but don't look at it next - * time, either + * append from it. */ - tx->cookie = 0; + + /* if nothing else is pending, cancel the + * completion timeout + */ + if (n == &ioat->used_desc) { + dev_dbg(to_dev(chan), + "%s cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + } /* TODO check status bits? */ break; } } + chan->last_completion = phys_complete; +} + +/** + * ioat1_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + * + * To prevent lock contention we defer cleanup when the locks are + * contended with a terminal timeout that forces cleanup and catches + * completion notification errors. + */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->desc_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); +} - chan->last_completion = phys_complete; - if (cookie != 0) - chan->completed_cookie = cookie; +static void ioat1_timer_event(unsigned long data) +{ + struct ioat_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + + spin_lock_bh(&chan->cleanup_lock); + if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat->desc_lock); + + /* restart active descriptors */ + desc = to_ioat_desc(ioat->used_desc.prev); + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + + ioat->pending = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&ioat->desc_lock); + } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + + spin_lock_bh(&ioat->desc_lock); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat1_reset_channel(ioat); + else { + u64 status = ioat_chansts(chan); + + /* manually update the last completion address */ + if (ioat_chansts_to_addr(status) != 0) + *chan->completion = status; + + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->desc_lock); + } spin_unlock_bh(&chan->cleanup_lock); } @@ -855,13 +787,8 @@ static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) list_add_tail(&desc->node, &ioat->used_desc); dump_desc_dbg(ioat, desc); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, chan->reg_base - + IOAT_CHANCMD_OFFSET(chan->device->version)); + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); spin_unlock_bh(&ioat->desc_lock); } @@ -1194,9 +1121,6 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) if (dca) device->dca = ioat_dca_init(pdev, device->reg_base); - INIT_DELAYED_WORK(&device->work, ioat1_chan_watchdog); - schedule_delayed_work(&device->work, WATCHDOG_DELAY); - return err; } @@ -1204,9 +1128,6 @@ void __devexit ioat_dma_remove(struct ioatdma_device *device) { struct dma_device *dma = &device->common; - if (device->version != IOAT_VER_3_0) - cancel_delayed_work(&device->work); - ioat_disable_interrupts(device); dma_async_device_unregister(dma); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index ec851cf5345..dbfccac3e80 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -23,6 +23,7 @@ #include #include "hw.h" +#include "registers.h" #include #include #include @@ -33,7 +34,6 @@ #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 -#define IOAT_WATCHDOG_PERIOD (2 * HZ) #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) @@ -42,9 +42,6 @@ #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) -#define RESET_DELAY msecs_to_jiffies(100) -#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) - /* * workaround for IOAT ver.3.0 null descriptor issue * (channel returns error when size is 0) @@ -72,7 +69,6 @@ struct ioatdma_device { struct pci_pool *completion_pool; struct dma_device common; u8 version; - struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_chan_common *idx[4]; struct dca_provider *dca; @@ -81,24 +77,21 @@ struct ioatdma_device { }; struct ioat_chan_common { + struct dma_chan common; void __iomem *reg_base; - unsigned long last_completion; - unsigned long last_completion_time; - spinlock_t cleanup_lock; dma_cookie_t completed_cookie; - unsigned long watchdog_completion; - int watchdog_tcp_cookie; - u32 watchdog_last_tcp_cookie; - struct delayed_work work; - + unsigned long state; + #define IOAT_COMPLETION_PENDING 0 + #define IOAT_COMPLETION_ACK 1 + #define IOAT_RESET_PENDING 2 + struct timer_list timer; + #define COMPLETION_TIMEOUT msecs_to_jiffies(100) + #define RESET_DELAY msecs_to_jiffies(100) struct ioatdma_device *device; - struct dma_chan common; - dma_addr_t completion_dma; u64 *completion; - unsigned long last_compl_desc_addr_hw; struct tasklet_struct cleanup_task; }; @@ -148,7 +141,6 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, last_used = c->cookie; last_complete = chan->completed_cookie; - chan->watchdog_tcp_cookie = cookie; if (done) *done = last_complete; @@ -215,6 +207,85 @@ ioat_chan_by_index(struct ioatdma_device *device, int index) return device->idx[index]; } +static inline u64 ioat_chansts(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u64 status; + u32 status_lo; + + /* We need to read the low address first as this causes the + * chipset to latch the upper bits for the subsequent read + */ + status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status <<= 32; + status |= status_lo; + + return status; +} + +static inline void ioat_start(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline u64 ioat_chansts_to_addr(u64 status) +{ + return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +} + +static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +{ + return readl(chan->reg_base + IOAT_CHANERR_OFFSET); +} + +static inline void ioat_suspend(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); +} + +static inline bool is_ioat_active(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); +} + +static inline bool is_ioat_idle(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE); +} + +static inline bool is_ioat_halted(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); +} + +static inline bool is_ioat_suspended(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); +} + +/* channel was fatally programmed */ +static inline bool is_ioat_bug(unsigned long err) +{ + return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR| + IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR| + IOAT_CHANERR_LENGTH_ERR)); +} + int __devinit ioat_probe(struct ioatdma_device *device); int __devinit ioat_register(struct ioatdma_device *device); int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); @@ -224,8 +295,11 @@ struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx, - work_func_t work_fn, void (*tasklet)(unsigned long), - unsigned long tasklet_data); + void (*timer_fn)(unsigned long), + void (*tasklet)(unsigned long), + unsigned long ioat); void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, size_t len, struct ioat_dma_descriptor *hw); +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + unsigned long *phys_complete); #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 1aa2974e7a9..72e59a0d0f2 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -49,7 +49,7 @@ static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) void * __iomem reg_base = ioat->base.reg_base; ioat->pending = 0; - ioat->dmacount += ioat2_ring_pending(ioat); + ioat->dmacount += ioat2_ring_pending(ioat);; ioat->issued = ioat->head; /* make descriptor updates globally visible before notifying channel */ wmb(); @@ -92,7 +92,6 @@ static void ioat2_update_pending(struct ioat2_dma_chan *ioat) static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) { - void __iomem *reg_base = ioat->base.reg_base; struct ioat_ring_ent *desc; struct ioat_dma_descriptor *hw; int idx; @@ -118,10 +117,7 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) hw->src_addr = 0; hw->dst_addr = 0; async_tx_ack(&desc->txd); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + ioat2_set_chainaddr(ioat, desc->txd.phys); dump_desc_dbg(ioat, desc); __ioat2_issue_pending(ioat); } @@ -133,177 +129,14 @@ static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) spin_unlock_bh(&ioat->ring_lock); } -static void ioat2_cleanup(struct ioat2_dma_chan *ioat); - -/** - * ioat2_reset_part2 - reinit the channel after a reset - */ -static void ioat2_reset_part2(struct work_struct *work) -{ - struct ioat_chan_common *chan; - struct ioat2_dma_chan *ioat; - - chan = container_of(work, struct ioat_chan_common, work.work); - ioat = container_of(chan, struct ioat2_dma_chan, base); - - /* ensure that ->tail points to the stalled descriptor - * (ioat->pending is set to 2 at this point so no new - * descriptors will be issued while we perform this cleanup) - */ - ioat2_cleanup(ioat); - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->ring_lock); - - /* set the tail to be re-issued */ - ioat->issued = ioat->tail; - ioat->dmacount = 0; - - dev_dbg(to_dev(&ioat->base), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); - - if (ioat2_ring_pending(ioat)) { - struct ioat_ring_ent *desc; - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - __ioat2_issue_pending(ioat); - } else - __ioat2_start_null_desc(ioat); - - spin_unlock_bh(&ioat->ring_lock); - spin_unlock_bh(&chan->cleanup_lock); - - dev_info(to_dev(chan), - "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, 1 << ioat->alloc_order); -} - -/** - * ioat2_reset_channel - restart a channel - * @ioat: IOAT DMA channel handle - */ -static void ioat2_reset_channel(struct ioat2_dma_chan *ioat) +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) { - u32 chansts, chanerr; struct ioat_chan_common *chan = &ioat->base; - u16 active; - - spin_lock_bh(&ioat->ring_lock); - active = ioat2_ring_active(ioat); - spin_unlock_bh(&ioat->ring_lock); - if (!active) - return; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; - if (chanerr) { - dev_err(to_dev(chan), - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(chan), chansts, chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - } - - spin_lock_bh(&ioat->ring_lock); - ioat->pending = 2; - writeb(IOAT_CHANCMD_RESET, - chan->reg_base - + IOAT_CHANCMD_OFFSET(chan->device->version)); - spin_unlock_bh(&ioat->ring_lock); - schedule_delayed_work(&chan->work, RESET_DELAY); -} - -/** - * ioat2_chan_watchdog - watch for stuck channels - */ -static void ioat2_chan_watchdog(struct work_struct *work) -{ - struct ioatdma_device *device = - container_of(work, struct ioatdma_device, work.work); - struct ioat2_dma_chan *ioat; - struct ioat_chan_common *chan; - u16 active; - int i; - - dev_dbg(&device->pdev->dev, "%s\n", __func__); - - for (i = 0; i < device->common.chancnt; i++) { - chan = ioat_chan_by_index(device, i); - ioat = container_of(chan, struct ioat2_dma_chan, base); - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - spin_lock_bh(&ioat->ring_lock); - active = ioat2_ring_active(ioat); - spin_unlock_bh(&ioat->ring_lock); - - if (active && - chan->last_completion && - chan->last_completion == chan->watchdog_completion) { - - if (ioat->pending == 1) - ioat2_issue_pending(&chan->common); - else { - ioat2_reset_channel(ioat); - chan->watchdog_completion = 0; - } - } else { - chan->last_compl_desc_addr_hw = 0; - chan->watchdog_completion = chan->last_completion; - } - chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; - } - schedule_delayed_work(&device->work, WATCHDOG_DELAY); -} - -/** - * ioat2_cleanup - clean finished descriptors (advance tail pointer) - * @chan: ioat channel to be cleaned up - */ -static void ioat2_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; + struct dma_async_tx_descriptor *tx; struct ioat_ring_ent *desc; bool seen_current = false; u16 active; int i; - struct dma_async_tx_descriptor *tx; - - prefetch(chan->completion); - - spin_lock_bh(&chan->cleanup_lock); - phys_complete = ioat_get_current_completion(chan); - if (phys_complete == chan->last_completion) { - spin_unlock_bh(&chan->cleanup_lock); - /* - * perhaps we're stuck so hard that the watchdog can't go off? - * try to catch it after WATCHDOG_DELAY seconds - */ - if (chan->device->version < IOAT_VER_3_0) { - unsigned long tmo; - - tmo = chan->last_completion_time + HZ*WATCHDOG_DELAY; - if (time_after(jiffies, tmo)) { - ioat2_chan_watchdog(&(chan->device->work.work)); - chan->last_completion_time = jiffies; - } - } - return; - } - chan->last_completion_time = jiffies; - - spin_lock_bh(&ioat->ring_lock); dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued); @@ -329,10 +162,42 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) } ioat->tail += i; BUG_ON(!seen_current); /* no active descs have written a completion? */ - spin_unlock_bh(&ioat->ring_lock); chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + } +} + +/** + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up + */ +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); spin_unlock_bh(&chan->cleanup_lock); } @@ -344,6 +209,90 @@ static void ioat2_cleanup_tasklet(unsigned long data) writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } +static void __restart_chan(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + + /* set the tail to be re-issued */ + ioat->issued = ioat->tail; + ioat->dmacount = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); + + if (ioat2_ring_pending(ioat)) { + struct ioat_ring_ent *desc; + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + ioat2_set_chainaddr(ioat, desc->txd.phys); + __ioat2_issue_pending(ioat); + } else + __ioat2_start_null_desc(ioat); +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __restart_chan(ioat); +} + +static void ioat2_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat2_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } + spin_unlock_bh(&chan->cleanup_lock); +} + /** * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated @@ -381,7 +330,7 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device) break; ioat_init_channel(device, &ioat->base, i, - ioat2_reset_part2, + ioat2_timer_event, ioat2_cleanup_tasklet, (unsigned long) ioat); ioat->xfercap_log = xfercap_log; @@ -395,6 +344,7 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) { struct dma_chan *c = tx->chan; struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; dma_cookie_t cookie = c->cookie; cookie++; @@ -404,6 +354,8 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) c->cookie = cookie; dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); ioat2_update_pending(ioat); spin_unlock_bh(&ioat->ring_lock); @@ -543,9 +495,18 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d ioat->issued); spin_unlock_bh(&ioat->ring_lock); - /* do direct reclaim in the allocation failure case */ - ioat2_cleanup(ioat); - + /* progress reclaim in the allocation failure case we + * may be called under bh_disabled so we need to trigger + * the timer event directly + */ + spin_lock_bh(&chan->cleanup_lock); + if (jiffies > chan->timer.expires && + timer_pending(&chan->timer)) { + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&chan->cleanup_lock); + ioat2_timer_event((unsigned long) ioat); + } else + spin_unlock_bh(&chan->cleanup_lock); return -ENOMEM; } @@ -624,6 +585,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) return; tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); ioat2_cleanup(ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce @@ -663,10 +625,6 @@ static void ioat2_free_chan_resources(struct dma_chan *c) chan->completion_dma = 0; ioat->pending = 0; ioat->dmacount = 0; - chan->watchdog_completion = 0; - chan->last_compl_desc_addr_hw = 0; - chan->watchdog_tcp_cookie = 0; - chan->watchdog_last_tcp_cookie = 0; } static enum dma_status @@ -716,9 +674,6 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) if (dca) device->dca = ioat2_dca_init(pdev, device->reg_base); - INIT_DELAYED_WORK(&device->work, ioat2_chan_watchdog); - schedule_delayed_work(&device->work, WATCHDOG_DELAY); - return err; } diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index bdde5373cf6..73b04a2eb4b 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -127,6 +127,16 @@ ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) return ioat->ring[idx & ioat2_ring_mask(ioat)]; } +static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 4380f6fbf05..e4334a19538 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -101,11 +101,11 @@ #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) #define IOAT_CHANSTS_SOFT_ERR 0x10ULL #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x7ULL -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 +#define IOAT_CHANSTS_STATUS 0x7ULL +#define IOAT_CHANSTS_ACTIVE 0x0 +#define IOAT_CHANSTS_DONE 0x1 +#define IOAT_CHANSTS_SUSPENDED 0x2 +#define IOAT_CHANSTS_HALTED 0x3 @@ -208,18 +208,18 @@ #define IOAT_CDAR_OFFSET_HIGH 0x24 #define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ -#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 -#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 +#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008 #define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 #define IOAT_CHANERR_CHANCMD_ERR 0x0020 #define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 #define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 #define IOAT_CHANERR_READ_DATA_ERR 0x0100 #define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 -#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 -#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_LENGTH_ERR 0x0800 #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 #define IOAT_CHANERR_SOFT_ERR 0x4000 -- cgit v1.2.3 From a309218acee8606f7e235da20cc826eb06d9b0f6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:02:01 -0700 Subject: ioat2,3: dynamically resize descriptor ring Increment the allocation order of the descriptor ring every time we run out of descriptors up to a maximum of allocation order specified by the module parameter 'ioat_max_alloc_order'. After each idle period decrement the allocation order to a minimum order of 'ioat_ring_alloc_order' (i.e. the default ring size, tunable as a module parameter). Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.h | 1 + drivers/dma/ioat/dma_v2.c | 215 +++++++++++++++++++++++++++++++++++++++------- drivers/dma/ioat/dma_v2.h | 2 + 3 files changed, 187 insertions(+), 31 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index dbfccac3e80..d9d6a7e3cd7 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -88,6 +88,7 @@ struct ioat_chan_common { #define IOAT_RESET_PENDING 2 struct timer_list timer; #define COMPLETION_TIMEOUT msecs_to_jiffies(100) + #define IDLE_TIMEOUT msecs_to_jiffies(2000) #define RESET_DELAY msecs_to_jiffies(100) struct ioatdma_device *device; dma_addr_t completion_dma; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 72e59a0d0f2..460b7730133 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -43,6 +43,10 @@ static int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); +static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) { @@ -168,6 +172,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", __func__); clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } } @@ -253,6 +258,8 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) __restart_chan(ioat); } +static bool reshape_ring(struct ioat2_dma_chan *ioat, int order); + static void ioat2_timer_event(unsigned long data) { struct ioat2_dma_chan *ioat = (void *) data; @@ -289,6 +296,23 @@ static void ioat2_timer_event(unsigned long data) mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); } spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } spin_unlock_bh(&chan->cleanup_lock); } @@ -362,7 +386,7 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) return cookie; } -static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *desc; @@ -370,12 +394,12 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) dma_addr_t phys; dma = to_ioatdma_device(chan->device); - hw = pci_pool_alloc(dma->dma_pool, GFP_KERNEL, &phys); + hw = pci_pool_alloc(dma->dma_pool, flags, &phys); if (!hw) return NULL; memset(hw, 0, sizeof(*hw)); - desc = kzalloc(sizeof(*desc), GFP_KERNEL); + desc = kzalloc(sizeof(*desc), flags); if (!desc) { pci_pool_free(dma->dma_pool, hw, phys); return NULL; @@ -397,6 +421,42 @@ static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *cha kfree(desc); } +static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) +{ + struct ioat_ring_ent **ring; + int descs = 1 << order; + int i; + + if (order > ioat_get_max_alloc_order()) + return NULL; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; +} + /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring * @chan: channel to be initialized */ @@ -406,8 +466,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; u32 chanerr; - int descs; - int i; + int order; /* have we already been set up? */ if (ioat->ring) @@ -435,32 +494,10 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) writel(((u64) chan->completion_dma) >> 32, chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - ioat->alloc_order = ioat_get_alloc_order(); - descs = 1 << ioat->alloc_order; - - /* allocate the array to hold the software ring */ - ring = kcalloc(descs, sizeof(*ring), GFP_KERNEL); + order = ioat_get_alloc_order(); + ring = ioat2_alloc_ring(c, order, GFP_KERNEL); if (!ring) return -ENOMEM; - for (i = 0; i < descs; i++) { - ring[i] = ioat2_alloc_ring_ent(c); - if (!ring[i]) { - while (i--) - ioat2_free_ring_ent(ring[i], c); - kfree(ring); - return -ENOMEM; - } - set_desc_id(ring[i], i); - } - - /* link descs */ - for (i = 0; i < descs-1; i++) { - struct ioat_ring_ent *next = ring[i+1]; - struct ioat_dma_descriptor *hw = ring[i]->hw; - - hw->next = next->txd.phys; - } - ring[i]->hw->next = ring[0]->txd.phys; spin_lock_bh(&ioat->ring_lock); ioat->ring = ring; @@ -468,12 +505,120 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) ioat->issued = 0; ioat->tail = 0; ioat->pending = 0; + ioat->alloc_order = order; spin_unlock_bh(&ioat->ring_lock); tasklet_enable(&chan->cleanup_task); ioat2_start_null_desc(ioat); - return descs; + return 1 << ioat->alloc_order; +} + +static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +{ + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct ioat_chan_common *chan = &ioat->base; + struct dma_chan *c = &chan->common; + const u16 curr_size = ioat2_ring_mask(ioat) + 1; + const u16 active = ioat2_ring_active(ioat); + const u16 new_size = 1 << order; + struct ioat_ring_ent **ring; + u16 i; + + if (order > ioat_get_max_alloc_order()) + return false; + + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring + */ + if (active >= new_size) + return false; + + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; + + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ioat2_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } + + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; + + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat2_get_ring_ent(ioat, ioat->tail+i); + ioat2_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; + } + + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, new_size); + + kfree(ioat->ring); + ioat->ring = ring; + ioat->alloc_order = order; + + return true; } /** @@ -487,7 +632,15 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d struct ioat_chan_common *chan = &ioat->base; spin_lock_bh(&ioat->ring_lock); - if (unlikely(ioat2_ring_space(ioat) < num_descs)) { + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { + if (reshape_ring(ioat, ioat->alloc_order + 1) && + ioat2_ring_space(ioat) > num_descs) + break; + if (printk_ratelimit()) dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 73b04a2eb4b..9baa3d6065f 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -37,6 +37,8 @@ extern int ioat_pending_level; #define IOAT_MAX_ORDER 16 #define ioat_get_alloc_order() \ (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) /* struct ioat2_dma_chan - ioat v2 / v3 channel attributes * @base: common ioat channel parameters -- cgit v1.2.3 From 4b652f0db3be891c7b76b109c3b55003b920fc96 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 12:02:15 -0700 Subject: net_dma: poll for a descriptor after allocation failure Handle descriptor allocation failures by polling for a descriptor. The driver will force forward progress when polled. In the best case this polling interval will be the time it takes for one dma memcpy transaction to complete. In the worst case, channel hang, we will need to wait 100ms for the cleanup watchdog to fire (ioatdma driver). Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/iovlock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c index 9f6fe46a9b8..c0a272c7368 100644 --- a/drivers/dma/iovlock.c +++ b/drivers/dma/iovlock.c @@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, iov_byte_offset, kdata, copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } len -= copy; iov[iovec_idx].iov_len -= copy; @@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, page, offset, copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } len -= copy; iov[iovec_idx].iov_len -= copy; -- cgit v1.2.3 From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:51 -0700 Subject: dmaengine, async_tx: add a "no channel switch" allocator Channel switching is problematic for some dmaengine drivers as the architecture precludes separating the ->prep from ->submit. In these cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify the async_tx allocator to only return channels that support all of the required asynchronous operations. For example MD_RAID456=y selects support for asynchronous xor, xor validate, pq, pq validate, and memcpy. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to quickly locate compatible channels with the guarantee that dependency chains will remain on one channel. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select channels that lead to operation chains that need to cross channel boundaries using the async_tx channel switch capability. Signed-off-by: Dan Williams --- drivers/dma/Kconfig | 4 ++++ drivers/dma/dmaengine.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 912a51b5cbd..ddcd9793b25 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -17,11 +17,15 @@ if DMADEVICES comment "DMA Devices" +config ASYNC_TX_DISABLE_CHANNEL_SWITCH + bool + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE select DCA + select ASYNC_TX_DISABLE_CHANNEL_SWITCH help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 96598479eec..d5bc628d207 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -608,6 +608,40 @@ void dmaengine_put(void) } EXPORT_SYMBOL(dmaengine_put); +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) + if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + #endif + + return true; +} + static int get_dma_id(struct dma_device *device) { int rc; @@ -665,6 +699,12 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); + /* note: this only matters in the + * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); if (!idr_ref) return -ENOMEM; -- cgit v1.2.3 From 9308add6ea4fedeba37b0d7c4630a542bd34f214 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:52 -0700 Subject: dmaengine: cleanup unused transaction types No drivers currently implement these operation types, so they can be deleted. Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 4496bc60666..cecb6d657d5 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1256,15 +1256,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) } dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " - "( %s%s%s%s%s%s%s%s%s%s)\n", + "( %s%s%s%s%s%s%s)\n", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", - dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", - dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", - dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); -- cgit v1.2.3 From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:53 -0700 Subject: dmaengine, async_tx: support alignment checks Some engines have transfer size and address alignment restrictions. Add a per-operation alignment property to struct dma_device that the async routines and dmatest can use to check alignment capabilities. Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 58e49e41c7a..a3722a7384b 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -288,6 +288,7 @@ static int dmatest_func(void *data) dma_addr_t dma_dsts[dst_cnt]; struct completion cmp; unsigned long tmo = msecs_to_jiffies(3000); + u8 align = 0; total_tests++; @@ -295,6 +296,18 @@ static int dmatest_func(void *data) src_off = dmatest_random() % (test_buf_size - len + 1); dst_off = dmatest_random() % (test_buf_size - len + 1); + /* honor alignment restrictions */ + if (thread->type == DMA_MEMCPY) + align = dev->copy_align; + else if (thread->type == DMA_XOR) + align = dev->xor_align; + else if (thread->type == DMA_PQ) + align = dev->pq_align; + + len = (len >> align) << align; + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + dmatest_init_srcs(thread->srcs, src_off, len); dmatest_init_dsts(thread->dsts, dst_off, len); @@ -311,6 +324,7 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); } + if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dma_dsts[0] + dst_off, -- cgit v1.2.3 From 128f2d567f906d38b11d993d8d97b9b988848e26 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:53 -0700 Subject: ioat2+: add fence support In preparation for adding more operation types to the ioat3 path the driver needs to honor the DMA_PREP_FENCE flag. For example the async_tx api will hand xor->memcpy->xor chains to the driver with the 'fence' flag set on the first xor and the memcpy operation. This flag in turn sets the 'fence' flag in the descriptor control field telling the hardware that future descriptors in the chain depend on the result of the current descriptor, so wait for all writes to complete before starting the next operation. Note that ioat1 does not prefetch the descriptor chain, so does not require/support fenced operations. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 460b7730133..568923c5dde 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -710,6 +710,7 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, desc->txd.flags = flags; desc->len = total_len; hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); hw->ctl_f.compl_write = 1; dump_desc_dbg(ioat, desc); /* we leave the channel locked to ensure in order submission */ -- cgit v1.2.3 From 2aec048cdc4a5a81163a42a61df903f76a27e737 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:54 -0700 Subject: ioat3: hardware version 3.2 register / descriptor definitions ioat3.2 adds raid5 and raid6 offload capabilities. Signed-off-by: Tom Picard Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.h | 2 +- drivers/dma/ioat/dma_v2.h | 26 +++++++- drivers/dma/ioat/hw.h | 142 +++++++++++++++++++++++++++++++++++++++++++ drivers/dma/ioat/registers.h | 17 ++++++ 4 files changed, 185 insertions(+), 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index d9d6a7e3cd7..0d94e7804c1 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -155,7 +155,7 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, /** * struct ioat_desc_sw - wrapper around hardware descriptor - * @hw: hardware DMA descriptor + * @hw: hardware DMA descriptor (for memcpy) * @node: this descriptor will either be on the free list, * or attached to a transaction list (async_tx.tx_list) * @txd: the generic software descriptor for all engines diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 9baa3d6065f..ed4bb82a283 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -114,8 +114,32 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len return num_descs; } +/** + * struct ioat_ring_ent - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @fill: hardware fill descriptor + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor + * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @id: identifier for debug + */ + struct ioat_ring_ent { - struct ioat_dma_descriptor *hw; + union { + struct ioat_dma_descriptor *hw; + struct ioat_fill_descriptor *fill; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; struct dma_async_tx_descriptor txd; size_t len; #ifdef DEBUG diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 7481fb13ce0..99afb12bd40 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -37,6 +37,7 @@ #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ +#define IOAT_VER_3_2 0x32 /* Version 3.2 */ struct ioat_dma_descriptor { uint32_t size; @@ -55,6 +56,7 @@ struct ioat_dma_descriptor { unsigned int dest_dca:1; unsigned int hint:1; unsigned int rsvd2:13; + #define IOAT_OP_COPY 0x00 unsigned int op:8; } ctl_f; }; @@ -70,4 +72,144 @@ struct ioat_dma_descriptor { }; uint64_t user2; }; + +struct ioat_fill_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int rsvd:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int rsvd2:2; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int rsvd4:15; + #define IOAT_OP_FILL 0x01 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_data; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t next_dst_addr; + uint64_t user1; + uint64_t user2; +}; + +struct ioat_xor_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd:13; + #define IOAT_OP_XOR 0x87 + #define IOAT_OP_XOR_VAL 0x88 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; +}; + +struct ioat_xor_ext_descriptor { + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t next; + uint64_t rsvd[4]; +}; + +struct ioat_pq_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:11; + #define IOAT_OP_PQ 0x89 + #define IOAT_OP_PQ_VAL 0x8a + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint8_t coef[8]; + uint64_t q_addr; +}; + +struct ioat_pq_ext_descriptor { + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t next; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t rsvd[2]; +}; + +struct ioat_pq_update_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:3; + unsigned int coef:8; + #define IOAT_OP_PQ_UP 0x8b + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t p_src; + uint64_t q_src; + uint64_t q_addr; +}; + +struct ioat_raw_descriptor { + uint64_t field[8]; +}; #endif diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index e4334a19538..85d04b8c563 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -64,6 +64,20 @@ #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 +#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002 +#define IOAT_DEVICE_MEMORY_BYPASS 0x0004 +#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008 + +#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */ +#define IOAT_CAP_PAGE_BREAK 0x00000001 +#define IOAT_CAP_CRC 0x00000002 +#define IOAT_CAP_SKIP_MARKER 0x00000004 +#define IOAT_CAP_DCA 0x00000010 +#define IOAT_CAP_CRC_MOVE 0x00000020 +#define IOAT_CAP_FILL_BLOCK 0x00000040 +#define IOAT_CAP_APIC 0x00000080 +#define IOAT_CAP_XOR 0x00000100 +#define IOAT_CAP_PQ 0x00000200 #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ @@ -224,6 +238,9 @@ #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 #define IOAT_CHANERR_SOFT_ERR 0x4000 #define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 +#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000 +#define IOAT_CHANERR_XOR_Q_ERR 0x20000 +#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ -- cgit v1.2.3 From bf40a6869c9198bdf56fe173961feb89e9f0d961 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:55 -0700 Subject: ioat3: split ioat3 support to its own file, add memset Up until this point the driver for Intel(R) QuickData Technology engines, specification versions 2 and 3, were mostly identical save for a few quirks. Version 3.2 hardware adds many new capabilities (like raid offload support) requiring some infrastructure that is not relevant for v2. For better code organization of the new funcionality move v3 and v3.2 support to its own file dma_v3.c, and export some routines from the base files (dma.c and dma_v2.c) that can be reused directly. The first new capability included in this code reorganization is support for v3.2 memset operations. Signed-off-by: Dan Williams --- drivers/dma/ioat/Makefile | 2 +- drivers/dma/ioat/dma.c | 11 -- drivers/dma/ioat/dma.h | 16 ++ drivers/dma/ioat/dma_v2.c | 94 +++--------- drivers/dma/ioat/dma_v2.h | 13 ++ drivers/dma/ioat/dma_v3.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/ioat/pci.c | 2 +- 7 files changed, 421 insertions(+), 84 deletions(-) create mode 100644 drivers/dma/ioat/dma_v3.c (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index 205a639e84d..8997d3fb905 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := pci.o dma.o dma_v2.o dca.o +ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 17a518d0386..70262c0131d 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -538,17 +538,6 @@ static void ioat1_cleanup_tasklet(unsigned long data) writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); } -static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, - int direction, enum dma_ctrl_flags flags, bool dst) -{ - if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || - (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) - pci_unmap_single(pdev, addr, len, direction); - else - pci_unmap_page(pdev, addr, len, direction); -} - - void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, size_t len, struct ioat_dma_descriptor *hw) { diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 0d94e7804c1..c6d58bf541d 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -60,6 +60,10 @@ * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) * @enumerate_channels: hw version specific channel enumeration + * @cleanup_tasklet: select between the v2 and v3 cleanup routines + * @timer_fn: select between the v2 and v3 timer watchdog routines + * + * Note: the v3 cleanup routine supports raid operations */ struct ioatdma_device { @@ -74,6 +78,8 @@ struct ioatdma_device { struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); + void (*cleanup_tasklet)(unsigned long data); + void (*timer_fn)(unsigned long data); }; struct ioat_chan_common { @@ -287,6 +293,16 @@ static inline bool is_ioat_bug(unsigned long err) IOAT_CHANERR_LENGTH_ERR)); } +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) +{ + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); +} + int __devinit ioat_probe(struct ioatdma_device *device); int __devinit ioat_register(struct ioatdma_device *device); int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 568923c5dde..7492e9165e0 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -39,7 +39,7 @@ #include "registers.h" #include "hw.h" -static int ioat_ring_alloc_order = 8; +int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); @@ -63,7 +63,7 @@ static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); } -static void ioat2_issue_pending(struct dma_chan *chan) +void ioat2_issue_pending(struct dma_chan *chan) { struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); @@ -214,7 +214,7 @@ static void ioat2_cleanup_tasklet(unsigned long data) writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } -static void __restart_chan(struct ioat2_dma_chan *ioat) +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; @@ -255,11 +255,9 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); - __restart_chan(ioat); + __ioat2_restart_chan(ioat); } -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order); - static void ioat2_timer_event(unsigned long data) { struct ioat2_dma_chan *ioat = (void *) data; @@ -321,7 +319,7 @@ static void ioat2_timer_event(unsigned long data) * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated */ -static int ioat2_enumerate_channels(struct ioatdma_device *device) +int ioat2_enumerate_channels(struct ioatdma_device *device) { struct ioat2_dma_chan *ioat; struct device *dev = &device->pdev->dev; @@ -354,8 +352,8 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device) break; ioat_init_channel(device, &ioat->base, i, - ioat2_timer_event, - ioat2_cleanup_tasklet, + device->timer_fn, + device->cleanup_tasklet, (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); @@ -460,7 +458,7 @@ static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gf /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring * @chan: channel to be initialized */ -static int ioat2_alloc_chan_resources(struct dma_chan *c) +int ioat2_alloc_chan_resources(struct dma_chan *c) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; @@ -514,7 +512,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) return 1 << ioat->alloc_order; } -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +bool reshape_ring(struct ioat2_dma_chan *ioat, int order) { /* reshape differs from normal ring allocation in that we want * to allocate a new software ring while only @@ -627,7 +625,7 @@ static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) * @ioat: ioat2,3 channel (ring) to operate on * @num_descs: allocation length */ -static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) { struct ioat_chan_common *chan = &ioat->base; @@ -655,9 +653,11 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d spin_lock_bh(&chan->cleanup_lock); if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); spin_unlock_bh(&chan->cleanup_lock); - ioat2_timer_event((unsigned long) ioat); + device->timer_fn((unsigned long) ioat); } else spin_unlock_bh(&chan->cleanup_lock); return -ENOMEM; @@ -670,7 +670,7 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d return 0; /* with ioat->ring_lock held */ } -static struct dma_async_tx_descriptor * +struct dma_async_tx_descriptor * ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) { @@ -722,11 +722,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, * ioat2_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -static void ioat2_free_chan_resources(struct dma_chan *c) +void ioat2_free_chan_resources(struct dma_chan *c) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *ioatdma_device = chan->device; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *desc; const u16 total_descs = 1 << ioat->alloc_order; int descs; @@ -740,7 +740,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) tasklet_disable(&chan->cleanup_task); del_timer_sync(&chan->timer); - ioat2_cleanup(ioat); + device->cleanup_tasklet((unsigned long) ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. @@ -770,8 +770,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) kfree(ioat->ring); ioat->ring = NULL; ioat->alloc_order = 0; - pci_pool_free(ioatdma_device->completion_pool, - chan->completion, + pci_pool_free(device->completion_pool, chan->completion, chan->completion_dma); spin_unlock_bh(&ioat->ring_lock); @@ -781,16 +780,17 @@ static void ioat2_free_chan_resources(struct dma_chan *c) ioat->dmacount = 0; } -static enum dma_status +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioatdma_device *device = ioat->base.device; if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) return DMA_SUCCESS; - ioat2_cleanup(ioat); + device->cleanup_tasklet((unsigned long) ioat); return ioat_is_complete(c, cookie, done, used); } @@ -804,6 +804,8 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) int err; device->enumerate_channels = ioat2_enumerate_channels; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; @@ -830,53 +832,3 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) return err; } - -int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - u16 dev_id; - - device->enumerate_channels = ioat2_enumerate_channels; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_is_tx_complete = ioat2_is_complete; - - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(262144); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return err; -} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index ed4bb82a283..bde57ddf555 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -27,6 +27,7 @@ extern int ioat_pending_level; +extern int ioat_ring_alloc_order; /* * workaround for IOAT ver.3.0 null descriptor issue @@ -167,4 +168,16 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs); +int ioat2_enumerate_channels(struct ioatdma_device *device); +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +void ioat2_issue_pending(struct dma_chan *chan); +int ioat2_alloc_chan_resources(struct dma_chan *c); +void ioat2_free_chan_resources(struct dma_chan *c); +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used); +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); +bool reshape_ring(struct ioat2_dma_chan *ioat, int order); #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 00000000000..b223d66b97e --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c @@ -0,0 +1,367 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * BSD LICENSE + * + * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Support routines for v3+ hardware + */ + +#include +#include +#include +#include "registers.h" +#include "hw.h" +#include "dma.h" +#include "dma_v2.h" + +static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc) +{ + struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = chan->device->pdev; + size_t len = desc->len; + size_t offset = len - desc->hw->size; + struct dma_async_tx_descriptor *tx = &desc->txd; + enum dma_ctrl_flags flags = tx->flags; + + switch (desc->hw->ctl_f.op) { + case IOAT_OP_COPY: + ioat_dma_unmap(chan, flags, len, desc->hw); + break; + case IOAT_OP_FILL: { + struct ioat_fill_descriptor *hw = desc->fill; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + default: + dev_err(&pdev->dev, "%s: unknown op type: %#x\n", + __func__, desc->hw->ctl_f.op); + } +} + + +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; + + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + tx = &desc->txd; + if (tx->cookie) { + chan->completed_cookie = tx->cookie; + ioat3_dma_unmap(ioat, desc); + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +static void ioat3_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat3_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat3_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +static void ioat3_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat3_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +static enum dma_status +ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat3_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_fill_descriptor *fill; + int num_descs; + u64 src_data = (0x0101010101010101ULL) * (value & 0xff); + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + for (i = 0; i < num_descs; i++) { + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + fill = desc->fill; + + fill->size = xfer_size; + fill->src_data = src_data; + fill->dst_addr = dest; + fill->ctl = 0; + fill->ctl_f.op = IOAT_OP_FILL; + + len -= xfer_size; + dest += xfer_size; + dump_desc_dbg(ioat, desc); + } + + desc->txd.flags = flags; + desc->len = total_len; + fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + fill->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + u16 dev_id; + u32 cap; + + device->enumerate_channels = ioat2_enumerate_channels; + device->cleanup_tasklet = ioat3_cleanup_tasklet; + device->timer_fn = ioat3_timer_event; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat3_is_complete; + cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + if (cap & IOAT_CAP_FILL_BLOCK) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = ioat3_prep_memset_lock; + } + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return 0; +} diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index c4e43226925..0f3ec6e97fe 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -36,7 +36,7 @@ #include "hw.h" MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); static struct pci_device_id ioat_pci_tbl[] = { -- cgit v1.2.3 From 5669e31c5a4874f1634bc0ffba268a6e2fa0cdd2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:56 -0700 Subject: ioat: add 'ioat' sysfs attributes Export driver attributes for diagnostic purposes: 'ring_size': total number of descriptors available to the engine 'ring_active': number of descriptors in-flight 'capabilities': supported operation types for this channel 'version': Intel(R) QuickData specfication revision This also allows some chattiness to be removed from the driver startup as this information is now available via sysfs. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 120 +++++++++++++++++++++++++++++++++++++++++++--- drivers/dma/ioat/dma.h | 12 +++++ drivers/dma/ioat/dma_v2.c | 33 +++++++++++++ drivers/dma/ioat/dma_v2.h | 1 + drivers/dma/ioat/dma_v3.c | 3 ++ drivers/dma/ioat/pci.c | 3 ++ 6 files changed, 166 insertions(+), 6 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 70262c0131d..cb08f810849 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -263,6 +263,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat->active += desc->hw->tx_cnt; ioat->pending += desc->hw->tx_cnt; if (ioat->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat); @@ -611,6 +612,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete) chan->completed_cookie = tx->cookie; tx->cookie = 0; ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + ioat->active -= desc->hw->tx_cnt; if (tx->callback) { tx->callback(tx->callback_param); tx->callback = NULL; @@ -1028,13 +1030,8 @@ int __devinit ioat_probe(struct ioatdma_device *device) dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->dev = &pdev->dev; - dev_err(dev, "Intel(R) I/OAT DMA Engine found," - " %d channels, device version 0x%02x, driver version %s\n", - dma->chancnt, device->version, IOAT_DMA_VERSION); - if (!dma->chancnt) { - dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: " - "zero channels detected\n"); + dev_err(dev, "zero channels detected\n"); goto err_setup_interrupts; } @@ -1085,6 +1082,113 @@ static void ioat1_intr_quirk(struct ioatdma_device *device) pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); } +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->desccount); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->active); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *device = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + device->version >> 4, device->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static struct attribute *ioat1_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioat_chan_common *chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + chan = container_of(kobj, struct ioat_chan_common, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&chan->common, page); +} + +struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +static struct kobj_type ioat1_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat1_attrs, +}; + +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (err) { + dev_warn(to_dev(chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { + kobject_del(&chan->kobj); + kobject_put(&chan->kobj); + } + } +} + int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1107,6 +1211,8 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) err = ioat_register(device); if (err) return err; + ioat_kobject_add(device, &ioat1_ktype); + if (dca) device->dca = ioat_dca_init(pdev, device->reg_base); @@ -1119,6 +1225,8 @@ void __devexit ioat_dma_remove(struct ioatdma_device *device) ioat_disable_interrupts(device); + ioat_kobject_del(device); + dma_async_device_unregister(dma); pci_pool_destroy(device->dma_pool); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index c6d58bf541d..c2939b28918 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -92,6 +92,7 @@ struct ioat_chan_common { #define IOAT_COMPLETION_PENDING 0 #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 + #define IOAT_KOBJ_INIT_FAIL 3 struct timer_list timer; #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) @@ -100,8 +101,13 @@ struct ioat_chan_common { dma_addr_t completion_dma; u64 *completion; struct tasklet_struct cleanup_task; + struct kobject kobj; }; +struct ioat_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct dma_chan *, char *); +}; /** * struct ioat_dma_chan - internal representation of a DMA channel @@ -117,6 +123,7 @@ struct ioat_dma_chan { int pending; u16 desccount; + u16 active; }; static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) @@ -319,4 +326,9 @@ void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, size_t len, struct ioat_dma_descriptor *hw); bool ioat_cleanup_preamble(struct ioat_chan_common *chan, unsigned long *phys_complete); +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *device); +extern struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 7492e9165e0..80ce32de8d3 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -795,6 +795,36 @@ ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, return ioat_is_complete(c, cookie, done, used); } +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat2_ring_active(ioat)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat2_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat2_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat2_attrs, +}; + int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -827,6 +857,9 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) err = ioat_register(device); if (err) return err; + + ioat_kobject_add(device, &ioat2_ktype); + if (dca) device->dca = ioat2_dca_init(pdev, device->reg_base); diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index bde57ddf555..fa030f8e1f2 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -180,4 +180,5 @@ enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used); void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); bool reshape_ring(struct ioat2_dma_chan *ioat, int order); +extern struct kobj_type ioat2_ktype; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index b223d66b97e..22af78ec257 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -360,6 +360,9 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) err = ioat_register(device); if (err) return err; + + ioat_kobject_add(device, &ioat2_ktype); + if (dca) device->dca = ioat3_dca_init(pdev, device->reg_base); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 0f3ec6e97fe..626a508a4f8 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -168,6 +168,9 @@ static void __devexit ioat_remove(struct pci_dev *pdev) static int __init ioat_init_module(void) { + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + return pci_register_driver(&ioat_pci_driver); } module_init(ioat_init_module); -- cgit v1.2.3 From e61dacaeb3918cd00cd642e8fb0828324ac59819 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:57 -0700 Subject: ioat3: enable dca for completion writes Tag completion writes for direct cache access to reduce the latency of checking for descriptor completions. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 3 ++- drivers/dma/ioat/registers.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 22af78ec257..0913d11e09e 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -167,7 +167,8 @@ static void ioat3_cleanup_tasklet(unsigned long data) struct ioat2_dma_chan *ioat = (void *) data; ioat3_cleanup(ioat); - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); + writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN, + ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 85d04b8c563..97d26ea6d72 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -84,6 +84,7 @@ /* DMA Channel Registers */ #define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ #define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200 #define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 #define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 #define IOAT_CHANCTRL_ERR_INT_EN 0x0010 -- cgit v1.2.3 From b094ad3be564e7cc59cca4ff0256550d3a55dd3b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:57 -0700 Subject: ioat3: xor support ioat3.2 adds xor offload support for up to 8 sources. It can also perform an xor-zero-sum operation to validate whether all given sources sum to zero, without writing to a destination. Xor descriptors differ from memcpy in that one operation may require multiple descriptors depending on the number of sources. When the number of sources exceeds 5 an extended descriptor is needed. These descriptors need to be accounted for when updating the DMA_COUNT register. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 2 +- drivers/dma/ioat/dma_v2.h | 3 + drivers/dma/ioat/dma_v3.c | 218 ++++++++++++++++++++++++++++++++++++++++++- drivers/dma/ioat/registers.h | 2 + 4 files changed, 222 insertions(+), 3 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 80ce32de8d3..ee295d48ba2 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -48,7 +48,7 @@ module_param(ioat_ring_max_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_max_alloc_order, "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); -static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) { void * __iomem reg_base = ioat->base.reg_base; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index fa030f8e1f2..e23027d3dcb 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -127,6 +127,7 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len * @raw: hardware raw (un-typed) descriptor * @txd: the generic software descriptor for all engines * @len: total transaction length for unmap + * @result: asynchronous result of validate operations * @id: identifier for debug */ @@ -143,6 +144,7 @@ struct ioat_ring_ent { }; struct dma_async_tx_descriptor txd; size_t len; + enum sum_check_flags *result; #ifdef DEBUG int id; #endif @@ -180,5 +182,6 @@ enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used); void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); bool reshape_ring(struct ioat2_dma_chan *ioat, int order); +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); extern struct kobj_type ioat2_ktype; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 0913d11e09e..957c205f91d 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -64,8 +64,33 @@ #include "dma.h" #include "dma_v2.h" +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor descriptor + */ +static const u8 xor_idx_to_desc __read_mostly = 0xd0; +static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; + +static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + return raw->field[xor_idx_to_field[idx]]; +} + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, - struct ioat_ring_ent *desc) + struct ioat_ring_ent *desc, int idx) { struct ioat_chan_common *chan = &ioat->base; struct pci_dev *pdev = chan->device->pdev; @@ -86,13 +111,71 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, PCI_DMA_FROMDEVICE, flags, 1); break; } + case IOAT_OP_XOR_VAL: + case IOAT_OP_XOR: { + struct ioat_xor_descriptor *xor = desc->xor; + struct ioat_ring_ent *ext; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 5) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + xor_ex = ext->xor_ex; + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = xor_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* dest is a source in xor validate operations */ + if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_TODEVICE, flags, 1); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } default: dev_err(&pdev->dev, "%s: unknown op type: %#x\n", __func__, desc->hw->ctl_f.op); } } +static bool desc_has_ext(struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } + + return false; +} + +/** + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean + * + * The difference from the dma_v2.c __cleanup() is that this routine + * handles extended descriptors and dma-unmapping raid operations. + */ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) { struct ioat_chan_common *chan = &ioat->base; @@ -114,7 +197,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) tx = &desc->txd; if (tx->cookie) { chan->completed_cookie = tx->cookie; - ioat3_dma_unmap(ioat, desc); + ioat3_dma_unmap(ioat, desc, ioat->tail + i); tx->cookie = 0; if (tx->callback) { tx->callback(tx->callback_param); @@ -124,6 +207,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) if (tx->phys == phys_complete) seen_current = true; + + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } } ioat->tail += i; BUG_ON(!seen_current); /* no active descs have written a completion? */ @@ -309,6 +398,121 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, return &desc->txd; } +static struct dma_async_tx_descriptor * +__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + int num_descs; + int with_ext; + int i; + u16 idx; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) + /* pass */; + else + return NULL; + for (i = 0; i < num_descs; i += 1 + with_ext) { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + int s; + + desc = ioat2_get_ring_ent(ioat, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat, desc); + } + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -333,6 +537,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma_cap_set(DMA_MEMSET, dma->cap_mask); dma->device_prep_dma_memset = ioat3_prep_memset_lock; } + if (cap & IOAT_CAP_XOR) { + dma->max_xor = 8; + dma->xor_align = 2; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_xor_val; + } /* -= IOAT ver.3 workarounds =- */ /* Write CHANERRMSK_INT with 3E07h to mask out the errors diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 97d26ea6d72..63038e18ab0 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -243,6 +243,8 @@ #define IOAT_CHANERR_XOR_Q_ERR 0x20000 #define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 +#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) + #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ #endif /* _IOAT_REGISTERS_H_ */ -- cgit v1.2.3 From 9de6fc717bdc574cf5faf9d46ce0f9d6265c7952 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:58 -0700 Subject: ioat3: xor self test This adds a hardware specific self test to be called from ioat_probe. In the ioat3 case we will have tests for all the different raid operations, while ioat1 and ioat2 will continue to just test memcpy. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 5 +- drivers/dma/ioat/dma.h | 4 +- drivers/dma/ioat/dma_v2.c | 1 + drivers/dma/ioat/dma_v3.c | 275 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 282 insertions(+), 3 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index cb08f810849..32a757be75c 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -799,7 +799,7 @@ static void __devinit ioat_dma_test_callback(void *dma_async_param) * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. * @device: device to be tested */ -static int __devinit ioat_dma_self_test(struct ioatdma_device *device) +int __devinit ioat_dma_self_test(struct ioatdma_device *device) { int i; u8 *src; @@ -1039,7 +1039,7 @@ int __devinit ioat_probe(struct ioatdma_device *device) if (err) goto err_setup_interrupts; - err = ioat_dma_self_test(device); + err = device->self_test(device); if (err) goto err_self_test; @@ -1197,6 +1197,7 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) device->intr_quirk = ioat1_intr_quirk; device->enumerate_channels = ioat1_enumerate_channels; + device->self_test = ioat_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index c2939b28918..0e37e426c72 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -62,10 +62,10 @@ * @enumerate_channels: hw version specific channel enumeration * @cleanup_tasklet: select between the v2 and v3 cleanup routines * @timer_fn: select between the v2 and v3 timer watchdog routines + * @self_test: hardware version specific self test for each supported op type * * Note: the v3 cleanup routine supports raid operations */ - struct ioatdma_device { struct pci_dev *pdev; void __iomem *reg_base; @@ -80,6 +80,7 @@ struct ioatdma_device { int (*enumerate_channels)(struct ioatdma_device *device); void (*cleanup_tasklet)(unsigned long data); void (*timer_fn)(unsigned long data); + int (*self_test)(struct ioatdma_device *device); }; struct ioat_chan_common { @@ -313,6 +314,7 @@ static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, int __devinit ioat_probe(struct ioatdma_device *device); int __devinit ioat_register(struct ioatdma_device *device); int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat_dma_self_test(struct ioatdma_device *device); void __devexit ioat_dma_remove(struct ioatdma_device *device); struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index ee295d48ba2..12c64e1a7e3 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -836,6 +836,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) device->enumerate_channels = ioat2_enumerate_channels; device->cleanup_tasklet = ioat2_cleanup_tasklet; device->timer_fn = ioat2_timer_event; + device->self_test = ioat_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 957c205f91d..927c08b0886 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -513,6 +513,280 @@ ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, src_cnt - 1, len, flags); } +static void __devinit ioat3_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto free_resources; + } + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + /* skip memset if the capability is not present */ + if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) + goto free_resources; + + /* test memset */ + dma_addr = dma_map_page(dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, + DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test memset prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test memset setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test memset timed out\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_err(dev, "Self-test memset failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) +{ + int rc = ioat_dma_self_test(device); + + if (rc) + return rc; + + rc = ioat_xor_val_self_test(device); + if (rc) + return rc; + + return 0; +} + int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -526,6 +800,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) device->enumerate_channels = ioat2_enumerate_channels; device->cleanup_tasklet = ioat3_cleanup_tasklet; device->timer_fn = ioat3_timer_event; + device->self_test = ioat3_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; -- cgit v1.2.3 From d69d235b7da2778891640ee95efcd68075978904 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:59 -0700 Subject: ioat3: pq support ioat3.2 adds support for raid6 syndrome generation (xor sum of galois field multiplication products) using up to 8 sources. It can also perform an pq-zero-sum operation to validate whether the syndrome for a given set of sources matches a previously computed syndrome. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 265 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 264 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 927c08b0886..ca2af0fa1c3 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -69,10 +69,12 @@ #define src_cnt_to_hw(x) ((x) - 2) /* provide a lookup table for setting the source address in the base or - * extended descriptor of an xor descriptor + * extended descriptor of an xor or pq descriptor */ static const u8 xor_idx_to_desc __read_mostly = 0xd0; static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc __read_mostly = 0xf8; +static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) { @@ -89,6 +91,23 @@ static void xor_set_src(struct ioat_raw_descriptor *descs[2], raw->field[xor_idx_to_field[idx]] = addr + offset; } +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, int idx) { @@ -148,6 +167,58 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, PCI_DMA_FROMDEVICE, flags, 1); break; } + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ: { + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_ring_ent *ext; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 3) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + pq_ex = ext->pq_ex; + } + + /* in the 'continue' case don't unmap the dests as sources */ + if (dmaf_p_disabled_continue(flags)) + src_cnt--; + else if (dmaf_continue(flags)) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = pq_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* the dests are sources in pq validate operations */ + if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + } + break; + } default: dev_err(&pdev->dev, "%s: unknown op type: %#x\n", __func__, desc->hw->ctl_f.op); @@ -164,6 +235,12 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; + + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; } return false; @@ -513,6 +590,182 @@ ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, src_cnt - 1, len, flags); } +static void +dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + int num_descs; + int with_ext; + int i, s; + u16 idx; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + + dev_dbg(to_dev(chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources + */ + if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) + /* pass */; + else + return NULL; + for (i = 0; i < num_descs; i += 1 + with_ext) { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat, desc, ext); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* handle the single source multiply case from the raid6 + * recovery path + */ + if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + } else + return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, + len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + static void __devinit ioat3_dma_test_callback(void *dma_async_param) { struct completion *cmp = dma_async_param; @@ -822,6 +1075,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma_cap_set(DMA_XOR_VAL, dma->cap_mask); dma->device_prep_dma_xor_val = ioat3_prep_xor_val; } + if (cap & IOAT_CAP_PQ) { + dma_set_maxpq(dma, 8, 0); + dma->pq_align = 2; + + dma_cap_set(DMA_PQ, dma->cap_mask); + dma->device_prep_dma_pq = ioat3_prep_pq; + + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + } /* -= IOAT ver.3 workarounds =- */ /* Write CHANERRMSK_INT with 3E07h to mask out the errors -- cgit v1.2.3 From ae786624c27411c1d38823f640b39f3d97412d5a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:43:00 -0700 Subject: ioat3: support xor via pq descriptors If a platform advertises pq capabilities, but not xor, then use ioat3_prep_pqxor and ioat3_prep_pqxor_val to simulate xor support. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index ca2af0fa1c3..bb57491f3fb 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -766,6 +766,44 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, flags); } +static struct dma_async_tx_descriptor * +ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + memset(scf, 0, src_cnt); + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[0] = dst; + pq[1] = ~0; + + return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[0] = src[0]; + pq[1] = ~0; + + return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, + len, flags); +} + static void __devinit ioat3_dma_test_callback(void *dma_async_param) { struct completion *cmp = dma_async_param; @@ -1084,6 +1122,17 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma_cap_set(DMA_PQ_VAL, dma->cap_mask); dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + + if (!(cap & IOAT_CAP_XOR)) { + dma->max_xor = 8; + dma->xor_align = 2; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_pqxor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; + } } /* -= IOAT ver.3 workarounds =- */ -- cgit v1.2.3 From 58c8649e0e25de511c4a66ce3fa38891e2ec4e9e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:43:00 -0700 Subject: ioat3: interrupt descriptor support The async_tx api uses the DMA_INTERRUPT operation type to terminate a chain of issued operations with a callback routine. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index bb57491f3fb..ff4afdc8e59 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -120,7 +120,8 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, switch (desc->hw->ctl_f.op) { case IOAT_OP_COPY: - ioat_dma_unmap(chan, flags, len, desc->hw); + if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ + ioat_dma_unmap(chan, flags, len, desc->hw); break; case IOAT_OP_FILL: { struct ioat_fill_descriptor *hw = desc->fill; @@ -804,6 +805,38 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, len, flags); } +static struct dma_async_tx_descriptor * +ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + u16 idx; + + if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) + desc = ioat2_get_ring_ent(ioat, idx); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + static void __devinit ioat3_dma_test_callback(void *dma_async_param) { struct completion *cmp = dma_async_param; @@ -1098,6 +1131,10 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; dma->device_free_chan_resources = ioat2_free_chan_resources; dma->device_is_tx_complete = ioat3_is_complete; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; + cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); if (cap & IOAT_CAP_FILL_BLOCK) { dma_cap_set(DMA_MEMSET, dma->cap_mask); -- cgit v1.2.3 From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001 From: Tom Picard Date: Tue, 8 Sep 2009 17:43:01 -0700 Subject: ioat3: ioat3.2 pci ids for Jasper Forest Jasper Forest introduces raid offload support via ioat3.2 support. When raid offload is enabled two (out of 8 channels) will report raid5/raid6 offload capabilities. The remaining channels will only report ioat3.0 capabilities (memcpy). Signed-off-by: Tom Picard Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 626a508a4f8..6c1aac5b359 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -58,6 +58,19 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + { 0, } }; -- cgit v1.2.3 From e3232714d465c42ac631929b990f5e35e2d8a955 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:43:02 -0700 Subject: ioat3: segregate raid engines The cleanup routine for the raid cases imposes extra checks for handling raid descriptors and extended descriptors. If the channel does not support raid it can avoid this extra overhead by using the ioat2 cleanup path. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 4 ++-- drivers/dma/ioat/dma_v2.h | 2 ++ drivers/dma/ioat/dma_v3.c | 25 ++++++++++++++++++------- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 12c64e1a7e3..7bbbd83d12e 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -206,7 +206,7 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) spin_unlock_bh(&chan->cleanup_lock); } -static void ioat2_cleanup_tasklet(unsigned long data) +void ioat2_cleanup_tasklet(unsigned long data) { struct ioat2_dma_chan *ioat = (void *) data; @@ -258,7 +258,7 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) __ioat2_restart_chan(ioat); } -static void ioat2_timer_event(unsigned long data) +void ioat2_timer_event(unsigned long data) { struct ioat2_dma_chan *ioat = (void *) data; struct ioat_chan_common *chan = &ioat->base; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index e23027d3dcb..246e646b190 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -183,5 +183,7 @@ enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); bool reshape_ring(struct ioat2_dma_chan *ioat, int order); void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); +void ioat2_cleanup_tasklet(unsigned long data); +void ioat2_timer_event(unsigned long data); extern struct kobj_type ioat2_ktype; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index ff4afdc8e59..3686dddf6bf 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1117,30 +1117,25 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) struct dma_device *dma; struct dma_chan *c; struct ioat_chan_common *chan; + bool is_raid_device = false; int err; u16 dev_id; u32 cap; device->enumerate_channels = ioat2_enumerate_channels; - device->cleanup_tasklet = ioat3_cleanup_tasklet; - device->timer_fn = ioat3_timer_event; device->self_test = ioat3_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_is_tx_complete = ioat3_is_complete; dma_cap_set(DMA_INTERRUPT, dma->cap_mask); dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); - if (cap & IOAT_CAP_FILL_BLOCK) { - dma_cap_set(DMA_MEMSET, dma->cap_mask); - dma->device_prep_dma_memset = ioat3_prep_memset_lock; - } if (cap & IOAT_CAP_XOR) { + is_raid_device = true; dma->max_xor = 8; dma->xor_align = 2; @@ -1151,6 +1146,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = ioat3_prep_xor_val; } if (cap & IOAT_CAP_PQ) { + is_raid_device = true; dma_set_maxpq(dma, 8, 0); dma->pq_align = 2; @@ -1171,6 +1167,21 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; } } + if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = ioat3_prep_memset_lock; + } + + + if (is_raid_device) { + dma->device_is_tx_complete = ioat3_is_complete; + device->cleanup_tasklet = ioat3_cleanup_tasklet; + device->timer_fn = ioat3_timer_event; + } else { + dma->device_is_tx_complete = ioat2_is_complete; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; + } /* -= IOAT ver.3 workarounds =- */ /* Write CHANERRMSK_INT with 3E07h to mask out the errors -- cgit v1.2.3 From 6506cbca6b5b36d682bd39afcbf3f575c81dddb6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Sep 2009 17:43:03 -0700 Subject: Add MODULE_DEVICE_TABLE() so ioatdma module is autoloaded The ioatdma module is missing aliases for the PCI devices it supports, so it is not autoloaded on boot. Add a MODULE_DEVICE_TABLE() to get these aliases. Signed-off-by: Roland Dreier Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 6c1aac5b359..3b2f40e9b3b 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -73,6 +73,7 @@ static struct pci_device_id ioat_pci_tbl[] = { { 0, } }; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); -- cgit v1.2.3 From a6417dd58d6832f123f36c6f22c63ec1ab62ce1c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Sep 2009 17:43:03 -0700 Subject: I/OAT: Convert to PCI_VDEVICE() Trivial cleanup to make the PCI ID table easier to read. [dan.j.williams@intel.com: extended to v3.2 devices] Signed-off-by: Roland Dreier Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 3b2f40e9b3b..b77d3a2864a 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -41,35 +41,35 @@ MODULE_AUTHOR("Intel Corporation"); static struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v1 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, /* I/OAT v2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, /* I/OAT v3 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, /* I/OAT v3.2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, { 0, } }; -- cgit v1.2.3 From e0bd0f8cb09cf3ccac1425f0f3a6705106c4d65c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:02 -0700 Subject: dw_dmac: implement a private tx_list Drop dw_dmac's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Cc: Haavard Skinnemoen Signed-off-by: Dan Williams --- drivers/dma/dw_dmac.c | 19 ++++++++++--------- drivers/dma/dw_dmac_regs.h | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 98c9a847bf5..514ef7d71bc 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) { struct dw_desc *child; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dma_sync_single_for_cpu(chan2parent(&dwc->chan), child->txd.phys, sizeof(child->lli), DMA_TO_DEVICE); @@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) dwc_sync_desc_for_cpu(dwc, desc); spin_lock_bh(&dwc->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dwc->chan), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &dwc->free_list); + list_splice_init(&desc->tx_list, &dwc->free_list); dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dwc->free_list); spin_unlock_bh(&dwc->lock); @@ -209,7 +209,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) param = txd->callback_param; dwc_sync_desc_for_cpu(dwc, desc); - list_splice_init(&txd->tx_list, &dwc->free_list); + list_splice_init(&desc->tx_list, &dwc->free_list); list_move(&desc->desc_node, &dwc->free_list); /* @@ -289,7 +289,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) /* This one is currently in progress */ return; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (child->lli.llp == llp) /* Currently in progress */ return; @@ -356,7 +356,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) dev_printk(KERN_CRIT, chan2dev(&dwc->chan), " cookie: %d\n", bad_desc->txd.cookie); dwc_dump_lli(dwc, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) dwc_dump_lli(dwc, &child->lli); /* Pretend the descriptor completed successfully */ @@ -608,7 +608,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, prev->txd.phys, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; } @@ -700,7 +700,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -746,7 +746,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -902,6 +902,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) break; } + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = dwc_tx_submit; desc->txd.flags = DMA_CTRL_ACK; diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 13a58076703..d9a939f67f4 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -217,6 +217,7 @@ struct dw_desc { /* THEN values for driver housekeeping */ struct list_head desc_node; + struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; }; -- cgit v1.2.3 From eda34234578fd822c950fd06b5c5ff7ac08b3001 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:02 -0700 Subject: fsldma: implement a private tx_list Drop fsldma's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Cc: Li Yang Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 16 +++++++++------- drivers/dma/fsldma.h | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index ef87a898414..73dd7482319 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -326,7 +326,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable) static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) { struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); - struct fsl_desc_sw *desc; + struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); + struct fsl_desc_sw *child; unsigned long flags; dma_cookie_t cookie; @@ -334,7 +335,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&fsl_chan->desc_lock, flags); cookie = fsl_chan->common.cookie; - list_for_each_entry(desc, &tx->tx_list, node) { + list_for_each_entry(child, &desc->tx_list, node) { cookie++; if (cookie < 0) cookie = 1; @@ -343,8 +344,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) } fsl_chan->common.cookie = cookie; - append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); - list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); + append_ld_queue(fsl_chan, desc); + list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev); spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); @@ -366,6 +367,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor( desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); if (desc_sw) { memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); + INIT_LIST_HEAD(&desc_sw->tx_list); dma_async_tx_descriptor_init(&desc_sw->async_tx, &fsl_chan->common); desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; @@ -455,7 +457,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags) new->async_tx.flags = flags; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &new->async_tx.tx_list); + list_add_tail(&new->node, &new->tx_list); /* Set End-of-link to the last link descriptor of new list*/ set_ld_eol(fsl_chan, new); @@ -513,7 +515,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( dma_dest += copy; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->async_tx.tx_list); + list_add_tail(&new->node, &first->tx_list); } while (len); new->async_tx.flags = flags; /* client is in control of this ack */ @@ -528,7 +530,7 @@ fail: if (!first) return NULL; - list = &first->async_tx.tx_list; + list = &first->tx_list; list_for_each_entry_safe_reverse(new, prev, list, node) { list_del(&new->node); dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index dc7f2686579..4493afed53f 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -90,6 +90,7 @@ struct fsl_dma_ld_hw { struct fsl_desc_sw { struct fsl_dma_ld_hw hw; struct list_head node; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; struct list_head *ld; void *priv; -- cgit v1.2.3 From 308136d1abcb2d759bac40ed4f5d42ac4af59d8b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:02 -0700 Subject: iop-adma: implement a private tx_list Drop iop-adma's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 2f052265122..9f6c16f8e2b 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -370,7 +370,7 @@ retry: } alloc_tail->group_head = alloc_start; alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->async_tx.tx_list); + list_splice(&chain, &alloc_tail->tx_list); iop_chan->last_used = last_used; iop_desc_clear_next_desc(alloc_start); iop_desc_clear_next_desc(alloc_tail); @@ -429,7 +429,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) old_chain_tail = list_entry(iop_chan->chain.prev, struct iop_adma_desc_slot, chain_node); - list_splice_init(&sw_desc->async_tx.tx_list, + list_splice_init(&sw_desc->tx_list, &old_chain_tail->chain_node); /* fix up the hardware chain */ @@ -496,6 +496,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = iop_adma_tx_submit; + INIT_LIST_HEAD(&slot->tx_list); INIT_LIST_HEAD(&slot->chain_node); INIT_LIST_HEAD(&slot->slot_node); hw_desc = (char *) iop_chan->device->dma_desc_pool; @@ -1296,7 +1297,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) if (sw_desc) { grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); async_tx_ack(&sw_desc->async_tx); iop_desc_init_memcpy(grp_start, 0); iop_desc_set_byte_count(grp_start, iop_chan, 0); @@ -1352,7 +1353,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); if (sw_desc) { grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); async_tx_ack(&sw_desc->async_tx); iop_desc_init_null_xor(grp_start, 2, 0); iop_desc_set_byte_count(grp_start, iop_chan, 0); -- cgit v1.2.3 From ea25968a32a621b02c3715d6b649f0c6ef53c24e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:02 -0700 Subject: ioat: implement a private tx_list Drop ioatdma's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Cc: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 7 ++++--- drivers/dma/ioat/dma.h | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 17a518d0386..21527b89590 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -251,12 +251,12 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); /* write address into NextDescriptor field of last desc in chain */ - first = to_ioat_desc(tx->tx_list.next); + first = to_ioat_desc(desc->tx_list.next); chain_tail = to_ioat_desc(ioat->used_desc.prev); /* make descriptor updates globally visible before chaining */ wmb(); chain_tail->hw->next = first->txd.phys; - list_splice_tail_init(&tx->tx_list, &ioat->used_desc); + list_splice_tail_init(&desc->tx_list, &ioat->used_desc); dump_desc_dbg(ioat, chain_tail); dump_desc_dbg(ioat, first); @@ -297,6 +297,7 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) memset(desc, 0, sizeof(*desc)); + INIT_LIST_HEAD(&desc_sw->tx_list); dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); desc_sw->txd.tx_submit = ioat1_tx_submit; desc_sw->hw = desc; @@ -521,7 +522,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, desc->txd.flags = flags; desc->len = total_len; - list_splice(&chain, &desc->txd.tx_list); + list_splice(&chain, &desc->tx_list); hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); hw->ctl_f.compl_write = 1; hw->tx_cnt = tx_cnt; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index d9d6a7e3cd7..8966fa5453a 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -157,7 +157,7 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, * struct ioat_desc_sw - wrapper around hardware descriptor * @hw: hardware DMA descriptor * @node: this descriptor will either be on the free list, - * or attached to a transaction list (async_tx.tx_list) + * or attached to a transaction list (tx_list) * @txd: the generic software descriptor for all engines * @id: identifier for debug */ @@ -165,6 +165,7 @@ struct ioat_desc_sw { struct ioat_dma_descriptor *hw; struct list_head node; size_t len; + struct list_head tx_list; struct dma_async_tx_descriptor txd; #ifdef DEBUG int id; -- cgit v1.2.3 From 64203b67274680e95e0c2eec935a22fc94e9ecb5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:03 -0700 Subject: mv_xor: implement a private tx_list Drop mv_xor's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Cc: Saeed Bishara Signed-off-by: Dan Williams --- drivers/dma/mv_xor.c | 7 ++++--- drivers/dma/mv_xor.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3f23eabe09f..466ab10c1ff 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -517,7 +517,7 @@ retry: } alloc_tail->group_head = alloc_start; alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->async_tx.tx_list); + list_splice(&chain, &alloc_tail->tx_list); mv_chan->last_used = last_used; mv_desc_clear_next_desc(alloc_start); mv_desc_clear_next_desc(alloc_tail); @@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) cookie = mv_desc_assign_cookie(mv_chan, sw_desc); if (list_empty(&mv_chan->chain)) - list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); + list_splice_init(&sw_desc->tx_list, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, chain_node); - list_splice_init(&grp_start->async_tx.tx_list, + list_splice_init(&grp_start->tx_list, &old_chain_tail->chain_node); if (!mv_can_chain(grp_start)) @@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) slot->async_tx.tx_submit = mv_xor_tx_submit; INIT_LIST_HEAD(&slot->chain_node); INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->tx_list); hw_desc = (char *) mv_chan->device->dma_desc_pool; slot->async_tx.phys = (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 06cafe1ef52..977b592e976 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -126,9 +126,8 @@ struct mv_xor_chan { * @idx: pool index * @unmap_src_cnt: number of xor sources * @unmap_len: transaction bytecount + * @tx_list: list of slots that make up a multi-descriptor transaction * @async_tx: support for the async_tx api - * @group_list: list of slots that make up a multi-descriptor transaction - * for example transfer lengths larger than the supported hw max * @xor_check_result: result of zero sum * @crc32_result: result crc calculation */ @@ -145,6 +144,7 @@ struct mv_xor_desc_slot { u16 unmap_src_cnt; u32 value; size_t unmap_len; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { u32 *xor_check_result; -- cgit v1.2.3 From 285a3c71640ad7101b7237b8fbaa4ead22c6551c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:03 -0700 Subject: at_hdmac: implement a private tx_list Drop at_hdmac's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Cc: Nicolas Ferre Signed-off-by: Dan Williams --- drivers/dma/at_hdmac.c | 17 +++++++++-------- drivers/dma/at_hdmac_regs.h | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 9a1e5fb412e..da4e8b710a9 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); if (desc) { memset(desc, 0, sizeof(struct at_desc)); + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, chan); /* txd.flags will be overwritten in prep functions */ desc->txd.flags = DMA_CTRL_ACK; @@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) struct at_desc *child; spin_lock_bh(&atchan->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&atchan->chan_common), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &atchan->free_list); + list_splice_init(&desc->tx_list, &atchan->free_list); dev_vdbg(chan2dev(&atchan->chan_common), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &atchan->free_list); @@ -247,7 +248,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) param = txd->callback_param; /* move children to free_list */ - list_splice_init(&txd->tx_list, &atchan->free_list); + list_splice_init(&desc->tx_list, &atchan->free_list); /* move myself to free_list */ list_move(&desc->desc_node, &atchan->free_list); @@ -334,7 +335,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan) /* This one is currently in progress */ return; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (!(child->lli.ctrla & ATC_DONE)) /* Currently in progress */ return; @@ -407,7 +408,7 @@ static void atc_handle_error(struct at_dma_chan *atchan) dev_crit(chan2dev(&atchan->chan_common), " cookie: %d\n", bad_desc->txd.cookie); atc_dump_lli(atchan, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) atc_dump_lli(atchan, &child->lli); /* Pretend the descriptor completed successfully */ @@ -587,7 +588,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; } @@ -687,7 +688,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -729,7 +730,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 4c972afc49e..495457e3dc4 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -165,6 +165,7 @@ struct at_desc { struct at_lli lli; /* THEN values for driver housekeeping */ + struct list_head tx_list; struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; -- cgit v1.2.3 From 1979b186b80449ac6574d97c254b694c8a99b703 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:03 -0700 Subject: txx9dmac: implement a private tx_list Drop txx9dmac's use of tx_list from struct dma_async_tx_descriptor in preparation for removal of this field. Cc: Atsushi Nemoto Signed-off-by: Dan Williams --- drivers/dma/txx9dmac.c | 24 +++++++++++------------- drivers/dma/txx9dmac.h | 1 + 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index 88dab52926f..197c7bc3789 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc) static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) { - if (!list_empty(&desc->txd.tx_list)) - desc = list_entry(desc->txd.tx_list.prev, - struct txx9dmac_desc, desc_node); + if (!list_empty(&desc->tx_list)) + desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node); return desc; } @@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc, desc = kzalloc(sizeof(*desc), flags); if (!desc) return NULL; + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, &dc->chan); desc->txd.tx_submit = txx9dmac_tx_submit; /* txd.flags will be overwritten in prep funcs */ @@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc, struct txx9dmac_dev *ddev = dc->ddev; struct txx9dmac_desc *child; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dma_sync_single_for_cpu(chan2parent(&dc->chan), child->txd.phys, ddev->descsize, DMA_TO_DEVICE); @@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc, txx9dmac_sync_desc_for_cpu(dc, desc); spin_lock_bh(&dc->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dc->chan), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &dc->free_list); + list_splice_init(&desc->tx_list, &dc->free_list); dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dc->free_list); @@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, param = txd->callback_param; txx9dmac_sync_desc_for_cpu(dc, desc); - list_splice_init(&txd->tx_list, &dc->free_list); + list_splice_init(&desc->tx_list, &dc->free_list); list_move(&desc->desc_node, &dc->free_list); if (!ds) { @@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr) "Bad descriptor submitted for DMA! (cookie: %d)\n", bad_desc->txd.cookie); txx9dmac_dump_desc(dc, &bad_desc->hwdesc); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) txx9dmac_dump_desc(dc, &child->hwdesc); /* Pretend the descriptor completed successfully */ txx9dmac_descriptor_complete(dc, bad_desc); @@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc) return; } - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (desc_read_CHAR(dc, child) == chain) { /* Currently in progress */ if (csr & TXX9_DMA_CSR_ABCHC) @@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dma_sync_single_for_device(chan2parent(&dc->chan), prev->txd.phys, ddev->descsize, DMA_TO_DEVICE); - list_add_tail(&desc->desc_node, - &first->txd.tx_list); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } @@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->txd.phys, ddev->descsize, DMA_TO_DEVICE); - list_add_tail(&desc->desc_node, - &first->txd.tx_list); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h index c907ff01d27..365d42366b9 100644 --- a/drivers/dma/txx9dmac.h +++ b/drivers/dma/txx9dmac.h @@ -231,6 +231,7 @@ struct txx9dmac_desc { /* THEN values for driver housekeeping */ struct list_head desc_node ____cacheline_aligned; + struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; }; -- cgit v1.2.3 From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: dmaengine: kill tx_list The tx_list attribute of struct dma_async_tx_descriptor is common to most, but not all dma driver implementations. None of the upper level code (dmaengine/async_tx) uses it, so allow drivers to implement it locally if they need it. This saves sizeof(struct list_head) bytes for drivers that do not manage descriptors with a linked list (e.g.: ioatdma v2,3). Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 5a87384ea4f..562d182eae6 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -933,7 +933,6 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, { tx->chan = chan; spin_lock_init(&tx->lock); - INIT_LIST_HEAD(&tx->tx_list); } EXPORT_SYMBOL(dma_async_tx_descriptor_init); -- cgit v1.2.3 From 162b96e63e518aa6ff029ce23de12d7f027483bf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: ioat2,3: cacheline align software descriptor allocations All the necessary fields for handling an ioat2,3 ring entry can fit into one cacheline. Move ->len prior to ->txd in struct ioat_ring_ent, and move allocation of these entries to a hw-cache-aligned kmem cache to reduce the number of cachelines dirtied for descriptor management. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 5 +++-- drivers/dma/ioat/dma_v2.h | 3 ++- drivers/dma/ioat/pci.c | 16 +++++++++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 460b7730133..fa3d6db6624 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -399,11 +399,12 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t f return NULL; memset(hw, 0, sizeof(*hw)); - desc = kzalloc(sizeof(*desc), flags); + desc = kmem_cache_alloc(ioat2_cache, flags); if (!desc) { pci_pool_free(dma->dma_pool, hw, phys); return NULL; } + memset(desc, 0, sizeof(*desc)); dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = ioat2_tx_submit_unlock; @@ -418,7 +419,7 @@ static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *cha dma = to_ioatdma_device(chan->device); pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); - kfree(desc); + kmem_cache_free(ioat2_cache, desc); } static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 9baa3d6065f..ac00adc8197 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -116,8 +116,8 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len struct ioat_ring_ent { struct ioat_dma_descriptor *hw; - struct dma_async_tx_descriptor txd; size_t len; + struct dma_async_tx_descriptor txd; #ifdef DEBUG int id; #endif @@ -143,4 +143,5 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +extern struct kmem_cache *ioat2_cache; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index c4e43226925..61086c6bbf4 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -69,6 +69,8 @@ static int ioat_dca_enabled = 1; module_param(ioat_dca_enabled, int, 0644); MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +struct kmem_cache *ioat2_cache; + #define DRV_NAME "ioatdma" static struct pci_driver ioat_pci_driver = { @@ -168,12 +170,24 @@ static void __devexit ioat_remove(struct pci_dev *pdev) static int __init ioat_init_module(void) { - return pci_register_driver(&ioat_pci_driver); + int err; + + ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat2_cache) + return -ENOMEM; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + kmem_cache_destroy(ioat2_cache); + + return err; } module_init(ioat_init_module); static void __exit ioat_exit_module(void) { pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat2_cache); } module_exit(ioat_exit_module); -- cgit v1.2.3 From e6c7ecb64e08ef346cb7062b4a5421f00bc602bd Mon Sep 17 00:00:00 2001 From: Ira Snyder Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: fsldma: split apart external pause and request count features When using the Freescale DMA controller in external control mode, both the request count and external pause bits need to be setup correctly. This was being done with the same function. The 83xx controller lacks the external pause feature, but has a similar feature called external start. This feature requires that the request count bits be setup correctly. Split the function into two parts, to make it possible to use the external start feature on the 83xx controller. Signed-off-by: Ira W. Snyder Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 45 +++++++++++++++++++++++++++++---------------- drivers/dma/fsldma.h | 3 ++- 2 files changed, 31 insertions(+), 17 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 73dd7482319..7a0cb6064f8 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -280,28 +280,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size) } /** - * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * fsl_chan_set_request_count - Set DMA Request Count for external control * @fsl_chan : Freescale DMA channel - * @size : Pause control size, 0 for disable external pause control. - * The maximum is 1024. + * @size : Number of bytes to transfer in a single request + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA request count is how many bytes are allowed to transfer before + * pausing the channel, after which a new assertion of DREQ# resumes channel + * operation. * - * The Freescale DMA channel can be controlled by the external - * signal DREQ#. The pause control size is how many bytes are allowed - * to transfer before pausing the channel, after which a new assertion - * of DREQ# resumes channel operation. + * A size of 0 disables external pause control. The maximum size is 1024. */ -static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) +static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size) { - if (size > 1024) - return; + BUG_ON(size > 1024); + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) + | ((__ilog2(size) << 24) & 0x0f000000), + 32); +} - if (size) { - DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, - DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) - | ((__ilog2(size) << 24) & 0x0f000000), - 32); +/** + * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * @fsl_chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA Request Count feature should be used in addition to this feature + * to set the number of bytes to transfer before pausing the channel. + */ +static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable) +{ + if (enable) fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; - } else + else fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; } @@ -885,6 +897,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; + new_fsl_chan->set_request_count = fsl_chan_set_request_count; } spin_lock_init(&new_fsl_chan->desc_lock); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index 4493afed53f..0df14cbb8ca 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -144,10 +144,11 @@ struct fsl_dma_chan { struct tasklet_struct tasklet; u32 feature; - void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); + void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable); void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); + void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size); }; #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) -- cgit v1.2.3 From bbea0b6e0d214ef1511b9c6ccf3af26b38f0af7d Mon Sep 17 00:00:00 2001 From: Ira Snyder Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: fsldma: Add DMA_SLAVE support Use the DMA_SLAVE capability of the DMAEngine API to copy/from a scatterlist into an arbitrary list of hardware address/length pairs. This allows a single DMA transaction to copy data from several different devices into a scatterlist at the same time. This also adds support to enable some controller-specific features such as external start and external pause for a DMA transaction. [dan.j.williams@intel.com: rebased on tx_list movement] Signed-off-by: Ira W. Snyder Acked-by: Li Yang Acked-by: Kumar Gala Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 7a0cb6064f8..296f9e747fa 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -34,6 +34,7 @@ #include #include +#include #include "fsldma.h" static void dma_init(struct fsl_dma_chan *fsl_chan) @@ -551,6 +552,229 @@ fail: return NULL; } +/** + * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: DMAEngine flags + * + * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the + * DMA_SLAVE API, this gets the device-specific information from the + * chan->private variable. + */ +static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_data_direction direction, unsigned long flags) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; + struct fsl_dma_slave *slave; + struct list_head *tx_list; + size_t copy; + + int i; + struct scatterlist *sg; + size_t sg_used; + size_t hw_used; + struct fsl_dma_hw_addr *hw; + dma_addr_t dma_dst, dma_src; + + if (!chan) + return NULL; + + if (!chan->private) + return NULL; + + fsl_chan = to_fsl_chan(chan); + slave = chan->private; + + if (list_empty(&slave->addresses)) + return NULL; + + hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry); + hw_used = 0; + + /* + * Build the hardware transaction to copy from the scatterlist to + * the hardware, or from the hardware to the scatterlist + * + * If you are copying from the hardware to the scatterlist and it + * takes two hardware entries to fill an entire page, then both + * hardware entries will be coalesced into the same page + * + * If you are copying from the scatterlist to the hardware and a + * single page can fill two hardware entries, then the data will + * be read out of the page into the first hardware entry, and so on + */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + + /* + * If we've used up the current hardware address/length + * pair, we need to load a new one + * + * This is done in a while loop so that descriptors with + * length == 0 will be skipped + */ + while (hw_used >= hw->length) { + + /* + * If the current hardware entry is the last + * entry in the list, we're finished + */ + if (list_is_last(&hw->entry, &slave->addresses)) + goto finished; + + /* Get the next hardware address/length pair */ + hw = list_entry(hw->entry.next, + struct fsl_dma_hw_addr, entry); + hw_used = 0; + } + + /* Allocate the link descriptor from DMA pool */ + new = fsl_dma_alloc_descriptor(fsl_chan); + if (!new) { + dev_err(fsl_chan->dev, "No free memory for " + "link descriptor\n"); + goto fail; + } +#ifdef FSL_DMA_LD_DEBUG + dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); +#endif + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the DMA controller limit + */ + copy = min_t(size_t, sg_dma_len(sg) - sg_used, + hw->length - hw_used); + copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT); + + /* + * DMA_FROM_DEVICE + * from the hardware to the scatterlist + * + * DMA_TO_DEVICE + * from the scatterlist to the hardware + */ + if (direction == DMA_FROM_DEVICE) { + dma_src = hw->address + hw_used; + dma_dst = sg_dma_address(sg) + sg_used; + } else { + dma_src = sg_dma_address(sg) + sg_used; + dma_dst = hw->address + hw_used; + } + + /* Fill in the descriptor */ + set_desc_cnt(fsl_chan, &new->hw, copy); + set_desc_src(fsl_chan, &new->hw, dma_src); + set_desc_dest(fsl_chan, &new->hw, dma_dst); + + /* + * If this is not the first descriptor, chain the + * current descriptor after the previous descriptor + */ + if (!first) { + first = new; + } else { + set_desc_next(fsl_chan, &prev->hw, + new->async_tx.phys); + } + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + sg_used += copy; + hw_used += copy; + + /* Insert the link descriptor into the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } + } + +finished: + + /* All of the hardware address/length pairs had length == 0 */ + if (!first || !new) + return NULL; + + new->async_tx.flags = flags; + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(fsl_chan, new); + + /* Enable extra controller features */ + if (fsl_chan->set_src_loop_size) + fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size); + + if (fsl_chan->set_dest_loop_size) + fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size); + + if (fsl_chan->toggle_ext_start) + fsl_chan->toggle_ext_start(fsl_chan, slave->external_start); + + if (fsl_chan->toggle_ext_pause) + fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause); + + if (fsl_chan->set_request_count) + fsl_chan->set_request_count(fsl_chan, slave->request_count); + + return &first->async_tx; + +fail: + /* If first was not set, then we failed to allocate the very first + * descriptor, and we're done */ + if (!first) + return NULL; + + /* + * First is set, so all of the descriptors we allocated have been added + * to first->tx_list, INCLUDING "first" itself. Therefore we + * must traverse the list backwards freeing each descriptor in turn + * + * We're re-using variables for the loop, oh well + */ + tx_list = &first->tx_list; + list_for_each_entry_safe_reverse(new, prev, tx_list, node) { + list_del_init(&new->node); + dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); + } + + return NULL; +} + +static void fsl_dma_device_terminate_all(struct dma_chan *chan) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *desc, *tmp; + unsigned long flags; + + if (!chan) + return; + + fsl_chan = to_fsl_chan(chan); + + /* Halt the DMA engine */ + dma_halt(fsl_chan); + + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + + /* Remove and free all of the descriptors in the LD queue */ + list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) { + list_del(&desc->node); + dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys); + } + + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); +} + /** * fsl_dma_update_completed_cookie - Update the completed cookie. * @fsl_chan : Freescale DMA channel @@ -977,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); + dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; fdev->common.device_is_tx_complete = fsl_dma_is_complete; fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; + fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; + fdev->common.device_terminate_all = fsl_dma_device_terminate_all; fdev->common.dev = &dev->dev; fdev->irq = irq_of_parse_and_map(dev->node, 0); -- cgit v1.2.3 From 657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Tue, 8 Sep 2009 17:53:05 -0700 Subject: dmaengine: Move all map_sg/unmap_sg for slave channel to its client Dan Williams wrote: ... DMA-slave clients request specific channels and know the hardware details at a low level, so it should not be too high an expectation to push dma mapping responsibility to the client. Also this patch includes DMA_COMPL_{SRC,DEST}_UNMAP_SINGLE support for dw_dmac driver. Acked-by: Maciej Sosnowski Acked-by: Nicolas Ferre Signed-off-by: Atsushi Nemoto Signed-off-by: Dan Williams --- drivers/dma/at_hdmac.c | 43 ++++++++++++++++++++++--------------------- drivers/dma/dw_dmac.c | 31 +++++++++++++++++++------------ 2 files changed, 41 insertions(+), 33 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index da4e8b710a9..0aeb578a24e 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -253,25 +253,28 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) list_move(&desc->desc_node, &atchan->free_list); /* unmap dma addresses */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&atchan->chan_common), - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - else - dma_unmap_page(chan2parent(&atchan->chan_common), - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - } - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&atchan->chan_common), - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); - else - dma_unmap_page(chan2parent(&atchan->chan_common), - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); + if (!atchan->chan_common.private) { + struct device *parent = chan2parent(&atchan->chan_common); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + } } /* @@ -647,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = atslave->reg_width; - sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); - ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 514ef7d71bc..8fb74828036 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -212,16 +212,25 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) list_splice_init(&desc->tx_list, &dwc->free_list); list_move(&desc->desc_node, &dwc->free_list); - /* - * We use dma_unmap_page() regardless of how the buffers were - * mapped before they were submitted... - */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) - dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar, - desc->len, DMA_FROM_DEVICE); - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) - dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar, - desc->len, DMA_TO_DEVICE); + if (!dwc->chan.private) { + struct device *parent = chan2parent(&dwc->chan); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + } + } /* * The API requires that no submissions are done from a @@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = dws->reg_width; prev = first = NULL; - sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); - switch (direction) { case DMA_TO_DEVICE: ctllo = (DWC_DEFAULT_CTLLO -- cgit v1.2.3 From d8902adcc1a9fd484c8cb5e575152e32192c1ff8 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Mon, 7 Sep 2009 03:26:23 +0000 Subject: dmaengine: sh: Add Support SuperH DMA Engine driver This supported all DMA channels, and it was tested in SH7722, SH7780, SH7785 and SH7763. This can not use with SH DMA API. Signed-off-by: Nobuhiro Iwamatsu Reviewed-by: Matt Fleming Acked-by: Maciej Sosnowski Acked-by: Paul Mundt Signed-off-by: Dan Williams --- drivers/dma/Kconfig | 8 + drivers/dma/Makefile | 1 + drivers/dma/shdma.c | 786 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/shdma.h | 64 +++++ 4 files changed, 859 insertions(+) create mode 100644 drivers/dma/shdma.c create mode 100644 drivers/dma/shdma.h (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index fe1f3717b1f..3230a780c3d 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -101,6 +101,14 @@ config TXX9_DMAC Support the TXx9 SoC internal DMA controller. This can be integrated in chips such as the Toshiba TX4927/38/39. +config SH_DMAE + tristate "Renesas SuperH DMAC support" + depends on SUPERH && SH_DMA + depends on !SH_DMA_API + select DMA_ENGINE + help + Enable support for the Renesas SuperH DMA controllers. + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 8f115e93b4a..eca71ba78ae 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_MX3_IPU) += ipu/ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_SH_DMAE) += shdma.o diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c new file mode 100644 index 00000000000..b3b065c4e5c --- /dev/null +++ b/drivers/dma/shdma.c @@ -0,0 +1,786 @@ +/* + * Renesas SuperH DMA Engine support + * + * base is drivers/dma/flsdma.c + * + * Copyright (C) 2009 Nobuhiro Iwamatsu + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * - DMA of SuperH does not have Hardware DMA chain mode. + * - MAX DMA size is 16MB. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "shdma.h" + +/* DMA descriptor control */ +#define DESC_LAST (-1) +#define DESC_COMP (1) +#define DESC_NCOMP (0) + +#define NR_DESCS_PER_CHANNEL 32 +/* + * Define the default configuration for dual address memory-memory transfer. + * The 0x400 value represents auto-request, external->external. + * + * And this driver set 4byte burst mode. + * If you want to change mode, you need to change RS_DEFAULT of value. + * (ex 1byte burst mode -> (RS_DUAL & ~TS_32) + */ +#define RS_DEFAULT (RS_DUAL) + +#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) +static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) +{ + ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); +} + +static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg) +{ + return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); +} + +static void dmae_init(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = RS_DEFAULT; /* default is DUAL mode */ + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +/* + * Reset DMA controller + * + * SH7780 has two DMAOR register + */ +static void sh_dmae_ctl_stop(int id) +{ + unsigned short dmaor = dmaor_read_reg(id); + + dmaor &= ~(DMAOR_NMIF | DMAOR_AE); + dmaor_write_reg(id, dmaor); +} + +static int sh_dmae_rst(int id) +{ + unsigned short dmaor; + + sh_dmae_ctl_stop(id); + dmaor = (dmaor_read_reg(id)|DMAOR_INIT); + + dmaor_write_reg(id, dmaor); + if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) { + pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n"); + return -EINVAL; + } + return 0; +} + +static int dmae_is_idle(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + if (chcr & CHCR_DE) { + if (!(chcr & CHCR_TE)) + return -EBUSY; /* working */ + } + return 0; /* waiting */ +} + +static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; +} + +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +{ + sh_dmae_writel(sh_chan, hw.sar, SAR); + sh_dmae_writel(sh_chan, hw.dar, DAR); + sh_dmae_writel(sh_chan, + (hw.tcr >> calc_xmit_shift(sh_chan)), TCR); +} + +static void dmae_start(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + chcr |= (CHCR_DE|CHCR_IE); + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +static void dmae_halt(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE); + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) +{ + int ret = dmae_is_idle(sh_chan); + /* When DMA was working, can not set data to CHCR */ + if (ret) + return ret; + + sh_dmae_writel(sh_chan, val, CHCR); + return 0; +} + +#define DMARS1_ADDR 0x04 +#define DMARS2_ADDR 0x08 +#define DMARS_SHIFT 8 +#define DMARS_CHAN_MSK 0x01 +static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) +{ + u32 addr; + int shift = 0; + int ret = dmae_is_idle(sh_chan); + if (ret) + return ret; + + if (sh_chan->id & DMARS_CHAN_MSK) + shift = DMARS_SHIFT; + + switch (sh_chan->id) { + /* DMARS0 */ + case 0: + case 1: + addr = SH_DMARS_BASE; + break; + /* DMARS1 */ + case 2: + case 3: + addr = (SH_DMARS_BASE + DMARS1_ADDR); + break; + /* DMARS2 */ + case 4: + case 5: + addr = (SH_DMARS_BASE + DMARS2_ADDR); + break; + default: + return -EINVAL; + } + + ctrl_outw((val << shift) | + (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)), + addr); + + return 0; +} + +static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&sh_chan->desc_lock); + + cookie = sh_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + + /* If desc only in the case of 1 */ + if (desc->async_tx.cookie != -EBUSY) + desc->async_tx.cookie = cookie; + sh_chan->common.cookie = desc->async_tx.cookie; + + list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + + spin_unlock_bh(&sh_chan->desc_lock); + + return cookie; +} + +static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc, *_desc, *ret = NULL; + + spin_lock_bh(&sh_chan->desc_lock); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + ret = desc; + break; + } + } + spin_unlock_bh(&sh_chan->desc_lock); + + return ret; +} + +static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) +{ + if (desc) { + spin_lock_bh(&sh_chan->desc_lock); + + list_splice_init(&desc->tx_list, &sh_chan->ld_free); + list_add(&desc->node, &sh_chan->ld_free); + + spin_unlock_bh(&sh_chan->desc_lock); + } +} + +static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc; + + spin_lock_bh(&sh_chan->desc_lock); + while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { + spin_unlock_bh(&sh_chan->desc_lock); + desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL); + if (!desc) { + spin_lock_bh(&sh_chan->desc_lock); + break; + } + dma_async_tx_descriptor_init(&desc->async_tx, + &sh_chan->common); + desc->async_tx.tx_submit = sh_dmae_tx_submit; + desc->async_tx.flags = DMA_CTRL_ACK; + INIT_LIST_HEAD(&desc->tx_list); + sh_dmae_put_desc(sh_chan, desc); + + spin_lock_bh(&sh_chan->desc_lock); + sh_chan->descs_allocated++; + } + spin_unlock_bh(&sh_chan->desc_lock); + + return sh_chan->descs_allocated; +} + +/* + * sh_dma_free_chan_resources - Free all resources of the channel. + */ +static void sh_dmae_free_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc, *_desc; + LIST_HEAD(list); + + BUG_ON(!list_empty(&sh_chan->ld_queue)); + spin_lock_bh(&sh_chan->desc_lock); + + list_splice_init(&sh_chan->ld_free, &list); + sh_chan->descs_allocated = 0; + + spin_unlock_bh(&sh_chan->desc_lock); + + list_for_each_entry_safe(desc, _desc, &list, node) + kfree(desc); +} + +static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct sh_dmae_chan *sh_chan; + struct sh_desc *first = NULL, *prev = NULL, *new; + size_t copy_size; + + if (!chan) + return NULL; + + if (!len) + return NULL; + + sh_chan = to_sh_chan(chan); + + do { + /* Allocate the link descriptor from DMA pool */ + new = sh_dmae_get_desc(sh_chan); + if (!new) { + dev_err(sh_chan->dev, + "No free memory for link descriptor\n"); + goto err_get_desc; + } + + copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + + new->hw.sar = dma_src; + new->hw.dar = dma_dest; + new->hw.tcr = copy_size; + if (!first) + first = new; + + new->mark = DESC_NCOMP; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy_size; + dma_src += copy_size; + dma_dest += copy_size; + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; /* Last desc */ + + return &first->async_tx; + +err_get_desc: + sh_dmae_put_desc(sh_chan, first); + return NULL; + +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function clean up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc, *_desc; + + spin_lock_bh(&sh_chan->desc_lock); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { + dma_async_tx_callback callback; + void *callback_param; + + /* non send data */ + if (desc->mark == DESC_NCOMP) + break; + + /* send data sesc */ + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + + /* Remove from ld_queue list */ + list_splice_init(&desc->tx_list, &sh_chan->ld_free); + + dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", + desc); + + list_move(&desc->node, &sh_chan->ld_free); + /* Run the link descriptor callback function */ + if (callback) { + spin_unlock_bh(&sh_chan->desc_lock); + dev_dbg(sh_chan->dev, "link descriptor %p callback\n", + desc); + callback(callback_param); + spin_lock_bh(&sh_chan->desc_lock); + } + } + spin_unlock_bh(&sh_chan->desc_lock); +} + +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) +{ + struct list_head *ld_node; + struct sh_dmae_regs hw; + + /* DMA work check */ + if (dmae_is_idle(sh_chan)) + return; + + /* Find the first un-transfer desciptor */ + for (ld_node = sh_chan->ld_queue.next; + (ld_node != &sh_chan->ld_queue) + && (to_sh_desc(ld_node)->mark == DESC_COMP); + ld_node = ld_node->next) + cpu_relax(); + + if (ld_node != &sh_chan->ld_queue) { + /* Get the ld start address from ld_queue */ + hw = to_sh_desc(ld_node)->hw; + dmae_set_reg(sh_chan, hw); + dmae_start(sh_chan); + } +} + +static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + sh_chan_xfer_ld_queue(sh_chan); +} + +static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + sh_dmae_chan_ld_cleanup(sh_chan); + + last_used = chan->cookie; + last_complete = sh_chan->completed_cookie; + if (last_complete == -EBUSY) + last_complete = last_used; + + if (done) + *done = last_complete; + + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static irqreturn_t sh_dmae_interrupt(int irq, void *data) +{ + irqreturn_t ret = IRQ_NONE; + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + if (chcr & CHCR_TE) { + /* DMA stop */ + dmae_halt(sh_chan); + + ret = IRQ_HANDLED; + tasklet_schedule(&sh_chan->tasklet); + } + + return ret; +} + +#if defined(CONFIG_CPU_SH4) +static irqreturn_t sh_dmae_err(int irq, void *data) +{ + int err = 0; + struct sh_dmae_device *shdev = (struct sh_dmae_device *)data; + + /* IRQ Multi */ + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + int cnt = 0; + switch (irq) { +#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) + case DMTE6_IRQ: + cnt++; +#endif + case DMTE0_IRQ: + if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) { + disable_irq(irq); + return IRQ_HANDLED; + } + default: + return IRQ_NONE; + } + } else { + /* reset dma controller */ + err = sh_dmae_rst(0); + if (err) + return err; + if (shdev->pdata.mode & SHDMA_DMAOR1) { + err = sh_dmae_rst(1); + if (err) + return err; + } + disable_irq(irq); + return IRQ_HANDLED; + } +} +#endif + +static void dmae_do_tasklet(unsigned long data) +{ + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + struct sh_desc *desc, *_desc, *cur_desc = NULL; + u32 sar_buf = sh_dmae_readl(sh_chan, SAR); + list_for_each_entry_safe(desc, _desc, + &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { + cur_desc = desc; + break; + } + } + + if (cur_desc) { + switch (cur_desc->async_tx.cookie) { + case 0: /* other desc data */ + break; + case -EBUSY: /* last desc */ + sh_chan->completed_cookie = + cur_desc->async_tx.cookie; + break; + default: /* first desc ( 0 < )*/ + sh_chan->completed_cookie = + cur_desc->async_tx.cookie - 1; + break; + } + cur_desc->mark = DESC_COMP; + } + /* Next desc */ + sh_chan_xfer_ld_queue(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan); +} + +static unsigned int get_dmae_irq(unsigned int id) +{ + unsigned int irq = 0; + if (id < ARRAY_SIZE(dmte_irq_map)) + irq = dmte_irq_map[id]; + return irq; +} + +static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id) +{ + int err; + unsigned int irq = get_dmae_irq(id); + unsigned long irqflags = IRQF_DISABLED; + struct sh_dmae_chan *new_sh_chan; + + /* alloc channel */ + new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL); + if (!new_sh_chan) { + dev_err(shdev->common.dev, "No free memory for allocating " + "dma channels!\n"); + return -ENOMEM; + } + + new_sh_chan->dev = shdev->common.dev; + new_sh_chan->id = id; + + /* Init DMA tasklet */ + tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet, + (unsigned long)new_sh_chan); + + /* Init the channel */ + dmae_init(new_sh_chan); + + spin_lock_init(&new_sh_chan->desc_lock); + + /* Init descripter manage list */ + INIT_LIST_HEAD(&new_sh_chan->ld_queue); + INIT_LIST_HEAD(&new_sh_chan->ld_free); + + /* copy struct dma_device */ + new_sh_chan->common.device = &shdev->common; + + /* Add the channel to DMA device channel list */ + list_add_tail(&new_sh_chan->common.device_node, + &shdev->common.channels); + shdev->common.chancnt++; + + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + irqflags = IRQF_SHARED; +#if defined(DMTE6_IRQ) + if (irq >= DMTE6_IRQ) + irq = DMTE6_IRQ; + else +#endif + irq = DMTE0_IRQ; + } + + snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id), + "sh-dmae%d", new_sh_chan->id); + + /* set up channel irq */ + err = request_irq(irq, &sh_dmae_interrupt, + irqflags, new_sh_chan->dev_id, new_sh_chan); + if (err) { + dev_err(shdev->common.dev, "DMA channel %d request_irq error " + "with return %d\n", id, err); + goto err_no_irq; + } + + /* CHCR register control function */ + new_sh_chan->set_chcr = dmae_set_chcr; + /* DMARS register control function */ + new_sh_chan->set_dmars = dmae_set_dmars; + + shdev->chan[id] = new_sh_chan; + return 0; + +err_no_irq: + /* remove from dmaengine device node */ + list_del(&new_sh_chan->common.device_node); + kfree(new_sh_chan); + return err; +} + +static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) +{ + int i; + + for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) { + if (shdev->chan[i]) { + struct sh_dmae_chan *shchan = shdev->chan[i]; + if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) + free_irq(dmte_irq_map[i], shchan); + + list_del(&shchan->common.device_node); + kfree(shchan); + shdev->chan[i] = NULL; + } + } + shdev->common.chancnt = 0; +} + +static int __init sh_dmae_probe(struct platform_device *pdev) +{ + int err = 0, cnt, ecnt; + unsigned long irqflags = IRQF_DISABLED; +#if defined(CONFIG_CPU_SH4) + int eirq[] = { DMAE0_IRQ, +#if defined(DMAE1_IRQ) + DMAE1_IRQ +#endif + }; +#endif + struct sh_dmae_device *shdev; + + shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL); + if (!shdev) { + dev_err(&pdev->dev, "No enough memory\n"); + err = -ENOMEM; + goto shdev_err; + } + + /* get platform data */ + if (!pdev->dev.platform_data) + goto shdev_err; + + /* platform data */ + memcpy(&shdev->pdata, pdev->dev.platform_data, + sizeof(struct sh_dmae_pdata)); + + /* reset dma controller */ + err = sh_dmae_rst(0); + if (err) + goto rst_err; + + /* SH7780/85/23 has DMAOR1 */ + if (shdev->pdata.mode & SHDMA_DMAOR1) { + err = sh_dmae_rst(1); + if (err) + goto rst_err; + } + + INIT_LIST_HEAD(&shdev->common.channels); + + dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); + shdev->common.device_alloc_chan_resources + = sh_dmae_alloc_chan_resources; + shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources; + shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy; + shdev->common.device_is_tx_complete = sh_dmae_is_complete; + shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; + shdev->common.dev = &pdev->dev; + +#if defined(CONFIG_CPU_SH4) + /* Non Mix IRQ mode SH7722/SH7730 etc... */ + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + irqflags = IRQF_SHARED; + eirq[0] = DMTE0_IRQ; +#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) + eirq[1] = DMTE6_IRQ; +#endif + } + + for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) { + err = request_irq(eirq[ecnt], sh_dmae_err, + irqflags, "DMAC Address Error", shdev); + if (err) { + dev_err(&pdev->dev, "DMA device request_irq" + "error (irq %d) with return %d\n", + eirq[ecnt], err); + goto eirq_err; + } + } +#endif /* CONFIG_CPU_SH4 */ + + /* Create DMA Channel */ + for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) { + err = sh_dmae_chan_probe(shdev, cnt); + if (err) + goto chan_probe_err; + } + + platform_set_drvdata(pdev, shdev); + dma_async_device_register(&shdev->common); + + return err; + +chan_probe_err: + sh_dmae_chan_remove(shdev); + +eirq_err: + for (ecnt-- ; ecnt >= 0; ecnt--) + free_irq(eirq[ecnt], shdev); + +rst_err: + kfree(shdev); + +shdev_err: + return err; +} + +static int __exit sh_dmae_remove(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + + dma_async_device_unregister(&shdev->common); + + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + free_irq(DMTE0_IRQ, shdev); +#if defined(DMTE6_IRQ) + free_irq(DMTE6_IRQ, shdev); +#endif + } + + /* channel data remove */ + sh_dmae_chan_remove(shdev); + + if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) { + free_irq(DMAE0_IRQ, shdev); +#if defined(DMAE1_IRQ) + free_irq(DMAE1_IRQ, shdev); +#endif + } + kfree(shdev); + + return 0; +} + +static void sh_dmae_shutdown(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + sh_dmae_ctl_stop(0); + if (shdev->pdata.mode & SHDMA_DMAOR1) + sh_dmae_ctl_stop(1); +} + +static struct platform_driver sh_dmae_driver = { + .remove = __exit_p(sh_dmae_remove), + .shutdown = sh_dmae_shutdown, + .driver = { + .name = "sh-dma-engine", + }, +}; + +static int __init sh_dmae_init(void) +{ + return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe); +} +module_init(sh_dmae_init); + +static void __exit sh_dmae_exit(void) +{ + platform_driver_unregister(&sh_dmae_driver); +} +module_exit(sh_dmae_exit); + +MODULE_AUTHOR("Nobuhiro Iwamatsu "); +MODULE_DESCRIPTION("Renesas SH DMA Engine driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h new file mode 100644 index 00000000000..2b4bc15a2c0 --- /dev/null +++ b/drivers/dma/shdma.h @@ -0,0 +1,64 @@ +/* + * Renesas SuperH DMA Engine support + * + * Copyright (C) 2009 Nobuhiro Iwamatsu + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __DMA_SHDMA_H +#define __DMA_SHDMA_H + +#include +#include +#include + +#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ + +struct sh_dmae_regs { + u32 sar; /* SAR / source address */ + u32 dar; /* DAR / destination address */ + u32 tcr; /* TCR / transfer count */ +}; + +struct sh_desc { + struct list_head tx_list; + struct sh_dmae_regs hw; + struct list_head node; + struct dma_async_tx_descriptor async_tx; + int mark; +}; + +struct sh_dmae_chan { + dma_cookie_t completed_cookie; /* The maximum cookie completed */ + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_queue; /* Link descriptors queue */ + struct list_head ld_free; /* Link descriptors free */ + struct dma_chan common; /* DMA common channel */ + struct device *dev; /* Channel device */ + struct tasklet_struct tasklet; /* Tasklet */ + int descs_allocated; /* desc count */ + int id; /* Raw id of this channel */ + char dev_id[16]; /* unique name per DMAC of channel */ + + /* Set chcr */ + int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs); + /* Set DMA resource */ + int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res); +}; + +struct sh_dmae_device { + struct dma_device common; + struct sh_dmae_chan *chan[MAX_DMA_CHANNELS]; + struct sh_dmae_pdata pdata; +}; + +#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common) +#define to_sh_desc(lh) container_of(lh, struct sh_desc, node) +#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx) + +#endif /* __DMA_SHDMA_H */ -- cgit v1.2.3 From 9a8de639f35ca3951b910d5e3a2f92f4cf3afc8f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 15:06:10 -0700 Subject: async_tx: remove HIGHMEM64G restriction This restriction prevented ASYNC_TX_DMA from being enabled on platform configurations where DMA address conversion could not be performed in place on the stack. Since commit 04ce9ab3 ("async_xor: permit callers to pass in a 'dma/page scribble' region") the async_tx api now either uses a caller provided 'scribble' buffer, or performs the conversion in place when sizeof(dma_addr_t) <= sizeof(struct page *). Signed-off-by: Dan Williams --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3230a780c3d..5903a88351b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -128,7 +128,7 @@ config NET_DMA config ASYNC_TX_DMA bool "Async_tx: Offload support for the async_tx api" - depends on DMA_ENGINE && !HIGHMEM64G + depends on DMA_ENGINE help This allows the async_tx api to take advantage of offload engines for memcpy, memset, xor, and raid6 p+q operations. If your platform has -- cgit v1.2.3 From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 10 Sep 2009 15:05:58 +0200 Subject: dca: registering requesters in multiple dca domains This patch enables DCA support on multiple-IOH/multiple-IIO architectures. It modifies dca module by replacing single dca_providers list with dca_domains list, each domain containing separate list of providers. This approach lets dca driver manage multiple domains, i.e. sets of providers and requesters mapped back to the same PCI root complex device. The driver takes care to register each requester to a provider from the same domain. Signed-off-by: Dan Williams Signed-off-by: Maciej Sosnowski --- drivers/dma/ioat/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index c788fa26647..d545fae30f3 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -175,7 +175,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev) dev_err(&pdev->dev, "Removing dma and dca services\n"); if (device->dca) { - unregister_dca_provider(device->dca); + unregister_dca_provider(device->dca, &pdev->dev); free_dca_provider(device->dca); device->dca = NULL; } -- cgit v1.2.3 From 3208ca52f3bfa36914c44db207d0a34071f9897f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 10 Sep 2009 11:27:36 -0700 Subject: ioat: driver version 4.0 A new ring implementation and the addition of raid functionality constitutes a bump in the driver major version number. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 6a675a2a2d1..c14fdfeb7f3 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -30,7 +30,7 @@ #include #include -#define IOAT_DMA_VERSION "3.64" +#define IOAT_DMA_VERSION "4.00" #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 -- cgit v1.2.3 From 376ec37667b510453f5a62fcd95d762786e6a0a9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Sep 2009 15:16:50 -0700 Subject: ioat2: clarify ring size limits With the addition of ioat_max_alloc_order it is not clear what the maximum allocation order is, so document that in the modinfo. Also take an opportunity to kill a stray semicolon. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 5d6ac49e0d3..8fd0b59f190 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -42,18 +42,19 @@ int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, - "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); + "ioat2+: allocate 2^n descriptors per channel" + " (default: 8 max: 16)"); static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; module_param(ioat_ring_max_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_max_alloc_order, - "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); + "ioat2+: upper limit for ring size (default: 16)"); void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) { void * __iomem reg_base = ioat->base.reg_base; ioat->pending = 0; - ioat->dmacount += ioat2_ring_pending(ioat);; + ioat->dmacount += ioat2_ring_pending(ioat); ioat->issued = ioat->head; /* make descriptor updates globally visible before notifying channel */ wmb(); -- cgit v1.2.3 From f477f5b3316f39c841aa121a219b82b3a56e7da7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 21 Sep 2009 09:17:58 -0700 Subject: drivers/dma/ioat/dma_v2.c: fix warnings drivers/dma/ioat/dma_v2.c: In function 'ioat2_dma_prep_memcpy_lock': drivers/dma/ioat/dma_v2.c:680: warning: 'hw' may be used uninitialized in this function drivers/dma/ioat/dma_v2.c:681: warning: 'desc' may be used uninitialized in this function Cc: Maciej Sosnowski Signed-off-by: Andrew Morton Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 8fd0b59f190..96ffab7d37a 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -692,7 +692,8 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, /* pass */; else return NULL; - for (i = 0; i < num_descs; i++) { + i = 0; + do { size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); desc = ioat2_get_ring_ent(ioat, idx + i); @@ -707,7 +708,7 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, dst += copy; src += copy; dump_desc_dbg(ioat, desc); - } + } while (++i < num_descs); desc->txd.flags = flags; desc->len = total_len; -- cgit v1.2.3 From cdef57dbb618608bfffda2fc32c8d0a4012a1d3a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 21 Sep 2009 09:22:29 -0700 Subject: ioat3: fix uninitialized var warnings drivers/dma/ioat/dma_v3.c: In function 'ioat3_prep_memset_lock': drivers/dma/ioat/dma_v3.c:439: warning: 'fill' may be used uninitialized in this function drivers/dma/ioat/dma_v3.c:437: warning: 'desc' may be used uninitialized in this function drivers/dma/ioat/dma_v3.c: In function '__ioat3_prep_xor_lock': drivers/dma/ioat/dma_v3.c:489: warning: 'xor' may be used uninitialized in this function drivers/dma/ioat/dma_v3.c:486: warning: 'desc' may be used uninitialized in this function drivers/dma/ioat/dma_v3.c: In function '__ioat3_prep_pq_lock': drivers/dma/ioat/dma_v3.c:631: warning: 'pq' may be used uninitialized in this function drivers/dma/ioat/dma_v3.c:628: warning: 'desc' may be used uninitialized in this function gcc-4.0, unlike gcc-4.3, does not see that these variables are initialized before use. Convert the descriptor loops to do-while make this initialization apparent. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 3686dddf6bf..35d1e33afd5 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -448,7 +448,8 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, /* pass */; else return NULL; - for (i = 0; i < num_descs; i++) { + i = 0; + do { size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); desc = ioat2_get_ring_ent(ioat, idx + i); @@ -463,7 +464,7 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, len -= xfer_size; dest += xfer_size; dump_desc_dbg(ioat, desc); - } + } while (++i < num_descs); desc->txd.flags = flags; desc->len = total_len; @@ -518,7 +519,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, /* pass */; else return NULL; - for (i = 0; i < num_descs; i += 1 + with_ext) { + i = 0; + do { struct ioat_raw_descriptor *descs[2]; size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); int s; @@ -546,7 +548,7 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, len -= xfer_size; offset += xfer_size; dump_desc_dbg(ioat, desc); - } + } while ((i += 1 + with_ext) < num_descs); /* last xor descriptor carries the unmap parameters and fence bit */ desc->txd.flags = flags; @@ -664,7 +666,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, /* pass */; else return NULL; - for (i = 0; i < num_descs; i += 1 + with_ext) { + i = 0; + do { struct ioat_raw_descriptor *descs[2]; size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); @@ -703,7 +706,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, len -= xfer_size; offset += xfer_size; - } + } while ((i += 1 + with_ext) < num_descs); /* last pq descriptor carries the unmap parameters and fence bit */ desc->txd.flags = flags; -- cgit v1.2.3