From 871a8623d3b40221ad1103aff715dfee0aa4dacf Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 17 Mar 2017 18:30:07 -0500 Subject: i40iw: Receive netdev events post INET_NOTIFIER state Netdev notification events are de-registered only when all client iwdev instances are removed. If a single client is closed and re-opened, netdev events could arrive even before the Control Queue-Pair (CQP) is created, causing a NULL pointer dereference crash in i40iw_get_cqp_request. Fix this by allowing netdev event notification only after we have reached the INET_NOTIFIER state with respect to device initialization. Reported-by: Stefan Assmann Signed-off-by: Shiraz Saleem Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0f5d43d1f5fc..70c3e9e79508 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -160,6 +160,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; upper_dev = netdev_master_upper_dev_get(netdev); if (netdev != event_netdev) @@ -214,6 +217,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; if (netdev != event_netdev) return NOTIFY_DONE; @@ -260,6 +266,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * if (!iwhdl) return NOTIFY_DONE; iwdev = &iwhdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; p = (__be32 *)neigh->primary_key; i40iw_copy_ip_ntohl(local_ipaddr, p); if (neigh->nud_state & NUD_VALID) { -- cgit v1.2.3 From 86f46aba8d1ac3ed0904542158a9b9cb9c7a143c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Mar 2017 22:00:52 +0200 Subject: IB/core: Protect against self-requeue of a cq work item We need to make sure that the cq work item does not run when we are destroying the cq. Unlike flush_work, cancel_work_sync protects against self-requeue of the work item (which we can do in ib_cq_poll_work). Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche -- Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index e95510117a6d..2746d2eb3d52 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq) irq_poll_disable(&cq->iop); break; case IB_POLL_WORKQUEUE: - flush_work(&cq->work); + cancel_work_sync(&cq->work); break; default: WARN_ON_ONCE(1); -- cgit v1.2.3 From cb8864559631754ac93d5734b165ccd0cad4728c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Mar 2017 11:34:20 -0700 Subject: infiniband: Fix alignment of mmap cookies to support VIPT caching When vmalloc_user is used to create memory that is supposed to be mmap'd to user space, it is necessary for the mmap cookie (eg the offset) to be aligned to SHMLBA. This creates a situation where all virtual mappings of the same physical page share the same virtual cache index and guarantees VIPT coherence. Otherwise the cache is non-coherent and the kernel will not see writes by userspace when reading the shared page (or vice-versa). Reported-by: Josh Beavers Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mmap.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_mmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index e202b8142759..6b712eecbd37 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, spin_lock_irq(&rdi->mmap_offset_lock); if (rdi->mmap_offset == 0) - rdi->mmap_offset = PAGE_SIZE; + rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->offset = rdi->mmap_offset; - rdi->mmap_offset += size; + rdi->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_irq(&rdi->mmap_offset_lock); INIT_LIST_HEAD(&ip->pending_mmaps); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index c572a4c09359..bd812e00988e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, spin_lock_bh(&rxe->mmap_offset_lock); if (rxe->mmap_offset == 0) - rxe->mmap_offset = PAGE_SIZE; + rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->info.offset = rxe->mmap_offset; - rxe->mmap_offset += size; + rxe->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_bh(&rxe->mmap_offset_lock); -- cgit v1.2.3 From 6332dee83d8eab80d6d502cc51135b998fe6df79 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Wed, 22 Feb 2017 17:22:56 -0800 Subject: RDMA/vmw_pvrdma: Cleanup unused variables Removed the unused nreq and redundant index variables. Moved hardcoded async and cq ring pages number to macro. Reported-by: Yuval Shaia Signed-off-by: Adit Ranadive Reviewed-by: Aditya Sarwade Tested-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 2 ++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 4 ++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 33 ++++++++++---------------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 3cd96c1b9502..dbf61c37835c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -69,6 +69,8 @@ */ #define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 +#define PVRDMA_NUM_RING_PAGES 4 + struct pvrdma_dev; struct pvrdma_page_dir { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 100bea5c42ff..e77476ca70ac 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -858,7 +858,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev->dsr->resp_slot_dma = (u64)slot_dma; /* Async event ring */ - dev->dsr->async_ring_pages.num_pages = 4; + dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; ret = pvrdma_page_dir_init(dev, &dev->async_pdir, dev->dsr->async_ring_pages.num_pages, true); if (ret) @@ -867,7 +867,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; /* CQ notification ring */ - dev->dsr->cq_ring_pages.num_pages = 4; + dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, dev->dsr->cq_ring_pages.num_pages, true); if (ret) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index dbbfd35e7da7..3ffbb2d42170 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -554,13 +554,13 @@ out: return ret; } -static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n) +static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n) { return pvrdma_page_dir_get_ptr(&qp->pdir, qp->sq.offset + n * qp->sq.wqe_size); } -static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n) +static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n) { return pvrdma_page_dir_get_ptr(&qp->pdir, qp->rq.offset + n * qp->rq.wqe_size); @@ -598,9 +598,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; struct pvrdma_sq_wqe_hdr *wqe_hdr; struct pvrdma_sge *sge; - int i, index; - int nreq; - int ret; + int i, ret; /* * In states lower than RTS, we can fail immediately. In other states, @@ -613,9 +611,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt); - for (nreq = 0; wr; nreq++, wr = wr->next) { - unsigned int tail; + while (wr) { + unsigned int tail = 0; if (unlikely(!pvrdma_idx_ring_has_space( qp->sq.ring, qp->sq.wqe_cnt, &tail))) { @@ -680,7 +677,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index); + wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail); memset(wqe_hdr, 0, sizeof(*wqe_hdr)); wqe_hdr->wr_id = wr->wr_id; wqe_hdr->num_sge = wr->num_sge; @@ -771,12 +768,11 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, /* Make sure wqe is written before index update */ smp_wmb(); - index++; - if (unlikely(index >= qp->sq.wqe_cnt)) - index = 0; /* Update shared sq ring */ pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt); + + wr = wr->next; } ret = 0; @@ -806,7 +802,6 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct pvrdma_qp *qp = to_vqp(ibqp); struct pvrdma_rq_wqe_hdr *wqe_hdr; struct pvrdma_sge *sge; - int index, nreq; int ret = 0; int i; @@ -821,9 +816,8 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt); - for (nreq = 0; wr; nreq++, wr = wr->next) { - unsigned int tail; + while (wr) { + unsigned int tail = 0; if (unlikely(wr->num_sge > qp->rq.max_sg || wr->num_sge < 0)) { @@ -843,7 +837,7 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, goto out; } - wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index); + wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail); wqe_hdr->wr_id = wr->wr_id; wqe_hdr->num_sge = wr->num_sge; wqe_hdr->total_len = 0; @@ -859,12 +853,11 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, /* Make sure wqe is written before index update */ smp_wmb(); - index++; - if (unlikely(index >= qp->rq.wqe_cnt)) - index = 0; /* Update shared rq ring */ pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt); + + wr = wr->next; } spin_unlock_irqrestore(&qp->rq.lock, flags); -- cgit v1.2.3 From e51c2fb0331cb3440d7dc83ee78019ee8c7bb366 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Wed, 22 Feb 2017 17:22:57 -0800 Subject: RDMA/vmw_pvrdma: Dont hardcode QP header page Moved the header page count to a macro. Reported-by: Yuval Shaia Signed-off-by: Adit Ranadive Reviewed-by: Aditya Sarwade Tested-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 1 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index dbf61c37835c..9fbe22d3467b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -70,6 +70,7 @@ #define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 #define PVRDMA_NUM_RING_PAGES 4 +#define PVRDMA_QP_NUM_HEADER_PAGES 1 struct pvrdma_dev; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 3ffbb2d42170..30062aad3af1 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -170,8 +170,9 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap, sizeof(struct pvrdma_sge) * qp->sq.max_sg); /* Note: one extra page for the header. */ - qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size + - PAGE_SIZE - 1) / PAGE_SIZE; + qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES + + (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) / + PAGE_SIZE; return 0; } @@ -288,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, qp->npages = qp->npages_send + qp->npages_recv; /* Skip header page. */ - qp->sq.offset = PAGE_SIZE; + qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE; /* Recv queue pages are after send pages. */ qp->rq.offset = qp->npages_send * PAGE_SIZE; @@ -341,7 +342,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); cmd->access_flags = IB_ACCESS_LOCAL_WRITE; cmd->total_chunks = qp->npages; - cmd->send_chunks = qp->npages_send - 1; + cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES; cmd->pdir_dma = qp->pdir.dir_dma; dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n", -- cgit v1.2.3 From b172679b0d3b93a6197b2ff892794e7178e3c448 Mon Sep 17 00:00:00 2001 From: Aditya Sarwade Date: Wed, 22 Feb 2017 17:22:58 -0800 Subject: RDMA/vmw_pvrdma: Activate device on ethernet link up Restore device state when ethernet link changes to active. Acked-by: George Zhang Acked-by: Jorgen Hansen Acked-by: Bryan Tan Signed-off-by: Aditya Sarwade Signed-off-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index e69d6f3cae32..09078ccfaec7 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -132,7 +132,7 @@ enum pvrdma_pci_resource { enum pvrdma_device_ctl { PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */ - PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */ + PVRDMA_DEVICE_CTL_UNQUIESCE, /* Unquiesce device. */ PVRDMA_DEVICE_CTL_RESET, /* Reset device. */ }; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e77476ca70ac..34ebc7615411 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -56,7 +56,7 @@ #include "pvrdma.h" #define DRV_NAME "vmw_pvrdma" -#define DRV_VERSION "1.0.0.0-k" +#define DRV_VERSION "1.0.1.0-k" static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); @@ -660,7 +660,16 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); break; case NETDEV_UP: - pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + pvrdma_write_reg(dev, PVRDMA_REG_CTL, + PVRDMA_DEVICE_CTL_UNQUIESCE); + + mb(); + + if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) + dev_err(&dev->pdev->dev, + "failed to activate device during link up\n"); + else + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; default: dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", -- cgit v1.2.3 From ded260235308f340b979258a4c736e06ba12c747 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Mar 2017 08:21:52 +0300 Subject: IB/rxe: double free on error "goto err;" has it's own kfree_skb() call so it's a double free. We only need to free on the "goto exit;" path. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index dbfde0dc6ff7..9f95f50b2909 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -729,11 +729,11 @@ next_wqe: ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; - kfree_skb(skb); rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { + kfree_skb(skb); rxe_run_task(&qp->req.task, 1); goto exit; } -- cgit v1.2.3 From 004d18ea99b0f3b7b3d5790e4dc5ca7d5238706d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Feb 2017 13:40:16 +0300 Subject: RDMA/ocrdma: fix a type issue in ocrdma_put_pd_num() We want to return zero on success or negative error codes. The type should be int and not u8. Signed-off-by: Dan Carpenter Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bc9fb144e57b..c52edeafd616 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -372,7 +372,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id, return 0; } -static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id, +static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id, bool dpp_pool) { int status; -- cgit v1.2.3 From a1c5dd13228a1f9e5087375f9702422dfc2adbf1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 28 Feb 2017 21:42:53 +0200 Subject: IB/rxe: Update documentation link All Soft-RoCE (rxe) is handled now in rdma-core user space library, so the documentation. The patch below updates the documentation link to that new location. Reported-by: Josh Beavers Signed-off-by: Leon Romanovsky Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 7d1ac27ed251..6332dedc11e8 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -22,4 +22,4 @@ config RDMA_RXE To configure and work with soft-RoCE driver please use the following wiki page under "configure Soft-RoCE (RXE)" section: - https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home + https://github.com/linux-rdma/rdma-core/blob/master/Documentation/rxe.md -- cgit v1.2.3 From 0957c29f78af7d890c4ac506eda8f76bfc5a137a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 Mar 2017 22:56:53 +0000 Subject: IB/core: Restore I/O MMU, s390 and powerpc support Avoid that the following error message is reported on the console while loading an RDMA driver with I/O MMU support enabled: DMAR: Allocating domain for mlx5_0 failed Ensure that DMA mapping operations that use to_pci_dev() to access to struct pci_dev see the correct PCI device. E.g. the s390 and powerpc DMA mapping operations use to_pci_dev() even with I/O MMU support disabled. This patch preserves the following changes of the DMA mapping updates patch series: - Introduction of dma_virt_ops. - Removal of ib_device.dma_ops. - Removal of struct ib_dma_mapping_ops. - Removal of an if-statement from each ib_dma_*() operation. - IB HW drivers no longer set dma_device directly. Reported-by: Sebastian Ott Reported-by: Parav Pandit Fixes: commit 99db9494035f ("IB/core: Remove ib_device.dma_device") Signed-off-by: Bart Van Assche Reviewed-by: parav@mellanox.com Tested-by: parav@mellanox.com Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 26 ++++++++++++++++++++------ include/rdma/ib_verbs.h | 30 +++++++++++++++++------------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 593d2ce6ec7c..addf869045cc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -336,12 +336,26 @@ int ib_register_device(struct ib_device *device, struct device *parent = device->dev.parent; WARN_ON_ONCE(!parent); - if (!device->dev.dma_ops) - device->dev.dma_ops = parent->dma_ops; - if (!device->dev.dma_mask) - device->dev.dma_mask = parent->dma_mask; - if (!device->dev.coherent_dma_mask) - device->dev.coherent_dma_mask = parent->coherent_dma_mask; + WARN_ON_ONCE(device->dma_device); + if (device->dev.dma_ops) { + /* + * The caller provided custom DMA operations. Copy the + * DMA-related fields that are used by e.g. dma_alloc_coherent() + * into device->dev. + */ + device->dma_device = &device->dev; + if (!device->dev.dma_mask) + device->dev.dma_mask = parent->dma_mask; + if (!device->dev.coherent_dma_mask) + device->dev.coherent_dma_mask = + parent->coherent_dma_mask; + } else { + /* + * The caller did not provide custom DMA operations. Use the + * DMA mapping operations of the parent device. + */ + device->dma_device = parent; + } mutex_lock(&device_mutex); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f1813c13687..99e4423eb2b8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1863,6 +1863,9 @@ struct ib_port_immutable { }; struct ib_device { + /* Do not access @dma_device directly from ULP nor from HW drivers. */ + struct device *dma_device; + char name[IB_DEVICE_NAME_MAX]; struct list_head event_handler_list; @@ -3007,7 +3010,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { - return dma_mapping_error(&dev->dev, dma_addr); + return dma_mapping_error(dev->dma_device, dma_addr); } /** @@ -3021,7 +3024,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { - return dma_map_single(&dev->dev, cpu_addr, size, direction); + return dma_map_single(dev->dma_device, cpu_addr, size, direction); } /** @@ -3035,7 +3038,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(&dev->dev, addr, size, direction); + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3052,7 +3055,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { - return dma_map_page(&dev->dev, page, offset, size, direction); + return dma_map_page(dev->dma_device, page, offset, size, direction); } /** @@ -3066,7 +3069,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(&dev->dev, addr, size, direction); + dma_unmap_page(dev->dma_device, addr, size, direction); } /** @@ -3080,7 +3083,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(&dev->dev, sg, nents, direction); + return dma_map_sg(dev->dma_device, sg, nents, direction); } /** @@ -3094,7 +3097,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(&dev->dev, sg, nents, direction); + dma_unmap_sg(dev->dma_device, sg, nents, direction); } static inline int ib_dma_map_sg_attrs(struct ib_device *dev, @@ -3102,7 +3105,8 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, @@ -3110,7 +3114,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs); + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry @@ -3152,7 +3156,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(&dev->dev, addr, size, dir); + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -3167,7 +3171,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(&dev->dev, addr, size, dir); + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** @@ -3182,7 +3186,7 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev, dma_addr_t *dma_handle, gfp_t flag) { - return dma_alloc_coherent(&dev->dev, size, dma_handle, flag); + return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); } /** @@ -3196,7 +3200,7 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle); + dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } /** -- cgit v1.2.3 From 812755d69efcf7c12fded57983d456647a0bbadd Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 24 Feb 2017 03:28:13 +0300 Subject: uapi: fix rdma/mlx5-abi.h userspace compilation errors Consistently use types from linux/types.h to fix the following rdma/mlx5-abi.h userspace compilation errors: /usr/include/rdma/mlx5-abi.h:69:25: error: 'u64' undeclared here (not in a function) MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, /usr/include/rdma/mlx5-abi.h:69:29: error: expected ',' or '}' before numeric constant MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, Include to fix the following rdma/mlx5-abi.h userspace compilation error: /usr/include/rdma/mlx5-abi.h:286:12: error: 'ETH_ALEN' undeclared here (not in a function) __u8 dmac[ETH_ALEN]; Signed-off-by: Dmitry V. Levin Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index da7cd62bace7..0b3d30837a9f 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -34,6 +34,7 @@ #define MLX5_ABI_USER_H #include +#include /* For ETH_ALEN. */ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, @@ -66,7 +67,7 @@ struct mlx5_ib_alloc_ucontext_req { }; enum mlx5_lib_caps { - MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, + MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, }; struct mlx5_ib_alloc_ucontext_req_v2 { -- cgit v1.2.3 From 9fcd67d1772c43d2f23e8fca56acc7219e991676 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Fri, 24 Feb 2017 15:38:26 +0100 Subject: IB/rxe: increment msn only when completing a request According to C9-147, MSN should only be incremented when the last packet of a multi packet request has been received. "Logically, the requester associates a sequential Send Sequence Number (SSN) with each WQE posted to the send queue. The SSN bears a one- to-one relationship to the MSN returned by the responder in each re- sponse packet. Therefore, when the requester receives a response, it in- terprets the MSN as representing the SSN of the most recent request completed by the responder to determine which send WQE(s) can be completed." Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: David Marchand Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index d404a8aba7af..c9dd385ce62e 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -813,18 +813,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) WARN_ON_ONCE(1); } - /* We successfully processed this new request. */ - qp->resp.msn++; - /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) + if (pkt->mask & RXE_COMP_MASK) { + /* We successfully processed this new request. */ + qp->resp.msn++; return RESPST_COMPLETE; - else if (qp_type(qp) == IB_QPT_RC) + } else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; -- cgit v1.2.3 From fedd9e1f75828992ace5833379116f38c66ad7bb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 16 Mar 2017 18:57:00 +0200 Subject: IB/cq: Don't process more than the given budget The caller might not want this overhead. Reviewed-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/cq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 2746d2eb3d52..f2ae75fa3128 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget) { int i, n, completed = 0; - while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) { + /* + * budget might be (-1) if the caller does not + * want to bound this call, thus we need unsigned + * minimum here. + */ + while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH, + budget - completed), cq->wc)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &cq->wc[i]; -- cgit v1.2.3 From b7363e67b23e04c23c2a99437feefac7292a88bc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Mar 2017 22:03:17 +0200 Subject: IB/device: Convert ib-comp-wq to be CPU-bound This workqueue is used by our storage target mode ULPs via the new CQ API. Recent observations when working with very high-end flash storage devices reveal that UNBOUND workqueue threads can migrate between cpu cores and even numa nodes (although some numa locality is accounted for). While this attribute can be useful in some workloads, it does not fit in very nicely with the normal run-to-completion model we usually use in our target-mode ULPs and the block-mq irq<->cpu affinity facilities. The whole block-mq concept is that the completion will land on the same cpu where the submission was performed. The fact that our submitter thread is migrating cpus can break this locality. We assume that as a target mode ULP, we will serve multiple initiators/clients and we can spread the load enough without having to use unbound kworkers. Also, while we're at it, expose this workqueue via sysfs which is harmless and can be useful for debug. Signed-off-by: Sagi Grimberg Reviewed-by: Bart Van Assche -- Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index addf869045cc..7c9e34d679d3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1029,8 +1029,7 @@ static int __init ib_core_init(void) return -ENOMEM; ib_comp_wq = alloc_workqueue("ib-comp-wq", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - WQ_UNBOUND_MAX_ACTIVE); + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!ib_comp_wq) { ret = -ENOMEM; goto err; -- cgit v1.2.3 From ea174c9573b0e0c8bc1a7a90fe9360ccb7aa9cbb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 27 Feb 2017 20:16:33 +0200 Subject: RDMA/iser: Fix possible mr leak on device removal event When the rdma device is removed, we must cleanup all the rdma resources within the DEVICE_REMOVAL event handler to let the device teardown gracefully. When this happens with live I/O, some memory regions are occupied. Thus, track them too and dereg all the mr's. We are safe with mr access by iscsi_iser_cleanup_task. Reported-by: Raju Rangoju Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Max Gurtovoy Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ drivers/infiniband/ulp/iser/iser_verbs.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 9d0b22ad58c1..c1ae4aeae2f9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -430,6 +430,7 @@ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; struct iser_pi_context *pi_ctx; + struct list_head all_list; }; /** @@ -443,6 +444,7 @@ struct iser_fr_pool { struct list_head list; spinlock_t lock; int size; + struct list_head all_list; }; /** diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 30b622f2ab73..c538a38c91ce 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, int i, ret; INIT_LIST_HEAD(&fr_pool->list); + INIT_LIST_HEAD(&fr_pool->all_list); spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { @@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, } list_add_tail(&desc->list, &fr_pool->list); + list_add_tail(&desc->all_list, &fr_pool->all_list); fr_pool->size++; } @@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) struct iser_fr_desc *desc, *tmp; int i = 0; - if (list_empty(&fr_pool->list)) + if (list_empty(&fr_pool->all_list)) return; iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) { - list_del(&desc->list); + list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { + list_del(&desc->all_list); iser_free_reg_res(&desc->rsc); if (desc->pi_ctx) iser_free_pi_ctx(desc->pi_ctx); -- cgit v1.2.3 From f6aafac184a3e46e919769dd4faa8bf0dc436534 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:18:45 +0100 Subject: IB/qib: fix false-postive maybe-uninitialized warning aarch64-linux-gcc-7 complains about code it doesn't fully understand: drivers/infiniband/hw/qib/qib_iba7322.c: In function 'qib_7322_txchk_change': include/asm-generic/bitops/non-atomic.h:105:35: error: 'shadow' may be used uninitialized in this function [-Werror=maybe-uninitialized] The code is right, and despite trying hard, I could not come up with a version that I liked better than just adding a fake initialization here to shut up the warning. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Signed-off-by: Arnd Bergmann Acked-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba7322.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 12c4208fd701..af9f596bb68b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -7068,7 +7068,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, unsigned long flags; while (wait) { - unsigned long shadow; + unsigned long shadow = 0; int cstart, previ = -1; /* -- cgit v1.2.3