aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-05-01 10:37:57 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2017-05-01 10:37:57 +1000
commit6b762260b8b89fed6187d1bbcdafcfd44d65cc1d (patch)
tree99c96b304687843ba55a469a70854a912ead9588
parent1c85247fa78a168be76588ad99d048fec4069d11 (diff)
parent2196f2716292c39b35c86f5238ed16a8978f9ad4 (diff)
Merge remote-tracking branch 'rdma/for-next'
Updates for 4.12 kernel merge window - idr usage and locking changes - build fix for hns - ipoib debug path record file fix - hfi1 updates - core RDMA netdev addition - Intel VNIC driver addition - Enhanced allerators for IPoIB addition - Debug cleanups in cxgb3/4 - Trivial cleanups from SF Markus Elfring - Misc rxe fixes from Mellanox - Misc ipoib fixes from Mellanox - Lots of mlx4/mlx5 changes from Mellanox - Misc fixes across the RDMA subsystem - ODP paging fixes and improvements - qedr updates - hfi1 updates - OPA port info patches # gpg: Signature made Sat 29 Apr 2017 09:30:17 AEST # gpg: using RSA key B826A3330E572FDD # gpg: Can't check signature: No public key
-rw-r--r--Documentation/infiniband/opa_vnic.txt153
-rw-r--r--MAINTAINERS16
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/Makefile3
-rw-r--r--drivers/infiniband/core/addr.c4
-rw-r--r--drivers/infiniband/core/cma.c79
-rw-r--r--drivers/infiniband/core/device.c33
-rw-r--r--drivers/infiniband/core/fmr_pool.c49
-rw-r--r--drivers/infiniband/core/mad.c3
-rw-r--r--drivers/infiniband/core/rdma_core.c627
-rw-r--r--drivers/infiniband/core/rdma_core.h78
-rw-r--r--drivers/infiniband/core/sa_query.c521
-rw-r--r--drivers/infiniband/core/sysfs.c6
-rw-r--r--drivers/infiniband/core/umem.c17
-rw-r--r--drivers/infiniband/core/umem_odp.c81
-rw-r--r--drivers/infiniband/core/uverbs.h69
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1423
-rw-r--r--drivers/infiniband/core/uverbs_main.c508
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c275
-rw-r--r--drivers/infiniband/core/verbs.c8
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c22
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c35
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c201
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h7
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c25
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c19
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c269
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h18
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c26
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c116
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h9
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c67
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c393
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c79
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c141
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c39
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h48
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c44
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c42
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c96
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c64
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h24
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c590
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h20
-rw-r--r--drivers/infiniband/hw/hfi1/common.h15
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c238
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h62
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c124
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c31
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c14
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h95
-rw-r--r--drivers/infiniband/hw/hfi1/init.c69
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c27
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c91
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h34
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c31
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c59
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c43
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h46
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c5
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h8
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h48
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h7
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h43
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c8
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c18
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c16
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c5
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c22
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c119
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h11
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h184
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c907
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c323
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c20
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c10
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c33
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c2
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c21
-rw-r--r--drivers/infiniband/hw/mlx5/main.c397
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c13
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c344
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c5
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c7
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c5
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c15
-rw-r--r--drivers/infiniband/hw/qedr/main.c88
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h5
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c193
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c15
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h38
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c46
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c61
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c57
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c32
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h127
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rc.h109
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h34
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/Makefile3
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c78
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h61
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c83
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c21
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c16
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h9
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c358
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c432
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c59
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c64
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c12
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c475
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h489
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c187
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h329
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c389
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c1053
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c390
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h24
-rw-r--r--include/net/addrconf.h22
-rw-r--r--include/rdma/ib_hdrs.h66
-rw-r--r--include/rdma/ib_mad.h27
-rw-r--r--include/rdma/ib_pack.h2
-rw-r--r--include/rdma/ib_sa.h13
-rw-r--r--include/rdma/ib_umem.h8
-rw-r--r--include/rdma/ib_umem_odp.h6
-rw-r--r--include/rdma/ib_verbs.h112
-rw-r--r--include/rdma/opa_port_info.h3
-rw-r--r--include/rdma/opa_vnic.h141
-rw-r--r--include/rdma/rdma_vt.h3
-rw-r--r--include/rdma/rdmavt_qp.h14
-rw-r--r--include/rdma/uverbs_std_types.h114
-rw-r--r--include/rdma/uverbs_types.h172
-rw-r--r--include/uapi/linux/pci_regs.h1
-rw-r--r--include/uapi/rdma/ib_user_verbs.h11
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h4
190 files changed, 12222 insertions, 4221 deletions
diff --git a/Documentation/infiniband/opa_vnic.txt b/Documentation/infiniband/opa_vnic.txt
new file mode 100644
index 000000000000..282e17be798a
--- /dev/null
+++ b/Documentation/infiniband/opa_vnic.txt
@@ -0,0 +1,153 @@
+Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature
+supports Ethernet functionality over Omni-Path fabric by encapsulating
+the Ethernet packets between HFI nodes.
+
+Architecture
+=============
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+ +-------------------+
+ | Subnet/ |
+ | Ethernet |
+ | Manager |
+ +-------------------+
+ / /
+ / /
+ / /
+ / /
++-----------------------------+ +------------------------------+
+| Virtual Ethernet Switch | | Virtual Ethernet Switch |
+| +---------+ +---------+ | | +---------+ +---------+ |
+| | VPORT | | VPORT | | | | VPORT | | VPORT | |
++--+---------+----+---------+-+ +-+---------+----+---------+---+
+ | \ / |
+ | \ / |
+ | \/ |
+ | / \ |
+ | / \ |
+ +-----------+------------+ +-----------+------------+
+ | VNIC | VNIC | | VNIC | VNIC |
+ +-----------+------------+ +-----------+------------+
+ | HFI | | HFI |
+ +------------------------+ +------------------------+
+
+
+The Omni-Path encapsulated Ethernet packet format is as described below.
+
+Bits Field
+------------------------------------
+Quad Word 0:
+0-19 SLID (lower 20 bits)
+20-30 Length (in Quad Words)
+31 BECN bit
+32-51 DLID (lower 20 bits)
+52-56 SC (Service Class)
+57-59 RC (Routing Control)
+60 FECN bit
+61-62 L2 (=10, 16B format)
+63 LT (=1, Link Transfer Head Flit)
+
+Quad Word 1:
+0-7 L4 type (=0x78 ETHERNET)
+8-11 SLID[23:20]
+12-15 DLID[23:20]
+16-31 PKEY
+32-47 Entropy
+48-63 Reserved
+
+Quad Word 2:
+0-15 Reserved
+16-31 L4 header
+32-63 Ethernet Packet
+
+Quad Words 3 to N-1:
+0-63 Ethernet packet (pad extended)
+
+Quad Word N (last):
+0-23 Ethernet packet (pad extended)
+24-55 ICRC
+56-61 Tail
+62-63 LT (=01, Link Transfer Tail Flit)
+
+Ethernet packet is padded on the transmit side to ensure that the VNIC OPA
+packet is quad word aligned. The 'Tail' field contains the number of bytes
+padded. On the receive side the 'Tail' field is read and the padding is
+removed (along with ICRC, Tail and OPA header) before passing packet up
+the network stack.
+
+The L4 header field contains the virtual Ethernet switch id the VNIC port
+belongs to. On the receive side, this field is used to de-multiplex the
+received VNIC packets to different VNIC ports.
+
+Driver Design
+==============
+Intel OPA VNIC software design is presented in the below diagram.
+OPA VNIC functionality has a HW dependent component and a HW
+independent component.
+
+The support has been added for IB device to allocate and free the RDMA
+netdev devices. The RDMA netdev supports interfacing with the network
+stack thus creating standard network interfaces. OPA_VNIC is an RDMA
+netdev device type.
+
+The HW dependent VNIC functionality is part of the HFI1 driver. It
+implements the verbs to allocate and free the OPA_VNIC RDMA netdev.
+It involves HW resource allocation/management for VNIC functionality.
+It interfaces with the network stack and implements the required
+net_device_ops functions. It expects Omni-Path encapsulated Ethernet
+packets in the transmit path and provides HW access to them. It strips
+the Omni-Path header from the received packets before passing them up
+the network stack. It also implements the RDMA netdev control operations.
+
+The OPA VNIC module implements the HW independent VNIC functionality.
+It consists of two parts. The VNIC Ethernet Management Agent (VEMA)
+registers itself with IB core as an IB client and interfaces with the
+IB MAD stack. It exchanges the management information with the Ethernet
+Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees
+the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions
+set by HW dependent VNIC driver where required to accommodate any control
+operation. It also handles the encapsulation of Ethernet packets with an
+Omni-Path header in the transmit path. For each VNIC interface, the
+information required for encapsulation is configured by the EM via VEMA MAD
+interface. It also passes any control information to the HW dependent driver
+by invoking the RDMA netdev control operations.
+
+ +-------------------+ +----------------------+
+ | | | Linux |
+ | IB MAD | | Network |
+ | | | Stack |
+ +-------------------+ +----------------------+
+ | | |
+ | | |
+ +----------------------------+ |
+ | | |
+ | OPA VNIC Module | |
+ | (OPA VNIC RDMA Netdev | |
+ | & EMA functions) | |
+ | | |
+ +----------------------------+ |
+ | |
+ | |
+ +------------------+ |
+ | IB core | |
+ +------------------+ |
+ | |
+ | |
+ +--------------------------------------------+
+ | |
+ | HFI1 Driver with VNIC support |
+ | |
+ +--------------------------------------------+
diff --git a/MAINTAINERS b/MAINTAINERS
index 7a9fd64fec3b..ff8da543faa2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5899,6 +5899,13 @@ F: drivers/block/cciss*
F: include/linux/cciss_ioctl.h
F: include/uapi/linux/cciss_ioctl.h
+OPA-VNIC DRIVER
+M: Dennis Dalessandro <dennis.dalessandro@intel.com>
+M: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+L: linux-rdma@vger.kernel.org
+S: Supported
+F: drivers/infiniband/ulp/opa_vnic
+
HFI1 DRIVER
M: Mike Marciniszyn <mike.marciniszyn@intel.com>
M: Dennis Dalessandro <dennis.dalessandro@intel.com>
@@ -6507,6 +6514,7 @@ W: http://www.openfabrics.org/
Q: http://patchwork.kernel.org/project/linux-rdma/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git
S: Supported
+F: Documentation/devicetree/bindings/infiniband/
F: Documentation/infiniband/
F: drivers/infiniband/
F: include/uapi/linux/if_infiniband.h
@@ -11501,11 +11509,11 @@ S: Supported
F: drivers/net/ethernet/emulex/benet/
EMULEX ONECONNECT ROCE DRIVER
-M: Selvin Xavier <selvin.xavier@avagotech.com>
-M: Devesh Sharma <devesh.sharma@avagotech.com>
+M: Selvin Xavier <selvin.xavier@broadcom.com>
+M: Devesh Sharma <devesh.sharma@broadcom.com>
L: linux-rdma@vger.kernel.org
-W: http://www.emulex.com
-S: Supported
+W: http://www.broadcom.com
+S: Odd Fixes
F: drivers/infiniband/hw/ocrdma/
F: include/uapi/rdma/ocrdma-abi.h
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66f86027ed47..234fe01904e7 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index e426ac877d19..6ebd9ad95010 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -29,4 +29,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
+ rdma_core.o uverbs_std_types.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 329d08c884f6..523d24320100 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -444,8 +444,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(addr->net, NULL, &fl6);
- if ((ret = dst->error))
+ ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+ if (ret < 0)
goto put;
rt = (struct rt6_info *)dst;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index acd10d666f1c..34dc81d30692 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1755,6 +1755,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
+ if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ (id_priv->id.qp_type != IB_QPT_UD))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -3941,63 +3944,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
-static void cma_query_sa_classport_info_cb(int status,
- struct ib_class_port_info *rec,
- void *context)
-{
- struct class_port_info_context *cb_ctx = context;
-
- WARN_ON(!context);
-
- if (status || !rec) {
- pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
- cb_ctx->device->name, cb_ctx->port_num, status);
- goto out;
- }
-
- memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
-
-out:
- complete(&cb_ctx->done);
-}
-
-static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
- struct ib_class_port_info *class_port_info)
-{
- struct class_port_info_context *cb_ctx;
- int ret;
-
- cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
- if (!cb_ctx)
- return -ENOMEM;
-
- cb_ctx->device = device;
- cb_ctx->class_port_info = class_port_info;
- cb_ctx->port_num = port_num;
- init_completion(&cb_ctx->done);
-
- ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
- CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
- GFP_KERNEL, cma_query_sa_classport_info_cb,
- cb_ctx, &cb_ctx->sa_query);
- if (ret < 0) {
- pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
- device->name, port_num, ret);
- goto out;
- }
-
- wait_for_completion(&cb_ctx->done);
-
-out:
- kfree(cb_ctx);
- return ret;
-}
-
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
- struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@@ -4018,21 +3968,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
- ret = cma_query_sa_classport_info(id_priv->id.device,
- id_priv->id.port_num,
- &class_port_info);
-
- if (ret)
- return ret;
-
- if (!(ib_get_cpi_capmask2(&class_port_info) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
+ if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
+ (!ib_sa_sendonly_fullmem_support(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num))) {
+ pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+ "RDMA CM: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.device->name, id_priv->id.port_num);
+ return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7c9e34d679d3..81d447da0048 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -172,8 +172,16 @@ static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
+ WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
+ if (dev->reg_state == IB_DEV_UNREGISTERED) {
+ /*
+ * In IB_DEV_UNINITIALIZED state, cache or port table
+ * is not even created. Free cache and port table only when
+ * device reaches UNREGISTERED state.
+ */
+ ib_cache_release_one(dev);
+ kfree(dev->port_immutable);
+ }
kfree(dev);
}
@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device,
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
- goto out;
+ goto port_cleanup;
}
ret = ib_device_register_rdmacg(device);
if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n");
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
- ib_device_unregister_rdmacg(device);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
- ib_device_unregister_rdmacg(device);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
device->reg_state = IB_DEV_REGISTERED;
@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device,
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
+ mutex_unlock(&device_mutex);
+ return 0;
+
+cache_cleanup:
+ ib_cache_cleanup_one(device);
+ ib_cache_release_one(device);
+port_cleanup:
+ kfree(device->port_immutable);
out:
mutex_unlock(&device_mutex);
return ret;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdfad5f26212..84d2615b5d4b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -96,7 +96,8 @@ struct ib_fmr_pool {
void * arg);
void *flush_arg;
- struct task_struct *thread;
+ struct kthread_worker *worker;
+ struct kthread_work work;
atomic_t req_ser;
atomic_t flush_ser;
@@ -174,29 +175,19 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
}
-static int ib_fmr_cleanup_thread(void *pool_ptr)
+static void ib_fmr_cleanup_func(struct kthread_work *work)
{
- struct ib_fmr_pool *pool = pool_ptr;
+ struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
- do {
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
+ ib_fmr_batch_release(pool);
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (!kthread_should_stop());
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
- return 0;
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
+ kthread_queue_work(pool->worker, &pool->work);
}
/**
@@ -265,15 +256,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
- pool->thread = kthread_run(ib_fmr_cleanup_thread,
- pool,
- "ib_fmr(%s)",
- device->name);
- if (IS_ERR(pool->thread)) {
- pr_warn(PFX "couldn't start cleanup thread\n");
- ret = PTR_ERR(pool->thread);
+ pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+ if (IS_ERR(pool->worker)) {
+ pr_warn(PFX "couldn't start cleanup kthread worker\n");
+ ret = PTR_ERR(pool->worker);
goto out_free_pool;
}
+ kthread_init_work(&pool->work, ib_fmr_cleanup_func);
{
struct ib_pool_fmr *fmr;
@@ -338,7 +327,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
LIST_HEAD(fmr_list);
int i;
- kthread_stop(pool->thread);
+ kthread_destroy_worker(pool->worker);
ib_fmr_batch_release(pool);
i = 0;
@@ -388,7 +377,7 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
@@ -502,7 +491,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
}
}
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 57f231f1c721..8a2ceb4ddaa2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -605,7 +605,7 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
@@ -622,7 +622,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
- return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
new file mode 100644
index 000000000000..41c31a2bf093
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+ kref_get(&uobject->ref);
+}
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ struct ib_uobject *uobj =
+ container_of(ref, struct ib_uobject, ref);
+
+ if (uobj->type->type_class->needs_kfree_rcu)
+ kfree_rcu(uobj, rcu);
+ else
+ kfree(uobj);
+}
+
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+
+static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ if (!exclusive)
+ return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+
+ /* lock is either WRITE or DESTROY - should be exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+}
+
+static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+ const struct uverbs_obj_type *type)
+{
+ struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->context = context;
+ uobj->type = type;
+ atomic_set(&uobj->usecnt, 0);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ int ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&uobj->context->ufile->idr_lock);
+
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
+ if (ret >= 0)
+ uobj->id = ret;
+
+ spin_unlock(&uobj->context->ufile->idr_lock);
+ idr_preload_end();
+
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * It only removes it from the uobjects list, uverbs_uobject_put() is still
+ * required.
+ */
+static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
+{
+ spin_lock(&uobj->context->ufile->idr_lock);
+ idr_remove(&uobj->context->ufile->idr, uobj->id);
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+
+ rcu_read_lock();
+ /* object won't be released as we're protected in rcu */
+ uobj = idr_find(&ucontext->ufile->idr, id);
+ if (!uobj) {
+ uobj = ERR_PTR(-ENOENT);
+ goto free;
+ }
+
+ uverbs_uobject_get(uobj);
+free:
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct file *f;
+ struct ib_uobject *uobject;
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+
+ if (exclusive)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ f = fget(id);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running uverbs_close_fd,
+ * and the caller is expected to ensure that uverbs_close_fd is never
+ * done while a call top lookup is possible.
+ */
+ if (f->f_op != fd_type->fops) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->type != type) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, exclusive);
+ if (ret) {
+ WARN(ucontext->cleanup_reason,
+ "ib_uverbs: Trying to lookup_get while cleanup context\n");
+ goto free;
+ }
+
+ return uobj;
+free:
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto idr_remove;
+
+ return uobj;
+
+idr_remove:
+ uverbs_idr_remove_uobj(uobj);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+ int new_fd;
+ struct ib_uobject *uobj;
+ struct ib_uobject_file *uobj_file;
+ struct file *filp;
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0)
+ return ERR_PTR(new_fd);
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj)) {
+ put_unused_fd(new_fd);
+ return uobj;
+ }
+
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ filp = anon_inode_getfile(fd_type->name,
+ fd_type->fops,
+ uobj_file,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ put_unused_fd(new_fd);
+ uverbs_uobject_put(uobj);
+ return (void *)filp;
+ }
+
+ uobj_file->uobj.id = new_fd;
+ uobj_file->uobj.object = filp;
+ uobj_file->ufile = ucontext->ufile;
+ INIT_LIST_HEAD(&uobj->list);
+ kref_get(&uobj_file->ufile->ref);
+
+ return uobj;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ return type->type_class->alloc_begin(type, ucontext);
+}
+
+static void uverbs_uobject_add(struct ib_uobject *uobject)
+{
+ mutex_lock(&uobject->context->uobjects_lock);
+ list_add(&uobject->list, &uobject->context->uobjects);
+ mutex_unlock(&uobject->context->uobjects_lock);
+}
+
+static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->type, struct uverbs_obj_idr_type,
+ type);
+ int ret = idr_type->destroy_object(uobj, why);
+
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object. In the rest of the cases, just remove whatever you can.
+ */
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_idr_remove_uobj(uobj);
+
+ return ret;
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ struct file *filp = uobj->object;
+ int id = uobj_file->uobj.id;
+
+ /* Unsuccessful NEW */
+ fput(filp);
+ put_unused_fd(id);
+}
+
+static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(uobj->type, struct uverbs_obj_fd_type, type);
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ int ret = fd_type->context_closed(uobj_file, why);
+
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ if (why == RDMA_REMOVE_DURING_CLEANUP) {
+ alloc_abort_fd_uobject(uobj);
+ return ret;
+ }
+
+ uobj_file->uobj.context = NULL;
+ return ret;
+}
+
+static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+{
+#ifdef CONFIG_LOCKDEP
+ if (exclusive)
+ WARN_ON(atomic_read(&uobj->usecnt) > 0);
+ else
+ WARN_ON(atomic_read(&uobj->usecnt) == -1);
+#endif
+}
+
+static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ ret = uobj->type->type_class->remove_commit(uobj, why);
+ if (ret && why == RDMA_REMOVE_DESTROY) {
+ /* We couldn't remove the object, so just unlock the uobject */
+ atomic_set(&uobj->usecnt, 0);
+ uobj->type->type_class->lookup_put(uobj, true);
+ } else {
+ mutex_lock(&ucontext->uobjects_lock);
+ list_del(&uobj->list);
+ mutex_unlock(&ucontext->uobjects_lock);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(uobj);
+ }
+
+ return ret;
+}
+
+/* This is called only for user requested DESTROY reasons */
+int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ /* put the ref count we took at lookup_get */
+ uverbs_uobject_put(uobj);
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
+ return 0;
+ }
+ lockdep_check(uobj, true);
+ ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
+
+ up_read(&ucontext->cleanup_rwsem);
+ return ret;
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_uobject_add(uobj);
+ spin_lock(&uobj->context->ufile->idr_lock);
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ */
+ WARN_ON(idr_replace(&uobj->context->ufile->idr,
+ uobj, uobj->id));
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+
+ uverbs_uobject_add(&uobj_file->uobj);
+ fd_install(uobj_file->uobj.id, uobj->object);
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj_file->uobj.id = 0;
+ /* Get another reference as we export this to the fops */
+ uverbs_uobject_get(&uobj_file->uobj);
+}
+
+int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+{
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+ int ret;
+
+ WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
+ ret = uobj->type->type_class->remove_commit(uobj,
+ RDMA_REMOVE_DURING_CLEANUP);
+ if (ret)
+ pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
+ uobj->id);
+ return ret;
+ }
+
+ uobj->type->type_class->alloc_commit(uobj);
+ up_read(&uobj->context->cleanup_rwsem);
+
+ return 0;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_idr_remove_uobj(uobj);
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_uobject_put(uobj);
+}
+
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
+{
+ uobj->type->type_class->alloc_abort(uobj);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(exclusive);
+ /* This indirectly calls uverbs_close_fd and free the object */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ lockdep_check(uobj, exclusive);
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ if (!exclusive)
+ atomic_dec(&uobj->usecnt);
+ else
+ atomic_set(&uobj->usecnt, 0);
+
+ uverbs_uobject_put(uobj);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .remove_commit = remove_commit_idr_uobject,
+ /*
+ * When we destroy an object, we first just lock it for WRITE and
+ * actually DESTROY it in the finalize stage. So, the problematic
+ * scenario is when we just started the finalize stage of the
+ * destruction (nothing was executed yet). Now, the other thread
+ * fetched the object for READ access, but it didn't lock it yet.
+ * The DESTROY thread continues and starts destroying the object.
+ * When the other thread continue - without the RCU, it would
+ * access freed memory. However, the rcu_read_lock delays the free
+ * until the rcu_read_lock of the READ operation quits. Since the
+ * exclusive lock of the object is still taken by the DESTROY flow, the
+ * READ operation will get -EBUSY and it'll just bail out.
+ */
+ .needs_kfree_rcu = true,
+};
+
+static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+{
+ struct ib_ucontext *ucontext;
+ struct ib_uverbs_file *ufile = uobj_file->ufile;
+ int ret;
+
+ mutex_lock(&uobj_file->ufile->cleanup_mutex);
+
+ /* uobject was either already cleaned up or is cleaned up right now anyway */
+ if (!uobj_file->uobj.context ||
+ !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+ goto unlock;
+
+ ucontext = uobj_file->uobj.context;
+ ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ucontext->cleanup_rwsem);
+ if (ret)
+ pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
+unlock:
+ mutex_unlock(&ufile->cleanup_mutex);
+}
+
+void uverbs_close_fd(struct file *f)
+{
+ struct ib_uobject_file *uobj_file = f->private_data;
+ struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+
+ _uverbs_close_fd(uobj_file);
+ uverbs_uobject_put(&uobj_file->uobj);
+ kref_put(uverbs_file_ref, ib_uverbs_release_file);
+}
+
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+{
+ enum rdma_remove_reason reason = device_removed ?
+ RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
+ unsigned int cur_order = 0;
+
+ ucontext->cleanup_reason = reason;
+ /*
+ * Waits for all remove_commit and alloc_commit to finish. Logically, We
+ * want to hold this forever as the context is going to be destroyed,
+ * but we'll release it since it causes a "held lock freed" BUG message.
+ */
+ down_write(&ucontext->cleanup_rwsem);
+
+ while (!list_empty(&ucontext->uobjects)) {
+ struct ib_uobject *obj, *next_obj;
+ unsigned int next_order = UINT_MAX;
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per order traversal in order
+ * to let other threads (which might still use the FDs) chance
+ * to run.
+ */
+ mutex_lock(&ucontext->uobjects_lock);
+ list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
+ list) {
+ if (obj->type->destroy_order == cur_order) {
+ int ret;
+
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, true));
+ ret = obj->type->type_class->remove_commit(obj,
+ reason);
+ list_del(&obj->list);
+ if (ret)
+ pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
+ obj->id, cur_order);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(obj);
+ } else {
+ next_order = min(next_order,
+ obj->type->destroy_order);
+ }
+ }
+ mutex_unlock(&ucontext->uobjects_lock);
+ cur_order = next_order;
+ }
+ up_write(&ucontext->cleanup_rwsem);
+}
+
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+{
+ ucontext->cleanup_reason = 0;
+ mutex_init(&ucontext->uobjects_lock);
+ INIT_LIST_HEAD(&ucontext->uobjects);
+ init_rwsem(&ucontext->cleanup_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .remove_commit = remove_commit_fd_uobject,
+ .needs_kfree_rcu = false,
+};
+
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
new file mode 100644
index 000000000000..1b82e7ff7fe8
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+/*
+ * These functions initialize the context and cleanups its uobjects.
+ * The context has a list of objects which is protected by a mutex
+ * on the context. initialize_ucontext should be called when we create
+ * a context.
+ * cleanup_ucontext removes all uobjects from the context and puts them.
+ */
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+void uverbs_uobject_get(struct ib_uobject *uobject);
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+/* Indicate this fd is no longer used by this consumer, but its memory isn't
+ * necessarily released yet. When the last reference is put, we release the
+ * memory. After this call is executed, calling uverbs_uobject_get isn't
+ * allowed.
+ * This must be called from the release file_operations of the file!
+ */
+void uverbs_close_fd(struct file *f);
+
+#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ceae153997d0..b57bdc257fcf 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -56,6 +56,8 @@
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
+#define IB_SA_CPI_MAX_RETRY_CNT 3
+#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
@@ -65,9 +67,23 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
+enum rdma_class_port_info_type {
+ RDMA_CLASS_PORT_INFO_IB,
+ RDMA_CLASS_PORT_INFO_OPA
+};
+
+struct rdma_class_port_info {
+ enum rdma_class_port_info_type type;
+ union {
+ struct ib_class_port_info ib;
+ struct opa_class_port_info opa;
+ };
+};
+
struct ib_sa_classport_cache {
bool valid;
- struct ib_class_port_info data;
+ int retry_cnt;
+ struct rdma_class_port_info data;
};
struct ib_sa_port {
@@ -75,6 +91,7 @@ struct ib_sa_port {
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
+ struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
@@ -103,6 +120,7 @@ struct ib_sa_query {
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
+#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
@@ -123,7 +141,7 @@ struct ib_sa_guidinfo_query {
};
struct ib_sa_classport_info_query {
- void (*callback)(int, struct ib_class_port_info *, void *);
+ void (*callback)(void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -406,7 +424,7 @@ static const struct ib_field service_rec_table[] = {
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
-static const struct ib_field classport_info_rec_table[] = {
+static const struct ib_field ib_classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
@@ -477,6 +495,88 @@ static const struct ib_field classport_info_rec_table[] = {
.size_bits = 32 },
};
+#define OPA_CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes =\
+ offsetof(struct opa_class_port_info, field), \
+ .struct_size_bytes = \
+ sizeof((struct opa_class_port_info *)0)->field, \
+ .field_name = "opa_class_port_info:" #field
+
+static const struct ib_field opa_classport_info_rec_table[] = {
+ { OPA_CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 18,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 18,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
+ .offset_words = 19,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 19,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -931,92 +1031,6 @@ static void free_sm_ah(struct kref *kref)
kfree(sm_ah);
}
-static void update_sm_ah(struct work_struct *work)
-{
- struct ib_sa_port *port =
- container_of(work, struct ib_sa_port, update_task);
- struct ib_sa_sm_ah *new_ah;
- struct ib_port_attr port_attr;
- struct ib_ah_attr ah_attr;
-
- if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- pr_warn("Couldn't query port\n");
- return;
- }
-
- new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
- if (!new_ah) {
- return;
- }
-
- kref_init(&new_ah->ref);
- new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
-
- new_ah->pkey_index = 0;
- if (ib_find_pkey(port->agent->device, port->port_num,
- IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- pr_err("Couldn't find index for default PKey\n");
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port_attr.sm_lid;
- ah_attr.sl = port_attr.sm_sl;
- ah_attr.port_num = port->port_num;
- if (port_attr.grh_required) {
- ah_attr.ah_flags = IB_AH_GRH;
- ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
- ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
- }
-
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(new_ah->ah)) {
- pr_warn("Couldn't create new SM AH\n");
- kfree(new_ah);
- return;
- }
-
- spin_lock_irq(&port->ah_lock);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = new_ah;
- spin_unlock_irq(&port->ah_lock);
-
-}
-
-static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
-{
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
- unsigned long flags;
- struct ib_sa_device *sa_dev =
- container_of(handler, typeof(*sa_dev), event_handler);
- struct ib_sa_port *port =
- &sa_dev->port[event->element.port_num - sa_dev->start_port];
-
- if (!rdma_cap_ib_sa(handler->device, port->port_num))
- return;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = NULL;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- if (event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_LID_CHANGE) {
- spin_lock_irqsave(&port->classport_lock, flags);
- port->classport_info.valid = false;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- }
- queue_work(ib_wq, &sa_dev->port[event->element.port_num -
- sa_dev->start_port].update_task);
- }
-}
-
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
@@ -1203,7 +1217,9 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask,
- IB_MGMT_BASE_VERSION);
+ ((query->flags & IB_SA_QUERY_OPA) ?
+ OPA_MGMT_BASE_VERSION :
+ IB_MGMT_BASE_VERSION));
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
@@ -1220,16 +1236,21 @@ static void free_mad(struct ib_sa_query *query)
kref_put(&query->sm_ah->ref, free_sm_ah);
}
-static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
{
+ struct ib_sa_mad *mad = query->mad_buf->mad;
unsigned long flags;
memset(mad, 0, sizeof *mad);
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ if (query->flags & IB_SA_QUERY_OPA) {
+ mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
+ } else {
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ }
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
-
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
@@ -1383,7 +1404,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
@@ -1508,7 +1529,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
query->sa_query.release = ib_sa_service_rec_release;
@@ -1600,7 +1621,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
@@ -1697,7 +1718,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
@@ -1728,7 +1749,42 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-/* Support get SA ClassPortInfo */
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ bool ret = false;
+ unsigned long flags;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if ((port->classport_info.valid) &&
+ (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
+ ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
+ & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
+
+struct ib_classport_info_context {
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void ib_classportinfo_cb(void *context)
+{
+ struct ib_classport_info_context *cb_ctx = context;
+
+ complete(&cb_ctx->done);
+}
+
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1736,91 +1792,91 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+ struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
if (mad) {
- struct ib_class_port_info rec;
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ struct opa_class_port_info rec;
- ib_unpack(classport_info_rec_table,
- ARRAY_SIZE(classport_info_rec_table),
- mad->data, &rec);
+ ib_unpack(opa_classport_info_rec_table,
+ ARRAY_SIZE(opa_classport_info_rec_table),
+ mad->data, &rec);
- spin_lock_irqsave(&sa_query->port->classport_lock, flags);
- if (!status && !sa_query->port->classport_info.valid) {
- memcpy(&sa_query->port->classport_info.data, &rec,
- sizeof(sa_query->port->classport_info.data));
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.opa, &rec,
+ sizeof(info->data.opa));
- sa_query->port->classport_info.valid = true;
- }
- spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_OPA;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
- query->callback(status, &rec, query->context);
- } else {
- query->callback(status, NULL, query->context);
+ } else {
+ struct ib_class_port_info rec;
+
+ ib_unpack(ib_classport_info_rec_table,
+ ARRAY_SIZE(ib_classport_info_rec_table),
+ mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.ib, &rec,
+ sizeof(info->data.ib));
+
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_IB;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
+ }
}
+ query->callback(query->context);
}
-static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
+ int timeout_ms,
+ void (*callback)(void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
- struct ib_sa_classport_info_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
struct ib_mad_agent *agent;
+ struct ib_sa_classport_info_query *query;
struct ib_sa_mad *mad;
- struct ib_class_port_info cached_class_port_info;
+ gfp_t gfp_mask = GFP_KERNEL;
int ret;
- unsigned long flags;
-
- if (!sa_dev)
- return -ENODEV;
- port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- /* Use cached ClassPortInfo attribute if valid instead of sending mad */
- spin_lock_irqsave(&port->classport_lock, flags);
- if (port->classport_info.valid && callback) {
- memcpy(&cached_class_port_info, &port->classport_info.data,
- sizeof(cached_class_port_info));
- spin_unlock_irqrestore(&port->classport_lock, flags);
- callback(0, &cached_class_port_info, context);
- return 0;
- }
- spin_unlock_irqrestore(&port->classport_lock, flags);
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
+ query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
+ port->port_num) ?
+ IB_SA_QUERY_OPA : 0;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err_free;
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
-
- query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
+ init_mad(&query->sa_query, agent);
- query->sa_query.release = ib_sa_portclass_info_rec_release;
- /* support GET only */
+ query->sa_query.callback = ib_sa_classport_info_rec_callback;
+ query->sa_query.release = ib_sa_classport_info_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
@@ -1828,20 +1884,71 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err_free_mad;
return ret;
-err2:
+err_free_mad:
*sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-err1:
+err_free:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
+static void update_ib_cpi(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, ib_cpi_work.work);
+ struct ib_classport_info_context *cb_context;
+ unsigned long flags;
+ int ret;
+
+ /* If the classport info is valid, nothing
+ * to do here.
+ */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid) {
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+ cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
+ if (!cb_context)
+ goto err_nomem;
+
+ init_completion(&cb_context->done);
+
+ ret = ib_sa_classport_info_rec_query(port, 3000,
+ ib_classportinfo_cb, cb_context,
+ &cb_context->sa_query);
+ if (ret < 0)
+ goto free_cb_err;
+ wait_for_completion(&cb_context->done);
+free_cb_err:
+ kfree(cb_context);
+ spin_lock_irqsave(&port->classport_lock, flags);
+
+ /* If the classport info is still not valid, the query should have
+ * failed for some reason. Retry issuing the query
+ */
+ if (!port->classport_info.valid) {
+ port->classport_info.retry_cnt++;
+ if (port->classport_info.retry_cnt <=
+ IB_SA_CPI_MAX_RETRY_CNT) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
+ }
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+err_nomem:
+ return;
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1870,7 +1977,8 @@ static void send_handler(struct ib_mad_agent *agent,
spin_unlock_irqrestore(&idr_lock, flags);
free_mad(query);
- ib_sa_client_put(query->client);
+ if (query->client)
+ ib_sa_client_put(query->client);
query->release(query);
}
@@ -1897,6 +2005,99 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct ib_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ pr_warn("Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
+ if (!new_ah)
+ return;
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ pr_err("Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.dlid = port_attr.sm_lid;
+ ah_attr.sl = port_attr.sm_sl;
+ ah_attr.port_num = port->port_num;
+ if (port_attr.grh_required) {
+ ah_attr.ah_flags = IB_AH_GRH;
+ ah_attr.grh.dgid.global.subnet_prefix =
+ cpu_to_be64(port_attr.subnet_prefix);
+ ah_attr.grh.dgid.global.interface_id =
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+ }
+
+ new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ pr_warn("Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ u8 port_num = event->element.port_num - sa_dev->start_port;
+ struct ib_sa_port *port = &sa_dev->port[port_num];
+
+ if (!rdma_cap_ib_sa(handler->device, port->port_num))
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PORT_ACTIVE) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ port->classport_info.retry_cnt = 0;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ queue_delayed_work(ib_wq,
+ &port->ib_cpi_work, delay);
+ }
+ queue_work(ib_wq, &sa_dev->port[port_num].update_task);
+ }
+}
+
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
@@ -1934,6 +2135,8 @@ static void ib_sa_add_one(struct ib_device *device)
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
+ update_ib_cpi);
count++;
}
@@ -1980,11 +2183,11 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
return;
ib_unregister_event_handler(&sa_dev->event_handler);
-
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_cap_ib_sa(device, i + 1)) {
+ cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index daadf3130c9f..7ebe1ef23652 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -253,6 +253,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " EDR";
rate = 250;
break;
+ case IB_SPEED_HDR:
+ speed = " HDR";
+ rate = 500;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
rate = 25;
@@ -1301,7 +1305,7 @@ err_put:
free_port_list_attributes(device);
err_unregister:
- device_unregister(class_dev);
+ device_del(class_dev);
err:
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 27f155d2df8d..3dbf811d3c51 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -115,11 +115,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
@@ -133,7 +133,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
put_pid(umem->pid);
- ret = ib_umem_odp_get(context, umem);
+ ret = ib_umem_odp_get(context, umem, access);
if (ret) {
kfree(umem);
return ERR_PTR(ret);
@@ -315,7 +315,6 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- int shift;
int i;
int n;
struct scatterlist *sg;
@@ -323,11 +322,9 @@ int ib_umem_page_count(struct ib_umem *umem)
if (umem->odp_data)
return ib_umem_num_pages(umem);
- shift = ilog2(umem->page_size);
-
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> shift;
+ n += sg_dma_len(sg) >> umem->page_shift;
return n;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cb2742b548bb..0780b1afefa9 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -254,11 +255,11 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->writable = 1;
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->writable = 1;
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data) {
@@ -306,7 +307,8 @@ out_umem:
}
EXPORT_SYMBOL(ib_alloc_odp_umem);
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access)
{
int ret_val;
struct pid *our_pid;
@@ -315,6 +317,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
if (!mm)
return -EINVAL;
+ if (access & IB_ACCESS_HUGETLB) {
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ vma = find_vma(mm, ib_umem_start(umem));
+ if (!vma || !is_vm_hugetlb_page(vma))
+ return -EINVAL;
+ h = hstate_vma(vma);
+ umem->page_shift = huge_page_shift(h);
+ umem->hugetlb = 1;
+ } else {
+ umem->hugetlb = 0;
+ }
+
/* Prevent creating ODP MRs in child processes */
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
@@ -325,7 +341,6 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
goto out_mm;
}
- umem->hugetlb = 0;
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
if (!umem->odp_data) {
ret_val = -ENOMEM;
@@ -504,7 +519,6 @@ out:
static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem,
int page_index,
- u64 base_virt_addr,
struct page *page,
u64 access_mask,
unsigned long current_seq)
@@ -527,7 +541,7 @@ static int ib_umem_odp_map_dma_single_page(
if (!(umem->odp_data->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
- 0, PAGE_SIZE,
+ 0, BIT(umem->page_shift),
DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(dev, dma_addr)) {
ret = -EFAULT;
@@ -555,8 +569,9 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
umem,
- base_virt_addr + (page_index * PAGE_SIZE),
- base_virt_addr + ((page_index+1)*PAGE_SIZE),
+ ib_umem_start(umem) + (page_index >> umem->page_shift),
+ ib_umem_start(umem) + ((page_index + 1) >>
+ umem->page_shift),
NULL);
ret = -EAGAIN;
}
@@ -595,10 +610,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct page **local_page_list = NULL;
- u64 off;
- int j, k, ret = 0, start_idx, npages = 0;
- u64 base_virt_addr;
+ u64 page_mask, off;
+ int j, k, ret = 0, start_idx, npages = 0, page_shift;
unsigned int flags = 0;
+ phys_addr_t p = 0;
if (access_mask == 0)
return -EINVAL;
@@ -611,9 +626,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (!local_page_list)
return -ENOMEM;
- off = user_virt & (~PAGE_MASK);
- user_virt = user_virt & PAGE_MASK;
- base_virt_addr = user_virt;
+ page_shift = umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ off = user_virt & (~page_mask);
+ user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -631,13 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (access_mask & ODP_WRITE_ALLOWED_BIT)
flags |= FOLL_WRITE;
- start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+ start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
k = start_idx;
while (bcnt > 0) {
- const size_t gup_num_pages =
- min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ const size_t gup_num_pages = min_t(size_t,
+ (bcnt + BIT(page_shift) - 1) >> page_shift,
+ PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
/*
@@ -656,14 +672,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- user_virt += npages << PAGE_SHIFT;
mutex_lock(&umem->odp_data->umem_mutex);
- for (j = 0; j < npages; ++j) {
+ for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
+ if (user_virt & ~page_mask) {
+ p += PAGE_SIZE;
+ if (page_to_phys(local_page_list[j]) != p) {
+ ret = -EFAULT;
+ break;
+ }
+ put_page(local_page_list[j]);
+ continue;
+ }
+
ret = ib_umem_odp_map_dma_single_page(
- umem, k, base_virt_addr, local_page_list[j],
- access_mask, current_seq);
+ umem, k, local_page_list[j],
+ access_mask, current_seq);
if (ret < 0)
break;
+
+ p = page_to_phys(local_page_list[j]);
k++;
}
mutex_unlock(&umem->odp_data->umem_mutex);
@@ -707,8 +734,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem->odp_data->umem_mutex);
- for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx];
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index e1bedf0bac04..64d494a64daf 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
@@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+};
+
+struct ib_uverbs_async_event_file {
+ struct ib_uverbs_event_queue ev_queue;
+ struct ib_uverbs_file *uverbs_file;
+ struct kref ref;
struct list_head list;
};
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject_file uobj_file;
+ struct ib_uverbs_event_queue ev_queue;
+};
+
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
@@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
+
+ struct idr idr;
+ /* spinlock protects write access to idr */
+ spinlock_t idr_lock;
};
struct ib_uverbs_event {
@@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
+ /* lock for mcast list */
+ struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
+extern const struct file_operations uverbs_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {
@@ -229,6 +233,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7b7a76e1279a..cb3c426c0dad 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,270 +40,29 @@
#include <linux/uaccess.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+
#include "uverbs.h"
#include "core_priv.h"
-struct uverbs_lock_class {
- struct lock_class_key key;
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
-/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
- *
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
- */
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- int ret;
-
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
-
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
-
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
- }
- rcu_read_unlock();
-
- return uobj;
-}
-
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
-}
-
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
+static struct ib_uverbs_completion_event_file *
+ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
+ struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ fd, context);
+ struct ib_uobject_file *uobj_file;
-static void put_pd_read(struct ib_pd *pd)
-{
- put_uobj_read(pd->uobject);
-}
+ if (IS_ERR(uobj))
+ return (void *)uobj;
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
-}
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
-static void put_cq_read(struct ib_cq *cq)
-{
- put_uobj_read(cq->uobject);
-}
-
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
-
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
-}
-
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
-}
-
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
-}
-
-static void put_wq_read(struct ib_wq *wq)
-{
- put_uobj_read(wq->uobject);
-}
-
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
-}
-
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
-{
- put_uobj_read(ind_table->uobject);
-}
-
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
-
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
-}
-
-static void put_qp_write(struct ib_qp *qp)
-{
- put_uobj_write(qp->uobject);
-}
-
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
-
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
-
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
-
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ return container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -348,17 +107,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->device = ib_dev;
ucontext->cg_obj = cg_obj;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
+ /* ufile is required when some objects are released */
+ ucontext->ufile = file;
+ uverbs_initialize_ucontext(ucontext);
+
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
@@ -382,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
+ filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -565,19 +317,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret) {
- kfree(uobj);
- return ret;
- }
-
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
if (IS_ERR(pd)) {
@@ -591,10 +333,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -604,25 +342,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
goto err_copy;
}
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
-err_idr:
ib_dealloc_pd(pd);
err:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -633,45 +361,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
- struct ib_pd *pd;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
-
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
- if (ret)
- goto err_put;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+ uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- uobj->live = 0;
- put_uobj_write(uobj);
+ ret = uobj_remove_commit(uobj);
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
-
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return ret ?: in_len;
}
struct xrcd_table_entry {
@@ -808,16 +510,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ file->ucontext);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
if (IS_ERR(xrcd)) {
@@ -835,10 +534,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = obj->uobject.id;
@@ -847,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
/* create new inode/xrcd table entry */
ret = xrcd_table_insert(file->device, inode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
@@ -861,12 +556,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (f.file)
fdput(f);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
mutex_unlock(&file->device->xrcd_tree_mutex);
return in_len;
@@ -878,14 +568,11 @@ err_copy:
atomic_dec(&xrcd->usecnt);
}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
-
-err_idr:
+err_dealloc_xrcd:
ib_dealloc_xrcd(xrcd);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
err_tree_mutex_unlock:
if (f.file)
@@ -903,75 +590,41 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
{
struct ib_uverbs_close_xrcd cmd;
struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
+ uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj)) {
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return PTR_ERR(uobj);
}
- xrcd = uobj->object;
- inode = xrcd->inode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
- }
-
- live = uobj->live;
- if (inode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
-
- if (ret)
- goto out;
-
- if (inode && !live)
- xrcd_table_delete(file->device, inode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
-
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why)
{
struct inode *inode;
+ int ret;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd(xrcd);
- if (inode)
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ atomic_inc(&xrcd->usecnt);
+ else if (inode)
xrcd_table_delete(dev, inode);
+
+ return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -1004,14 +657,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1025,10 +675,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_put;
}
}
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
@@ -1043,9 +689,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
@@ -1058,32 +701,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(pd);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
ib_dereg_mr(mr);
err_put:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1119,11 +750,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
-
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
mr = uobj->object;
@@ -1134,7 +764,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -1167,11 +797,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ uobj_put_write(uobj);
return ret;
}
@@ -1182,39 +811,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mr = uobj->object;
-
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+ ret = uobj_remove_commit(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return ret ?: in_len;
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -1236,14 +846,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1254,11 +861,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@@ -1271,9 +873,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
- if (ret)
- goto err_unalloc;
memset(&resp, 0, sizeof(resp));
resp.rkey = mw->rkey;
@@ -1285,32 +884,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
-err_unalloc:
uverbs_dealloc_mw(mw);
-
err_put:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1320,39 +904,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
-
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
+ uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -1362,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
- int ret;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1371,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- return ret;
- resp.fd = ret;
+ uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ resp.fd = uobj->id;
+
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
+ uobj_alloc_abort(uobj);
return -EFAULT;
}
- fd_install(resp.fd, filp);
+ uobj_alloc_commit(uobj);
return in_len;
}
@@ -1407,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
void *context)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
@@ -1416,21 +978,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
-
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
+ file->ucontext);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
+ obj->uobject.user_handle = cmd->user_handle;
obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
@@ -1443,13 +1005,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
- ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1459,14 +1015,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = &ev_file->ev_queue;
atomic_set(&cq->usecnt, 0);
obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
- if (ret)
- goto err_free;
-
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
@@ -1478,32 +1030,19 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (ret)
goto err_cb;
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
return obj;
err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
-
-err_free:
ib_destroy_cq(cq);
err_file:
- ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
return ERR_PTR(ret);
}
@@ -1626,7 +1165,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1641,7 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
ret = -EFAULT;
out:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret ? ret : in_len;
}
@@ -1688,7 +1227,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1720,7 +1259,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
ret = in_len;
out_put:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret;
}
@@ -1735,14 +1274,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return in_len;
}
@@ -1757,44 +1296,38 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
+ struct ib_uverbs_event_queue *ev_queue;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
cq = uobj->object;
- ev_file = cq->cq_context;
+ ev_queue = cq->cq_context;
obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
+ }
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- put_uobj(uobj);
-
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
@@ -1816,7 +1349,7 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
@@ -1830,18 +1363,20 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ mutex_init(&obj->mcast_lock);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ cmd->rwq_ind_tbl_handle,
+ file->ucontext);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1865,8 +1400,15 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ file->ucontext);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1878,8 +1420,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
@@ -1888,8 +1430,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
+ rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ file->ucontext);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1899,10 +1441,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1954,11 +1497,6 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_put;
}
- ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_put;
-
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
@@ -1966,7 +1504,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_create;
+ goto err_put;
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1994,9 +1532,6 @@ static int create_qp(struct ib_uverbs_file *file,
qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -2018,54 +1553,41 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(ind_tbl);
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_qp(qp);
-err_create:
- ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
- RDMACG_RESOURCE_HCA_OBJECT);
-
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2201,17 +1723,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
@@ -2226,15 +1753,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd.user_handle;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
@@ -2243,32 +1766,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_remove;
+ goto err_destroy;
}
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ qp->uobject = &obj->uevent.uobject;
+ uobj_put_read(xrcd_uobj);
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return in_len;
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
err_destroy:
ib_destroy_qp(qp);
-
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2294,7 +1810,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2302,7 +1818,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
if (ret)
goto out;
@@ -2398,7 +1914,7 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2470,7 +1986,7 @@ static int modify_qp(struct ib_uverbs_file *file,
}
release_qp:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
out:
kfree(attr);
@@ -2557,42 +2073,27 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
qp = uobj->object;
obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
-
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
+ }
resp.events_reported = obj->uevent.events_reported;
-
- put_uobj(uobj);
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2603,9 +2104,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge))
+ return NULL;
+
return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
@@ -2636,7 +2141,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2672,7 +2177,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ file->ucontext);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2779,11 +2285,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
ret = -EFAULT;
out_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2832,6 +2338,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
goto err;
}
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
@@ -2900,21 +2413,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ uobj_put_obj_read(qp);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2950,14 +2463,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
for (next = wr; next; next = next->next) {
@@ -3004,14 +2517,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
(unsigned long)cmd.response + sizeof(resp),
in_len - sizeof(cmd), out_len - sizeof(resp));
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -3030,28 +2540,20 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
ah = pd->device->create_ah(pd, &attr, &udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- goto err_create;
+ goto err_put;
}
ah->device = pd->device;
ah->pd = pd;
atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
-
resp.ah_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -3060,32 +2562,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
ib_destroy_ah(ah);
-err_create:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
+err_put:
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -3094,38 +2583,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+ uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -3142,12 +2612,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
@@ -3171,7 +2642,8 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3186,31 +2658,37 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
+ bool found = false;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
- if (ret)
- goto out_put;
-
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
+ found = true;
break;
}
-out_put:
- put_qp_write(qp);
+ if (!found) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
+out_put:
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3227,6 +2705,13 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
default:
return -EINVAL;
}
@@ -3402,20 +2887,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3450,9 +2933,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
atomic_inc(&cq->usecnt);
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
@@ -3465,27 +2945,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_pd_read(pd);
- put_cq_read(cq);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_put_obj_read(cq);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
ib_destroy_wq(wq);
err_put_cq:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return err;
}
@@ -3526,36 +2998,26 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ib_uverbs_release_uevent(file, &obj->uevent);
+ ret = uobj_remove_commit(uobj);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
-
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ uverbs_uobject_put(uobj);
if (ret)
return ret;
- return 0;
+ return ib_copy_to_udata(ucore, &resp, resp.response_length);
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
@@ -3588,7 +3050,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
@@ -3599,7 +3061,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags_mask = cmd.flags_mask;
}
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ uobj_put_obj_read(wq);
return ret;
}
@@ -3677,7 +3139,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ file->ucontext);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3686,14 +3149,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
@@ -3713,10 +3174,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = required_resp_len;
@@ -3729,26 +3186,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
+ uobj_put_obj_read(wqs[j]);
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3761,10 +3210,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_uobject *uobj;
int ret;
- struct ib_wq **ind_tbl;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
@@ -3784,31 +3231,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_remove_commit(uobj);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3882,15 +3310,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3931,24 +3357,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
-
- err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (err)
- goto err_free;
-
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
- goto err_create;
+ goto err_free;
}
flow_id->uobject = uobj;
uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
-
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3957,30 +3373,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(qp);
+ uobj_alloc_commit(uobj);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
ib_destroy_flow(flow_id);
-err_create:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
err_free:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
@@ -3993,7 +3399,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
@@ -4007,29 +3412,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret) {
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- uobj->live = 0;
- }
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
+ uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ ret = uobj_remove_commit(uobj);
return ret;
}
@@ -4046,31 +3434,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ file->ucontext);
if (!attr.ext.xrc.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -4086,11 +3480,6 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_put_cq;
-
srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
@@ -4115,9 +3504,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
atomic_set(&srq->usecnt, 0);
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -4133,44 +3520,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_read(xrcd_uobj);
+ uobj_put_obj_read(attr.ext.xrc.cq);
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_srq(srq);
err_put:
- ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_put_cq:
if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_obj_read(attr.ext.xrc.cq);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -4255,7 +3630,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -4264,7 +3639,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
return ret ? ret : in_len;
}
@@ -4286,13 +3661,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
return ret;
@@ -4321,53 +3696,39 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
- struct ib_usrq_object *us;
enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
srq_type = srq->srq_type;
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
}
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.events_reported = obj->events_reported;
+ uverbs_uobject_put(uobj);
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ return -EFAULT;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
- return ret ? ret : in_len;
+ return in_len;
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 35c788a32e26..3a9883d1257e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -52,6 +52,7 @@
#include "uverbs.h"
#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -67,19 +68,6 @@ enum {
static struct class *uverbs_class;
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -168,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
-static void ib_uverbs_release_event_file(struct kref *ref)
+static void ib_uverbs_release_async_event_file(struct kref *ref)
{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
+ struct ib_uverbs_async_event_file *file =
+ container_of(ref, struct ib_uverbs_async_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj_file.uobj);
}
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@@ -206,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
{
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -227,138 +215,11 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
+ struct ib_ucontext *context,
+ bool device_removed)
{
- struct ib_uobject *uobj, *tmp;
-
context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
+ uverbs_cleanup_ucontext(context, device_removed);
put_pid(context->tgid);
ib_rdmacg_uncharge(&context->cg_obj, context->device,
@@ -372,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
@@ -392,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref)
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct ib_uverbs_file *uverbs_file,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
- !file->uverbs_file->device->ib_dev)))
+ !uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) &&
+ !uverbs_file->device->ib_dev)
return -EIO;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -457,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue,
+ comp_ev_file->uobj_file.ufile, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static unsigned int ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ return ib_uverbs_event_poll(filp->private_data, filp, wait);
+}
+
+static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+}
+
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
+}
+
+static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+ struct ib_uverbs_file *uverbs_file = file->uverbs_file;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ mutex_lock(&uverbs_file->device->lists_mutex);
+ spin_lock_irq(&file->ev_queue.lock);
+ closed_already = file->ev_queue.is_closed;
+ file->ev_queue.is_closed = 1;
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&file->ev_queue.lock);
if (!closed_already) {
list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
+ ib_unregister_event_handler(&uverbs_file->event_handler);
+ }
+ mutex_unlock(&uverbs_file->device->lists_mutex);
+
+ kref_put(&uverbs_file->ref, ib_uverbs_release_file);
+ kref_put(&file->ref, ib_uverbs_release_async_event_file);
+
+ return 0;
+}
+
+static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
}
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ spin_unlock_irq(&file->ev_queue.lock);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ uverbs_close_fd(filp);
return 0;
}
-static const struct file_operations uverbs_event_fops = {
+const struct file_operations uverbs_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = ib_uverbs_comp_event_close,
+ .fasync = ib_uverbs_comp_event_fasync,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = ib_uverbs_async_event_close,
+ .fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
@@ -546,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -562,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
+ if (file->async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
@@ -579,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.reserved = 0;
entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
+ kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@@ -603,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
struct ib_uevent_object *uobj;
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
uobj = container_of(event->element.qp->uobject,
@@ -648,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
file->async_file = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
{
- struct ib_uverbs_event_file *ev_file;
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
+}
+
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev)
+{
+ struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
int ret;
@@ -664,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
if (!ev_file)
return ERR_PTR(-ENOMEM);
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
-
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+ kref_init(&ev_file->ref);
+ filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
goto err_put_refs;
@@ -683,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
- }
+ WARN_ON(uverbs_file->async_file);
+ uverbs_file->async_file = ev_file;
+ kref_get(&uverbs_file->async_file->ref);
+ INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
+ ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&uverbs_file->event_handler);
+ if (ret)
+ goto err_put_file;
+
+ /* At that point async file stuff was fully set */
return filp;
err_put_file:
fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->async_file->ref,
+ ib_uverbs_release_async_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
return filp;
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
-{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
-
- if (!f.file)
- return NULL;
-
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
-
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
-
- kref_get(&ev_file->ref);
-
-out:
- fdput(f);
- return ev_file;
-}
-
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
u64 mask;
@@ -986,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
+ spin_lock_init(&file->idr_lock);
+ idr_init(&file->idr);
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
@@ -1019,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
+ idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@@ -1032,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref,
+ ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
@@ -1231,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
+ struct ib_uverbs_async_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
@@ -1268,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
+ mutex_lock(&file->cleanup_mutex);
+ ib_uverbs_cleanup_ucontext(file, ucontext, true);
+ mutex_unlock(&file->cleanup_mutex);
}
mutex_lock(&uverbs_dev->lists_mutex);
@@ -1278,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
- struct ib_uverbs_event_file,
+ struct ib_uverbs_async_event_file,
list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
+ spin_lock_irq(&event_file->ev_queue.lock);
+ event_file->ev_queue.is_closed = 1;
+ spin_unlock_irq(&event_file->ev_queue.lock);
list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
+ ib_unregister_event_handler(
+ &event_file->uverbs_file->event_handler);
+ event_file->uverbs_file->event_handler.device =
+ NULL;
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&event_file->ev_queue.poll_wait);
+ kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
@@ -1396,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
new file mode 100644
index 000000000000..ad3caad40945
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/bug.h>
+#include <linux/file.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_destroy_ah((struct ib_ah *)uobject->object);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_destroy_flow((struct ib_flow *)uobject->object);
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp(qp);
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
+ return ret;
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ int ret;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ return ret;
+}
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq(srq);
+
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uevent, struct ib_usrq_object, uevent);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uobject->context->ufile, uevent);
+ return ret;
+}
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
+ ret = -EBUSY;
+ else
+ ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
+ xrcd, why);
+ mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_pd *pd = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ ib_dealloc_pd((struct ib_pd *)uobject->object);
+ return 0;
+}
+
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *comp_event_file =
+ container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
+ struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+
+ spin_lock_irq(&event_queue->lock);
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE) {
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ }
+ return 0;
+};
+
+const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
+ .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
+ .context_closed = uverbs_hot_unplug_completion_event_file,
+ .fops = &uverbs_event_fops,
+ .name = "[infinibandevent]",
+ .flags = O_RDONLY,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
+ .destroy_object = uverbs_free_cq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
+ .destroy_object = uverbs_free_qp,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_mw,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
+ /* 1 is used in order to free the MR after all the MWs */
+ .type = UVERBS_TYPE_ALLOC_IDR(1),
+ .destroy_object = uverbs_free_mr,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
+ .destroy_object = uverbs_free_srq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_ah,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_flow,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
+ .destroy_object = uverbs_free_wq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_rwq_ind_tbl,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
+ .destroy_object = uverbs_free_xrcd,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
+ /* 2 is used in order to free the PD after MRs */
+ .type = UVERBS_TYPE_ALLOC_IDR(2),
+ .destroy_object = uverbs_free_pd,
+};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 85ed5051fdfd..207e5c2457cc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1519,7 +1519,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1535,7 +1537,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 33af2e3de399..a4e8e0b075d2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -692,9 +692,9 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
kfree(rdev->qp1_sqp);
}
- if (qp->rumem && !IS_ERR(qp->rumem))
+ if (!IS_ERR_OR_NULL(qp->rumem))
ib_umem_release(qp->rumem);
- if (qp->sumem && !IS_ERR(qp->sumem))
+ if (!IS_ERR_OR_NULL(qp->sumem))
ib_umem_release(qp->sumem);
mutex_lock(&rdev->qp_lock);
@@ -2116,7 +2116,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ");
return rc;
}
- if (cq->umem && !IS_ERR(cq->umem))
+ if (!IS_ERR_OR_NULL(cq->umem))
ib_umem_release(cq->umem);
if (cq) {
@@ -2818,7 +2818,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
{
struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
struct bnxt_re_dev *rdev = mr->rdev;
- int rc = 0;
+ int rc;
if (mr->npages && mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
@@ -2829,7 +2829,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
}
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (!IS_ERR(mr->ib_umem) && mr->ib_umem)
+ if (!IS_ERR_OR_NULL(mr->ib_umem))
ib_umem_release(mr->ib_umem);
kfree(mr);
@@ -3016,7 +3016,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct bnxt_re_mr *mr;
struct ib_umem *umem;
u64 *pbl_tbl, *pbl_tbl_orig;
- int i, umem_pgs, pages, page_shift, rc;
+ int i, umem_pgs, pages, rc;
struct scatterlist *sg;
int entry;
@@ -3062,22 +3062,22 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
}
pbl_tbl_orig = pbl_tbl;
- page_shift = ilog2(umem->page_size);
if (umem->hugetlb) {
dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
rc = -EFAULT;
goto fail;
}
- if (umem->page_size != PAGE_SIZE) {
- dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
+
+ if (umem->page_shift != PAGE_SHIFT) {
+ dev_err(rdev_to_dev(rdev), "umem page shift unsupported!");
rc = -EFAULT;
goto fail;
}
/* Map umem buf ptrs to the PBL */
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- pages = sg_dma_len(sg) >> page_shift;
+ pages = sg_dma_len(sg) >> umem->page_shift;
for (i = 0; i < pages; i++, pbl_tbl++)
- *pbl_tbl = sg_dma_address(sg) + (i << page_shift);
+ *pbl_tbl = sg_dma_address(sg) + (i << umem->page_shift);
}
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
umem_pgs, false);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 445e89e5e7cf..97dbe728520a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -51,17 +51,18 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
m->mem_id = MEM_PMRX;
m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
m->len = size;
- PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("TPT %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -87,18 +88,19 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
m->mem_id = MEM_PMRX;
m->addr = pbl_addr;
m->len = size;
- PDBG("%s PBL addr 0x%x len %d depth %d\n",
- __func__, m->addr, m->len, npages);
+ pr_debug("%s PBL addr 0x%x len %d depth %d\n",
+ __func__, m->addr, m->len, npages);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("PBL %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -114,8 +116,8 @@ void cxio_dump_wqe(union t3_wr *wqe)
if (size == 0)
size = 8;
while (size > 0) {
- PDBG("WQE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
+ pr_debug("WQE %p: %016llx\n",
+ data, (unsigned long long)be64_to_cpu(*data));
size--;
data++;
}
@@ -127,8 +129,8 @@ void cxio_dump_wce(struct t3_cqe *wce)
int size = sizeof(*wce);
while (size > 0) {
- PDBG("WCE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
+ pr_debug("WCE %p: %016llx\n",
+ data, (unsigned long long)be64_to_cpu(*data));
size -= 8;
data++;
}
@@ -148,17 +150,18 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
m->mem_id = MEM_PMRX;
m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m->len = size;
- PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("RQT %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -180,10 +183,10 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
m->mem_id = MEM_CM;
m->addr = hwtid * size;
m->len = size;
- PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s TCB %d len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index ada2e5009c86..558d6a03375d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -110,8 +110,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
udelay(1);
if (i++ > 1000000) {
- printk(KERN_ERR "%s: stalled rnic\n",
- rdev_p->dev_name);
+ pr_err("%s: stalled rnic\n", rdev_p->dev_name);
return -EIO;
}
}
@@ -140,7 +139,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
struct t3_modify_qp_wr *wqe;
struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
+ pr_debug("%s alloc_skb failed\n", __func__);
return -ENOMEM;
}
wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
@@ -230,7 +229,7 @@ static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
@@ -242,7 +241,7 @@ static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
entry->qpid = qpid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -306,8 +305,8 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
(wq->qpid << rdev_p->qpshift);
wq->rdev = rdev_p;
- PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
- wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
+ pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n",
+ __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb);
return 0;
err4:
kfree(wq->sq);
@@ -351,8 +350,8 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
{
struct t3_cqe cqe;
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
+ pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
V_CQE_OPCODE(T3_SEND) |
@@ -370,11 +369,11 @@ int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
u32 ptr;
int flushed = 0;
- PDBG("%s wq %p cq %p\n", __func__, wq, cq);
+ pr_debug("%s wq %p cq %p\n", __func__, wq, cq);
/* flush RQ */
- PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
- wq->rq_rptr, wq->rq_wptr, count);
+ pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
+ wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
@@ -388,8 +387,8 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
{
struct t3_cqe cqe;
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
+ pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
V_CQE_OPCODE(sqp->opcode) |
@@ -429,11 +428,11 @@ void cxio_flush_hw_cq(struct t3_cq *cq)
{
struct t3_cqe *cqe, *swcqe;
- PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
+ pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
cqe = cxio_next_hw_cqe(cq);
while (cqe) {
- PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
- __func__, cq->rptr, cq->sw_wptr);
+ pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
+ __func__, cq->rptr, cq->sw_wptr);
swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
*swcqe = *cqe;
swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
@@ -476,7 +475,7 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
(*count)++;
ptr++;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
@@ -485,7 +484,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("%s count zero %d\n", __func__, *count);
ptr = cq->sw_rptr;
while (!Q_EMPTY(ptr, cq->sw_wptr)) {
cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
@@ -494,7 +493,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
(*count)++;
ptr++;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
@@ -521,12 +520,12 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
+ pr_debug("%s alloc_skb failed\n", __func__);
return -ENOMEM;
}
err = cxio_hal_init_ctrl_cq(rdev_p);
if (err) {
- PDBG("%s err %d initializing ctrl_cq\n", __func__, err);
+ pr_debug("%s err %d initializing ctrl_cq\n", __func__, err);
goto err;
}
rdev_p->ctrl_qp.workq = dma_alloc_coherent(
@@ -536,7 +535,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
&(rdev_p->ctrl_qp.dma_addr),
GFP_KERNEL);
if (!rdev_p->ctrl_qp.workq) {
- PDBG("%s dma_alloc_coherent failed\n", __func__);
+ pr_debug("%s dma_alloc_coherent failed\n", __func__);
err = -ENOMEM;
goto err;
}
@@ -571,9 +570,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
wqe->sge_cmd = cpu_to_be64(sge_cmd);
wqe->ctx1 = cpu_to_be64(ctx1);
wqe->ctx0 = cpu_to_be64(ctx0);
- PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
- (unsigned long long) rdev_p->ctrl_qp.dma_addr,
- rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ pr_debug("CtrlQP dma_addr 0x%llx workq %p size %d\n",
+ (unsigned long long)rdev_p->ctrl_qp.dma_addr,
+ rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
skb->priority = CPL_PRIORITY_CONTROL;
return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
err:
@@ -605,26 +604,26 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
u64 utx_cmd;
addr &= 0x7FFFFFF;
nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
- PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
- __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
- nr_wqe, data, addr);
+ pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
+ __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+ nr_wqe, data, addr);
utx_len = 3; /* in 32B unit */
for (i = 0; i < nr_wqe; i++) {
if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2)) {
- PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
- "wait for more space i %d\n", __func__,
- rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+ pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n",
+ __func__,
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
!Q_FULL(rdev_p->ctrl_qp.rptr,
rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2))) {
- PDBG("%s ctrl_qp workq interrupted\n",
- __func__);
+ pr_debug("%s ctrl_qp workq interrupted\n",
+ __func__);
return -ERESTARTSYS;
}
- PDBG("%s ctrl_qp wakeup, continue posting work request "
- "i %d\n", __func__, i);
+ pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n",
+ __func__, i);
}
wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
(1 << T3_CTRL_QP_SIZE_LOG2)));
@@ -645,7 +644,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
if ((i != 0) &&
(i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
flag = T3_COMPLETION_FLAG;
- PDBG("%s force completion at i %d\n", __func__, i);
+ pr_debug("%s force completion at i %d\n", __func__, i);
}
/* build the utx mem command */
@@ -717,8 +716,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
return -ENOMEM;
*stag = (stag_idx << 8) | ((*stag) & 0xFF);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
mutex_lock(&rdev_p->ctrl_qp.lock);
@@ -767,9 +766,9 @@ int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 wptr;
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
- pbl_size);
+ pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
mutex_lock(&rdev_p->ctrl_qp.lock);
err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
@@ -837,7 +836,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
if (!skb)
return -ENOMEM;
- PDBG("%s rdev_p %p\n", __func__, rdev_p);
+ pr_debug("%s rdev_p %p\n", __func__, rdev_p);
wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
@@ -880,22 +879,20 @@ static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
static int cnt;
struct cxio_rdev *rdev_p = NULL;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
- " se %0x notify %0x cqbranch %0x creditth %0x\n",
- cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
- RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
- RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
- RSPQ_CREDIT_THRESH(rsp_msg));
- PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
- "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n",
+ cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+ RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
+ RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+ RSPQ_CREDIT_THRESH(rsp_msg));
+ pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
if (!rdev_p) {
- PDBG("%s called by t3cdev %p with null ulp\n", __func__,
- t3cdev_p);
+ pr_debug("%s called by t3cdev %p with null ulp\n", __func__,
+ t3cdev_p);
return 0;
}
if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
@@ -934,13 +931,13 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
T3_MAX_DEV_NAME_LEN);
} else {
- PDBG("%s t3cdev_p or dev_name must be set\n", __func__);
+ pr_debug("%s t3cdev_p or dev_name must be set\n", __func__);
return -EINVAL;
}
list_add_tail(&rdev_p->entry, &rdev_list);
- PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
+ pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
if (!rdev_p->t3cdev_p)
rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
@@ -949,13 +946,12 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
&(rdev_p->fw_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
- printk(KERN_ERR MOD "fatal firmware version mismatch: "
- "need version %u but adapter has version %u\n",
+ pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n",
CXIO_FW_MAJ,
G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
err = -EINVAL;
@@ -965,15 +961,15 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
&(rdev_p->rnic_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
&(rdev_p->port_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
@@ -988,42 +984,39 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
PAGE_SHIFT));
rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
- "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
- __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
- rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
- rdev_p->rnic_info.pbl_base,
- rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
- rdev_p->rnic_info.rqt_top);
- PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
- "qpnr %d qpmask 0x%x\n",
- rdev_p->rnic_info.udbell_len,
- rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
- rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+ pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
+ __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
+ rdev_p->rnic_info.pbl_base,
+ rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+ rdev_p->rnic_info.rqt_top);
+ pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n",
+ rdev_p->rnic_info.udbell_len,
+ rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+ rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
err = cxio_hal_init_ctrl_qp(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
- __func__, err);
+ pr_err("%s error %d initializing ctrl_qp\n", __func__, err);
goto err1;
}
err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
T3_MAX_NUM_PD);
if (err) {
- printk(KERN_ERR "%s error %d initializing hal resources.\n",
+ pr_err("%s error %d initializing hal resources\n",
__func__, err);
goto err2;
}
err = cxio_hal_pblpool_create(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
+ pr_err("%s error %d initializing pbl mem pool\n",
__func__, err);
goto err3;
}
err = cxio_hal_rqtpool_create(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
+ pr_err("%s error %d initializing rqt mem pool\n",
__func__, err);
goto err4;
}
@@ -1086,9 +1079,9 @@ static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
- __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
- Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+ pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
+ __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
+ Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
= sqp->cqe;
@@ -1154,12 +1147,11 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
*credit = 0;
hw_cqe = cxio_next_cqe(cq);
- PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
- CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
- CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
- CQE_WRID_LOW(*hw_cqe));
+ pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+ CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
+ CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+ CQE_WRID_LOW(*hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -1278,9 +1270,10 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
struct t3_swsq *sqp;
- PDBG("%s out of order completion going in swsq at idx %ld\n",
- __func__,
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+ pr_debug("%s out of order completion going in swsq at idx %ld\n",
+ __func__,
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe),
+ wq->sq_size_log2));
sqp = wq->sq +
Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
sqp->cqe = *hw_cqe;
@@ -1298,13 +1291,13 @@ proc_cqe:
*/
if (SQ_TYPE(*hw_cqe)) {
wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
- PDBG("%s completing sq idx %ld\n", __func__,
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+ pr_debug("%s completing sq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
*cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
wq->sq_rptr++;
} else {
- PDBG("%s completing rq idx %ld\n", __func__,
- Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ pr_debug("%s completing rq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
cxio_hal_pblpool_free(wq->rdev,
@@ -1322,12 +1315,12 @@ flush_wq:
skip_cqe:
if (SW_CQE(*hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->sw_rptr);
+ pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->sw_rptr);
++cq->sw_rptr;
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->rptr);
+ pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->rptr);
++cq->rptr;
/*
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 78fbe9ffe7f0..7e70c5492262 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -196,8 +196,11 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
u8 *cqe_flushed, u64 *cookie, u32 *credit);
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-#define MOD "iw_cxgb3: "
-#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#ifdef DEBUG
void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index c40088ecf9f3..c6e7bc4420b6 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -209,13 +209,13 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
{
u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
&rscp->qpid_fifo_lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
{
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
}
@@ -257,13 +257,13 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
{
unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
return (u32)addr;
}
void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
}
@@ -282,17 +282,18 @@ int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
pbl_chunk);
if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
- __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
+ pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n",
+ __func__, pbl_start,
+ rdev_p->rnic_info.pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -315,13 +316,13 @@ void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
{
unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
return (u32)addr;
}
void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b3e11329801d..47b2ce2ef203 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -105,7 +105,7 @@ static void iwch_db_drop_task(struct work_struct *work)
static void rnic_init(struct iwch_dev *rnicp)
{
- PDBG("%s iwch_dev %p\n", __func__, rnicp);
+ pr_debug("%s iwch_dev %p\n", __func__, rnicp);
idr_init(&rnicp->cqidr);
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
@@ -145,12 +145,11 @@ static void open_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *rnicp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
- printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
- DRV_VERSION);
+ pr_debug("%s t3cdev %p\n", __func__, tdev);
+ pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
if (!rnicp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return;
}
rnicp->rdev.ulp = rnicp;
@@ -160,7 +159,7 @@ static void open_rnic_dev(struct t3cdev *tdev)
if (cxio_rdev_open(&rnicp->rdev)) {
mutex_unlock(&dev_mutex);
- printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
+ pr_err("Unable to open CXIO rdev\n");
ib_dealloc_device(&rnicp->ibdev);
return;
}
@@ -171,18 +170,18 @@ static void open_rnic_dev(struct t3cdev *tdev)
mutex_unlock(&dev_mutex);
if (iwch_register_device(rnicp)) {
- printk(KERN_ERR MOD "Unable to register device\n");
+ pr_err("Unable to register device\n");
close_rnic_dev(tdev);
}
- printk(KERN_INFO MOD "Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
+ pr_info("Initialized device %s\n",
+ pci_name(rnicp->rdev.rnic_info.pdev));
return;
}
static void close_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *dev, *tmp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
+ pr_debug("%s t3cdev %p\n", __func__, tdev);
mutex_lock(&dev_mutex);
list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
if (dev->rdev.t3cdev_p == tdev) {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 65ee64400deb..b61630eba912 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -112,9 +112,9 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status);
static void start_ep_timer(struct iwch_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (timer_pending(&ep->timer)) {
- PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
+ pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
del_timer_sync(&ep->timer);
} else
get_ep(&ep->com);
@@ -126,7 +126,7 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
@@ -227,13 +227,13 @@ int iwch_resume_tid(struct iwch_ep *ep)
static void set_emss(struct iwch_ep *ep, u16 opt)
{
- PDBG("%s ep %p opt %u\n", __func__, ep, opt);
+ pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
if (G_TCPOPT_TSTAMP(opt))
ep->emss -= 12;
if (ep->emss < 128)
ep->emss = 128;
- PDBG("emss=%d\n", ep->emss);
+ pr_debug("emss=%d\n", ep->emss);
}
static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
@@ -257,7 +257,7 @@ static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
unsigned long flags;
spin_lock_irqsave(&epc->lock, flags);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
__state_set(epc, new);
spin_unlock_irqrestore(&epc->lock, flags);
return;
@@ -273,7 +273,7 @@ static void *alloc_ep(int size, gfp_t gfp)
spin_lock_init(&epc->lock);
init_waitqueue_head(&epc->waitq);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("%s alloc ep %p\n", __func__, epc);
return epc;
}
@@ -282,7 +282,8 @@ void __free_ep(struct kref *kref)
struct iwch_ep *ep;
ep = container_of(container_of(kref, struct iwch_ep_common, kref),
struct iwch_ep, com);
- PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ pr_debug("%s ep %p state %s\n",
+ __func__, ep, states[state_read(&ep->com)]);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
dst_release(ep->dst);
@@ -293,7 +294,7 @@ void __free_ep(struct kref *kref)
static void release_ep_resources(struct iwch_ep *ep)
{
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
set_bit(RELEASE_RESOURCES, &ep->com.flags);
put_ep(&ep->com);
}
@@ -358,7 +359,7 @@ static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
{
- PDBG("%s t3cdev %p\n", __func__, dev);
+ pr_debug("%s t3cdev %p\n", __func__, dev);
kfree_skb(skb);
}
@@ -367,7 +368,7 @@ static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
*/
static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
{
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
kfree_skb(skb);
}
@@ -379,7 +380,7 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
{
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s t3cdev %p\n", __func__, dev);
+ pr_debug("%s t3cdev %p\n", __func__, dev);
req->cmd = CPL_ABORT_NO_RST;
iwch_cxgb3_ofld_send(dev, skb);
}
@@ -389,10 +390,10 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
struct cpl_close_con_req *req;
struct sk_buff *skb;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), gfp);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
@@ -408,11 +409,10 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
struct cpl_abort_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(skb, sizeof(*req), gfp);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
@@ -434,12 +434,11 @@ static int send_connect(struct iwch_ep *ep)
unsigned int mtu_idx;
int wscale;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
@@ -478,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
struct mpa_message *mpa;
int len;
- PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
+ pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
BUG_ON(skb_cloned(skb));
@@ -538,13 +537,13 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
struct mpa_message *mpa;
struct sk_buff *skb;
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+ pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
mpalen = sizeof(*mpa) + plen;
skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
skb_reserve(skb, sizeof(*req));
@@ -587,13 +586,13 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
int len;
struct sk_buff *skb;
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+ pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
mpalen = sizeof(*mpa) + plen;
skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
@@ -636,7 +635,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_act_establish *req = cplhdr(skb);
unsigned int tid = GET_TID(req);
- PDBG("%s ep %p tid %d\n", __func__, ep, tid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
dst_confirm(ep->dst);
@@ -660,7 +659,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
- PDBG("%s ep %p\n", __FILE__, ep);
+ pr_debug("%s ep %p\n", __FILE__, ep);
state_set(&ep->com, ABORTING);
send_abort(ep, skb, gfp);
}
@@ -669,12 +668,12 @@ static void close_complete_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
ep->com.cm_id->rem_ref(ep->com.cm_id);
ep->com.cm_id = NULL;
@@ -686,12 +685,12 @@ static void peer_close_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
}
@@ -700,13 +699,13 @@ static void peer_abort_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
ep->com.cm_id->rem_ref(ep->com.cm_id);
ep->com.cm_id = NULL;
@@ -718,7 +717,7 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p status %d\n", __func__, ep, status);
+ pr_debug("%s ep %p status %d\n", __func__, ep, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -732,8 +731,8 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
+ ep->hwtid, status);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
if (status < 0) {
@@ -747,7 +746,7 @@ static void connect_request_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -776,7 +775,7 @@ static void established_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
/*
@@ -785,7 +784,7 @@ static void established_upcall(struct iwch_ep *ep)
*/
event.ird = event.ord = 8;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
}
@@ -795,10 +794,10 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits)
struct cpl_rx_data_ack *req;
struct sk_buff *skb;
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
@@ -819,7 +818,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
enum iwch_qp_attr_mask mask;
int err;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
/*
* Stop mpa timer. If it expired, then the state has
@@ -906,10 +905,10 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ird;
@@ -944,7 +943,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
struct mpa_message *mpa;
u16 plen;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
/*
* Stop mpa timer. If it expired, then the state has
@@ -964,7 +963,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
return;
}
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -979,7 +978,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
return;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
@@ -1029,10 +1028,10 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
state_set(&ep->com, MPA_REQ_RCVD);
@@ -1047,7 +1046,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_rx_data *hdr = cplhdr(skb);
unsigned int dlen = ntohs(hdr->len);
- PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
+ pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
@@ -1065,8 +1064,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case MPA_REP_SENT:
break;
default:
- printk(KERN_ERR MOD "%s Unexpected streaming data."
- " ep %p state %d tid %d\n",
+ pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
__func__, ep, state_read(&ep->com), ep->hwtid);
/*
@@ -1095,11 +1093,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int post_zb = 0;
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
+ pr_debug("%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
return CPL_RET_BUF_DONE;
}
@@ -1107,24 +1105,24 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
BUG_ON(credits != 1);
dst_confirm(ep->dst);
if (!ep->mpa_skb) {
- PDBG("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s rdma_init wr_ack ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (ep->mpa_attr.initiator) {
- PDBG("%s initiator ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s initiator ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (peer2peer && ep->com.state == FPDU_MODE)
post_zb = 1;
} else {
- PDBG("%s responder ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s responder ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (ep->com.state == MPA_REQ_RCVD) {
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
}
}
} else {
- PDBG("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s lsm ack ep %p state %u freeing skb\n",
+ __func__, ep, ep->com.state);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
}
@@ -1140,7 +1138,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/*
@@ -1159,8 +1157,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
spin_unlock_irqrestore(&ep->com.lock, flags);
@@ -1184,8 +1181,8 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_act_open_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
- status2errno(rpl->status));
+ pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
+ status2errno(rpl->status));
connect_reply_upcall(ep, status2errno(rpl->status));
state_set(&ep->com, DEAD);
if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
@@ -1202,10 +1199,10 @@ static int listen_start(struct iwch_listen_ep *ep)
struct sk_buff *skb;
struct cpl_pass_open_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
+ pr_err("t3c_listen_start failed to alloc skb!\n");
return -ENOMEM;
}
@@ -1230,8 +1227,8 @@ static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_listen_ep *ep = ctx;
struct cpl_pass_open_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
+ pr_debug("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
ep->com.rpl_err = status2errno(rpl->status);
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
@@ -1244,10 +1241,10 @@ static int listen_stop(struct iwch_listen_ep *ep)
struct sk_buff *skb;
struct cpl_close_listserv_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
@@ -1264,7 +1261,7 @@ static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
struct iwch_listen_ep *ep = ctx;
struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.rpl_err = status2errno(rpl->status);
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
@@ -1278,7 +1275,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
u32 opt0h, opt0l, opt2;
int wscale;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(*rpl));
skb_get(skb);
@@ -1312,8 +1309,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
struct sk_buff *skb)
{
- PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
- peer_ip);
+ pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
+ peer_ip);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
skb_get(skb);
@@ -1347,11 +1344,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct rtable *rt;
struct iff_mac tim;
- PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
+ pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
if (state_read(&parent_ep->com) != LISTEN) {
- printk(KERN_ERR "%s - listening ep not in LISTEN\n",
- __func__);
+ pr_err("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
@@ -1361,8 +1357,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
tim.mac_addr = req->dst_mac;
tim.vlan_tag = ntohs(req->vlan_tag);
if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- printk(KERN_ERR "%s bad dst mac %pM\n",
- __func__, req->dst_mac);
+ pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
goto reject;
}
@@ -1373,22 +1368,19 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
req->local_port,
req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
if (!rt) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
dst = &rt->dst;
l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
if (!l2t) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
l2t_release(tdev, l2t);
dst_release(dst);
goto reject;
@@ -1423,7 +1415,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_pass_establish *req = cplhdr(skb);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->snd_seq = ntohl(req->snd_isn);
ep->rcv_seq = ntohl(req->rcv_isn);
@@ -1444,7 +1436,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
int disconnect = 1;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
dst_confirm(ep->dst);
spin_lock_irqsave(&ep->com.lock, flags);
@@ -1467,14 +1459,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
__state_set(&ep->com, CLOSING);
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case FPDU_MODE:
@@ -1539,8 +1531,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
if (is_neg_adv_abort(req->status)) {
- PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
- ep->hwtid);
+ pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
+ ep->hwtid);
t3_l2t_send_event(ep->com.tdev, ep->l2t);
return CPL_RET_BUF_DONE;
}
@@ -1554,7 +1546,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
}
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+ pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
switch (ep->com.state) {
case CONNECTING:
break;
@@ -1568,7 +1560,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case MPA_REP_SENT:
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MPA_REQ_RCVD:
@@ -1581,7 +1573,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
*/
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MORIBUND:
@@ -1595,16 +1587,14 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
spin_unlock_irqrestore(&ep->com.lock, flags);
return CPL_RET_BUF_DONE;
default:
@@ -1620,8 +1610,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) {
- printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
- __func__);
+ pr_err("%s - cannot allocate skb!\n", __func__);
release = 1;
goto out;
}
@@ -1645,7 +1634,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
@@ -1699,9 +1688,9 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
if (state_read(&ep->com) != FPDU_MODE)
return CPL_RET_BUF_DONE;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb_pull(skb, sizeof(struct cpl_rdma_terminate));
- PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
+ pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
skb->len);
ep->com.qp->attr.terminate_msg_len = skb->len;
@@ -1714,12 +1703,12 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_rdma_ec_status *rep = cplhdr(skb);
struct iwch_ep *ep = ctx;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
- rep->status);
+ pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
+ rep->status);
if (rep->status) {
struct iwch_qp_attributes attrs;
- printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
+ pr_err("%s BAD CLOSE - Aborting tid %u\n",
__func__, ep->hwtid);
stop_ep_timer(ep);
attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1739,8 +1728,8 @@ static void ep_timeout(unsigned long arg)
int abort = 1;
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
switch (ep->com.state) {
case MPA_REQ_SENT:
__state_set(&ep->com, ABORTING);
@@ -1774,7 +1763,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
int err;
struct iwch_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
put_ep(&ep->com);
@@ -1800,7 +1789,7 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_dev *h = to_iwch_dev(cm_id->device);
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
err = -ECONNRESET;
goto err;
@@ -1826,7 +1815,7 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (peer2peer && ep->ird == 0)
ep->ird = 1;
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
@@ -1907,7 +1896,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -1928,15 +1917,15 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.cm_id = cm_id;
ep->com.qp = get_qhp(h, conn_param->qpn);
BUG_ON(!ep->com.qp);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -1946,7 +1935,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr->sin_addr.s_addr, laddr->sin_port,
raddr->sin_port, IPTOS_LOWDELAY);
if (!rt) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -1954,7 +1943,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
&raddr->sin_addr.s_addr);
if (!ep->l2t) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
err = -ENOMEM;
goto fail4;
}
@@ -1999,11 +1988,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.tdev = h->rdev.t3cdev_p;
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
@@ -2016,7 +2005,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
*/
ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -2048,7 +2037,7 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct iwch_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -2077,8 +2066,8 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
tdev = (struct t3cdev *)ep->com.tdev;
rdev = (struct cxio_rdev *)tdev->ulp;
@@ -2115,8 +2104,8 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
@@ -2145,8 +2134,8 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
if (ep->dst != old)
return 0;
- PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
- l2t);
+ pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
+ l2t);
dst_hold(new);
l2t_release(ep->com.tdev, ep->l2t);
ep->l2t = l2t;
@@ -2225,8 +2214,8 @@ static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
return CPL_RET_BUF_DONE;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index e66e75921797..cc7fe644d260 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -53,17 +53,17 @@
#define MPA_MARKERS 0x80
#define MPA_FLAGS_MASK 0xE0
-#define put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- WARN_ON(kref_read(&((ep)->kref)) < 1); \
- kref_put(&((ep)->kref), __free_ep); \
+#define put_ep(ep) { \
+ pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
+ __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), __free_ep); \
}
-#define get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- kref_get(&((ep)->kref)); \
+#define get_ep(ep) { \
+ pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
+ __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
struct mpa_message {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 97fbfd2c298e..dd5348e48806 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -67,8 +67,8 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
&credit);
if (t3a_device(chp->rhp) && credit) {
- PDBG("%s updating %d cq credits on id %d\n", __func__,
- credit, chp->cq.cqid);
+ pr_debug("%s updating %d cq credits on id %d\n", __func__,
+ credit, chp->cq.cqid);
cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
}
@@ -83,11 +83,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->vendor_err = CQE_STATUS(cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__,
- CQE_QPID(cqe), CQE_TYPE(cqe),
- CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
- CQE_WRID_LOW(cqe), (unsigned long long) cookie);
+ pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ __func__,
+ CQE_QPID(cqe), CQE_TYPE(cqe),
+ CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+ CQE_WRID_LOW(cqe), (unsigned long long)cookie);
if (CQE_TYPE(cqe) == 0) {
if (!CQE_STATUS(cqe))
@@ -122,8 +122,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->opcode = IB_WC_REG_MR;
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(cqe), CQE_QPID(cqe));
ret = -EINVAL;
goto out;
@@ -177,8 +176,8 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
- "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ CQE_STATUS(cqe), CQE_QPID(cqe));
ret = -EINVAL;
}
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index abcc9e76962b..4a0c82a8fb60 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -52,7 +52,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
if (!qhp) {
- printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
+ pr_err("%s unaffiliated error 0x%x qpid 0x%x\n",
__func__, CQE_STATUS(rsp_msg->cqe),
CQE_QPID(rsp_msg->cqe));
spin_unlock(&rnicp->lock);
@@ -61,15 +61,16 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
(qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
- PDBG("%s AE received after RTS - "
- "qp state %d qpid 0x%x status 0x%x\n", __func__,
- qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n",
+ __func__,
+ qhp->attr.state, qhp->wq.qpid,
+ CQE_STATUS(rsp_msg->cqe));
spin_unlock(&rnicp->lock);
return;
}
- printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
+ pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ __func__,
CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
@@ -117,8 +118,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
chp = get_chp(rnicp, cqid);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
if (!chp || !qhp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(rsp_msg->cqe),
CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
@@ -137,12 +137,12 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
(CQE_STATUS(rsp_msg->cqe) == 0)) {
if (SQ_TYPE(rsp_msg->cqe)) {
- PDBG("%s QPID 0x%x ep %p disconnecting\n",
- __func__, qhp->wq.qpid, qhp->ep);
+ pr_debug("%s QPID 0x%x ep %p disconnecting\n",
+ __func__, qhp->wq.qpid, qhp->ep);
iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
} else {
- PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
- qhp->wq.qpid);
+ pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__,
+ qhp->wq.qpid);
post_qp_event(rnicp, chp, rsp_msg,
IB_EVENT_QP_REQ_ERR, 0);
iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
@@ -218,7 +218,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
break;
default:
- printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T3 status 0x%x QPID 0x%x\n",
CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 1d04c872c9d5..12886b1b4b10 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -48,7 +48,7 @@ static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 86ecd3ea6a4b..790d7c79fe3e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -103,7 +103,7 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context)
struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
struct iwch_mm_entry *mm, *tmp;
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("%s context %p\n", __func__, context);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -117,7 +117,7 @@ static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
struct iwch_ucontext *context;
struct iwch_dev *rhp = to_iwch_dev(ibdev);
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -131,7 +131,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq)
{
struct iwch_cq *chp;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("%s ib_cq %p\n", __func__, ib_cq);
chp = to_iwch_cq(ib_cq);
remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -157,7 +157,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
static int warned;
size_t resplen;
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -227,8 +227,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
mm->addr = virt_to_phys(chp->cq.queue);
if (udata->outlen < sizeof uresp) {
if (!warned++)
- printk(KERN_WARNING MOD "Warning - "
- "downlevel libcxgb3 (non-fatal).\n");
+ pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
sizeof(struct t3_cqe));
resplen = sizeof(struct iwch_create_cq_resp_v0);
@@ -246,9 +245,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
}
insert_mmap(ucontext, mm);
}
- PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- (unsigned long long) chp->cq.dma_addr);
+ pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+ chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+ (unsigned long long)chp->cq.dma_addr);
return &chp->ibcq;
}
@@ -259,7 +258,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
struct t3_cq oldcq, newcq;
int ret;
- PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
+ pr_debug("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
/* We don't downsize... */
if (cqe <= cq->cqe)
@@ -306,8 +305,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
oldcq.cqid = newcq.cqid;
ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
if (ret) {
- printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
- __func__, ret);
+ pr_err("%s - cxio_destroy_cq failed %d\n", __func__, ret);
}
/* add user hooks here */
@@ -342,12 +340,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
chp->cq.rptr = rptr;
} else
spin_lock_irqsave(&chp->lock, flag);
- PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr);
+ pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
spin_unlock_irqrestore(&chp->lock, flag);
if (err < 0)
- printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
- chp->cq.cqid);
+ pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
err = 0;
return err;
@@ -363,8 +360,8 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct iwch_ucontext *ucontext;
u64 addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1)) {
return -EINVAL;
@@ -416,7 +413,7 @@ static int iwch_deallocate_pd(struct ib_pd *pd)
php = to_iwch_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
kfree(php);
return 0;
@@ -430,7 +427,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
u32 pdid;
struct iwch_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
rhp = (struct iwch_dev *) ibdev;
pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
if (!pdid)
@@ -448,7 +445,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
return ERR_PTR(-EFAULT);
}
}
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
}
@@ -458,7 +455,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
struct iwch_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("%s ib_mr %p\n", __func__, ib_mr);
mhp = to_iwch_mr(ib_mr);
kfree(mhp->pages);
@@ -472,7 +469,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
}
@@ -487,13 +484,13 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
__be64 *page_list;
int shift = 26, npages, ret, i;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
/*
* T3 only supports 32 bits of size.
*/
if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ pr_warn_once("Cannot support dma_mrs on this platform\n");
return ERR_PTR(-ENOTSUPP);
}
@@ -518,8 +515,8 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
for (i = 0; i < npages; i++)
page_list[i] = cpu_to_be64((u64)i << shift);
- PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, mask, shift, total_size, npages);
+ pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
@@ -567,7 +564,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
struct scatterlist *sg;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
rhp = php->rhp;
@@ -584,7 +581,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = mhp->umem->page_shift;
n = mhp->umem->nmap;
@@ -604,7 +601,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
+ (k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
@@ -637,8 +634,8 @@ pbl_done:
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3;
- PDBG("%s user resp pbl_addr 0x%x\n", __func__,
- uresp.pbl_addr);
+ pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
+ uresp.pbl_addr);
if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
iwch_dereg_mr(&mhp->ibmr);
@@ -692,7 +689,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
kfree(mhp);
return ERR_PTR(-ENOMEM);
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
}
@@ -707,7 +704,7 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
mmid = (mw->rkey) >> 8;
cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
remove_handle(rhp, &rhp->mmidr, mmid);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
kfree(mhp);
return 0;
}
@@ -757,7 +754,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
goto err3;
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
err3:
cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
@@ -818,8 +815,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp)
cxio_destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
- ib_qp, qhp->wq.qpid, qhp);
+ pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
+ ib_qp, qhp->wq.qpid, qhp);
kfree(qhp);
return 0;
}
@@ -837,7 +834,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
int wqsize, sqsize, rqsize;
struct iwch_ucontext *ucontext;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (attrs->qp_type != IB_QPT_RC)
return ERR_PTR(-EINVAL);
php = to_iwch_pd(pd);
@@ -878,8 +875,8 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
- PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
- wqsize, sqsize, rqsize);
+ pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
+ wqsize, sqsize, rqsize);
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
return ERR_PTR(-ENOMEM);
@@ -974,11 +971,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
}
qhp->ibqp.qp_num = qhp->wq.qpid;
init_timer(&(qhp->timer));
- PDBG("%s sq_num_entries %d, rq_num_entries %d "
- "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
+ pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
+ __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr,
+ 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
return &qhp->ibqp;
}
@@ -990,7 +986,7 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum iwch_qp_attr_mask mask = 0;
struct iwch_qp_attributes attrs;
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ pr_debug("%s ib_qp %p\n", __func__, ibqp);
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -1023,20 +1019,20 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void iwch_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
atomic_inc(&(to_iwch_qp(qp)->refcnt));
}
void iwch_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
wake_up(&(to_iwch_qp(qp)->wait));
}
static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
}
@@ -1044,7 +1040,7 @@ static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
static int iwch_query_pkey(struct ib_device *ibdev,
u8 port, u16 index, u16 * pkey)
{
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
*pkey = 0;
return 0;
}
@@ -1054,8 +1050,8 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
{
struct iwch_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
dev = to_iwch_dev(ibdev);
BUG_ON(port == 0 || port > 2);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -1090,7 +1086,7 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct iwch_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -1128,7 +1124,7 @@ static int iwch_query_port(struct ib_device *ibdev,
struct net_device *netdev;
struct in_device *inetdev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
@@ -1171,7 +1167,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
@@ -1183,7 +1179,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
@@ -1193,7 +1189,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
}
@@ -1278,7 +1274,7 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
if (port != 0 || !stats)
return -ENOSYS;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
if (ret)
@@ -1348,7 +1344,7 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- PDBG("%s dev 0x%p\n", __func__, iwch_dev);
+ pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
snprintf(str, str_len, "%s", info.fw_version);
}
@@ -1358,7 +1354,7 @@ int iwch_register_device(struct iwch_dev *dev)
int ret;
int i;
- PDBG("%s iwch_dev %p\n", __func__, dev);
+ pr_debug("%s iwch_dev %p\n", __func__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
@@ -1469,7 +1465,7 @@ void iwch_unregister_device(struct iwch_dev *dev)
{
int i;
- PDBG("%s iwch_dev %p\n", __func__, dev);
+ pr_debug("%s iwch_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 252c464a09f6..9e216edec4c0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -217,8 +217,9 @@ static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -230,8 +231,8 @@ static inline void insert_mmap(struct iwch_ucontext *ucontext,
struct iwch_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index a9194db7f9b8..ba6d5d281b03 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -208,30 +208,30 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
if (!mhp) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (!mhp->attr.state) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (mhp->attr.zbva) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (sg_list[i].addr < mhp->attr.va_fbo) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
if (sg_list[i].addr + ((u64) sg_list[i].length) <
sg_list[i].addr) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
if (sg_list[i].addr + ((u64) sg_list[i].length) >
mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
offset = sg_list[i].addr - mhp->attr.va_fbo;
@@ -427,8 +427,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_debug("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err)
@@ -444,10 +444,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, t3_wr_flit_cnt,
(wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
- PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
- __func__, (unsigned long long) wr->wr_id, idx,
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
- sqp->opcode);
+ pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
+ __func__, (unsigned long long)wr->wr_id, idx,
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+ sqp->opcode);
wr = wr->next;
num_wrs--;
qhp->wq.wptr += wr_cnt;
@@ -508,9 +508,9 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
- PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
- "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
- idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+ pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n",
+ __func__, (unsigned long long)wr->wr_id,
+ idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
++(qhp->wq.rq_wptr);
++(qhp->wq.wptr);
wr = wr->next;
@@ -664,10 +664,10 @@ int iwch_post_zb_read(struct iwch_ep *ep)
struct sk_buff *skb;
u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
- PDBG("%s enter\n", __func__);
+ pr_debug("%s enter\n", __func__);
skb = alloc_skb(40, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+ pr_err("%s cannot send zb_read!!\n", __func__);
return -ENOMEM;
}
wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
@@ -696,10 +696,10 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
struct terminate_message *term;
struct sk_buff *skb;
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
skb = alloc_skb(40, GFP_ATOMIC);
if (!skb) {
- printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
+ pr_err("%s cannot send TERMINATE!\n", __func__);
return -ENOMEM;
}
wqe = (union t3_wr *)skb_put(skb, 40);
@@ -729,7 +729,7 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
int flushed;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
atomic_inc(&qhp->refcnt);
spin_unlock(&qhp->lock);
@@ -807,7 +807,7 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
count++;
wqe++;
}
- PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+ pr_debug("%s qhp %p count %u\n", __func__, qhp, count);
return count;
}
@@ -854,12 +854,12 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
} else
init_attr.rtr_type = 0;
init_attr.irs = qhp->ep->rcv_seq;
- PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
- "flags 0x%x qpcaps 0x%x\n", __func__,
- init_attr.rq_addr, init_attr.rq_size,
- init_attr.flags, init_attr.qpcaps);
+ pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n",
+ __func__,
+ init_attr.rq_addr, init_attr.rq_size,
+ init_attr.flags, init_attr.qpcaps);
ret = cxio_rdma_init(&rhp->rdev, &init_attr);
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
@@ -877,9 +877,9 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
int free = 0;
struct iwch_ep *ep = NULL;
- PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
- (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
+ qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+ (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
spin_lock_irqsave(&qhp->lock, flag);
@@ -1034,16 +1034,15 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.qpid);
+ pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.qpid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1077,6 +1076,6 @@ out:
if (free)
put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 03a1b0e64fc3..b6fe45924c6e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -101,7 +101,7 @@ MODULE_PARM_DESC(enable_tcp_window_scaling,
int c4iw_debug;
module_param(c4iw_debug, int, 0644);
-MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
+MODULE_PARM_DESC(c4iw_debug, "obsolete");
static int peer2peer = 1;
module_param(peer2peer, int, 0644);
@@ -180,7 +180,7 @@ static void ref_qp(struct c4iw_ep *ep)
static void start_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (timer_pending(&ep->timer)) {
pr_err("%s timer already started! ep %p\n",
__func__, ep);
@@ -196,7 +196,7 @@ static void start_ep_timer(struct c4iw_ep *ep)
static int stop_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p stopping\n", __func__, ep);
+ pr_debug("%s ep %p stopping\n", __func__, ep);
del_timer_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
@@ -212,7 +212,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_debug("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
@@ -229,7 +229,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_debug("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
@@ -263,10 +263,10 @@ static void set_emss(struct c4iw_ep *ep, u16 opt)
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
- PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
- TCPOPT_MSS_G(opt), ep->mss, ep->emss);
- PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
- ep->mss, ep->emss);
+ pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ TCPOPT_MSS_G(opt), ep->mss, ep->emss);
+ pr_debug("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
+ ep->mss, ep->emss);
}
static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
@@ -287,7 +287,7 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
{
mutex_lock(&epc->mutex);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
__state_set(epc, new);
mutex_unlock(&epc->mutex);
return;
@@ -322,7 +322,7 @@ static void *alloc_ep(int size, gfp_t gfp)
mutex_init(&epc->mutex);
c4iw_init_wr_wait(&epc->wr_wait);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("%s alloc ep %p\n", __func__, epc);
return epc;
}
@@ -384,7 +384,7 @@ void _c4iw_free_ep(struct kref *kref)
struct c4iw_ep *ep;
ep = container_of(kref, struct c4iw_ep, com.kref);
- PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
+ pr_debug("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
@@ -467,7 +467,7 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
- pr_err(MOD "ARP failure\n");
+ pr_err("ARP failure\n");
kfree_skb(skb);
}
@@ -528,7 +528,7 @@ static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
+ pr_err("ARP failure during accept - tid %u - dropping connection\n",
ep->hwtid);
__state_set(&ep->com, DEAD);
@@ -542,7 +542,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
connect_reply_upcall(ep, -EHOSTUNREACH);
__state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
@@ -567,7 +567,7 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
struct c4iw_rdev *rdev = &ep->com.dev->rdev;
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s rdev %p\n", __func__, rdev);
+ pr_debug("%s rdev %p\n", __func__, rdev);
req->cmd = CPL_ABORT_NO_RST;
ret = c4iw_ofld_send(rdev, skb);
if (ret) {
@@ -642,7 +642,7 @@ static int send_halfclose(struct c4iw_ep *ep)
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
@@ -657,7 +657,7 @@ static int send_abort(struct c4iw_ep *ep)
u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
@@ -720,12 +720,11 @@ static int send_connect(struct c4iw_ep *ep)
roundup(sizev4, 16) :
roundup(sizev6, 16);
- PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
+ pr_debug("%s ep %p atid %u\n", __func__, ep, ep->atid);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -822,13 +821,13 @@ static int send_connect(struct c4iw_ep *ep)
t5req->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t5req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t5req->rsvd);
t5req->opt2 = cpu_to_be32(opt2);
} else {
t6req->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t6req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t6req->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t6req->rsvd);
t6req->opt2 = cpu_to_be32(opt2);
}
}
@@ -877,13 +876,13 @@ static int send_connect(struct c4iw_ep *ep)
t5req6->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t5req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t5req6->rsvd);
t5req6->opt2 = cpu_to_be32(opt2);
} else {
t6req6->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t6req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t6req6->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t6req6->rsvd);
t6req6->opt2 = cpu_to_be32(opt2);
}
@@ -907,7 +906,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
BUG_ON(skb_cloned(skb));
@@ -961,8 +961,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
if (mpa_rev_to_use == 2) {
mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
sizeof (struct mpa_v2_conn_params));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("%s initiator ird %u ord %u\n", __func__, ep->ird,
+ ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
@@ -1014,7 +1014,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
@@ -1023,7 +1024,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -1094,7 +1095,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
@@ -1103,7 +1105,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -1185,8 +1187,8 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
ep = lookup_atid(t, atid);
- PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
- be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+ pr_debug("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
+ be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
mutex_lock(&ep->com.mutex);
dst_confirm(ep->dst);
@@ -1229,13 +1231,13 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = status;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(CLOSE_UPCALL, &ep->com.history);
@@ -1246,12 +1248,12 @@ static void peer_close_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(DISCONN_UPCALL, &ep->com.history);
}
@@ -1261,13 +1263,13 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(ABORT_UPCALL, &ep->com.history);
@@ -1278,7 +1280,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
+ pr_debug("%s ep %p tid %u status %d\n",
+ __func__, ep, ep->hwtid, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -1307,8 +1310,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
}
}
- PDBG("%s ep %p tid %u status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("%s ep %p tid %u status %d\n", __func__, ep,
+ ep->hwtid, status);
set_bit(CONN_RPL_UPCALL, &ep->com.history);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1321,7 +1324,7 @@ static int connect_request_upcall(struct c4iw_ep *ep)
struct iw_cm_event event;
int ret;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -1358,13 +1361,13 @@ static void established_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
event.ird = ep->ord;
event.ord = ep->ird;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(ESTAB_UPCALL, &ep->com.history);
}
@@ -1376,10 +1379,11 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
u32 credit_dack;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("%s ep %p tid %u credits %u\n",
+ __func__, ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
@@ -1427,7 +1431,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
int err;
int disconnect = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1454,8 +1458,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
/* Validate MPA header. */
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
err = -EPROTO;
goto err_stop_timer;
}
@@ -1525,8 +1529,9 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
- PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
- __func__, resp_ird, resp_ord, ep->ird, ep->ord);
+ pr_debug("%s responder ird %u ord %u ep ird %u ord %u\n",
+ __func__,
+ resp_ird, resp_ord, ep->ird, ep->ord);
/*
* This is a double-check. Ideally, below checks are
@@ -1570,12 +1575,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
- "%d\n", __func__, ep->mpa_attr.crc_enabled,
- ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type, p2p_type);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
+ __func__, ep->mpa_attr.crc_enabled,
+ ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type, p2p_type);
/*
* If responder's RTR does not match with that of initiator, assign
@@ -1610,7 +1614,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* supports, generate TERM message
*/
if (rtr_mismatch) {
- printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+ pr_err("%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1629,8 +1633,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* initiator ORD.
*/
if (insuff_ird) {
- printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
- __func__);
+ pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1669,7 +1672,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1678,7 +1681,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
goto err_stop_timer;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -1694,15 +1697,15 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len < sizeof(*mpa))
return 0;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
* Validate MPA Header.
*/
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
goto err_stop_timer;
}
@@ -1757,8 +1760,8 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
ep->ord = min_t(u32, ep->ord,
cur_max_read_depth(ep->com.dev));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("%s initiator ird %u ord %u\n",
+ __func__, ep->ird, ep->ord);
if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
if (peer2peer) {
if (ntohs(mpa_v2_params->ord) &
@@ -1775,11 +1778,11 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type);
__state_set(&ep->com, MPA_REQ_RCVD);
@@ -1815,7 +1818,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
+ pr_debug("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
mutex_lock(&ep->com.mutex);
@@ -1866,10 +1869,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep) {
- printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
+ pr_warn("Abort rpl to freed endpoint\n");
return 0;
}
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case ABORTING:
@@ -1878,8 +1881,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
mutex_unlock(&ep->com.mutex);
@@ -1995,7 +1997,8 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
{
ep->snd_win = snd_win;
ep->rcv_win = rcv_win;
- PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+ pr_debug("%s snd_win %d rcv_win %d\n",
+ __func__, ep->snd_win, ep->rcv_win);
}
#define ACT_OPEN_RETRY_COUNT 2
@@ -2100,7 +2103,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
int iptype;
__u8 *ra;
- PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
+ pr_debug("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
c4iw_init_wr_wait(&ep->com.wr_wait);
@@ -2124,7 +2127,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- pr_err("%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -2151,7 +2154,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ra = (__u8 *)&raddr6->sin6_addr;
}
if (!ep->dst) {
- pr_err("%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -2159,13 +2162,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ep->com.dev->rdev.lldi.adapter_type,
ep->com.cm_id->tos);
if (err) {
- pr_err("%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = ep->com.cm_id->tos;
@@ -2215,12 +2218,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
- PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
- status, status2errno(status));
+ pr_debug("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
+ status, status2errno(status));
if (cxgb_is_neg_adv(status)) {
- PDBG("%s Connection problems for atid %u status %u (%s)\n",
- __func__, atid, status, neg_adv_str(status));
+ pr_debug("%s Connection problems for atid %u status %u (%s)\n",
+ __func__, atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
@@ -2315,11 +2318,11 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
if (!ep) {
- PDBG("%s stid %d lookup failure!\n", __func__, stid);
+ pr_debug("%s stid %d lookup failure!\n", __func__, stid);
goto out;
}
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
+ pr_debug("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
out:
@@ -2332,7 +2335,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int stid = GET_TID(rpl);
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
return 0;
@@ -2350,7 +2353,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
skb_get(skb);
@@ -2421,7 +2424,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (peer2peer)
isn += 4;
rpl5->iss = cpu_to_be32(isn);
- PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ pr_debug("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
}
rpl->opt0 = cpu_to_be64(opt0);
@@ -2434,7 +2437,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
+ pr_debug("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
release_tid(&dev->rdev, hwtid, skb);
@@ -2460,12 +2463,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_debug("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
if (state_read(&parent_ep->com) != LISTEN) {
- PDBG("%s - listening ep not in LISTEN\n", __func__);
+ pr_debug("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
@@ -2474,18 +2478,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
/* Find output route */
if (iptype == 4) {
- PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
*(__be32 *)local_ip, *(__be32 *)peer_ip,
local_port, peer_port, tos);
} else {
- PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
local_ip, peer_ip, local_port, peer_port,
PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
@@ -2493,15 +2497,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
&parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
dst_release(dst);
goto reject;
}
@@ -2509,8 +2511,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
parent_ep->com.dev->rdev.lldi.adapter_type, tos);
if (err) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
kfree(child_ep);
goto reject;
@@ -2571,8 +2572,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
child_ep->dst = dst;
child_ep->hwtid = hwtid;
- PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
- child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
+ pr_debug("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
+ child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
init_timer(&child_ep->timer);
cxgb4_insert_tid(t, child_ep, hwtid);
@@ -2607,12 +2608,12 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
int ret;
ep = get_ep_from_tid(dev, tid);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
- PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
- ntohs(req->tcp_opt));
+ pr_debug("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
+ ntohs(req->tcp_opt));
set_emss(ep, ntohs(req->tcp_opt));
@@ -2644,7 +2645,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
dst_confirm(ep->dst);
set_bit(PEER_CLOSE, &ep->com.history);
@@ -2666,12 +2667,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
* in rdma connection migration (see c4iw_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case FPDU_MODE:
@@ -2735,17 +2736,17 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n",
+ __func__, ep->hwtid, req->status,
+ neg_adv_str(req->status));
ep->stats.abort_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock);
goto deref_ep;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
/*
@@ -2777,8 +2778,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
* do some housekeeping so as to re-initiate the
* connection
*/
- PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
- mpa_rev);
+ pr_debug("%s: mpa_rev=%d. Retrying with mpav1\n",
+ __func__, mpa_rev);
ep->retry_with_mpa_v1 = 1;
}
break;
@@ -2797,16 +2798,14 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex);
goto deref_ep;
default:
@@ -2870,7 +2869,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
@@ -2918,13 +2917,13 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
BUG_ON(!ep);
if (ep && ep->com.qp) {
- printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
- ep->com.qp->wq.sq.qid);
+ pr_warn("TERM received tid %u qpid %u\n",
+ tid, ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
} else
- printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+ pr_warn("TERM received tid %u no ep/qp\n", tid);
c4iw_put_ep(&ep->com);
return 0;
@@ -2946,18 +2945,19 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("%s ep %p tid %u credits %u\n",
+ __func__, ep, ep->hwtid, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p tid %u state %u\n",
- __func__, ep, ep->hwtid, state_read(&ep->com));
+ pr_debug("%s 0 credit ack ep %p tid %u state %u\n",
+ __func__, ep, ep->hwtid, state_read(&ep->com));
goto out;
}
dst_confirm(ep->dst);
if (ep->mpa_skb) {
- PDBG("%s last streaming msg ack ep %p tid %u state %u "
- "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
- state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ pr_debug("%s last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
+ __func__, ep, ep->hwtid,
+ state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
@@ -2975,7 +2975,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
int abort;
struct c4iw_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -3006,7 +3006,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
int abort = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -3059,7 +3059,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = 1;
}
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
@@ -3188,7 +3188,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -3215,20 +3215,20 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
- PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+ pr_debug("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
goto fail2;
}
ref_qp(ep);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -3258,9 +3258,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
- __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
- ra, ntohs(raddr->sin_port));
+ pr_debug("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+ __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
+ ra, ntohs(raddr->sin_port));
ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
laddr->sin_addr.s_addr,
raddr->sin_addr.s_addr,
@@ -3280,10 +3280,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
- __func__, laddr6->sin6_addr.s6_addr,
- ntohs(laddr6->sin6_port),
- raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+ pr_debug("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+ __func__, laddr6->sin6_addr.s6_addr,
+ ntohs(laddr6->sin6_port),
+ raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
@@ -3292,7 +3292,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr6->sin6_scope_id);
}
if (!ep->dst) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -3300,13 +3300,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = cm_id->tos;
@@ -3414,12 +3414,12 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
skb_queue_head_init(&ep->com.ep_skb_list);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
ep->com.dev = dev;
@@ -3439,7 +3439,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->m_local_addr.ss_family, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
+ pr_err("%s - cannot alloc stid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -3473,7 +3473,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -3514,8 +3514,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
/*
* Ref the ep here in case we have fatal errors causing the
@@ -3568,8 +3568,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
@@ -3600,8 +3600,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- pr_err(MOD
- "%s - qp <- error failed!\n",
+ pr_err("%s - qp <- error failed!\n",
__func__);
}
fatal = 1;
@@ -3674,7 +3673,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
BUG_ON(!rpl_skb);
if (req->retval) {
- PDBG("%s passive open failure %d\n", __func__, req->retval);
+ pr_debug("%s passive open failure %d\n", __func__, req->retval);
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.pas_ofld_conn_fails++;
mutex_unlock(&dev->rdev.stats.lock);
@@ -3800,6 +3799,8 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
int ret;
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
+ if (!req_skb)
+ return;
req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
@@ -3890,7 +3891,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!lep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_debug("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
@@ -3927,9 +3929,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
skb_set_transport_header(skb, (void *)tcph - (void *)rss);
skb_get(skb);
- PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
- ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
- ntohs(tcph->source), iph->tos);
+ pr_debug("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
+ ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
+ ntohs(tcph->source), iph->tos);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
iph->daddr, iph->saddr, tcph->dest,
@@ -4026,8 +4028,8 @@ static void process_timeout(struct c4iw_ep *ep)
int abort = 1;
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
set_bit(TIMEDOUT, &ep->com.history);
switch (ep->com.state) {
case MPA_REQ_SENT:
@@ -4157,8 +4159,8 @@ static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
kfree_skb(skb);
return 0;
@@ -4170,13 +4172,13 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_wr_wait *wr_waitp;
int ret;
- PDBG("%s type %u\n", __func__, rpl->type);
+ pr_debug("%s type %u\n", __func__, rpl->type);
switch (rpl->type) {
case FW6_TYPE_WR_RPL:
ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
- PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
+ pr_debug("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
if (wr_waitp)
c4iw_wake_up(wr_waitp, ret ? -ret : 0);
kfree_skb(skb);
@@ -4186,8 +4188,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
sched(dev, skb);
break;
default:
- printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
- rpl->type);
+ pr_err("%s unexpected fw6 msg type %u\n",
+ __func__, rpl->type);
kfree_skb(skb);
break;
}
@@ -4203,19 +4205,18 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
/* This EP will be dereferenced in peer_abort() */
if (!ep) {
- printk(KERN_WARNING MOD
- "Abort on non-existent endpoint, tid %d\n", tid);
+ pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
kfree_skb(skb);
return 0;
}
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n",
+ __func__, ep->hwtid, req->status,
+ neg_adv_str(req->status));
goto out;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
out:
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index bec82a600d77..14de5bde1b63 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -146,7 +146,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
ret = c4iw_ofld_send(rdev, skb);
if (ret)
goto err4;
- PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait);
+ pr_debug("%s wait_event wr_wait %p\n", __func__, &wr_wait);
ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
if (ret)
goto err4;
@@ -159,7 +159,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {
- pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
+ pr_warn("%s: cqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL;
goto err4;
@@ -180,8 +180,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(FW_RI_SEND) |
@@ -199,8 +199,8 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
int in_use = wq->rq.in_use - count;
BUG_ON(in_use < 0);
- PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
- wq, cq, wq->rq.in_use, count);
+ pr_debug("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
+ wq, cq, wq->rq.in_use, count);
while (in_use--) {
insert_recv_cqe(wq, cq);
flushed++;
@@ -213,8 +213,8 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(swcqe->opcode) |
@@ -283,8 +283,8 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
- __func__, cidx, cq->sw_pidx);
+ pr_debug("%s moving cqe into swcq sq idx %u cq idx %u\n",
+ __func__, cidx, cq->sw_pidx);
swsqe->cqe.header |= htonl(CQE_SWCQE_V(1));
cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
t4_swcq_produce(cq);
@@ -339,7 +339,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
struct t4_swsqe *swsqe;
int ret;
- PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
+ pr_debug("%s cqid 0x%x\n", __func__, chp->cq.cqid);
ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
/*
@@ -432,7 +432,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("%s count zero %d\n", __func__, *count);
ptr = cq->sw_cidx;
while (ptr != cq->sw_pidx) {
cqe = &cq->sw_queue[ptr];
@@ -442,7 +442,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
if (++ptr == cq->size)
ptr = 0;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
/*
@@ -473,12 +473,11 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (ret)
return ret;
- PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
- CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
- CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
- CQE_WRID_LOW(hw_cqe));
+ pr_debug("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
+ CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
+ CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
+ CQE_WRID_LOW(hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -606,8 +605,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
struct t4_swsqe *swsqe;
- PDBG("%s out of order completion going in sw_sq at idx %u\n",
- __func__, CQE_WRID_SQ_IDX(hw_cqe));
+ pr_debug("%s out of order completion going in sw_sq at idx %u\n",
+ __func__, CQE_WRID_SQ_IDX(hw_cqe));
swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
swsqe->cqe = *hw_cqe;
swsqe->complete = 1;
@@ -641,13 +640,13 @@ proc_cqe:
BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
wq->sq.cidx = (uint16_t)idx;
- PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
+ pr_debug("%s completing sq idx %u\n", __func__, wq->sq.cidx);
*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
if (c4iw_wr_log)
c4iw_log_wr_stats(wq, hw_cqe);
t4_sq_consume(wq);
} else {
- PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx);
+ pr_debug("%s completing rq idx %u\n", __func__, wq->rq.cidx);
*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
BUG_ON(t4_rq_empty(wq));
if (c4iw_wr_log)
@@ -664,12 +663,12 @@ flush_wq:
skip_cqe:
if (SW_CQE(hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->sw_cidx);
+ pr_debug("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->sw_cidx);
t4_swcq_consume(cq);
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->cidx);
+ pr_debug("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->cidx);
t4_hwcq_consume(cq);
}
return ret;
@@ -715,10 +714,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe),
- CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe),
- CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie);
+ pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ __func__, CQE_QPID(&cqe),
+ CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
+ CQE_STATUS(&cqe), CQE_LEN(&cqe),
+ CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
+ (unsigned long long)cookie);
if (CQE_TYPE(&cqe) == 0) {
if (!CQE_STATUS(&cqe))
@@ -766,8 +767,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->opcode = IB_WC_SEND;
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(&cqe), CQE_QPID(&cqe));
ret = -EINVAL;
goto out;
@@ -822,8 +822,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD
- "Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
CQE_STATUS(&cqe), CQE_QPID(&cqe));
wc->status = IB_WC_FATAL_ERR;
}
@@ -860,7 +859,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("%s ib_cq %p\n", __func__, ib_cq);
chp = to_c4iw_cq(ib_cq);
remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -892,7 +891,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -998,9 +997,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
mm2->len = PAGE_SIZE;
insert_mmap(ucontext, mm2);
}
- PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
- __func__, chp->cq.cqid, chp, chp->cq.size,
- chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
+ pr_debug("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
+ __func__, chp->cq.cqid, chp, chp->cq.size,
+ chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
return &chp->ibcq;
err6:
kfree(mm2);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4e4f1a732b01..329fb65e8fb0 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -334,7 +334,7 @@ static int qp_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd = file->private_data;
if (!qpd) {
- printk(KERN_INFO "%s null qpd?\n", __func__);
+ pr_info("%s null qpd?\n", __func__);
return 0;
}
vfree(qpd->buf);
@@ -422,7 +422,7 @@ static int stag_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *stagd = file->private_data;
if (!stagd) {
- printk(KERN_INFO "%s null stagd?\n", __func__);
+ pr_info("%s null stagd?\n", __func__);
return 0;
}
vfree(stagd->buf);
@@ -796,15 +796,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
* cqid and qpid range must match for now.
*/
if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
- pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
+ pr_err("%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
- pr_err(MOD "%s: unsupported qp and cq id ranges "
- "qp start %u size %u cq start %u size %u\n",
+ pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
@@ -813,23 +812,20 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->qpmask = rdev->lldi.udb_density - 1;
rdev->cqmask = rdev->lldi.ucq_density - 1;
- PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
- "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
- "qp qid start %u size %u cq qid start %u size %u\n",
- __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
- rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
- rdev->lldi.vr->pbl.start,
- rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
- rdev->lldi.vr->rq.size,
- rdev->lldi.vr->qp.start,
- rdev->lldi.vr->qp.size,
- rdev->lldi.vr->cq.start,
- rdev->lldi.vr->cq.size);
- PDBG("udb %pR db_reg %p gts_reg %p "
- "qpmask 0x%x cqmask 0x%x\n",
- &rdev->lldi.pdev->resource[2],
- rdev->lldi.db_reg, rdev->lldi.gts_reg,
- rdev->qpmask, rdev->cqmask);
+ pr_debug("%s dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+ __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
+ rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
+ rdev->lldi.vr->pbl.start,
+ rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
+ rdev->lldi.vr->rq.size,
+ rdev->lldi.vr->qp.start,
+ rdev->lldi.vr->qp.size,
+ rdev->lldi.vr->cq.start,
+ rdev->lldi.vr->cq.size);
+ pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
+ &rdev->lldi.pdev->resource[2],
+ rdev->lldi.db_reg, rdev->lldi.gts_reg,
+ rdev->qpmask, rdev->cqmask);
if (c4iw_num_stags(rdev) == 0)
return -EINVAL;
@@ -843,22 +839,22 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
- printk(KERN_ERR MOD "error %d initializing resources\n", err);
+ pr_err("error %d initializing resources\n", err);
return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
+ pr_err("error %d initializing pbl pool\n", err);
goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
+ pr_err("error %d initializing rqt pool\n", err);
goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
+ pr_err("error %d initializing ocqp pool\n", err);
goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
@@ -936,7 +932,7 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
static void c4iw_remove(struct uld_ctx *ctx)
{
- PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, ctx->dev);
c4iw_unregister_device(ctx->dev);
c4iw_dealloc(ctx);
}
@@ -954,25 +950,25 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
int ret;
if (!rdma_supported(infop)) {
- printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
- pci_name(infop->pdev));
+ pr_info("%s: RDMA not supported on this device\n",
+ pci_name(infop->pdev));
return ERR_PTR(-ENOSYS);
}
if (!ocqp_supported(infop))
- pr_info("%s: On-Chip Queues not supported on this device.\n",
+ pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
if (!devp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
}
devp->rdev.lldi = *infop;
/* init various hw-queue params based on lld info */
- PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
- __func__, devp->rdev.lldi.sge_ingpadboundary,
- devp->rdev.lldi.sge_egrstatuspagesize);
+ pr_debug("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
+ __func__, devp->rdev.lldi.sge_ingpadboundary,
+ devp->rdev.lldi.sge_egrstatuspagesize);
devp->rdev.hw_queue.t4_eq_status_entries =
devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
@@ -1000,7 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
- pr_err(MOD "Unable to ioremap BAR2\n");
+ pr_err("Unable to ioremap BAR2\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
@@ -1012,20 +1008,19 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
devp->rdev.lldi.vr->ocq.size);
if (!devp->rdev.oc_mw_kva) {
- pr_err(MOD "Unable to ioremap onchip mem\n");
+ pr_err("Unable to ioremap onchip mem\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
}
- PDBG(KERN_INFO MOD "ocq memory: "
- "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
- devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
- devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
+ pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
+ devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
+ devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
ret = c4iw_rdev_open(&devp->rdev);
if (ret) {
- printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
+ pr_err("Unable to open CXIO rdev err %d\n", ret);
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(ret);
}
@@ -1071,17 +1066,17 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
}
ctx->lldi = *infop;
- PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
- __func__, pci_name(ctx->lldi.pdev),
- ctx->lldi.nchan, ctx->lldi.nrxq,
- ctx->lldi.ntxq, ctx->lldi.nports);
+ pr_debug("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
+ __func__, pci_name(ctx->lldi.pdev),
+ ctx->lldi.nchan, ctx->lldi.nrxq,
+ ctx->lldi.ntxq, ctx->lldi.nports);
mutex_lock(&dev_mutex);
list_add_tail(&ctx->entry, &uld_ctx_list);
mutex_unlock(&dev_mutex);
for (i = 0; i < ctx->lldi.nrxq; i++)
- PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
+ pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
out:
return ctx;
}
@@ -1138,8 +1133,7 @@ static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
goto out;
if (c4iw_handlers[opcode] == NULL) {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
goto out;
}
@@ -1176,13 +1170,11 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (recv_rx_pkt(dev, gl, rsp))
return 0;
- pr_info("%s: unexpected FL contents at %p, " \
- "RSS %#llx, FL %#llx, len %u\n",
- pci_name(ctx->lldi.pdev), gl->va,
- (unsigned long long)be64_to_cpu(*rsp),
- (unsigned long long)be64_to_cpu(
- *(__force __be64 *)gl->va),
- gl->tot_len);
+ pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n",
+ pci_name(ctx->lldi.pdev), gl->va,
+ be64_to_cpu(*rsp),
+ be64_to_cpu(*(__force __be64 *)gl->va),
+ gl->tot_len);
return 0;
} else {
@@ -1195,8 +1187,7 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (c4iw_handlers[opcode]) {
c4iw_handlers[opcode](dev, skb);
} else {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
}
@@ -1209,17 +1200,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
{
struct uld_ctx *ctx = handle;
- PDBG("%s new_state %u\n", __func__, new_state);
+ pr_debug("%s new_state %u\n", __func__, new_state);
switch (new_state) {
case CXGB4_STATE_UP:
- printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
+ pr_info("%s: Up\n", pci_name(ctx->lldi.pdev));
if (!ctx->dev) {
int ret;
ctx->dev = c4iw_alloc(&ctx->lldi);
if (IS_ERR(ctx->dev)) {
- printk(KERN_ERR MOD
- "%s: initialization failed: %ld\n",
+ pr_err("%s: initialization failed: %ld\n",
pci_name(ctx->lldi.pdev),
PTR_ERR(ctx->dev));
ctx->dev = NULL;
@@ -1227,22 +1217,19 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
ret = c4iw_register_device(ctx->dev);
if (ret) {
- printk(KERN_ERR MOD
- "%s: RDMA registration failed: %d\n",
+ pr_err("%s: RDMA registration failed: %d\n",
pci_name(ctx->lldi.pdev), ret);
c4iw_dealloc(ctx);
}
}
break;
case CXGB4_STATE_DOWN:
- printk(KERN_INFO MOD "%s: Down\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Down\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
case CXGB4_STATE_START_RECOVERY:
- printk(KERN_INFO MOD "%s: Fatal Error\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
if (ctx->dev) {
struct ib_event event;
@@ -1255,8 +1242,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
break;
case CXGB4_STATE_DETACH:
- printk(KERN_INFO MOD "%s: Detach\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
@@ -1406,9 +1392,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing SQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
@@ -1422,9 +1406,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_rq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing RQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
@@ -1455,7 +1437,7 @@ static void recover_queues(struct uld_ctx *ctx)
/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
return;
}
@@ -1513,8 +1495,8 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
default:
- printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
- pci_name(ctx->lldi.pdev), control);
+ pr_warn("%s: unknown control cmd %u\n",
+ pci_name(ctx->lldi.pdev), control);
break;
}
return 0;
@@ -1543,8 +1525,7 @@ static int __init c4iw_init_module(void)
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
if (!c4iw_debugfs_root)
- printk(KERN_WARNING MOD
- "could not create debugfs entry, continuing\n");
+ pr_warn("could not create debugfs entry, continuing\n");
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index bdfac2ccb704..8f963df0bffc 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -47,17 +47,16 @@ static void print_tpte(struct c4iw_dev *dev, u32 stag)
"%s cxgb4_read_tpte err %d\n", __func__, ret);
return;
}
- PDBG("stag idx 0x%x valid %d key 0x%x state %d pdid %d "
- "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
- stag & 0xffffff00,
- FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
- FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
- ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
- ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
+ pr_debug("stag idx 0x%x valid %d key 0x%x state %d pdid %d perm 0x%x ps %d len 0x%llx va 0x%llx\n",
+ stag & 0xffffff00,
+ FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
+ FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
+ ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
+ ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
}
static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -71,9 +70,9 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
- PDBG("%016llx %016llx %016llx %016llx\n",
- be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
- be64_to_cpu(p[3]));
+ pr_debug("%016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
+ be64_to_cpu(p[3]));
/*
* Ingress WRITE and READ_RESP errors provide
@@ -124,8 +123,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
spin_lock_irq(&dev->lock);
qhp = get_qhp(dev, CQE_QPID(err_cqe));
if (!qhp) {
- printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
@@ -140,8 +138,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
cqid = qhp->attr.rcq;
chp = get_chp(dev, cqid);
if (!chp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
@@ -165,7 +162,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
/* Completion Events */
case T4_ERR_SUCCESS:
- printk(KERN_ERR MOD "AE with status 0!\n");
+ pr_err("AE with status 0!\n");
break;
case T4_ERR_STAG:
@@ -207,7 +204,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
break;
default:
- printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T4 status 0x%x QPID 0x%x\n",
CQE_STATUS(err_cqe), qhp->wq.sq.qid);
post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
break;
@@ -237,7 +234,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
if (atomic_dec_and_test(&chp->refcnt))
wake_up(&chp->wait);
} else {
- PDBG("%s unknown cqid 0x%x\n", __func__, qid);
+ pr_debug("%s unknown cqid 0x%x\n", __func__, qid);
spin_unlock_irqrestore(&dev->lock, flag);
}
return 0;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 5846c47c8d55..819a30635d53 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -64,12 +64,11 @@
#define DRV_NAME "iw_cxgb4"
#define MOD DRV_NAME ":"
-extern int c4iw_debug;
-#define PDBG(fmt, args...) \
-do { \
- if (c4iw_debug) \
- printk(MOD fmt, ## args); \
-} while (0)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "t4.h"
@@ -231,15 +230,15 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
ret = wait_for_completion_timeout(&wr_waitp->completion, C4IW_WR_TO);
if (!ret) {
- PDBG("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
- func, pci_name(rdev->lldi.pdev), hwtid, qpid);
+ pr_debug("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
+ func, pci_name(rdev->lldi.pdev), hwtid, qpid);
rdev->flags |= T4_FATAL_ERROR;
wr_waitp->ret = -EIO;
}
out:
if (wr_waitp->ret)
- PDBG("%s: FW reply %d tid %u qpid %u\n",
- pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+ pr_debug("%s: FW reply %d tid %u qpid %u\n",
+ pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
return wr_waitp->ret;
}
@@ -538,8 +537,9 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -551,8 +551,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,
struct c4iw_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
@@ -670,17 +670,19 @@ enum c4iw_mmid_state {
#define MPA_V2_RDMA_READ_RTR 0x4000
#define MPA_V2_IRD_ORD_MASK 0x3FFF
-#define c4iw_put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- WARN_ON(kref_read(&((ep)->kref)) < 1); \
- kref_put(&((ep)->kref), _c4iw_free_ep); \
+#define c4iw_put_ep(ep) { \
+ pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
+ __func__, __LINE__, \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), _c4iw_free_ep); \
}
-#define c4iw_get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- kref_get(&((ep)->kref)); \
+#define c4iw_get_ep(ep) { \
+ pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
+ __func__, __LINE__, \
+ ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
void _c4iw_free_ep(struct kref *kref);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 410408f886c1..3ee7f43e419a 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -38,9 +38,9 @@
#include "iw_cxgb4.h"
-int use_dsgl = 0;
+int use_dsgl = 1;
module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1) (DEPRECATED)");
#define T4_ULPTX_MIN_IO 32
#define C4IW_MAX_INLINE_SIZE 96
@@ -125,7 +125,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F);
addr &= 0x7FFFFFF;
- PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
+ pr_debug("%s addr 0x%x len %u\n", __func__, addr, len);
num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
c4iw_init_wr_wait(&wr_wait);
for (i = 0; i < num_wqe; i++) {
@@ -231,13 +231,11 @@ out:
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
void *data, struct sk_buff *skb)
{
- if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
+ if (rdev->lldi.ulptx_memwrite_dsgl && use_dsgl) {
if (len > inline_threshold) {
if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
- printk_ratelimited(KERN_WARNING
- "%s: dma map"
- " failure (non fatal)\n",
- pci_name(rdev->lldi.pdev));
+ pr_warn_ratelimited("%s: dma map failure (non fatal)\n",
+ pci_name(rdev->lldi.pdev));
return _c4iw_write_mem_inline(rdev, addr, len,
data, skb);
} else {
@@ -289,8 +287,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_unlock(&rdev->stats.lock);
*stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
/* write TPT entry */
if (reset_tpt_entry)
@@ -331,9 +329,9 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
{
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev->lldi.vr->pbl.start,
- pbl_size);
+ pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev->lldi.vr->pbl.start,
+ pbl_size);
err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
return err;
@@ -376,7 +374,7 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
@@ -426,7 +424,7 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
int ret;
u32 stag = T4_STAG_UNSET;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_c4iw_pd(pd);
rhp = php->rhp;
@@ -483,7 +481,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct c4iw_pd *php;
struct c4iw_mr *mhp;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (length == ~0ULL)
return ERR_PTR(-EINVAL);
@@ -517,7 +515,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = mhp->umem->page_shift;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
@@ -536,7 +534,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
+ (k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
@@ -620,7 +618,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
ret = -ENOMEM;
goto dealloc_win;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
dealloc_win:
@@ -645,7 +643,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
kfree_skb(mhp->dereg_skb);
kfree(mhp);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
return 0;
}
@@ -703,7 +701,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err3;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
err3:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
@@ -748,7 +746,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
struct c4iw_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("%s ib_mr %p\n", __func__, ib_mr);
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
@@ -766,7 +764,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index df64417ab6f2..6770a314b086 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -102,7 +102,7 @@ void _c4iw_free_ucontext(struct kref *kref)
ucontext = container_of(kref, struct c4iw_ucontext, kref);
rhp = to_c4iw_dev(ucontext->ibucontext.device);
- PDBG("%s ucontext %p\n", __func__, ucontext);
+ pr_debug("%s ucontext %p\n", __func__, ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -113,7 +113,7 @@ static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("%s context %p\n", __func__, context);
c4iw_put_ucontext(ucontext);
return 0;
}
@@ -123,12 +123,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
{
struct c4iw_ucontext *context;
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
- static int warned;
struct c4iw_alloc_ucontext_resp uresp;
int ret = 0;
struct c4iw_mm_entry *mm = NULL;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context) {
ret = -ENOMEM;
@@ -141,8 +140,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
- if (!warned++)
- pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
} else {
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
@@ -187,8 +185,8 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_ucontext *ucontext;
u64 addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1))
return -EINVAL;
@@ -253,7 +251,7 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
php = to_c4iw_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
@@ -270,7 +268,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
u32 pdid;
struct c4iw_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
@@ -293,14 +291,14 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
}
static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
*pkey = 0;
return 0;
}
@@ -310,8 +308,8 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
{
struct c4iw_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
dev = to_c4iw_dev(ibdev);
BUG_ON(port == 0);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -325,7 +323,7 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct c4iw_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -366,7 +364,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
struct net_device *netdev;
struct in_device *inetdev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
@@ -408,7 +406,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
@@ -421,7 +419,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct ethtool_drvinfo info;
struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
@@ -431,7 +429,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
}
@@ -524,7 +522,7 @@ static void get_dev_fw_str(struct ib_device *dev, char *str,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
snprintf(str, str_len, "%u.%u.%u.%u",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
@@ -538,7 +536,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
int ret;
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, dev);
BUG_ON(!dev->rdev.lldi.ports[0]);
strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
@@ -648,7 +646,7 @@ void c4iw_unregister_device(struct c4iw_dev *dev)
{
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d4fd2f5c8326..8e4154b4253e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -254,11 +254,11 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
ret = -ENOMEM;
goto free_sq;
}
- PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
- __func__, wq->sq.queue,
- (unsigned long long)virt_to_phys(wq->sq.queue),
- wq->rq.queue,
- (unsigned long long)virt_to_phys(wq->rq.queue));
+ pr_debug("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+ __func__, wq->sq.queue,
+ (unsigned long long)virt_to_phys(wq->sq.queue),
+ wq->rq.queue,
+ (unsigned long long)virt_to_phys(wq->rq.queue));
memset(wq->rq.queue, 0, wq->rq.memsize);
dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
@@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
* User mode must have bar2 access.
*/
if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
- pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
+ pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma;
}
@@ -362,9 +362,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (ret)
goto free_dma;
- PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
- __func__, wq->sq.qid, wq->rq.qid, wq->db,
- wq->sq.bar2_va, wq->rq.bar2_va);
+ pr_debug("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
+ __func__, wq->sq.qid, wq->rq.qid, wq->db,
+ wq->sq.bar2_va, wq->rq.bar2_va);
return 0;
free_dma:
@@ -725,7 +725,7 @@ static void free_qp_work(struct work_struct *work)
ucontext = qhp->ucontext;
rhp = qhp->rhp;
- PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
+ pr_debug("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
@@ -739,19 +739,19 @@ static void queue_qp_free(struct kref *kref)
struct c4iw_qp *qhp;
qhp = container_of(kref, struct c4iw_qp, kref);
- PDBG("%s qhp %p\n", __func__, qhp);
+ pr_debug("%s qhp %p\n", __func__, qhp);
queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
}
void c4iw_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
}
@@ -959,8 +959,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_debug("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err) {
@@ -981,9 +981,10 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
- PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
- __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
- swsqe->opcode, swsqe->read_len);
+ pr_debug("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
+ __func__,
+ (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
+ swsqe->opcode, swsqe->read_len);
wr = wr->next;
num_wrs--;
t4_sq_produce(&qhp->wq, len16);
@@ -1057,8 +1058,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->recv.r2[1] = 0;
wqe->recv.r2[2] = 0;
wqe->recv.len16 = len16;
- PDBG("%s cookie 0x%llx pidx %u\n", __func__,
- (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
+ pr_debug("%s cookie 0x%llx pidx %u\n",
+ __func__,
+ (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
t4_rq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
wr = wr->next;
@@ -1217,8 +1219,8 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
struct sk_buff *skb;
struct terminate_message *term;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- qhp->ep->hwtid);
+ pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ qhp->ep->hwtid);
skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1254,7 +1256,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
int rq_flushed, sq_flushed;
unsigned long flag;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&rchp->lock, flag);
@@ -1339,8 +1341,8 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- ep->hwtid);
+ pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ ep->hwtid);
skb = skb_dequeue(&ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1366,13 +1368,13 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid,
qhp->wq.sq.qid, __func__);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
- PDBG("%s p2p_type = %d\n", __func__, p2p_type);
+ pr_debug("%s p2p_type = %d\n", __func__, p2p_type);
memset(&init->u, 0, sizeof init->u);
switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
@@ -1401,8 +1403,8 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
- qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
+ pr_debug("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
+ qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
if (!skb) {
@@ -1474,7 +1476,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
err1:
free_ird(rhp, qhp->attr.max_ird);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
@@ -1491,9 +1493,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int free = 0;
struct c4iw_ep *ep = NULL;
- PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
- (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
+ __func__,
+ qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
+ (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
mutex_lock(&qhp->mutex);
@@ -1671,16 +1674,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.sq.qid);
+ pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.sq.qid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1716,7 +1718,7 @@ out:
*/
if (free)
c4iw_put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
@@ -1746,7 +1748,7 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
c4iw_qp_rem_ref(ib_qp);
- PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
+ pr_debug("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
return 0;
}
@@ -1765,7 +1767,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (attrs->qp_type != IB_QPT_RC)
return ERR_PTR(-EINVAL);
@@ -1936,11 +1938,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->ibqp.qp_num = qhp->wq.sq.qid;
init_timer(&(qhp->timer));
INIT_LIST_HEAD(&qhp->db_fc_entry);
- PDBG("%s sq id %u size %u memsize %zu num_entries %u "
- "rq id %u size %u memsize %zu num_entries %u\n", __func__,
- qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
- attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
- qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
+ pr_debug("%s sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
+ __func__,
+ qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
+ attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
+ qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
return &qhp->ibqp;
err8:
kfree(ma_sync_key_mm);
@@ -1970,7 +1972,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum c4iw_qp_attr_mask mask = 0;
struct c4iw_qp_attributes attrs;
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ pr_debug("%s ib_qp %p\n", __func__, ibqp);
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -2016,7 +2018,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 67df71a7012e..8ff0cbe5cb16 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -90,7 +90,7 @@ u32 c4iw_get_resource(struct c4iw_id_table *id_table)
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
{
- PDBG("%s entry 0x%x\n", __func__, entry);
+ pr_debug("%s entry 0x%x\n", __func__, entry);
c4iw_id_free(id_table, entry);
}
@@ -141,7 +141,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -157,7 +157,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->cqids);
@@ -215,7 +215,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -231,7 +231,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -254,7 +254,7 @@ void c4iw_destroy_resource(struct c4iw_resource *rscp)
u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
@@ -268,7 +268,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -290,19 +290,17 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
while (pbl_start < pbl_top) {
pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk);
if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all PBL chunks (%x/%x)\n",
- pbl_start,
- pbl_top - pbl_start);
+ pr_warn("Failed to add all PBL chunks (%x/%x)\n",
+ pbl_start, pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -324,9 +322,9 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
if (!addr)
- pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pr_warn_ratelimited("%s: Out of RQT memory\n",
pci_name(rdev->lldi.pdev));
mutex_lock(&rdev->stats.lock);
if (addr) {
@@ -341,7 +339,7 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
mutex_lock(&rdev->stats.lock);
rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -363,18 +361,17 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
while (rqt_start < rqt_top) {
rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk);
if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) {
- PDBG("%s failed to add RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("%s failed to add RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all RQT chunks (%x/%x)\n",
- rqt_start, rqt_top - rqt_start);
+ pr_warn("Failed to add all RQT chunks (%x/%x)\n",
+ rqt_start, rqt_top - rqt_start);
return 0;
}
rqt_chunk >>= 1;
} else {
- PDBG("%s added RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("%s added RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
rqt_start += rqt_chunk;
}
}
@@ -394,7 +391,7 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
if (addr) {
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
@@ -407,7 +404,7 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -429,18 +426,17 @@ int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev)
while (start < top) {
chunk = min(top - start + 1, chunk);
if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) {
- PDBG("%s failed to add OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("%s failed to add OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
if (chunk <= 1024 << MIN_OCQP_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all OCQP chunks (%x/%x)\n",
- start, top - start);
+ pr_warn("Failed to add all OCQP chunks (%x/%x)\n",
+ start, top - start);
return 0;
}
chunk >>= 1;
} else {
- PDBG("%s added OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("%s added OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
start += chunk;
}
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 640d22148a3e..e765c00303cd 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -466,14 +466,14 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
wmb();
if (wq->sq.bar2_va) {
if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("%s: WC wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
pio_copy((u64 __iomem *)
(wq->sq.bar2_va + SGE_UDB_WCDOORBELL),
(u64 *)wqe);
} else {
- PDBG("%s: DB wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("%s: DB wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid),
wq->sq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -493,14 +493,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
wmb();
if (wq->rq.bar2_va) {
if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("%s: WC wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
pio_copy((u64 __iomem *)
(wq->rq.bar2_va + SGE_UDB_WCDOORBELL),
(void *)wqe);
} else {
- PDBG("%s: DB wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("%s: DB wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid),
wq->rq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -601,7 +601,8 @@ static inline void t4_swcq_produce(struct t4_cq *cq)
{
cq->sw_in_use++;
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_debug("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
BUG_ON(1);
}
@@ -656,7 +657,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) {
ret = -EOVERFLOW;
cq->error = 1;
- printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+ pr_err("cq overflow cqid %u\n", cq->cqid);
BUG_ON(1);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
@@ -672,7 +673,8 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
{
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_debug("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
BUG_ON(1);
return NULL;
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a09b64b..88085f65432e 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o
+ verbs_txreq.o vnic_main.o vnic_sdma.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..794e6814a531 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
}
-/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_user_ctxt;
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
}
static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
- for (i = 0; i < dd->first_user_ctxt; i++)
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index d037f72e4d96..39279fd630bc 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
+#include "debugfs.h"
#define NUM_IB_PORTS 1
@@ -125,9 +126,16 @@ struct flag_table {
#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES 256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ * 0 - Verbs
+ * 1 - User Fecn Handling
+ * 2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN 1
+#define RSM_INS_VNIC 2
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -138,8 +146,7 @@ struct flag_table {
#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
-/* RSM fields */
-
+/* RSM fields for Verbs */
/* packet type */
#define IB_PACKET_TYPE 2ull
#define QW_SHIFT 6ull
@@ -169,6 +176,28 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for Vnic */
+/* L2_TYPE: QW 0, OFFSET 61 - for match */
+#define L2_TYPE_QW 0ull
+#define L2_TYPE_BIT_OFFSET 61ull
+#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off))
+#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET)
+#define L2_TYPE_MASK 3ull
+#define L2_16B_VALUE 2ull
+
+/* L4_TYPE QW 1, OFFSET 0 - for match */
+#define L4_TYPE_QW 1ull
+#define L4_TYPE_BIT_OFFSET 0ull
+#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off))
+#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET)
+#define L4_16B_TYPE_MASK 0xFFull
+#define L4_16B_ETH_VALUE 0x78ull
+
+/* 16B VESWID - for select */
+#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull))
+/* 16B ENTROPY - for select */
+#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull))
+
/* defines to build power on SC2VL table */
#define SC2VL_VAL( \
num, \
@@ -1045,6 +1074,8 @@ static void dc_start(struct hfi1_devdata *);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -6379,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
*
* The expectation is that the caller of this routine would have taken
* care of properly transitioning the link into the correct state.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_shutdown(struct hfi1_devdata *dd)
+static void _dc_shutdown(struct hfi1_devdata *dd)
{
- unsigned long flags;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
- if (dd->dc_shutdown) {
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ if (dd->dc_shutdown)
return;
- }
+
dd->dc_shutdown = 1;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
/*
@@ -6401,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
+static void dc_shutdown(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_shutdown(dd);
+ mutex_unlock(&dd->dc8051_lock);
+}
+
/*
* Calling this after the DC has been brought out of reset should not
* do any damage.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_start(struct hfi1_devdata *dd)
+static void _dc_start(struct hfi1_devdata *dd)
{
- unsigned long flags;
- int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
if (!dd->dc_shutdown)
- goto done;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ return;
+
/* Take the 8051 out of reset */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* Wait until 8051 is ready */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
- }
+
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
dd->dc_shutdown = 0;
-done:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+}
+
+static void dc_start(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_start(dd);
+ mutex_unlock(&dd->dc8051_lock);
}
/*
@@ -6701,7 +6741,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
int i;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++) {
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ /* Ensure all non-user contexts(including vnic) are enabled */
+ if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER))
+ continue;
+
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
@@ -7077,7 +7123,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
- /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */
+ /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
__func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
@@ -7165,7 +7211,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
- (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ (dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@@ -7277,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- /*
- * These are now valid:
- * remote VerifyCap fields in the general LNI config
- * CSR DC8051_STS_REMOTE_GUID
- * CSR DC8051_STS_REMOTE_NODE_TYPE
- * CSR DC8051_STS_REMOTE_FM_SECURITY
- * CSR DC8051_STS_REMOTE_PORT_NO
- */
-
read_vc_remote_phy(dd, &power_management, &continious);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
@@ -7350,7 +7387,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@@ -7416,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_fm_security =
- read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
- DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
- dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
if (ppd->mgmt_allowed)
add_full_mgmt_pkey(ppd);
@@ -7897,6 +7920,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
@@ -7995,7 +8021,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = dd->rcd[source];
if (rcd) {
- if (source < dd->first_user_ctxt)
+ /* Check for non-user contexts, including vnic */
+ if ((source < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
@@ -8023,7 +8051,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
rcd = dd->rcd[source];
if (rcd) {
/* only pay attention to user urgent interrupts */
- if (source >= dd->first_user_ctxt)
+ if ((source >= dd->first_dyn_alloc_ctxt) &&
+ (!rcd->sc || (rcd->sc->type == SC_USER)))
handle_user_interrupt(rcd);
return; /* OK */
}
@@ -8156,10 +8185,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
- } else
+ } else {
dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
sde->this_idx);
-
+ }
return IRQ_HANDLED;
}
@@ -8344,6 +8373,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
}
/*
+ * Provide a cache for some of the LCB registers in case the LCB is
+ * unavailable.
+ * (The LCB is unavailable in certain link states, for example.)
+ */
+struct lcb_datum {
+ u32 off;
+ u64 val;
+};
+
+static struct lcb_datum lcb_cache[] = {
+ { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
+ { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
+ { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
+};
+
+static void update_lcb_cache(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
+
+ /* Update if we get good data */
+ if (likely(ret != -EBUSY))
+ lcb_cache[i].val = val;
+ }
+}
+
+static int read_lcb_cache(u32 off, u64 *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ if (lcb_cache[i].off == off) {
+ *val = lcb_cache[i].val;
+ return 0;
+ }
+ }
+
+ pr_warn("%s bad offset 0x%x\n", __func__, off);
+ return -1;
+}
+
+/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
*/
@@ -8354,9 +8429,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
- /* if going up or down, no access */
- if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
- return -EBUSY;
+ /* if going up or down, check the cache, otherwise, no access */
+ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
+ if (read_lcb_cache(addr, data))
+ return -EBUSY;
+ return 0;
+ }
+
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@@ -8371,7 +8450,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
- (dd->dc8051_ver < dc8051_ver(0, 20))) {
+ (dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@@ -8420,16 +8499,11 @@ static int do_8051_command(
{
u64 reg, completed;
int return_code;
- unsigned long flags;
unsigned long timeout;
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- /*
- * Alternative to holding the lock for a long time:
- * - keep busy wait - have other users bounce off
- */
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
@@ -8455,10 +8529,8 @@ static int do_8051_command(
return_code = -ENXIO;
goto fail;
}
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
- dc_shutdown(dd);
- dc_start(dd);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ _dc_shutdown(dd);
+ _dc_start(dd);
}
/*
@@ -8539,8 +8611,7 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
-
+ mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8677,13 +8748,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
- *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
- *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
+ *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
+ STS_FM_VERSION_MAJOR_MASK;
+ *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
+ STS_FM_VERSION_MINOR_MASK;
+
+ read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
+ *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
+ STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@@ -8891,8 +8969,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
- u64 reg;
- unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@@ -8915,19 +8991,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
- /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
- timeout = jiffies + msecs_to_jiffies(10);
- while (1) {
- reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
- if (reg)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
- return -ETIMEDOUT;
- }
- udelay(2);
- }
+ ret = wait_link_transfer_active(dd, 10);
+ if (ret)
+ return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@@ -9091,7 +9157,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@@ -9274,7 +9340,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable on fire\n",
+ dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
__func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
@@ -9494,8 +9560,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
- /* report success if not a QSFP */
- if (ppd->port_type != PORT_TYPE_QSFP)
+ /*
+ * Report success if not a QSFP or, if it is a QSFP, but the cable is
+ * not present
+ */
+ if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@@ -10082,6 +10151,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
+/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
+{
+ u64 reg;
+ unsigned long timeout;
+
+ /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
+ timeout = jiffies + msecs_to_jiffies(wait_ms);
+ while (1) {
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ if (reg)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(dd,
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ return -ETIMEDOUT;
+ }
+ udelay(2);
+ }
+ return 0;
+}
+
+/* called when the logical link state is not down as it should be */
+static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * Bring link up in LCB loopback
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+
+ write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
+ write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
+ write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
+ write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
+
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
+ (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
+ udelay(3);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
+ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+
+ wait_link_transfer_active(dd, 100);
+
+ /*
+ * Bring the link down again.
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
+
+ /* call again to adjust ppd->statusp, if needed */
+ get_logical_state(ppd);
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -10098,6 +10225,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
+ update_lcb_cache(dd);
+
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
@@ -10135,15 +10264,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
- /* make sure the logical state is also down */
- wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+ /* make sure the logical state is also down */
+ ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+ if (ret)
+ force_logical_link_state_down(ppd);
+
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&
@@ -10380,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
- ppd->host_link_state = HLS_UP_INIT;
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
- /* logical state didn't change, stay at going_up */
- ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
@@ -10398,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
}
break;
case HLS_UP_ARMED:
@@ -11853,6 +11983,10 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->scntrs = NULL;
kfree(dd->cntrnames);
dd->cntrnames = NULL;
+ if (dd->update_cntr_wq) {
+ destroy_workqueue(dd->update_cntr_wq);
+ dd->update_cntr_wq = NULL;
+ }
}
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
@@ -12008,7 +12142,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
}
-static void update_synth_timer(unsigned long opaque)
+static void do_update_synth_timer(struct work_struct *work)
{
u64 cur_tx;
u64 cur_rx;
@@ -12017,8 +12151,8 @@ static void update_synth_timer(unsigned long opaque)
int i, j, vl;
struct hfi1_pportdata *ppd;
struct cntr_entry *entry;
-
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata,
+ update_cntr_work);
/*
* Rather than keep beating on the CSRs pick a minimal set that we can
@@ -12101,7 +12235,13 @@ static void update_synth_timer(unsigned long opaque)
} else {
hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
}
+}
+static void update_synth_timer(unsigned long opaque)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+
+ queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
@@ -12337,6 +12477,13 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (init_cpu_counters(dd))
goto bail;
+ dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d",
+ WQ_MEM_RECLAIM, dd->unit);
+ if (!dd->update_cntr_wq)
+ goto bail;
+
+ INIT_WORK(&dd->update_cntr_work, do_update_synth_timer);
+
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
return 0;
bail:
@@ -12726,7 +12873,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues;
+ last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+
+ /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
+ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
@@ -12740,7 +12890,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
- void *arg;
+ void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
@@ -12767,24 +12917,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = dd->rcd[idx];
- /* no interrupt if no rcd */
- if (!rcd)
- continue;
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
+ if (rcd) {
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+ handler = receive_context_interrupt;
+ thread = receive_context_thread;
+ arg = rcd;
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+ me->type = IRQ_RCVCTXT;
+ rcd->msix_intr = i;
+ }
} else {
/* not in our expected range - complain, then
* ignore it
@@ -12822,6 +12973,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
return ret;
}
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ if (!dd->num_msix_entries) {
+ synchronize_irq(dd->pcidev->irq);
+ return;
+ }
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->msix.vector);
+ }
+}
+
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->msix.vector, me->arg);
+
+ me->arg = NULL;
+}
+
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me;
+ int idx = rcd->ctxt;
+ void *arg = rcd;
+ int ret;
+
+ rcd->msix_intr = dd->vnic.msix_idx++;
+ me = &dd->msix_entries[rcd->msix_intr];
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
+ me->name[sizeof(me->name) - 1] = 0;
+ me->type = IRQ_RCVCTXT;
+
+ remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
+
+ ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ receive_context_thread, 0, me->name, arg);
+ if (ret) {
+ dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
+ me->msix.vector, idx, ret);
+ return;
+ }
+ /*
+ * assign arg after request_irq call, so it will be
+ * cleaned up
+ */
+ me->arg = arg;
+
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret) {
+ dd_dev_err(dd,
+ "unable to pin IRQ %d\n", ret);
+ free_irq(me->msix.vector, me->arg);
+ }
+}
+
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
@@ -12853,7 +13082,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
if (!entries) {
@@ -12918,7 +13147,8 @@ fail:
*
* num_rcv_contexts - number of contexts being used
* n_krcv_queues - number of kernel contexts
- * first_user_ctxt - first non-kernel context in array of contexts
+ * first_dyn_alloc_ctxt - first dynamically allocated context
+ * in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
*/
@@ -12995,10 +13225,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
total_contexts = num_kernel_contexts + num_user_contexts;
}
- /* the first N are kernel contexts, the rest are user contexts */
+ /* Accommodate VNIC contexts */
+ if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts)
+ total_contexts += HFI1_NUM_VNIC_CTXT;
+
+ /* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
- dd->first_user_ctxt = num_kernel_contexts;
+ dd->first_dyn_alloc_ctxt = num_kernel_contexts;
dd->num_user_contexts = num_user_contexts;
dd->freectxts = num_user_contexts;
dd_dev_info(dd,
@@ -13454,11 +13688,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
- for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
- write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
- write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
- write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
- }
+ for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++)
+ clear_rsm_rule(dd, i);
for (i = 0; i < 32; i++)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
@@ -13817,6 +14048,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
(u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
}
+/*
+ * Clear a receive side mapping rule.
+ */
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0);
+}
+
/* return the number of RSM map table entries that will be used for QOS */
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np)
@@ -13932,7 +14173,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
rrd.value2 = LRH_SC_VALUE;
/* add rule 0 */
- add_rsm_rule(dd, 0, &rrd);
+ add_rsm_rule(dd, RSM_INS_VERBS, &rrd);
/* mark RSM map entries as used */
rmt->used += rmt_entries;
@@ -13962,7 +14203,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
@@ -13970,9 +14211,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
* start with that to allow for a "negative" offset.
*/
offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_user_ctxt);
+ (int)dd->first_dyn_alloc_ctxt);
- for (i = dd->first_user_ctxt, idx = rmt->used;
+ for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
i < dd->num_rcv_contexts; i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
@@ -14006,11 +14247,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
rrd.value2 = 1;
/* add rule 1 */
- add_rsm_rule(dd, 1, &rrd);
+ add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts;
}
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ u8 i, j;
+ u8 ctx_id = 0;
+ u64 reg;
+ u32 regoff;
+ struct rsm_rule_data rrd;
+
+ if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
+ dd->vnic.rmt_start);
+ return;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
+ dd->vnic.rmt_start,
+ dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+
+ /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
+ regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ reg = read_csr(dd, regoff);
+ for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
+ /* Update map register with vnic context */
+ j = (dd->vnic.rmt_start + i) % 8;
+ reg &= ~(0xffllu << (j * 8));
+ reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
+ /* Wrap up vnic ctx index */
+ ctx_id %= dd->vnic.num_ctxt;
+ /* Write back map register */
+ if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ dev_dbg(&(dd)->pcidev->dev,
+ "Vnic rsm map reg[%d] =0x%llx\n",
+ regoff - RCV_RSM_MAP_TABLE, reg);
+
+ write_csr(dd, regoff, reg);
+ regoff += 8;
+ if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ reg = read_csr(dd, regoff);
+ }
+ }
+
+ /* Add rule for vnic */
+ rrd.offset = dd->vnic.rmt_start;
+ rrd.pkt_type = 4;
+ /* Match 16B packets */
+ rrd.field1_off = L2_TYPE_MATCH_OFFSET;
+ rrd.mask1 = L2_TYPE_MASK;
+ rrd.value1 = L2_16B_VALUE;
+ /* Match ETH L4 packets */
+ rrd.field2_off = L4_TYPE_MATCH_OFFSET;
+ rrd.mask2 = L4_16B_TYPE_MASK;
+ rrd.value2 = L4_16B_ETH_VALUE;
+ /* Calc context from veswid and entropy */
+ rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
+ rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ rrd.index2_off = L2_16B_ENTROPY_OFFSET;
+ rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+
+ /* Enable RSM if not already enabled */
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
+{
+ clear_rsm_rule(dd, RSM_INS_VNIC);
+
+ /* Disable RSM if used only by vnic */
+ if (dd->vnic.rmt_start == 0)
+ clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
static void init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
@@ -14023,6 +14337,8 @@ static void init_rxe(struct hfi1_devdata *dd)
init_qos(dd, rmt);
init_user_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
+ /* record number of used rsm map entries for vnic */
+ dd->vnic.rmt_start = rmt->used;
kfree(rmt);
/*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 043fd21dc5f3..b9dbf16d7703 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
-#define LINK_DOWN_REASON 0x16
+#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
+#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
-#define STS_FM_VERSION_A_SHIFT 16
-#define STS_FM_VERSION_A_MASK 0xff
-#define STS_FM_VERSION_B_SHIFT 24
-#define STS_FM_VERSION_B_MASK 0xff
+#define STS_FM_VERSION_MINOR_SHIFT 16
+#define STS_FM_VERSION_MINOR_MASK 0xff
+#define STS_FM_VERSION_MAJOR_SHIFT 24
+#define STS_FM_VERSION_MAJOR_MASK 0xff
+#define STS_FM_VERSION_PATCH_SHIFT 24
+#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -1358,6 +1362,8 @@ int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 1b783bbee4bb..995d62c7f9a7 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -331,12 +331,15 @@ struct diag_pkt {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_FECN_SHIFT 31
-#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
-#define HFI1_BECN_SHIFT 30
-#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
+
+/**
+ * 0xF8 - 4 bits of multicast range and 1 bit for collective range
+ * Example: For 24 bit LID space,
+ * Multicast range: 0xF00000 to 0xF7FFFF
+ * Collective range: 0xF80000 to 0xFFFFFE
+ */
+#define HFI1_MCAST_NR 0x4 /* Number of top bits set */
+#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */
#define HFI1_PSM_IOC_BASE_SEQ 0x0
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 7fe9dd885746..e9fa3c293e42 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,6 +1,6 @@
#ifdef CONFIG_DEBUG_FS
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
#include "hfi.h"
+#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@@ -170,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
- for (j = 0; j < dd->first_user_ctxt; j++) {
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
@@ -196,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -210,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
++*pos;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
+#ifdef CONFIG_FAULT_INJECTION
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault_opcode->n_rxfaults[i] &&
+ !ibd->fault_opcode->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
+ (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_opcode->dir);
+ kfree(ibd->fault_opcode);
+ ibd->fault_opcode = NULL;
+}
+
+static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
+ if (!ibd->fault_opcode)
+ return -ENOMEM;
+
+ ibd->fault_opcode->attr.interval = 1;
+ ibd->fault_opcode->attr.require_end = ULONG_MAX;
+ ibd->fault_opcode->attr.stacktrace_depth = 32;
+ ibd->fault_opcode->attr.dname = NULL;
+ ibd->fault_opcode->attr.verbose = 0;
+ ibd->fault_opcode->fault_by_opcode = false;
+ ibd->fault_opcode->opcode = 0;
+ ibd->fault_opcode->mask = 0xff;
+
+ ibd->fault_opcode->dir =
+ fault_create_debugfs_attr("fault_opcode",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_opcode->dir)) {
+ kfree(ibd->fault_opcode);
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
+ if (!debugfs_create_bool("fault_by_opcode", 0600,
+ ibd->fault_opcode->dir,
+ &ibd->fault_opcode->fault_by_opcode))
+ goto fail;
+ if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->opcode))
+ goto fail;
+ if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->mask))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_opcode_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_packet->dir);
+ kfree(ibd->fault_packet);
+ ibd->fault_packet = NULL;
+}
+
+static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
+ if (!ibd->fault_packet)
+ return -ENOMEM;
+
+ ibd->fault_packet->attr.interval = 1;
+ ibd->fault_packet->attr.require_end = ULONG_MAX;
+ ibd->fault_packet->attr.stacktrace_depth = 32;
+ ibd->fault_packet->attr.dname = NULL;
+ ibd->fault_packet->attr.verbose = 0;
+ ibd->fault_packet->fault_by_packet = false;
+
+ ibd->fault_packet->dir =
+ fault_create_debugfs_attr("fault_packet",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_packet->dir)) {
+ kfree(ibd->fault_packet);
+ return -ENOENT;
+ }
+
+ if (!debugfs_create_bool("fault_by_packet", 0600,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->fault_by_packet))
+ goto fail;
+ if (!debugfs_create_u64("fault_stats", 0400,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->n_faults))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_packet_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ fault_exit_opcode_debugfs(ibd);
+ fault_exit_packet_debugfs(ibd);
+}
+
+static int fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ int ret = 0;
+
+ ret = fault_init_opcode_debugfs(ibd);
+ if (ret)
+ return ret;
+
+ ret = fault_init_packet_debugfs(ibd);
+ if (ret)
+ fault_exit_opcode_debugfs(ibd);
+
+ return ret;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return ibd->fault_suppress_err;
+}
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
+{
+ bool ret = false;
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
+ return false;
+ if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
+ return false;
+ ret = should_fail(&ibd->fault_opcode->attr, 1);
+ if (ret) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ if (rx)
+ ibd->fault_opcode->n_rxfaults[opcode]++;
+ else
+ ibd->fault_opcode->n_txfaults[opcode]++;
+ }
+ return ret;
+}
+
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
+ struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
+ bool ret = false;
+
+ if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
+ return false;
+
+ ret = should_fail(&ibd->fault_packet->attr, 1);
+ if (ret) {
+ ++ibd->fault_packet->n_faults;
+ trace_hfi1_fault_packet(packet);
+ }
+ return ret;
+}
+#endif
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ debugfs_create_bool("fault_suppress_err", 0600,
+ ibd->hfi1_ibdev_dbg,
+ &ibd->fault_suppress_err);
+ fault_init_debugfs(ibd);
+#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
+#ifdef CONFIG_FAULT_INJECTION
+ fault_exit_debugfs(ibd);
+#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..38c38a98156d 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+struct fault_opcode {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_opcode;
+ u64 n_rxfaults[256];
+ u64 n_txfaults[256];
+ u8 opcode;
+ u8 mask;
+};
+
+struct fault_packet {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_packet;
+ u64 n_faults;
+};
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+#else
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+#endif
+
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+{
+}
+
+static inline void hfi1_dbg_init(void)
{
}
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_exit(void)
{
}
-void hfi1_dbg_exit(void)
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
+ return false;
}
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 3881c951f6af..500b129ed565 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,8 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
+#include "debugfs.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -283,7 +285,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
+ int lnh = ib_get_lnh(rhdr);
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -295,7 +297,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* For TIDERR and RC QPs preemptively schedule a NAK */
struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(rhdr->lrh[1]);
+ u16 lid = ib_get_dlid(rhdr);
u32 qp_num;
u32 rcv_flags = 0;
@@ -396,7 +398,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = hdr2sc(rhdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
@@ -414,7 +416,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
- rlid = be16_to_cpu(rhdr->lrh[3]);
+ rlid = ib_get_slid(rhdr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
@@ -460,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
- u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
@@ -471,7 +473,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- rlid = be16_to_cpu(hdr->lrh[3]);
+ rlid = ib_get_slid(hdr);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
@@ -491,16 +493,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc(hdr, pkt->rhf);
+ sc = hfi1_9B_get_sc5(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
+ if (do_cnp && (bth1 & IB_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
+ if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
@@ -621,8 +623,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
hdr = packet->hdr;
-
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
@@ -634,7 +635,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
}
bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
+ is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
if (!is_ecn)
goto next;
@@ -652,7 +653,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
rcu_read_unlock();
/* turn off BECN, FECN */
- bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
+ bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
@@ -872,20 +873,42 @@ bail:
return last;
}
-static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
+static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_nodma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_nodma_rtail;
}
-static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
+static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_dma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_dma_rtail;
}
@@ -895,8 +918,13 @@ void set_all_slowpath(struct hfi1_devdata *dd)
int i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ if ((i < dd->first_dyn_alloc_ctxt) ||
+ (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ rcd->do_interrupt = &handle_receive_interrupt;
+ }
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
@@ -908,7 +936,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
- if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB &&
+ hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -1006,7 +1035,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_all_nodma_rtail(dd);
+ set_nodma_rtail(dd, rcd->ctxt);
needset = 0;
}
} else {
@@ -1028,7 +1057,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
if (needset) {
dd_dev_info(dd,
"Switching to DMA_RTAIL\n");
- set_all_dma_rtail(dd);
+ set_dma_rtail(dd, rcd->ctxt);
needset = 0;
}
}
@@ -1077,10 +1106,10 @@ void receive_interrupt_work(struct work_struct *work)
set_link_state(ppd, HLS_UP_ACTIVE);
/*
- * Interrupt all kernel contexts that could have had an
- * interrupt during auto activation.
+ * Interrupt all statically allocated kernel contexts that could
+ * have had an interrupt during auto activation.
*/
- for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+ for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++)
force_recv_intr(dd->rcd[i]);
}
@@ -1294,7 +1323,8 @@ int hfi1_reset_device(int unit)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd)
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
if (!dd->rcd[i] || !dd->rcd[i]->cnt)
continue;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@@ -1354,6 +1384,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1363,6 +1396,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
+ if (unlikely(
+ (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (packet->rhf & RHF_DC_ERR))))
+ return RHF_RCV_CONTINUE;
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@@ -1372,15 +1410,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+ /* Packet received in VNIC context via RSM */
+ if (packet->rcd->is_vnic)
+ return true;
+
+ if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+ (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+ return true;
+
+ return false;
+}
+
int process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (unlikely(rhf_err_flags(packet->rhf)))
+ if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+ } else if (hfi1_is_vnic_packet(packet)) {
+ hfi1_vnic_bypass_rcv(packet);
+ return RHF_RCV_CONTINUE;
+ }
- dd_dev_err(dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
+ dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
@@ -1398,6 +1452,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ rhf_rcv_type_err(packet->rhf) == 3))
+ return RHF_RCV_CONTINUE;
+
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1409,6 +1469,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1421,6 +1483,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f78c739b330a..3d9bce4bfcc7 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -586,8 +586,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* knows where it's own bitmap is within the page.
*/
memaddr = (unsigned long)(dd->events +
- ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
+ ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
memlen = PAGE_SIZE;
/*
* v3.7 removes VM_RESERVED but the effect is kept by
@@ -597,6 +597,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
+ if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
+ ret = -EPERM;
+ goto done;
+ }
memaddr = kvirt_to_phys((void *)dd->status);
memlen = PAGE_SIZE;
flags |= VM_IO | VM_DONTEXPAND;
@@ -756,7 +760,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
*/
- ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
*ev = 0;
@@ -909,12 +913,18 @@ static int find_shared_ctxt(struct file *fp,
if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
continue;
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
struct hfi1_ctxtdata *uctxt = dd->rcd[i];
/* Skip ctxts which are not yet open */
if (!uctxt || !uctxt->cnt)
continue;
+
+ /* Skip dynamically allocted kernel contexts */
+ if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+ continue;
+
/* Skip ctxt if it doesn't match the requested one */
if (memcmp(uctxt->uuid, uinfo->uuid,
sizeof(uctxt->uuid)) ||
@@ -960,7 +970,8 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
if (!dd->rcd[ctxt])
break;
@@ -1306,7 +1317,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
fd->subctxt, 0);
- offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
+ offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
@@ -1400,12 +1411,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
}
spin_lock_irqsave(&dd->uctxt_lock, flags);
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
+ for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
ctxt++) {
uctxt = dd->rcd[ctxt];
if (uctxt) {
unsigned long *evs = dd->events +
- (uctxt->ctxt - dd->first_user_ctxt) *
+ (uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS;
int i;
/*
@@ -1477,7 +1488,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
if (!dd->events)
return 0;
- evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + subctxt;
for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0dd50cdb039a..4042c11b2742 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
- u8 ver_a, ver_b;
+ u8 ver_major;
+ u8 ver_minor;
+ u8 ver_patch;
/*
* DC Reset sequence
@@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
- read_misc_status(dd, &ver_a, &ver_b);
- dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
- dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
+ read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
+ dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
+ (int)ver_major, (int)ver_minor, (int)ver_patch);
+ dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 40d7559fa723..14063bd30c2a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/completion.h>
@@ -66,6 +67,7 @@
#include <linux/i2c-algo-bit.h>
#include <rdma/ib_hdrs.h>
#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -278,6 +280,8 @@ struct hfi1_ctxtdata {
struct hfi1_devdata *dd;
/* so functions that need physical port can get it easily */
struct hfi1_pportdata *ppd;
+ /* associated msix interrupt */
+ u32 msix_intr;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
@@ -337,6 +341,12 @@ struct hfi1_ctxtdata {
* packets with the wrong interrupt handler.
*/
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
};
/*
@@ -474,7 +484,7 @@ struct rvt_sge_state;
#define HFI1_PART_ENFORCE_OUT 0x2
/* how often we check for synthetic counter wrap around */
-#define SYNTH_CNT_TIME 2
+#define SYNTH_CNT_TIME 3
/* Counter flags */
#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
@@ -808,6 +818,32 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
};
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
+#define NUM_MAP_REGS 32
+
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT 8
+
+/* Number of VNIC RSM entries */
+#define NUM_VNIC_MAP_ENTRIES 8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+ struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+ struct kmem_cache *txreq_cache;
+ u8 num_vports;
+ struct idr vesw_idr;
+ u8 rmt_start;
+ u8 num_ctxt;
+ u32 msix_idx;
+};
+
+struct hfi1_vnic_vport_info;
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -926,8 +962,9 @@ struct hfi1_devdata {
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
/* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
spinlock_t uctxt_lock; /* rcd and user context changes */
- /* exclusive access to 8051 */
- spinlock_t dc8051_lock;
+ struct mutex dc8051_lock; /* exclusive access to 8051 */
+ struct workqueue_struct *update_cntr_wq;
+ struct work_struct update_cntr_work;
/* exclusive access to 8051 memory */
spinlock_t dc8051_memlock;
int dc8051_timed_out; /* remember if the 8051 timed out */
@@ -1020,7 +1057,7 @@ struct hfi1_devdata {
u8 qos_shift;
u16 irev; /* implementation revision */
- u16 dc8051_ver; /* 8051 firmware version */
+ u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@@ -1031,6 +1068,7 @@ struct hfi1_devdata {
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
+ u32 first_dyn_msix_idx;
/* INTx information */
u32 requested_intx_irq; /* did we request one? */
@@ -1115,6 +1153,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
@@ -1126,8 +1167,8 @@ struct hfi1_devdata {
u16 flags;
/* Number of physical ports available */
u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
+ /* Lowest context number which can be used by user processes or VNIC */
+ u8 first_dyn_alloc_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
@@ -1167,15 +1208,24 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
- struct rhashtable sdma_rht;
+ struct rhashtable *sdma_rht;
struct kobject kobj;
+
+ /* vnic data */
+ struct hfi1_vnic_data vnic;
};
+static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
+{
+ return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
+}
+
/* 8051 firmware version helper */
-#define dc8051_ver(a, b) ((a) << 8 | (b))
-#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
-#define dc8051_ver_min(a) (a & 0x00ff)
+#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
+#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
+#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
+#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
@@ -1235,6 +1285,9 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
void set_all_slowpath(struct hfi1_devdata *dd);
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1254,16 +1307,24 @@ int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */
static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
{
- return ppd->lstate; /* use the cached value */
+ /*
+ * The driver does some processing from the time the logical
+ * link state is at INIT to the time the SM can be notified
+ * as such. Return IB_PORT_DOWN until the software state
+ * is ready.
+ */
+ if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP))
+ return IB_PORT_DOWN;
+ else
+ return ppd->lstate;
}
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
+static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
{
- return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf_dc_info(rhf))) << 4);
+ return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
}
#define HFI1_JKEY_WIDTH 16
@@ -1597,9 +1658,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) {
hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return bth1 & HFI1_FECN_SMASK;
+ return !!(bth1 & IB_FECN_SMASK);
}
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f40864e9a3b2..4d6b9f82efa3 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -65,6 +65,7 @@
#include "verbs.h"
#include "aspm.h"
#include "affinity.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -139,7 +140,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
/* create one or more kernel contexts */
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
struct hfi1_pportdata *ppd;
struct hfi1_ctxtdata *rcd;
@@ -214,9 +215,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
u32 base;
if (dd->rcv_entries.nctxt_extra >
- dd->num_rcv_contexts - dd->first_user_ctxt)
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
- (dd->num_rcv_contexts - dd->first_user_ctxt));
+ (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
@@ -238,27 +239,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* Calculate the context's RcvArray entry starting point.
* We do this here because we have to take into account all
* the RcvArray entries that previous context would have
- * taken and we have to account for any extra groups
- * assigned to the kernel or user contexts.
+ * taken and we have to account for any extra groups assigned
+ * to the static (kernel) or dynamic (vnic/user) contexts.
*/
- if (ctxt < dd->first_user_ctxt) {
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
if (ctxt < kctxt_ngroups) {
base = ctxt * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base = kctxt_ngroups +
(ctxt * dd->rcv_entries.ngroups);
+ }
} else {
- u16 ct = ctxt - dd->first_user_ctxt;
+ u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
kctxt_ngroups);
if (ct < dd->rcv_entries.nctxt_extra) {
base += ct * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base += dd->rcv_entries.nctxt_extra +
(ct * dd->rcv_entries.ngroups);
+ }
}
rcd->eager_base = base * dd->rcv_entries.group_size;
@@ -322,7 +325,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
- if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ /* Applicable only for statically created kernel contexts */
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
GFP_KERNEL, numa);
if (!rcd->opstats)
@@ -482,6 +486,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
default_pkey_idx = 1;
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
+
if (loopback) {
hfi1_early_err(&pdev->dev,
"Faking data partition 0x8001 in idx %u\n",
@@ -585,7 +592,7 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
@@ -679,6 +686,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+ dd->process_vnic_dma_send = hfi1_vnic_send_dma;
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
@@ -714,7 +722,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
}
/* dd->rcd can be NULL if early initialization failed */
- for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
* Set up the (kernel) rcvhdr queue and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
@@ -1078,11 +1086,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->uctxt_lock);
spin_lock_init(&dd->hfi1_diag_trans_lock);
spin_lock_init(&dd->sc_init_lock);
- spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
spin_lock_init(&dd->pio_map_lock);
+ mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
dd->int_counter = alloc_percpu(u64);
@@ -1425,6 +1433,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
+ /* Validate dev ids */
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
+ hfi1_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ goto bail;
+ }
+
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
if (ret)
@@ -1470,15 +1488,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
- if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
- ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
- ret = -ENODEV;
- goto clean_bail;
- }
-
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.
@@ -1497,6 +1506,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
+ /* setup vnic */
+ hfi1_vnic_setup(dd);
+
ret = hfi1_register_ib_device(dd);
/*
@@ -1530,6 +1542,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
+ hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1574,6 +1587,9 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
+ /* cleanup vnic */
+ hfi1_vnic_cleanup(dd);
+
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
@@ -1613,8 +1629,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
sizeof(u32));
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
+ if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ gfp_flags = GFP_KERNEL;
+ else
+ gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_zalloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..232014d46f79 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -131,19 +131,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
set_up_vl15(dd, dd->vau, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_port_number =
- read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
}
+ ppd->neighbor_guid =
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
+ ppd->neighbor_type =
+ read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
+ DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
+ ppd->neighbor_port_number =
+ read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ ppd->neighbor_fm_security =
+ read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
+ DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
+ dd_dev_info(dd,
+ "Neighbor Guid %llx, Type %d, Port Num %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->neighbor_port_number);
+
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason =
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 09cda3c35e82..836d00b04547 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include "mad.h"
#include "trace.h"
#include "qp.h"
+#include "vnic.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
pi->port_packet_format.supported =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
pi->port_packet_format.enabled =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
/* flit_control.interleave is (OPA V1, version .76):
* bits use
@@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
+ pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
+ OPA_CAP_MASK3_IsEthOnFabricSupported);
+ /* Driver does not support mcast/collective configuration */
+ pi->opa_cap_mask &=
+ cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
+ << 3 | (HFI1_MCAST_NR & 0x7));
/* HFI supports a replay buffer 128 LTPs in size */
pi->replay_depth.buffer = 0x80;
@@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->linkinit_reason =
(pi->partenforce_filterraw &
OPA_PI_MASK_LINKINIT_REASON);
- /* enable/disable SW pkey checking as per FM control */
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
- ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
-
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
- ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
@@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
}
-static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
-{
- u64 *val = data;
-
- *val++ = read_csr(dd, SEND_SC2VLT0);
- *val++ = read_csr(dd, SEND_SC2VLT1);
- *val++ = read_csr(dd, SEND_SC2VLT2);
- *val++ = read_csr(dd, SEND_SC2VLT3);
- return 0;
-}
-
#define ILLEGAL_VL 12
/*
* filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
* for SC15, which must map to VL15). If we don't remap things this
* way it is possible for VL15 counters to increment when we try to
* send on a SC which is mapped to an invalid VL.
+ * When getting the table convert ILLEGAL_VL back to VL15.
*/
-static void filter_sc2vlt(void *data)
+static void filter_sc2vlt(void *data, bool set)
{
int i;
u8 *pd = data;
@@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data)
for (i = 0; i < OPA_MAX_SCS; i++) {
if (i == 15)
continue;
- if ((pd[i] & 0x1f) == 0xf)
- pd[i] = ILLEGAL_VL;
+
+ if (set) {
+ if ((pd[i] & 0x1f) == 0xf)
+ pd[i] = ILLEGAL_VL;
+ } else {
+ if ((pd[i] & 0x1f) == ILLEGAL_VL)
+ pd[i] = 0xf;
+ }
}
}
@@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
{
u64 *val = data;
- filter_sc2vlt(data);
+ filter_sc2vlt(data, true);
write_csr(dd, SEND_SC2VLT0, *val++);
write_csr(dd, SEND_SC2VLT1, *val++);
@@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
return 0;
}
+static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
+{
+ u64 *val = (u64 *)data;
+
+ *val++ = read_csr(dd, SEND_SC2VLT0);
+ *val++ = read_csr(dd, SEND_SC2VLT1);
+ *val++ = read_csr(dd, SEND_SC2VLT2);
+ *val++ = read_csr(dd, SEND_SC2VLT3);
+
+ filter_sc2vlt((u64 *)data, false);
+ return 0;
+}
+
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -1986,31 +1994,6 @@ struct opa_pma_mad {
u8 data[2024];
} __packed;
-struct opa_class_port_info {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- __be32 cap_mask2_resp_time;
-
- u8 redirect_gid[16];
- __be32 redirect_tc_fl;
- __be32 redirect_lid;
- __be32 redirect_sl_qp;
- __be32 redirect_qkey;
-
- u8 trap_gid[16];
- __be32 trap_tc_fl;
- __be32 trap_lid;
- __be32 trap_hl_qp;
- __be32 trap_qkey;
-
- __be16 trap_pkey;
- __be16 redirect_pkey;
-
- u8 trap_sl_rsvd;
- u8 reserved[3];
-} __packed;
-
struct opa_port_status_req {
__u8 port_num;
__u8 reserved[3];
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index c81556e84831..93faf86d54b6 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -553,7 +553,7 @@ pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
dd_dev_info(dd,
- "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
words, ret);
}
return ret;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 615be68e40b3..ed72b5aca139 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
+ int req_type = type;
dma_addr_t dma;
unsigned long flags;
u64 reg;
@@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are dynamically allocated.
+ * Hence, pick a user context for VNIC.
+ */
+ if (type == SC_VNIC)
+ type = SC_USER;
+
spin_lock_irqsave(&dd->sc_lock, flags);
ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
if (ret) {
@@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are used by kernel driver.
+ * Hence, mark them as kernel contexts.
+ */
+ if (req_type == SC_VNIC) {
+ dd->send_contexts[sw_index].type = SC_KERNEL;
+ type = SC_KERNEL;
+ }
+
sci = &dd->send_contexts[sw_index];
sci->sc = sc;
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 867e5ffc3595..99ca5edb0b43 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,7 +1,7 @@
#ifndef _PIO_H
#define _PIO_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,12 @@
#define SC_USER 3 /* must be the last one: it may take all left */
#define SC_MAX 4 /* count of send context types */
+/*
+ * SC_VNIC types are allocated (dynamically) from the user context pool,
+ * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL).
+ */
+#define SC_VNIC SC_MAX
+
/* invalid send context index */
#define INVALID_SCI 0xff
@@ -195,7 +201,7 @@ struct sc_config_sizes {
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | ksc[n] -> sc n |
+ * |--------------------------| --/ | ksc[n-1] -> sc n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
@@ -208,21 +214,21 @@ struct sc_config_sizes {
* |--------------------------| |--------------------|
* | map[vls - 1] |- | * |
* +--------------------------+ \- |--------------------|
- * \- | ksc[m] -> sc m+n |
+ * \- | ksc[m-1] -> sc m+n |
* \ +--------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | ksc[0] -> sc 1+m+n |
- * \- |--------------------|
- * >| ksc[1] -> sc 2+m+n |
- * |--------------------|
- * | * |
- * |--------------------|
- * | ksc[o] -> sc o+m+n |
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |----------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | ksc[o-1] -> sc o+m+n |
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7382be11afca..9b3333fd9dc0 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -773,7 +773,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
+ ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
/* Don't try to send ACKs if the link isn't ACTIVE */
@@ -994,12 +994,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
/* Find out where the BTH is */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
WARN_ON(!qp->s_rdma_ack_cnt);
@@ -1028,13 +1028,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1076,12 +1080,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
rvt_put_swqe(wqe);
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -1516,7 +1524,7 @@ read_middle:
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
goto ack_done;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 0 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1540,7 +1548,7 @@ read_middle:
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1922,7 +1930,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int diff;
struct ib_reth *reth;
unsigned long flags;
- int ret, is_fecn = 0;
+ int ret;
+ bool is_fecn = false;
bool copy_last = false;
u32 rkey;
@@ -1934,7 +1943,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2065,7 +2074,7 @@ no_immediate_data:
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (bth0 >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2378,7 +2387,7 @@ void hfi1_rc_hdrerr(
return;
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index aa15bcbfb079..ccf8d8037355 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -238,18 +238,18 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
qp->alt_ah_attr.grh.dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
+ if (ib_get_slid(hdr) != qp->alt_ah_attr.dlid ||
ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -273,18 +273,18 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
qp->remote_ah_attr.grh.dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (ib_get_slid(hdr) != qp->remote_ah_attr.dlid ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -775,7 +775,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
if (qp->s_flags & RVT_S_ECN) {
qp->s_flags &= ~RVT_S_ECN;
/* we recently received a FECN, so return a BECN */
- bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
}
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@@ -784,23 +784,29 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+void hfi1_do_send_from_rvt(struct rvt_qp *qp)
+{
+ hfi1_do_send(qp, false);
+}
+
void _hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct rvt_qp *qp = iowait_to_qp(wait);
- hfi1_do_send(qp);
+ hfi1_do_send(qp, true);
}
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
+ * @in_thread: true if in a workqueue thread
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct rvt_qp *qp)
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
{
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
@@ -868,8 +874,10 @@ void hfi1_do_send(struct rvt_qp *qp)
qp->s_hdrwords = 0;
/* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
- if (workqueue_congested(cpu,
- ps.ppd->hfi1_wq)) {
+ if (!in_thread ||
+ workqueue_congested(
+ cpu,
+ ps.ppd->hfi1_wq)) {
spin_lock_irqsave(
&qp->s_lock,
ps.flags);
@@ -882,11 +890,9 @@ void hfi1_do_send(struct rvt_qp *qp)
*ps.ppd->dd->send_schedule);
return;
}
- if (!irqs_disabled()) {
- cond_resched();
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- }
+ cond_resched();
+ this_cpu_inc(
+ *ps.ppd->dd->send_schedule);
timeout = jiffies + (timeout_int) / 8;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
@@ -909,8 +915,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
last = qp->s_last;
old_last = last;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
@@ -920,7 +928,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 5cde1ecda0fe..bfd0d5187e9b 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
@@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
continue;
}
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ if (vl >= ARRAY_SIZE(rht_node->map)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
- ret = rhashtable_insert_fast(&dd->sdma_rht,
+ ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
if (cpumask_test_cpu(cpu, mask))
continue;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
- ret = rhashtable_remove_fast(&dd->sdma_rht,
+ ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
synchronize_rcu();
kfree(dd->per_sdma);
dd->per_sdma = NULL;
+
+ if (dd->sdma_rht) {
+ rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
+ kfree(dd->sdma_rht);
+ dd->sdma_rht = NULL;
+ }
}
/**
@@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
+ struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
size_t num_engines = dd->chip_sdma_engines;
+ int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
if (!dd->per_sdma)
- return -ENOMEM;
+ return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
if (!sdma_desct_intr)
@@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
- if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
+ ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
+ if (ret < 0)
goto bail;
- if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
+ if (!tmp_sdma_rht) {
+ ret = -ENOMEM;
goto bail;
+ }
+
+ ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
+ if (ret < 0)
+ goto bail;
+ dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
- return -ENOMEM;
+ return ret;
}
/**
@@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
- rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 21f1e2834f37..64f10b8b5db8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | sde[n] -> eng n |
+ * |--------------------------| --/ | sde[n-1] -> eng n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
- * |--------------------------| +--------------------+
- * | map[1] |--- | mask |
- * |--------------------------| \---- |--------------------|
- * | * | \-- | sde[0] -> eng 1+n |
- * | * | \---- |--------------------|
- * | * | \->| sde[1] -> eng 2+n |
- * |--------------------------| |--------------------|
- * | map[vls - 1] |- | * |
- * +--------------------------+ \- |--------------------|
- * \- | sde[m] -> eng m+n |
- * \ +--------------------+
+ * |--------------------------| +---------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |---------------------|
+ * | * | \-- | sde[0] -> eng 1+n |
+ * | * | \---- |---------------------|
+ * | * | \->| sde[1] -> eng 2+n |
+ * |--------------------------| |---------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |---------------------|
+ * \- | sde[m-1] -> eng m+n |
+ * \ +---------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | sde[0] -> eng 1+m+n|
- * \- |--------------------|
- * >| sde[1] -> eng 2+m+n|
- * |--------------------|
- * | * |
- * |--------------------|
- * | sde[o] -> eng o+m+n|
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | sde[0] -> eng 1+m+n |
+ * \- |----------------------|
+ * >| sde[1] -> eng 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | sde[o-1] -> eng o+m+n|
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 919a5474e651..50d140d25e38 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device,
* give a more accurate picture of total contexts available.
*/
return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_rcv_contexts - dd->first_user_ctxt,
+ min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt,
(u32)dd->sc_sizes[SC_USER].count));
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index e86798af6903..eafae487face 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
- u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 382fcda3a5f6..090f6b506953 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__entry->pkey =
be32_to_cpu(ohdr->bth[0]) & 0xffff;
__entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+ IB_FECN_MASK;
__entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+ IB_BECN_MASK;
__entry->qpn =
be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->a =
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index d308454af7fd..deac77ddaeab 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+ TP_PROTO(struct rvt_qp *qp, u8 opcode),
+ TP_ARGS(qp, opcode),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, opcode)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->opcode = opcode;
+ ),
+ TP_printk("[%s] qpn 0x%x opcode 0x%x",
+ __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+#endif
+
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5ea5005f9f41..8ce476570462 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 415d6be42c5d..2c9ac57657d3 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -633,6 +633,49 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
+TRACE_EVENT(
+ hfi1_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4b2a8400c823..f0bdb100e005 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
@@ -433,7 +433,7 @@ no_immediate_data:
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -528,7 +528,7 @@ rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -555,7 +555,7 @@ rdma_last_imm:
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 13ea4eb6ef3d..45bc3f04793e 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -537,7 +537,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
+ ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -680,7 +680,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc(hdr, packet->rhf);
+ u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
@@ -688,18 +688,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = be16_to_cpu(hdr->lrh[1]);
+ dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = be16_to_cpu(hdr->lrh[3]);
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ slid = ib_get_slid(hdr);
+ pkey = ib_bth_get_pkey(ohdr);
+ opcode = ib_bth_get_opcode(ohdr);
+ sl = ib_get_sl(hdr);
+ extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- opcode &= 0xff;
-
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4a8295399e71..35c6e7ec8ad6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -200,8 +200,9 @@ int hfi1_user_exp_rcv_init(struct file *fp)
if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
- fd->invalid_tids = kzalloc(uctxt->expected_count *
- sizeof(u32), GFP_KERNEL);
+ fd->invalid_tids = kcalloc(uctxt->expected_count,
+ sizeof(*fd->invalid_tids),
+ GFP_KERNEL);
if (!fd->invalid_tids) {
ret = -ENOMEM;
goto done;
@@ -578,6 +579,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
u32 *tidinfo;
unsigned tididx;
+ if (unlikely(tinfo->tidcnt > fd->tid_used))
+ return -EINVAL;
+
tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
sizeof(tidinfo[0]) * tinfo->tidcnt);
if (IS_ERR(tidinfo))
@@ -607,7 +611,7 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
u32 *array;
int ret = 0;
@@ -1011,8 +1015,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
* process in question.
*/
ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
fdata->invalid_tid_idx++;
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 68295a12b771..e341e6dcc388 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
+ unsigned int usr_ctxts =
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
bool can_lock = capable(CAP_IPC_LOCK);
/*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index e6811c4edc73..0749689d7643 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -376,7 +376,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
{
struct hfi1_filedata *fd;
int ret = 0;
- unsigned memsize;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
@@ -401,13 +400,15 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!pq)
goto pq_nomem;
- memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kzalloc(memsize, GFP_KERNEL);
+ pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+ sizeof(*pq->reqs),
+ GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
- memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
- pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
+ pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+ sizeof(*pq->req_in_use),
+ GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
@@ -442,8 +443,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!cq)
goto cq_nomem;
- memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
- cq->comps = vmalloc_user(memsize);
+ cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
+ * hfi1_sdma_comp_ring_size));
if (!cq->comps)
goto cq_comps_nomem;
@@ -704,7 +705,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
- memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->status = ret;
@@ -1615,9 +1618,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
- cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
+ smp_wmb(); /* make sure errcode is visible first */
+ cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 222315fadab1..7174a18ebaac 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -60,6 +60,8 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
+#include "debugfs.h"
+#include "vnic.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -297,6 +299,22 @@ static inline bool wss_exceeds_threshold(void)
}
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+
+/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -501,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ else
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
+#endif
+ return pbc;
+}
+
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -525,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
u16 lid;
/* Check for GRH */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
@@ -544,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
trace_input_ibhdr(rcd->dd, hdr);
- opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ opcode = ib_bth_get_opcode(packet->ohdr);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = be16_to_cpu(hdr->lrh[1]);
+ lid = ib_get_dlid(hdr);
if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
struct rvt_mcast *mcast;
@@ -557,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (lnh != HFI1_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
@@ -583,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
goto drop;
}
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ true))) {
+ rcu_read_unlock();
+ goto drop;
+ }
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler))
@@ -781,7 +833,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@@ -800,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@@ -809,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
@@ -917,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -942,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ struct verbs_txreq *tx = ps->s_txreq;
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@@ -1173,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
@@ -1220,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- u16 ver = dd->dc8051_ver;
+ u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
- (u64)dc8051_ver_min(ver);
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
+ ((u64)(dc8051_ver_min(ver)) << 16) |
+ (u64)dc8051_ver_patch(ver);
+
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1504,10 +1565,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
- u16 ver = dd_from_dev(dev)->dc8051_ver;
+ u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
- dc8051_ver_min(ver));
+ snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@@ -1524,6 +1585,7 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
+static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
@@ -1578,6 +1640,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
{
int i, err;
+ mutex_lock(&cntr_names_lock);
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1586,8 +1649,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
- if (err)
+ if (err) {
+ mutex_unlock(&cntr_names_lock);
return NULL;
+ }
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
@@ -1601,10 +1666,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
+ mutex_unlock(&cntr_names_lock);
return NULL;
}
cntr_names_initialized = 1;
}
+ mutex_unlock(&cntr_names_lock);
if (!port_num)
return rdma_alloc_hw_stats_struct(
@@ -1707,6 +1774,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
+ ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
+ ibdev->free_rdma_netdev = hfi1_vnic_free_rn;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1751,7 +1820,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
@@ -1823,9 +1892,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+ mutex_lock(&cntr_names_lock);
kfree(dev_cntr_names);
kfree(port_cntr_names);
+ dev_cntr_names = NULL;
+ port_cntr_names = NULL;
cntr_names_initialized = 0;
+ mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -1859,7 +1932,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc(hdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3a0b589e41c2..46b00ed9f2dc 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -195,6 +195,11 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_opcode *fault_opcode;
+ struct fault_packet *fault_packet;
+ bool fault_suppress_err;
+#endif
#endif
};
@@ -350,7 +355,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
void _hfi1_do_send(struct work_struct *work);
-void hfi1_do_send(struct rvt_qp *qp);
+void hfi1_do_send_from_rvt(struct rvt_qp *qp);
+
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
new file mode 100644
index 000000000000..e2c455299b53
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -0,0 +1,184 @@
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/opa_vnic.h>
+#include "hfi.h"
+#include "sdma.h"
+
+#define HFI1_VNIC_MAX_TXQ 16
+#define HFI1_VNIC_MAX_PAD 12
+
+/* L2 header definitions */
+#define HFI1_L2_TYPE_OFFSET 0x7
+#define HFI1_L2_TYPE_SHFT 0x5
+#define HFI1_L2_TYPE_MASK 0x3
+
+#define HFI1_GET_L2_TYPE(hdr) \
+ ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \
+ HFI1_L2_TYPE_MASK)
+
+/* L4 type definitions */
+#define HFI1_L4_TYPE_OFFSET 8
+
+#define HFI1_GET_L4_TYPE(data) \
+ (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET))
+
+/* L4 header definitions */
+#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN
+
+#define HFI1_VNIC_GET_L4_HDR(data) \
+ (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET)))
+
+#define HFI1_VNIC_GET_VESWID(data) \
+ (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF)
+
+/* Service class */
+#define HFI1_VNIC_SC_OFFSET_LOW 6
+#define HFI1_VNIC_SC_OFFSET_HI 7
+#define HFI1_VNIC_SC_SHIFT 4
+
+#define HFI1_VNIC_MAX_QUEUE 16
+
+/**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+ struct hfi1_devdata *dd;
+ struct sdma_engine *sde;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct iowait wait;
+ struct sdma_txreq stx;
+ unsigned int state;
+ u8 q_idx;
+};
+
+/**
+ * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
+ * @idx: queue index
+ * @vinfo: pointer to vport information
+ * @netdev: network device
+ * @napi: netdev napi structure
+ * @skbq: queue of received socket buffers
+ */
+struct hfi1_vnic_rx_queue {
+ u8 idx;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct napi_struct napi;
+ struct sk_buff_head skbq;
+};
+
+/**
+ * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information
+ * @dd: device data pointer
+ * @netdev: net device pointer
+ * @flags: state flags
+ * @lock: vport lock
+ * @num_tx_q: number of transmit queues
+ * @num_rx_q: number of receive queues
+ * @vesw_id: virtual switch id
+ * @rxq: Array of receive queues
+ * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
+ */
+struct hfi1_vnic_vport_info {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ unsigned long flags;
+
+ /* Lock used around state updates */
+ struct mutex lock;
+
+ u8 num_tx_q;
+ u8 num_rx_q;
+ u16 vesw_id;
+ struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
+
+ struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE];
+ struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ];
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(vinfo->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(vinfo->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(vinfo->netdev, format, ## arg)
+
+/* vnic hfi1 internal functions */
+void hfi1_vnic_setup(struct hfi1_devdata *dd);
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx);
+
+/* vnic rdma netdev operations */
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+void hfi1_vnic_free_rn(struct net_device *netdev);
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
+
+#endif /* _HFI1_VNIC_H */
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
new file mode 100644
index 000000000000..392f4d57f3e3
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -0,0 +1,907 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC functionality
+ */
+
+#include <linux/io.h>
+#include <linux/if_vlan.h>
+
+#include "vnic.h"
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI1_VNIC_RCV_Q_SIZE 1024
+
+#define HFI1_VNIC_UP 0
+
+static DEFINE_SPINLOCK(vport_cntr_lock);
+
+static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops = 0;
+ int ret;
+
+ ret = hfi1_init_ctxt(uctxt->sc);
+ if (ret)
+ goto done;
+
+ uctxt->do_interrupt = &handle_receive_interrupt;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
+
+ if (uctxt->rcvhdrtail_kvaddr)
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+
+ uctxt->is_vnic = true;
+done:
+ return ret;
+}
+
+static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ unsigned int ctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
+ if (!dd->rcd[ctxt])
+ break;
+
+ if (ctxt == dd->num_rcv_contexts)
+ return -EBUSY;
+
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
+ if (!uctxt) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ uctxt->seq_cnt = 1;
+
+ /* Allocate and enable a PIO send context */
+ uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
+ uctxt->numa_id);
+
+ ret = uctxt->sc ? 0 : -ENOMEM;
+ if (ret)
+ goto bail;
+
+ dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
+ uctxt->sc->sw_index, uctxt->sc->hw_context);
+ ret = sc_enable(uctxt->sc);
+ if (ret)
+ goto bail;
+
+ if (dd->num_msix_entries)
+ hfi1_set_vnic_msix_info(uctxt);
+
+ hfi1_stats.sps_ctxts++;
+ dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
+ *vnic_ctxt = uctxt;
+
+ return ret;
+bail:
+ /*
+ * hfi1_free_ctxtdata() also releases send_context
+ * structure if uctxt->sc is not null
+ */
+ dd->rcd[uctxt->ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
+ return ret;
+}
+
+static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned long flags;
+
+ dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
+ flush_wc();
+
+ if (dd->num_msix_entries)
+ hfi1_reset_vnic_msix_info(uctxt);
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ /*
+ * VNIC contexts are allocated from user context pool.
+ * Release them back to user context pool.
+ *
+ * Reset context integrity checks to default.
+ * (writes to CSRs probably belong in chip.c)
+ */
+ write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
+ hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
+ sc_disable(uctxt->sc);
+
+ dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ dd->rcd[uctxt->ctxt] = NULL;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+
+ hfi1_stats.sps_ctxts--;
+ hfi1_free_ctxtdata(dd, uctxt);
+}
+
+void hfi1_vnic_setup(struct hfi1_devdata *dd)
+{
+ idr_init(&dd->vnic.vesw_idr);
+}
+
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
+{
+ idr_destroy(&dd->vnic.vesw_idr);
+}
+
+#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
+ u64 *src64, *dst64; \
+ for (src64 = &qstats->x_grp.unicast, \
+ dst64 = &stats->x_grp.unicast; \
+ dst64 <= &stats->x_grp.s_1519_max;) { \
+ *dst64++ += *src64++; \
+ } \
+ } while (0)
+
+/* hfi1_vnic_update_stats - update statistics */
+static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
+ struct opa_vnic_stats *stats)
+{
+ struct net_device *netdev = vinfo->netdev;
+ u8 i;
+
+ /* add tx counters on different queues */
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
+ stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
+ stats->tx_drop_state += qstats->tx_drop_state;
+ stats->tx_dlid_zero += qstats->tx_dlid_zero;
+
+ SUM_GRP_COUNTERS(stats, qstats, tx_grp);
+ stats->netstats.tx_packets += qnstats->tx_packets;
+ stats->netstats.tx_bytes += qnstats->tx_bytes;
+ }
+
+ /* add rx counters on different queues */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
+ stats->netstats.rx_nohandler += qnstats->rx_nohandler;
+ stats->rx_drop_state += qstats->rx_drop_state;
+ stats->rx_oversize += qstats->rx_oversize;
+ stats->rx_runt += qstats->rx_runt;
+
+ SUM_GRP_COUNTERS(stats, qstats, rx_grp);
+ stats->netstats.rx_packets += qnstats->rx_packets;
+ stats->netstats.rx_bytes += qnstats->rx_bytes;
+ }
+
+ stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
+ stats->netstats.tx_carrier_errors +
+ stats->tx_drop_state + stats->tx_dlid_zero;
+ stats->netstats.tx_dropped = stats->netstats.tx_errors;
+
+ stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
+ stats->netstats.rx_nohandler +
+ stats->rx_drop_state + stats->rx_oversize +
+ stats->rx_runt;
+ stats->netstats.rx_dropped = stats->netstats.rx_errors;
+
+ netdev->stats.tx_packets = stats->netstats.tx_packets;
+ netdev->stats.tx_bytes = stats->netstats.tx_bytes;
+ netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
+ netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
+ netdev->stats.tx_errors = stats->netstats.tx_errors;
+ netdev->stats.tx_dropped = stats->netstats.tx_dropped;
+
+ netdev->stats.rx_packets = stats->netstats.rx_packets;
+ netdev->stats.rx_bytes = stats->netstats.rx_bytes;
+ netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
+ netdev->stats.multicast = stats->rx_grp.mcastbcast;
+ netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
+ netdev->stats.rx_errors = stats->netstats.rx_errors;
+ netdev->stats.rx_dropped = stats->netstats.rx_dropped;
+}
+
+/* update_len_counters - update pkt's len histogram counters */
+static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
+ int len)
+{
+ /* account for 4 byte FCS */
+ if (len >= 1515)
+ grp->s_1519_max++;
+ else if (len >= 1020)
+ grp->s_1024_1518++;
+ else if (len >= 508)
+ grp->s_512_1023++;
+ else if (len >= 252)
+ grp->s_256_511++;
+ else if (len >= 124)
+ grp->s_128_255++;
+ else if (len >= 61)
+ grp->s_65_127++;
+ else
+ grp->s_64++;
+}
+
+/* hfi1_vnic_update_tx_counters - update transmit counters */
+static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.tx_packets++;
+ stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(tx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ tx_grp->mcastbcast++;
+ else
+ tx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ tx_grp->vlan++;
+ else
+ tx_grp->untagged++;
+}
+
+/* hfi1_vnic_update_rx_counters - update receive counters */
+static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.rx_packets++;
+ stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(rx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ rx_grp->mcastbcast++;
+ else
+ rx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ rx_grp->vlan++;
+ else
+ rx_grp->untagged++;
+}
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void hfi1_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_update_stats(vinfo, vstats);
+}
+
+static u64 create_bypass_pbc(u32 vl, u32 dw_len)
+{
+ u64 pbc;
+
+ pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
+ | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
+ | PBC_PACKET_BYPASS
+ | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
+ | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
+
+ return pbc;
+}
+
+/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ netif_stop_subqueue(vinfo->netdev, q_idx);
+ if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+ return;
+
+ netif_start_subqueue(vinfo->netdev, q_idx);
+}
+
+static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ u8 pad_len, q_idx = skb->queue_mapping;
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct opa_vnic_skb_mdata *mdata;
+ u32 pkt_len, total_len;
+ int err = -EINVAL;
+ u64 pbc;
+
+ v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+ if (unlikely(!netif_oper_up(netdev))) {
+ vinfo->stats[q_idx].tx_drop_state++;
+ goto tx_finish;
+ }
+
+ /* take out meta data */
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ skb_pull(skb, sizeof(*mdata));
+ if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
+ vinfo->stats[q_idx].tx_dlid_zero++;
+ goto tx_finish;
+ }
+
+ /* add tail padding (for 8 bytes size alignment) and icrc */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ /*
+ * pkt_len is how much data we have to write, includes header and data.
+ * total_len is length of the packet in Dwords plus the PBC should not
+ * include the CRC.
+ */
+ pkt_len = (skb->len + pad_len) >> 2;
+ total_len = pkt_len + 2; /* PBC + packet */
+
+ pbc = create_bypass_pbc(mdata->vl, total_len);
+
+ skb_get(skb);
+ v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
+ err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
+ if (unlikely(err)) {
+ if (err == -ENOMEM)
+ vinfo->stats[q_idx].netstats.tx_fifo_errors++;
+ else if (err != -EBUSY)
+ vinfo->stats[q_idx].netstats.tx_carrier_errors++;
+ }
+ /* remove the header before updating tx counters */
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ if (unlikely(err == -EBUSY)) {
+ hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_BUSY;
+ }
+
+tx_finish:
+ /* update tx counters */
+ hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 hfi1_vnic_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ struct sdma_engine *sde;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
+ return sde->this_idx;
+}
+
+/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
+ struct sk_buff *skb)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
+ int rc = -EFAULT;
+
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ /* Validate Packet length */
+ if (unlikely(skb->len > max_len))
+ vinfo->stats[rxq->idx].rx_oversize++;
+ else if (unlikely(skb->len < ETH_ZLEN))
+ vinfo->stats[rxq->idx].rx_runt++;
+ else
+ rc = 0;
+ return rc;
+}
+
+static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+{
+ unsigned char *pad_info;
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&rxq->skbq);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* remove tail padding and icrc */
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ return skb;
+}
+
+/* hfi1_vnic_handle_rx - handle skb receive */
+static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
+ int *work_done, int work_to_do)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ struct sk_buff *skb;
+ int rc;
+
+ while (1) {
+ if (*work_done >= work_to_do)
+ break;
+
+ skb = hfi1_vnic_get_skb(rxq);
+ if (unlikely(!skb))
+ break;
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
+ (*work_done)++;
+ }
+}
+
+/* hfi1_vnic_napi - napi receive polling callback function */
+static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_vnic_rx_queue *rxq = container_of(napi,
+ struct hfi1_vnic_rx_queue, napi);
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int work_done = 0;
+
+ v_dbg("napi %d budget %d\n", rxq->idx, budget);
+ hfi1_vnic_handle_rx(rxq, &work_done, budget);
+
+ v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
+ if (work_done < budget)
+ napi_complete(napi);
+
+ return work_done;
+}
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+ struct hfi1_vnic_vport_info *vinfo = NULL;
+ struct hfi1_vnic_rx_queue *rxq;
+ struct sk_buff *skb;
+ int l4_type, vesw_id = -1;
+ u8 q_idx;
+
+ l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
+ if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
+ vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
+ vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
+
+ /*
+ * In case of invalid vesw id, count the error on
+ * the first available vport.
+ */
+ if (unlikely(!vinfo)) {
+ struct hfi1_vnic_vport_info *vinfo_tmp;
+ int id_tmp = 0;
+
+ vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
+ if (vinfo_tmp) {
+ spin_lock(&vport_cntr_lock);
+ vinfo_tmp->stats[0].netstats.rx_nohandler++;
+ spin_unlock(&vport_cntr_lock);
+ }
+ }
+ }
+
+ if (unlikely(!vinfo)) {
+ dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
+ l4_type, vesw_id, packet->rcd->ctxt);
+ return;
+ }
+
+ q_idx = packet->rcd->vnic_q_idx;
+ rxq = &vinfo->rxq[q_idx];
+ if (unlikely(!netif_oper_up(vinfo->netdev))) {
+ vinfo->stats[q_idx].rx_drop_state++;
+ skb_queue_purge(&rxq->skbq);
+ return;
+ }
+
+ if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
+ if (unlikely(!skb)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ memcpy(skb->data, packet->ebuf, packet->tlen);
+ skb_put(skb, packet->tlen);
+ skb_queue_tail(&rxq->skbq, skb);
+
+ if (napi_schedule_prep(&rxq->napi)) {
+ v_dbg("napi %d scheduling\n", q_idx);
+ __napi_schedule(&rxq->napi);
+ }
+}
+
+static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct net_device *netdev = vinfo->netdev;
+ int i, rc;
+
+ /* ensure virtual eth switch id is valid */
+ if (!vinfo->vesw_id)
+ return -EINVAL;
+
+ rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
+ vinfo->vesw_id + 1, GFP_NOWAIT);
+ if (rc < 0)
+ return rc;
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ skb_queue_head_init(&rxq->skbq);
+ napi_enable(&rxq->napi);
+ }
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+ set_bit(HFI1_VNIC_UP, &vinfo->flags);
+
+ return 0;
+}
+
+static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ u8 i;
+
+ clear_bit(HFI1_VNIC_UP, &vinfo->flags);
+ netif_carrier_off(vinfo->netdev);
+ netif_tx_disable(vinfo->netdev);
+ idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
+
+ /* ensure irqs see the change */
+ hfi1_vnic_synchronize_irq(dd);
+
+ /* remove unread skbs */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ napi_disable(&rxq->napi);
+ skb_queue_purge(&rxq->skbq);
+ }
+}
+
+static int hfi1_netdev_open(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ int rc;
+
+ mutex_lock(&vinfo->lock);
+ rc = hfi1_vnic_up(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return rc;
+}
+
+static int hfi1_netdev_close(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
+ hfi1_vnic_down(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return 0;
+}
+
+static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ int rc;
+
+ rc = allocate_vnic_ctxt(dd, vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = setup_vnic_ctxt(dd, *vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
+ deallocate_vnic_ctxt(dd, *vnic_ctxt);
+ *vnic_ctxt = NULL;
+ }
+
+ return rc;
+}
+
+static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i, rc = 0;
+
+ mutex_lock(&hfi1_mutex);
+ if (!dd->vnic.num_vports) {
+ rc = hfi1_vnic_txreq_init(dd);
+ if (rc)
+ goto txreq_fail;
+
+ dd->vnic.msix_idx = dd->first_dyn_msix_idx;
+ }
+
+ for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
+ rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
+ if (rc)
+ break;
+ dd->vnic.ctxt[i]->vnic_q_idx = i;
+ }
+
+ if (i < vinfo->num_rx_q) {
+ /*
+ * If required amount of contexts is not
+ * allocated successfully then remaining contexts
+ * are released.
+ */
+ while (i-- > dd->vnic.num_ctxt) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ goto alloc_fail;
+ }
+
+ if (dd->vnic.num_ctxt != i) {
+ dd->vnic.num_ctxt = i;
+ hfi1_init_vnic_rsm(dd);
+ }
+
+ dd->vnic.num_vports++;
+ hfi1_vnic_sdma_init(vinfo);
+alloc_fail:
+ if (!dd->vnic.num_vports)
+ hfi1_vnic_txreq_deinit(dd);
+txreq_fail:
+ mutex_unlock(&hfi1_mutex);
+ return rc;
+}
+
+static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i;
+
+ mutex_lock(&hfi1_mutex);
+ if (--dd->vnic.num_vports == 0) {
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ hfi1_deinit_vnic_rsm(dd);
+ dd->vnic.num_ctxt = 0;
+ hfi1_vnic_txreq_deinit(dd);
+ }
+ mutex_unlock(&hfi1_mutex);
+}
+
+static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ bool reopen = false;
+
+ /*
+ * If vesw_id is being changed, and if the vnic port is up,
+ * reset the vnic port to ensure new vesw_id gets picked up
+ */
+ if (id != vinfo->vesw_id) {
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
+ hfi1_vnic_down(vinfo);
+ reopen = true;
+ }
+
+ vinfo->vesw_id = id;
+ if (reopen)
+ hfi1_vnic_up(vinfo);
+
+ mutex_unlock(&vinfo->lock);
+ }
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi1_netdev_ops = {
+ .ndo_open = hfi1_netdev_open,
+ .ndo_stop = hfi1_netdev_close,
+ .ndo_start_xmit = hfi1_netdev_start_xmit,
+ .ndo_select_queue = hfi1_vnic_select_queue,
+ .ndo_get_stats64 = hfi1_vnic_get_stats64,
+};
+
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int i, size, rc;
+
+ if (!port_num || (port_num > dd->num_pports))
+ return ERR_PTR(-EINVAL);
+
+ if (type != RDMA_NETDEV_OPA_VNIC)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
+ netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
+ dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ rn = netdev_priv(netdev);
+ vinfo = opa_vnic_dev_priv(netdev);
+ vinfo->dd = dd;
+ vinfo->num_tx_q = dd->chip_sdma_engines;
+ vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
+ vinfo->netdev = netdev;
+ rn->set_id = hfi1_vnic_set_vesw_id;
+
+ netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
+ netdev->hw_features = netdev->features;
+ netdev->vlan_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+ netdev->netdev_ops = &hfi1_netdev_ops;
+ mutex_init(&vinfo->lock);
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ rxq->idx = i;
+ rxq->vinfo = vinfo;
+ rxq->netdev = netdev;
+ netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
+ }
+
+ rc = hfi1_vnic_init(vinfo);
+ if (rc)
+ goto init_fail;
+
+ return netdev;
+init_fail:
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+ return ERR_PTR(rc);
+}
+
+void hfi1_vnic_free_rn(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_deinit(vinfo);
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+}
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
new file mode 100644
index 000000000000..51a817d3aa14
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC SDMA functionality
+ */
+
+#include "sdma.h"
+#include "vnic.h"
+
+#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0)
+#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1)
+
+#define HFI1_VNIC_TXREQ_NAME_LEN 32
+#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
+#define HFI1_VNIC_SDMA_RETRY_COUNT 1
+
+/*
+ * struct vnic_txreq - VNIC transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma: vnic sdma pointer
+ * @skb: skb to send
+ * @pad: pad buffer
+ * @plen: pad length
+ * @pbc_val: pbc value
+ * @retry_count: tx retry count
+ */
+struct vnic_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_vnic_sdma *sdma;
+
+ struct sk_buff *skb;
+ unsigned char pad[HFI1_VNIC_MAX_PAD];
+ u16 plen;
+ __le64 pbc_val;
+
+ u32 retry_count;
+};
+
+static void vnic_sdma_complete(struct sdma_txreq *txreq,
+ int status)
+{
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+ struct hfi1_vnic_sdma *vnic_sdma = tx->sdma;
+
+ sdma_txclean(vnic_sdma->dd, txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx);
+}
+
+static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ struct vnic_txreq *tx)
+{
+ int i, ret = 0;
+
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ tx->skb->data,
+ skb_headlen(tx->skb));
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+
+ /* combine physically continuous fragments later? */
+ ret = sdma_txadd_page(sde->dd,
+ &tx->txreq,
+ skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag));
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+
+ if (tx->plen)
+ ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
+ tx->pad + HFI1_VNIC_MAX_PAD - tx->plen,
+ tx->plen);
+
+bail_txadd:
+ return ret;
+}
+
+static int build_vnic_tx_desc(struct sdma_engine *sde,
+ struct vnic_txreq *tx,
+ u64 pbc)
+{
+ int ret = 0;
+ u16 hdrbytes = 2 << 2; /* PBC */
+
+ ret = sdma_txinit_ahg(
+ &tx->txreq,
+ 0,
+ hdrbytes + tx->skb->len + tx->plen,
+ 0,
+ 0,
+ NULL,
+ 0,
+ vnic_sdma_complete);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add pbc */
+ tx->pbc_val = cpu_to_le64(pbc);
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ &tx->pbc_val,
+ hdrbytes);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add the ulp payload */
+ ret = build_vnic_ulp_payload(sde, tx);
+bail_txadd:
+ return ret;
+}
+
+/* setup the last plen bypes of pad */
+static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen)
+{
+ pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN;
+}
+
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+ struct sdma_engine *sde = vnic_sdma->sde;
+ struct vnic_txreq *tx;
+ int ret = -ECOMM;
+
+ if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE))
+ goto tx_err;
+
+ if (unlikely(!sde || !sdma_running(sde)))
+ goto tx_err;
+
+ tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ ret = -ENOMEM;
+ goto tx_err;
+ }
+
+ tx->sdma = vnic_sdma;
+ tx->skb = skb;
+ hfi1_vnic_update_pad(tx->pad, plen);
+ tx->plen = plen;
+ ret = build_vnic_tx_desc(sde, tx, pbc);
+ if (unlikely(ret))
+ goto free_desc;
+ tx->retry_count = 0;
+
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ /* When -ECOMM, sdma callback will be called with ABORT status */
+ if (unlikely(ret && unlikely(ret != -ECOMM)))
+ goto free_desc;
+
+ return ret;
+
+free_desc:
+ sdma_txclean(dd, &tx->txreq);
+ kmem_cache_free(dd->vnic.txreq_cache, tx);
+tx_err:
+ if (ret != -EBUSY)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+
+/*
+ * hfi1_vnic_sdma_sleep - vnic sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+
+ if (sdma_progress(sde, seq, txreq))
+ if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
+ return -EAGAIN;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
+ write_seqlock(&dev->iowait_lock);
+ if (list_empty(&vnic_sdma->wait.list))
+ list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ write_sequnlock(&dev->iowait_lock);
+ return -EBUSY;
+}
+
+/*
+ * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ * It notifies the upper driver about Tx queue wakeup.
+ */
+static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+ if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx))
+ netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx);
+};
+
+inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+
+ return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE);
+}
+
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ int i;
+
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
+
+ iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ hfi1_vnic_sdma_wakeup, NULL);
+ vnic_sdma->sde = &vinfo->dd->per_sdma[i];
+ vnic_sdma->dd = vinfo->dd;
+ vnic_sdma->vinfo = vinfo;
+ vnic_sdma->q_idx = i;
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+
+ /* Add a free descriptor watermark for wakeups */
+ if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ INIT_LIST_HEAD(&vnic_sdma->stx.list);
+ vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
+ list_add_tail(&vnic_sdma->stx.list,
+ &vnic_sdma->wait.tx_head);
+ }
+ }
+}
+
+static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj)
+{
+ struct vnic_txreq *tx = (struct vnic_txreq *)obj;
+
+ memset(tx, 0, sizeof(*tx));
+}
+
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd)
+{
+ char buf[HFI1_VNIC_TXREQ_NAME_LEN];
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit);
+ dd->vnic.txreq_cache = kmem_cache_create(buf,
+ sizeof(struct vnic_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ hfi1_vnic_txreq_kmem_cache_ctor);
+ if (!dd->vnic.txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd)
+{
+ kmem_cache_destroy(dd->vnic.txreq_cache);
+ dd->vnic.txreq_cache = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 8c1f7a6f84d2..b94dcd823ad1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -299,9 +299,9 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
int i;
- hr_cmd->context = kmalloc(hr_cmd->max_cmds *
- sizeof(struct hns_roce_cmd_context),
- GFP_KERNEL);
+ hr_cmd->context = kmalloc_array(hr_cmd->max_cmds,
+ sizeof(*hr_cmd->context),
+ GFP_KERNEL);
if (!hr_cmd->context)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 589496c8fb9e..b89fd711019e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -219,8 +219,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
return PTR_ERR(*umem);
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- ilog2((unsigned int)(*umem)->page_size),
- &buf->hr_mtt);
+ (*umem)->page_shift, &buf->hr_mtt);
if (ret)
goto err_buf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b8111b0c8877..e637beb209f7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -33,6 +33,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
#include <linux/etherdevice.h>
+#include <linux/of.h>
#include <rdma/ib_umem.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
@@ -1851,6 +1852,7 @@ void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
u32 doorbell[2];
doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
+ doorbell[1] = 0;
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 4139abee3b54..dc5c97c8f070 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -127,11 +127,12 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
-
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof(long *),
- GFP_KERNEL);
- buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof(int *),
- GFP_KERNEL);
+ buddy->bits = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->bits),
+ GFP_KERNEL);
+ buddy->num_free = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->num_free),
+ GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
@@ -503,7 +504,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) + umem->page_size * k;
+ pages[i++] = sg_dma_address(sg) +
+ (k << umem->page_shift);
if (i == PAGE_SIZE / sizeof(u64)) {
ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
pages);
@@ -563,9 +565,9 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) {
- dev_err(dev, "Just support 4K page size but is 0x%x now!\n",
- mr->umem->page_size);
+ if (mr->umem->page_shift != HNS_ROCE_HEM_PAGE_SHIFT) {
+ dev_err(dev, "Just support 4K page size but is 0x%lx now!\n",
+ BIT(mr->umem->page_shift));
ret = -EINVAL;
goto err_umem;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 3f44f2f91f03..054c52699090 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -437,8 +437,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem),
- ilog2((unsigned int)hr_qp->umem->page_size),
- &hr_qp->mtt);
+ hr_qp->umem->page_shift, &hr_qp->mtt);
if (ret) {
dev_err(dev, "hns_roce_mtt_init error for create qp\n");
goto err_buf;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 95a0586a4da8..f3bc01bce483 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3184,9 +3184,8 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev)
INIT_LIST_HEAD(&cm_core->connected_nodes);
INIT_LIST_HEAD(&cm_core->listen_nodes);
- init_timer(&cm_core->tcp_timer);
- cm_core->tcp_timer.function = i40iw_cm_timer_tick;
- cm_core->tcp_timer.data = (unsigned long)cm_core;
+ setup_timer(&cm_core->tcp_timer, i40iw_cm_timer_tick,
+ (unsigned long)cm_core);
spin_lock_init(&cm_core->ht_lock);
spin_lock_init(&cm_core->listen_list_lock);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 70c3e9e79508..409a3781e735 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -844,10 +844,9 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
iwqp = (struct i40iw_qp *)qp->back_qp;
i40iw_add_ref(&iwqp->ibqp);
- init_timer(&iwqp->terminate_timer);
- iwqp->terminate_timer.function = i40iw_terminate_timeout;
+ setup_timer(&iwqp->terminate_timer, i40iw_terminate_timeout,
+ (unsigned long)iwqp);
iwqp->terminate_timer.expires = jiffies + HZ;
- iwqp->terminate_timer.data = (unsigned long)iwqp;
add_timer(&iwqp->terminate_timer);
}
@@ -1436,9 +1435,8 @@ void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
{
struct i40iw_vsi_pestat *devstat = vsi->pestat;
- init_timer(&devstat->stats_timer);
- devstat->stats_timer.function = i40iw_hw_stats_timeout;
- devstat->stats_timer.data = (unsigned long)vsi;
+ setup_timer(&devstat->stats_timer, i40iw_hw_stats_timeout,
+ (unsigned long)vsi);
mod_timer(&devstat->stats_timer,
jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 9b2849979756..378c75759be4 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1345,7 +1345,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
{
struct ib_umem *region = iwmr->region;
struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- int chunk_pages, entry, pg_shift, i;
+ int chunk_pages, entry, i;
struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
struct i40iw_pble_info *pinfo;
struct scatterlist *sg;
@@ -1354,14 +1354,14 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
- pg_shift = ffs(region->page_size) - 1;
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
- chunk_pages = sg_dma_len(sg) >> pg_shift;
+ chunk_pages = sg_dma_len(sg) >> region->page_shift;
if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
!iwpbl->qp_mr.sq_page)
iwpbl->qp_mr.sq_page = sg_page(sg);
for (i = 0; i < chunk_pages; i++) {
- pg_addr = sg_dma_address(sg) + region->page_size * i;
+ pg_addr = sg_dma_address(sg) +
+ (i << region->page_shift);
if ((entry + i) == 0)
*pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
@@ -1847,7 +1847,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
iwmr->ibmr.device = pd->device;
ucontext = to_ucontext(pd->uobject->context);
- iwmr->page_size = region->page_size;
+ iwmr->page_size = PAGE_SIZE;
iwmr->page_msk = PAGE_MASK;
if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6a0fec357dae..4f5a143fc0a7 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -147,7 +147,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
return PTR_ERR(*umem);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
- ilog2((*umem)->page_size), &buf->mtt);
+ (*umem)->page_shift, &buf->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index fba94df28cf1..521d0def2d9e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1173,7 +1173,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
- down_read(&owning_mm->mmap_sem);
+ down_write(&owning_mm->mmap_sem);
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
@@ -1187,11 +1187,13 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
BUG_ON(1);
}
+ context->hw_bar_info[i].vma->vm_flags &=
+ ~(VM_SHARED | VM_MAYSHARE);
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
- up_read(&owning_mm->mmap_sem);
+ up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
@@ -2867,23 +2869,19 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
- dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- if (!iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err) {
- iboe->nb.notifier_call = NULL;
- goto err_notif;
- }
- }
- if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
- if (err) {
- goto err_notif;
- }
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err)
+ goto err_notif;
+ }
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
@@ -2941,6 +2939,7 @@ err_counter:
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
err_map:
+ mlx4_ib_free_eqs(dev, ibdev);
iounmap(ibdev->uar_map);
err_uar:
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index e010fe459e67..8772d88d324d 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -1102,7 +1102,8 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
while ((p = rb_first(&ctx->mcg_table)) != NULL) {
group = rb_entry(p, struct mcast_group, node);
if (atomic_read(&group->refcount))
- mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+ mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
+ atomic_read(&group->refcount), group);
force_clean_group(group);
}
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 433bcdbdd680..e6f77f63da75 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -107,7 +107,7 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
- umem->page_size * k;
+ (k << umem->page_shift);
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
@@ -155,7 +155,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
+ shift = mr->umem->page_shift;
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
@@ -239,7 +239,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
goto release_mpt_entry;
}
n = ib_umem_page_count(mmr->umem);
- shift = ilog2(mmr->umem->page_size);
+ shift = mmr->umem->page_shift;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c34eebc7db65..8f382318f888 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -745,7 +745,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
- ilog2(qp->umem->page_size), &qp->mtt);
+ qp->umem->page_shift, &qp->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 7dd3f267f06b..e32dd58937a8 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -122,7 +122,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- ilog2(srq->umem->page_size), &srq->mtt);
+ srq->umem->page_shift, &srq->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index cdc2d3017da7..18d5e1db93ed 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -46,3 +46,14 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
null_mkey);
return err;
}
+
+int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ MLX5_SET(query_cong_statistics_in, in, clear, reset);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 7ca8a7b6434d..fa09228193a6 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,4 +37,6 @@
#include <linux/mlx5/driver.h>
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 31803b367104..94c049b62c2f 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -172,6 +172,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
+ u8 roce_packet_type;
+ bool vlan_present;
u8 g;
if (qp->ibqp.srq || qp->ibqp.xrcd) {
@@ -223,7 +225,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
break;
}
wc->slid = be16_to_cpu(cqe->slid);
- wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
@@ -237,10 +238,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->pkey_index = 0;
}
- if (ll != IB_LINK_LAYER_ETHERNET)
+ if (ll != IB_LINK_LAYER_ETHERNET) {
+ wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
return;
+ }
+
+ vlan_present = cqe->l4_l3_hdr_type & 0x1;
+ roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
+ if (vlan_present) {
+ wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
+ wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->sl = 0;
+ }
- switch (wc->sl & 0x3) {
+ switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
wc->network_hdr_type = RDMA_NETWORK_IB;
break;
@@ -818,7 +831,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
return 0;
err_cqb:
- kfree(cqb);
+ kfree(*cqb);
err_db:
mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4dc0a8785fe0..9f3ba320ce70 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,6 +57,7 @@
#include <linux/mlx5/fs.h>
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
+#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "2.2-1"
@@ -165,15 +166,82 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
return ndev;
}
-static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
- struct ib_port_attr *props)
+static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+ u8 *active_width)
+{
+ switch (eth_proto_oper) {
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
+ case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
+ u32 eth_prot_oper;
- /* props being zeroed by the caller, avoid zeroing it here */
+ /* Possible bad flows are checked before filling out props so in case
+ * of an error it will still be zeroed out.
+ */
+ if (mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num))
+ return;
+
+ translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
+ &props->active_width);
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@@ -191,7 +259,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
- return 0;
+ return;
if (mlx5_lag_is_active(dev->mdev)) {
rcu_read_lock();
@@ -214,11 +282,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
-
- props->active_width = IB_WIDTH_4X; /* TODO */
- props->active_speed = IB_SPEED_QDR; /* TODO */
-
- return 0;
}
static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
@@ -924,7 +987,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return mlx5_query_hca_port(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_NIC:
- return mlx5_query_port_roce(ibdev, port, props);
+ mlx5_query_port_roce(ibdev, port, props);
+ return 0;
default:
return -EINVAL;
@@ -1478,7 +1542,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* need to protect from a race on closing the vma as part of
* mlx5_ib_vma_close.
*/
- down_read(&owning_mm->mmap_sem);
+ down_write(&owning_mm->mmap_sem);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
@@ -1488,11 +1552,12 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* context going to be destroyed, should
* not access ops any more.
*/
+ vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
vma->vm_ops = NULL;
list_del(&vma_private->list);
kfree(vma_private);
}
- up_read(&owning_mm->mmap_sem);
+ up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
@@ -1726,6 +1791,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1735,8 +1801,11 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec, u32 *tag_id)
+#define IPV4_VERSION 4
+#define IPV6_VERSION 6
+static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
+ u32 *match_v, const union ib_flow_spec *ib_spec,
+ u32 *tag_id, bool *is_drop)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
@@ -1744,17 +1813,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
misc_parameters);
void *headers_c;
void *headers_v;
+ int match_ipv;
if (ib_spec->type & IB_FLOW_SPEC_INNER) {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
inner_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
} else {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
}
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
@@ -1810,10 +1884,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
@@ -1842,10 +1923,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
@@ -1937,6 +2025,12 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
*tag_id = ib_spec->flow_tag.tag_id;
break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ *is_drop = true;
+ break;
default:
return -EINVAL;
}
@@ -1967,26 +2061,60 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
-static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
{
union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- bool has_ipv4_spec = false;
- bool eth_type_ipv4 = true;
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
/* Validate that ethertype is correct */
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
ib_spec->eth.mask.ether_type) {
- if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
- ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
- eth_type_ipv4 = false;
- } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
- has_ipv4_spec = true;
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
}
ib_spec = (void *)ib_spec + ib_spec->size;
}
- return !has_ipv4_spec || eth_type_ipv4;
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
}
static void put_flow_table(struct mlx5_ib_dev *dev,
@@ -2038,8 +2166,8 @@ enum flow_table_type {
MLX5_IB_FT_TX
};
-#define MLX5_FS_MAX_TYPES 10
-#define MLX5_FS_MAX_ENTRIES 32000UL
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr,
enum flow_table_type ft_type)
@@ -2048,11 +2176,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
+ int max_table_size;
int num_entries;
int num_groups;
int priority;
int err = 0;
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_is_multicast_only(flow_attr) &&
!dont_trap)
@@ -2091,6 +2222,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
if (!ns)
return ERR_PTR(-ENOTSUPP);
+ if (num_entries > max_table_size)
+ return ERR_PTR(-ENOMEM);
+
ft = prio->flow_table;
if (!ft) {
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
@@ -2118,12 +2252,15 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination *rule_dst = dst;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ bool is_drop = false;
int err = 0;
+ int dest_num = 1;
- if (!is_valid_attr(flow_attr))
+ if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
spec = mlx5_vzalloc(sizeof(*spec));
@@ -2136,8 +2273,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
INIT_LIST_HEAD(&handler->list);
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow, &flow_tag);
+ err = parse_flow_attr(dev->mdev, spec->match_criteria,
+ spec->match_value,
+ ib_flow, &flow_tag, &is_drop);
if (err < 0)
goto free;
@@ -2145,8 +2283,14 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_drop) {
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ rule_dst = NULL;
+ dest_num = 0;
+ } else {
+ flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ }
if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
@@ -2159,7 +2303,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
- dst, 1);
+ rule_dst, dest_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -2315,7 +2459,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int err;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOSPC);
+ return ERR_PTR(-ENOMEM);
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
@@ -3133,7 +3277,7 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_q_counter {
+struct mlx5_ib_counter {
const char *name;
size_t offset;
};
@@ -3141,18 +3285,18 @@ struct mlx5_ib_q_counter {
#define INIT_Q_COUNTER(_name) \
{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+static const struct mlx5_ib_counter basic_q_cnts[] = {
INIT_Q_COUNTER(rx_write_requests),
INIT_Q_COUNTER(rx_read_requests),
INIT_Q_COUNTER(rx_atomic_requests),
INIT_Q_COUNTER(out_of_buffer),
};
-static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
INIT_Q_COUNTER(out_of_sequence),
};
-static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(duplicate_request),
INIT_Q_COUNTER(rnr_nak_retry_err),
INIT_Q_COUNTER(packet_seq_err),
@@ -3160,22 +3304,31 @@ static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(local_ack_timeout_err),
};
-static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnts.set_id);
- kfree(dev->port[i].q_cnts.names);
- kfree(dev->port[i].q_cnts.offsets);
+ dev->port[i].cnts.set_id);
+ kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.offsets);
}
}
-static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
- const char ***names,
- size_t **offsets,
- u32 *num)
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts)
{
u32 num_counters;
@@ -3186,27 +3339,32 @@ static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
+ cnts->num_q_counters = num_counters;
- *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
- if (!*names)
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+
+ cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ if (!cnts->names)
return -ENOMEM;
- *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
- if (!*offsets)
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
goto err_names;
- *num = num_counters;
-
return 0;
err_names:
- kfree(*names);
+ kfree(cnts->names);
return -ENOMEM;
}
-static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
{
int i;
int j = 0;
@@ -3229,9 +3387,16 @@ static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
offsets[j] = retrans_q_cnts[i].offset;
}
}
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ names[j] = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
}
-static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
int i;
int ret;
@@ -3240,7 +3405,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
struct mlx5_ib_port *port = &dev->port[i];
ret = mlx5_core_alloc_q_counter(dev->mdev,
- &port->q_cnts.set_id);
+ &port->cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
@@ -3248,15 +3413,12 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
goto dealloc_counters;
}
- ret = __mlx5_ib_alloc_q_counters(dev,
- &port->q_cnts.names,
- &port->q_cnts.offsets,
- &port->q_cnts.num_counters);
+ ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
if (ret)
goto dealloc_counters;
- mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
- port->q_cnts.offsets);
+ mlx5_ib_fill_counters(dev, port->cnts.names,
+ port->cnts.offsets);
}
return 0;
@@ -3264,7 +3426,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnts.set_id);
+ dev->port[i].cnts.set_id);
return ret;
}
@@ -3279,44 +3441,93 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
if (port_num == 0)
return NULL;
- return rdma_alloc_hw_stats_struct(port->q_cnts.names,
- port->q_cnts.num_counters,
+ return rdma_alloc_hw_stats_struct(port->cnts.names,
+ port->cnts.num_q_counters +
+ port->cnts.num_cong_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
+static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
- int ret;
- int i;
-
- if (!stats)
- return -ENOSYS;
+ int ret, i;
out = mlx5_vzalloc(outlen);
if (!out)
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
- port->q_cnts.set_id, 0,
+ port->cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
- for (i = 0; i < port->q_cnts.num_counters; i++) {
- val = *(__be32 *)(out + port->q_cnts.offsets[i]);
+ for (i = 0; i < port->cnts.num_q_counters; i++) {
+ val = *(__be32 *)(out + port->cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
free:
kvfree(out);
- return port->q_cnts.num_counters;
+ return ret;
+}
+
+static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ void *out;
+ int ret, i;
+ int offset = port->cnts.num_q_counters;
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < port->cnts.num_cong_counters; i++) {
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ port->cnts.offsets[i + offset]));
+ }
+
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ int ret, num_counters;
+
+ if (!stats)
+ return -EINVAL;
+
+ ret = mlx5_ib_query_q_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ num_counters = port->cnts.num_q_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ ret = mlx5_ib_query_cong_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ num_counters += port->cnts.num_cong_counters;
+ }
+
+ return num_counters;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3523,14 +3734,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_rsrc;
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- err = mlx5_ib_alloc_q_counters(dev);
+ err = mlx5_ib_alloc_counters(dev);
if (err)
goto err_odp;
}
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_q_cnt;
+ goto err_cnt;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@@ -3574,9 +3785,9 @@ err_bfreg:
err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
-err_q_cnt:
+err_cnt:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_ib_dealloc_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@@ -3610,7 +3821,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 778d8a18925f..914f212e7ef6 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -59,15 +59,14 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
u64 pfn;
struct scatterlist *sg;
int entry;
- unsigned long page_shift = ilog2(umem->page_size);
+ unsigned long page_shift = umem->page_shift;
- /* With ODP we must always match OS page size. */
if (umem->odp_data) {
- *count = ib_umem_page_count(umem);
- *shift = PAGE_SHIFT;
- *ncont = *count;
+ *ncont = ib_umem_page_count(umem);
+ *count = *ncont << (page_shift - PAGE_SHIFT);
+ *shift = page_shift;
if (order)
- *order = ilog2(roundup_pow_of_two(*count));
+ *order = ilog2(roundup_pow_of_two(*ncont));
return;
}
@@ -156,7 +155,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
__be64 *pas, int access_flags)
{
- unsigned long umem_page_shift = ilog2(umem->page_size);
+ unsigned long umem_page_shift = umem->page_shift;
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i, k, idx;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ce8ba617d46e..93c646691208 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -513,6 +513,7 @@ struct mlx5_ib_mr {
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
+ int ndescs;
};
struct mlx5_ib_umr_context {
@@ -595,15 +596,16 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
-struct mlx5_ib_q_counters {
+struct mlx5_ib_counters {
const char **names;
size_t *offsets;
- u32 num_counters;
+ u32 num_q_counters;
+ u32 num_cong_counters;
u16 set_id;
};
struct mlx5_ib_port {
- struct mlx5_ib_q_counters q_cnts;
+ struct mlx5_ib_counters cnts;
};
struct mlx5_roce {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b8f9382a8b7d..93c0e82aa491 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1009,7 +1009,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
}
if (!xlt) {
- uctx = to_mucontext(mr->ibmr.uobject->context);
+ uctx = to_mucontext(mr->ibmr.pd->uobject->context);
mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
size = PAGE_SIZE;
xlt = (void *)uctx->upd_xlt_page;
@@ -1045,8 +1045,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
+ npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- npages = populate_xlt(mr, idx, pages_iter, xlt,
+ npages = populate_xlt(mr, idx, npages, xlt,
page_shift, size, flags);
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
@@ -1687,6 +1688,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mw->mmkey.type = MLX5_MKEY_MW;
mw->ibmw.rkey = mw->mmkey.key;
+ mw->ndescs = ndescs;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d7b12f0750e2..ae0746754008 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -57,7 +57,7 @@ static int check_parent(struct ib_umem_odp *odp,
{
struct mlx5_ib_mr *mr = odp->private;
- return mr && mr->parent == parent;
+ return mr && mr->parent == parent && !odp->dying;
}
static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
@@ -158,13 +158,6 @@ static void mr_leaf_free_action(struct work_struct *work)
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- if (!READ_ONCE(odp->dying)) {
- mr->parent = imr;
- if (atomic_dec_and_test(&imr->num_leaf_free))
- wake_up(&imr->q_leaf_free);
- return;
- }
-
ib_umem_release(odp->umem);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
@@ -206,8 +199,8 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* but they will write 0s as well, so no difference in the end result.
*/
- for (addr = start; addr < end; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
@@ -225,8 +218,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
if (in_block && umr_offset == 0) {
mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx,
- PAGE_SHIFT,
+ idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
@@ -235,8 +227,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
}
if (in_block)
mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx + 1,
- PAGE_SHIFT,
+ idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
/*
@@ -297,24 +288,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
return;
}
-static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
- u32 key)
-{
- u32 base_key = mlx5_base_mkey(key);
- struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr;
-
- if (!mmkey || mmkey->key != key || mmkey->type != MLX5_MKEY_MR)
- return NULL;
-
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
-
- if (!mr->live)
- return NULL;
-
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
int error)
@@ -436,8 +409,6 @@ next_mr:
nentries++;
}
- odp->dying = 0;
-
/* Return first odp if region not covered by single one */
if (likely(!result))
result = odp;
@@ -522,77 +493,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
-/*
- * Handle a single data segment in a page-fault WQE or RDMA region.
- *
- * Returns number of pages retrieved on success. The caller may continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- * page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
- u32 key, u64 io_virt, size_t bcnt,
- u32 *bytes_committed,
- u32 *bytes_mapped)
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
- int srcu_key;
- unsigned int current_seq = 0;
- u64 start_idx;
- int npages = 0, ret = 0;
- struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
+ int npages = 0, page_shift, np;
+ u64 start_idx, page_mask;
struct ib_umem_odp *odp;
- int implicit = 0;
+ int current_seq;
size_t size;
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(dev, key);
- /*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
- */
- if (!mr || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
- ret = -EFAULT;
- goto srcu_unlock;
- }
- if (!mr->umem->odp_data) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - *bytes_committed);
- goto srcu_unlock;
- }
-
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += *bytes_committed;
- bcnt -= *bytes_committed;
+ int ret;
if (!mr->umem->odp_data->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
- if (IS_ERR(odp)) {
- ret = PTR_ERR(odp);
- goto srcu_unlock;
- }
+ if (IS_ERR(odp))
+ return PTR_ERR(odp);
mr = odp->private;
- implicit = 1;
} else {
odp = mr->umem->odp_data;
}
next_mr:
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
+ page_shift = mr->umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+ if (mr->umem->writable)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+
current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
@@ -600,51 +532,43 @@ next_mr:
*/
smp_rmb();
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
- start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
-
- if (mr->umem->writable)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
-
ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
access_mask, current_seq);
if (ret < 0)
- goto srcu_unlock;
+ goto out;
- if (ret > 0) {
- int np = ret;
-
- mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- PAGE_SHIFT,
- MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
- }
- mutex_unlock(&odp->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
- goto srcu_unlock;
- }
+ np = ret;
- if (bytes_mapped) {
- u32 new_mappings = np * PAGE_SIZE -
- (io_virt - round_down(io_virt, PAGE_SIZE));
- *bytes_mapped += min_t(u32, new_mappings, size);
- }
+ mutex_lock(&odp->umem_mutex);
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ /*
+ * No need to check whether the MTTs really belong to
+ * this MR, since ib_umem_odp_map_dma_pages already
+ * checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
+ page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+ } else {
+ ret = -EAGAIN;
+ }
+ mutex_unlock(&odp->umem_mutex);
- npages += np;
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+ goto out;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (io_virt - round_down(io_virt, 1 << page_shift));
+ *bytes_mapped += min_t(u32, new_mappings, size);
}
+ npages += np << (page_shift - PAGE_SHIFT);
bcnt -= size;
+
if (unlikely(bcnt)) {
struct ib_umem_odp *next;
@@ -653,17 +577,18 @@ next_mr:
if (unlikely(!next || next->umem->address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
- ret = -EAGAIN;
- goto srcu_unlock_no_wait;
+ return -EAGAIN;
}
odp = next;
mr = odp->private;
goto next_mr;
}
-srcu_unlock:
+ return npages;
+
+out:
if (ret == -EAGAIN) {
- if (implicit || !odp->dying) {
+ if (mr->parent || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
@@ -679,7 +604,155 @@ srcu_unlock:
}
}
-srcu_unlock_no_wait:
+ return ret;
+}
+
+struct pf_frame {
+ struct pf_frame *next;
+ u32 key;
+ u64 io_virt;
+ size_t bcnt;
+ int depth;
+};
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+ u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
+ u32 *bytes_mapped)
+{
+ int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ struct pf_frame *head = NULL, *frame;
+ struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mw *mw;
+ struct mlx5_ib_mr *mr;
+ struct mlx5_klm *pklm;
+ u32 *out = NULL;
+ size_t offset;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+next_mr:
+ mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ if (!mmkey || mmkey->key != key) {
+ mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_MR:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (!mr->live || !mr->ibmr.pd) {
+ mlx5_ib_dbg(dev, "got dead MR\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
+ if (ret < 0)
+ goto srcu_unlock;
+
+ npages += ret;
+ ret = 0;
+ break;
+
+ case MLX5_MKEY_MW:
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+
+ if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
+ mlx5_ib_dbg(dev, "indirection level exceeded\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
+ sizeof(*pklm) * (mw->ndescs - 2);
+
+ if (outlen > cur_outlen) {
+ kfree(out);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+ cur_outlen = outlen;
+ }
+
+ pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
+ bsf0_klm0_pas_mtt0_1);
+
+ ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ if (ret)
+ goto srcu_unlock;
+
+ offset = io_virt - MLX5_GET64(query_mkey_out, out,
+ memory_key_mkey_entry.start_addr);
+
+ for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ if (offset >= be32_to_cpu(pklm->bcount)) {
+ offset -= be32_to_cpu(pklm->bcount);
+ continue;
+ }
+
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+
+ frame->key = be32_to_cpu(pklm->key);
+ frame->io_virt = be64_to_cpu(pklm->va) + offset;
+ frame->bcnt = min_t(size_t, bcnt,
+ be32_to_cpu(pklm->bcount) - offset);
+ frame->depth = depth + 1;
+ frame->next = head;
+ head = frame;
+
+ bcnt -= frame->bcnt;
+ }
+ break;
+
+ default:
+ mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ if (head) {
+ frame = head;
+ head = frame->next;
+
+ key = frame->key;
+ io_virt = frame->io_virt;
+ bcnt = frame->bcnt;
+ depth = frame->depth;
+ kfree(frame);
+
+ goto next_mr;
+ }
+
+srcu_unlock:
+ while (head) {
+ frame = head;
+ head = frame->next;
+ kfree(frame);
+ }
+ kfree(out);
+
srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
@@ -997,9 +1070,6 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
resume_with_error = 0;
goto resolve_page_fault;
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
- if (ret != -ENOENT)
- mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
- ret, pfault->wqe.wq_num, pfault->type);
goto resolve_page_fault;
}
@@ -1059,8 +1129,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
} else if (ret < 0 || pages_in_range(address, length) > ret) {
mlx5_ib_page_fault_resume(dev, pfault, 1);
if (ret != -ENOENT)
- mlx5_ib_warn(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
- ret, pfault->token, pfault->type);
+ mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
+ ret, pfault->token, pfault->type);
return;
}
@@ -1081,8 +1151,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
prefetch_len,
&bytes_committed, NULL);
if (ret < 0 && ret != -EAGAIN) {
- mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
- ret, pfault->token, address, prefetch_len);
+ mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
+ ret, pfault->token, address, prefetch_len);
}
}
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ed6320186f89..4e5a811d33c7 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2799,7 +2799,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
+ cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2827,7 +2827,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
+ raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -4965,7 +4965,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
- MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
+ MLX5_SET(rqc, rqc, counter_set_id,
+ dev->port->cnts.set_id);
} else
pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
dev->ib_dev.name);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index c7f49bbb0c72..9d83a53c0c67 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -367,12 +367,16 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
goto out;
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param =
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
if (status) {
@@ -450,8 +454,12 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
err = mthca_status_to_errno(context->status);
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param = context->out_param;
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
out:
spin_lock(&dev->cmd.context_lock);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 22d0e6ee5af6..e1b8940558d2 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -937,7 +937,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err;
}
- shift = ffs(mr->umem->page_size) - 1;
+ shift = mr->umem->page_shift;
n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
@@ -959,8 +959,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) +
- mr->umem->page_size * k;
+ pages[i++] = sg_dma_address(sg) + (k << shift);
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 19acd13c6cb1..8f9d8b4ad583 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1849,9 +1849,8 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
wqe_count -= counter;
nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
} while (wqe_count);
- init_timer(&nesvnic->rq_wqes_timer);
- nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
- nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
+ setup_timer(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout,
+ (unsigned long)nesvnic);
nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
{
@@ -3055,7 +3054,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
barrier();
- opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
+ opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
else
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 33624f17c347..77226cf4ea02 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -1040,9 +1040,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct
mgtvnic->mgt.rx_skb[counter] = skb;
}
- init_timer(&mgtvnic->rq_wqes_timer);
- mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
- mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
+ setup_timer(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout,
+ (unsigned long)mgtvnic);
wqe_count = NES_MGT_WQ_COUNT - 1;
mgtvnic->mgt.rq_head = wqe_count;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index ccf0a4cffe9c..c5eb78f4ca8b 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1308,9 +1308,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
init_completion(&nesqp->rq_drained);
nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
- init_timer(&nesqp->terminate_timer);
- nesqp->terminate_timer.function = nes_terminate_timeout;
- nesqp->terminate_timer.data = (unsigned long)nesqp;
+ setup_timer(&nesqp->terminate_timer, nes_terminate_timeout,
+ (unsigned long)nesqp);
/* update the QP table */
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@@ -2165,9 +2164,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
- " offset = %u, page size = %u.\n",
+ " offset = %u, page size = %lu.\n",
(unsigned long int)start, (unsigned long int)virt, (u32)length,
- ib_umem_offset(region), region->page_size);
+ ib_umem_offset(region), BIT(region->page_shift));
skip_pages = ((u32)ib_umem_offset(region)) >> 12;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c52edeafd616..c57e387b55a2 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -914,21 +914,18 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
+ shift = umem->page_shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->pa_lo =
- cpu_to_le32(sg_dma_address
- (sg) +
- (umem->page_size * pg_cnt));
+ cpu_to_le32(sg_dma_address(sg) +
+ (pg_cnt << shift));
pbe->pa_hi =
- cpu_to_le32(upper_32_bits
- ((sg_dma_address
- (sg) +
- umem->page_size * pg_cnt)));
+ cpu_to_le32(upper_32_bits(sg_dma_address(sg) +
+ (pg_cnt << shift)));
pbe_cnt += 1;
total_num_pbes += 1;
pbe++;
@@ -978,7 +975,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
if (status)
goto umem_err;
- mr->hwmr.pbe_size = mr->umem->page_size;
+ mr->hwmr.pbe_size = BIT(mr->umem->page_shift);
mr->hwmr.fbo = ib_umem_offset(mr->umem);
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ced0461d6e9f..ef11e770f822 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -340,43 +340,58 @@ static void qedr_remove_sysfiles(struct qedr_dev *dev)
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
{
struct pci_dev *bridge;
- u32 val;
-
- dev->atomic_cap = IB_ATOMIC_NONE;
+ u32 ctl2, cap2;
+ u16 flags;
+ int rc;
bridge = pdev->bus->self;
if (!bridge)
- return;
-
- /* Check whether we are connected directly or via a switch */
- while (bridge && bridge->bus->parent) {
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
- bridge->bus->number, bridge->bus->primary);
- /* Need to check Atomic Op Routing Supported all the way to
- * root complex.
- */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
- pcie_capability_clear_word(pdev,
- PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- return;
- }
+ goto disable;
+
+ /* Check atomic routing support all the way to root complex */
+ while (bridge->bus->parent) {
+ rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags);
+ if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2))
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2);
+ if (rc)
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl2);
+ if (rc)
+ goto disable;
+
+ if (!(cap2 & PCI_EXP_DEVCAP2_ATOMIC_ROUTE) ||
+ (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK))
+ goto disable;
bridge = bridge->bus->parent->self;
}
- bridge = pdev->bus->self;
- /* according to bridge capability */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
- pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- dev->atomic_cap = IB_ATOMIC_GLOB;
- } else {
- pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- }
+ rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags);
+ if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2))
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2);
+ if (rc || !(cap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64))
+ goto disable;
+
+ /* Set atomic operations */
+ pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_GLOB;
+
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n");
+
+ return;
+
+disable:
+ pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_NONE;
+
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n");
+
}
static const struct qed_rdma_ops *qed_ops;
@@ -423,14 +438,21 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
cq->arm_flags = 0;
- if (cq->ibcq.comp_handler)
+ if (!cq->destroyed && cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler)
(&cq->ibcq, cq->ibcq.cq_context);
+ /* The CQ's CNQ notification counter is checked before
+ * destroying the CQ in a busy-wait loop that waits for all of
+ * the CQ's CNQ interrupts to be processed. It is increased
+ * here, only after the completion handler, to ensure that the
+ * the handler is not running when the CQ is destroyed.
+ */
+ cq->cnq_notif++;
+
sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
cnq->n_comp++;
-
}
qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 5cb9195513bd..bf02ae4c8891 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -272,6 +272,8 @@ struct qedr_cq {
u32 cq_cons;
struct qedr_userq q;
+ u8 destroyed;
+ u16 cnq_notif;
};
struct qedr_pd {
@@ -429,7 +431,8 @@ struct qedr_mr {
RDMA_CQE_RESPONDER_IMM_FLG_SHIFT)
#define QEDR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \
RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT)
-#define QEDR_RESP_RDMA_IMM (QEDR_RESP_IMM | QEDR_RESP_RDMA)
+#define QEDR_RESP_INV (RDMA_CQE_RESPONDER_INV_FLG_MASK << \
+ RDMA_CQE_RESPONDER_INV_FLG_SHIFT)
static inline void qedr_inc_sw_cons(struct qedr_qp_hwq_info *info)
{
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 2091902848e6..a3afd2b21cc5 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -681,16 +681,16 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
+ shift = umem->page_shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->lo = cpu_to_le32(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
+ (pg_cnt << shift));
addr = upper_32_bits(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
+ (pg_cnt << shift));
pbe->hi = cpu_to_le32(addr);
pbe_cnt++;
total_num_pbes++;
@@ -822,6 +822,17 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct qedr_cq *cq = get_qedr_cq(ibcq);
unsigned long sflags;
+ struct qedr_dev *dev;
+
+ dev = get_qedr_dev(ibcq->device);
+
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return -EINVAL;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return 0;
@@ -987,35 +998,82 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
return 0;
}
+#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
+#define QEDR_DESTROY_CQ_ITER_DURATION (10)
+
int qedr_destroy_cq(struct ib_cq *ibcq)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
struct qed_rdma_destroy_cq_in_params iparams;
struct qedr_cq *cq = get_qedr_cq(ibcq);
+ int iter;
+ int rc;
+
+ DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
- DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+ cq->destroyed = 1;
/* GSIs CQs are handled by driver, so they don't exist in the FW */
- if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
- int rc;
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+ goto done;
- iparams.icid = cq->icid;
- rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
- &oparams);
- if (rc)
- return rc;
- dev->ops->common->chain_free(dev->cdev, &cq->pbl);
- }
+ iparams.icid = cq->icid;
+ rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+ if (rc)
+ return rc;
+
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
if (ibcq->uobject && ibcq->uobject->context) {
qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
}
+ /* We don't want the IRQ handler to handle a non-existing CQ so we
+ * wait until all CNQ interrupts, if any, are received. This will always
+ * happen and will always happen very fast. If not, then a serious error
+ * has occured. That is why we can use a long delay.
+ * We spin for a short time so we don’t lose time on context switching
+ * in case all the completions are handled in that span. Otherwise
+ * we sleep for a while and check again. Since the CNQ may be
+ * associated with (only) the current CPU we use msleep to allow the
+ * current CPU to be freed.
+ * The CNQ notification is increased in qedr_irq_handler().
+ */
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ udelay(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ msleep(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+
+ if (oparams.num_cq_notif != cq->cnq_notif)
+ goto err;
+
+ /* Note that we don't need to have explicit code to wait for the
+ * completion of the event handler because it is invoked from the EQ.
+ * Since the destroy CQ ramrod has also been received on the EQ we can
+ * be certain that there's no event handler in process.
+ */
+done:
+ cq->sig = ~cq->sig;
+
kfree(cq);
return 0;
+
+err:
+ DP_ERR(dev,
+ "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
+ cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
+
+ return -EINVAL;
}
static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -2190,7 +2248,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
+ mr->hw_mr.page_size_log = mr->umem->page_shift;
mr->hw_mr.fbo = ib_umem_offset(mr->umem);
mr->hw_mr.length = len;
mr->hw_mr.vaddr = usr_addr;
@@ -2625,6 +2683,8 @@ static int qedr_prepare_reg(struct qedr_qp *qp,
fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
fwqe1->l_key = wr->key;
+ fwqe2->access_ctrl = 0;
+
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
!!(wr->access & IB_ACCESS_REMOTE_READ));
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
@@ -3271,57 +3331,81 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
return cnt;
}
-static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
- struct qedr_cq *cq, struct ib_wc *wc,
- struct rdma_cqe_responder *resp, u64 wr_id)
+static inline int qedr_cqe_resp_status_to_ib(u8 status)
{
- enum ib_wc_status wc_status = IB_WC_SUCCESS;
- u8 flags;
-
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
-
- switch (resp->status) {
+ switch (status) {
case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
- wc_status = IB_WC_LOC_ACCESS_ERR;
- break;
+ return IB_WC_LOC_ACCESS_ERR;
case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
- wc_status = IB_WC_LOC_LEN_ERR;
- break;
+ return IB_WC_LOC_LEN_ERR;
case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
- wc_status = IB_WC_LOC_QP_OP_ERR;
- break;
+ return IB_WC_LOC_QP_OP_ERR;
case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
- wc_status = IB_WC_LOC_PROT_ERR;
- break;
+ return IB_WC_LOC_PROT_ERR;
case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
- wc_status = IB_WC_MW_BIND_ERR;
- break;
+ return IB_WC_MW_BIND_ERR;
case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
- wc_status = IB_WC_REM_INV_RD_REQ_ERR;
- break;
+ return IB_WC_REM_INV_RD_REQ_ERR;
case RDMA_CQE_RESP_STS_OK:
- wc_status = IB_WC_SUCCESS;
- wc->byte_len = le32_to_cpu(resp->length);
+ return IB_WC_SUCCESS;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
+ struct ib_wc *wc)
+{
+ wc->status = IB_WC_SUCCESS;
+ wc->byte_len = le32_to_cpu(resp->length);
- flags = resp->flags & QEDR_RESP_RDMA_IMM;
+ if (resp->flags & QEDR_RESP_IMM) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_IMM;
- if (flags == QEDR_RESP_RDMA_IMM)
+ if (resp->flags & QEDR_RESP_RDMA)
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
- wc->ex.imm_data =
- le32_to_cpu(resp->imm_data_or_inv_r_Key);
- wc->wc_flags |= IB_WC_WITH_IMM;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- DP_ERR(dev, "Invalid CQE status detected\n");
+ if (resp->flags & QEDR_RESP_INV)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_INV) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+
+ if (resp->flags & QEDR_RESP_RDMA)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_RDMA) {
+ return -EINVAL;
}
- /* fill WC */
- wc->status = wc_status;
+ return 0;
+}
+
+static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct qedr_cq *cq, struct ib_wc *wc,
+ struct rdma_cqe_responder *resp, u64 wr_id)
+{
+ /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+
+ if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
+ if (qedr_set_ok_cqe_resp_wc(resp, wc))
+ DP_ERR(dev,
+ "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
+ cq, cq->icid, resp->flags);
+
+ } else {
+ wc->status = qedr_cqe_resp_status_to_ib(resp->status);
+ if (wc->status == IB_WC_GENERAL_ERR)
+ DP_ERR(dev,
+ "CQ %p (icid=%d) contains an invalid CQE status %d\n",
+ cq, cq->icid, resp->status);
+ }
+
+ /* Fill the rest of the WC */
wc->vendor_err = 0;
wc->src_qp = qp->id;
wc->qp = &qp->ibqp;
@@ -3416,6 +3500,13 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int update = 0;
int done = 0;
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return 0;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return qedr_gsi_poll_cq(ibcq, num_entries, wc);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 06de1cbcf67d..e423b71e6ea0 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3295,13 +3295,11 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->rhdrhead_intr_off = 1ULL << 32;
/* setup the stats timer; the add_timer is done at end of init */
- init_timer(&dd->stats_timer);
- dd->stats_timer.function = qib_get_6120_faststats;
- dd->stats_timer.data = (unsigned long) dd;
+ setup_timer(&dd->stats_timer, qib_get_6120_faststats,
+ (unsigned long)dd);
- init_timer(&dd->cspec->pma_timer);
- dd->cspec->pma_timer.function = pma_6120_timer;
- dd->cspec->pma_timer.data = (unsigned long) ppd;
+ setup_timer(&dd->cspec->pma_timer, pma_6120_timer,
+ (unsigned long)ppd);
dd->ureg_align = qib_read_kreg32(dd, kr_palign);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 55a18384c22d..c3679c48e61c 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4074,9 +4074,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
if (!qib_mini_init)
qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP);
- init_timer(&ppd->cpspec->chase_timer);
- ppd->cpspec->chase_timer.function = reenable_7220_chase;
- ppd->cpspec->chase_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->cpspec->chase_timer, reenable_7220_chase,
+ (unsigned long)ppd);
qib_num_cfg_vls = 1; /* if any 7220's, only one VL */
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index af9f596bb68b..bb2439fff8fa 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6611,9 +6611,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
if (!qib_mini_init)
write_7322_init_portregs(ppd);
- init_timer(&cp->chase_timer);
- cp->chase_timer.function = reenable_chase;
- cp->chase_timer.data = (unsigned long)ppd;
+ setup_timer(&cp->chase_timer, reenable_chase,
+ (unsigned long)ppd);
ppd++;
}
@@ -6639,9 +6638,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
(u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT;
/* setup the stats timer; the add_timer is done at end of init */
- init_timer(&dd->stats_timer);
- dd->stats_timer.function = qib_get_7322_faststats;
- dd->stats_timer.data = (unsigned long) dd;
+ setup_timer(&dd->stats_timer, qib_get_7322_faststats,
+ (unsigned long)dd);
dd->ureg_align = 0x10000; /* 64KB alignment */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index b50240b1d5a4..6c16ba1107ba 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -233,9 +233,8 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
- init_timer(&ppd->symerr_clear_timer);
- ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
- ppd->symerr_clear_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup,
+ (unsigned long)ppd);
ppd->qib_wq = NULL;
ppd->ibport_data.pmastats =
@@ -429,9 +428,8 @@ static int loadtime_init(struct qib_devdata *dd)
qib_get_eeprom_info(dd);
/* setup time (don't start yet) to verify we got interrupt */
- init_timer(&dd->intrchk_timer);
- dd->intrchk_timer.function = verify_interrupt;
- dd->intrchk_timer.data = (unsigned long) dd;
+ setup_timer(&dd->intrchk_timer, verify_interrupt,
+ (unsigned long)dd);
done:
return ret;
}
@@ -755,9 +753,8 @@ done:
continue;
if (dd->flags & QIB_HAS_SEND_DMA)
ret = qib_setup_sdma(ppd);
- init_timer(&ppd->hol_timer);
- ppd->hol_timer.function = qib_hol_event;
- ppd->hol_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->hol_timer, qib_hol_event,
+ (unsigned long)ppd);
ppd->hol_state = QIB_HOL_UP;
}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 12658e3fe154..023498745b8a 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -938,7 +938,10 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@@ -983,7 +986,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 17655cc3e6fe..6e1adf709483 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -769,7 +769,10 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 83f8b5f24381..68d449cdb794 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,6 +114,19 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_qib_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
* System image GUID.
*/
__be64 ib_qib_sys_image_guid;
@@ -343,7 +356,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
if (lnh != QIB_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (mcast == NULL)
goto drop;
this_cpu_inc(ibp->pmastats->n_multicast_rcv);
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
index b54986de5f0c..ddd81294fa46 100644
--- a/drivers/infiniband/hw/usnic/usnic_common_util.h
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -34,21 +34,7 @@
#ifndef USNIC_CMN_UTIL_H
#define USNIC_CMN_UTIL_H
-static inline void
-usnic_mac_to_gid(const char *const mac, char *raw_gid)
-{
- raw_gid[0] = 0xfe;
- raw_gid[1] = 0x80;
- memset(&raw_gid[2], 0, 6);
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
-}
+#include <net/addrconf.h>
static inline void
usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
@@ -57,27 +43,7 @@ usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
raw_gid[1] = 0x80;
memset(&raw_gid[2], 0, 2);
memcpy(&raw_gid[4], &inaddr, 4);
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
-}
-
-static inline void
-usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
-{
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
+ addrconf_addr_eui48(&raw_gid[8], mac);
}
#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 04443242e258..32956f9f5715 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -44,6 +44,7 @@
#include "usnic_vnic.h"
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
+#include "usnic_ib_sysfs.h"
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 3284730d3c09..44a93326f7b5 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -46,6 +46,7 @@
#include "usnic_log.h"
#include "usnic_uiom.h"
#include "usnic_transport.h"
+#include "usnic_ib_verbs.h"
#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
@@ -151,7 +152,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
struct usnic_vnic *vnic;
struct usnic_ib_qp_grp *qp_grp;
struct device *dev, **dev_list;
- int i, found = 0;
+ int i;
BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
@@ -173,8 +174,13 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
us_ibdev->ib_dev.name,
pci_name(usnic_vnic_get_pdev(
vnic)));
- found = 1;
- break;
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
+ vf, pd,
+ res_spec,
+ trans_spec);
+
+ spin_unlock(&vf->lock);
+ goto qp_grp_check;
}
spin_unlock(&vf->lock);
@@ -182,34 +188,30 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
usnic_uiom_free_dev_list(dev_list);
}
- if (!found) {
- /* Try to find resources on an unused vf */
- list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
- spin_lock(&vf->lock);
- vnic = vf->vnic;
- if (vf->qp_grp_ref_cnt == 0 &&
- usnic_vnic_check_room(vnic, res_spec) == 0) {
- found = 1;
- break;
- }
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf,
+ pd, res_spec,
+ trans_spec);
+
spin_unlock(&vf->lock);
+ goto qp_grp_check;
}
+ spin_unlock(&vf->lock);
}
- if (!found) {
- usnic_info("No free qp grp found on %s\n",
- us_ibdev->ib_dev.name);
- return ERR_PTR(-ENOMEM);
- }
+ usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
- qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
- trans_spec);
- spin_unlock(&vf->lock);
+qp_grp_check:
if (IS_ERR_OR_NULL(qp_grp)) {
usnic_err("Failed to allocate qp_grp\n");
return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
}
-
return qp_grp;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
index 948b5ccd2a70..6ef4df6c8c4a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -194,7 +194,7 @@ int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (j = 0; j < len; j++) {
dma_addr_t addr = sg_dma_address(sg) +
- umem->page_size * j;
+ (j << umem->page_shift);
ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
if (ret)
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 16c446142c2a..b0f09fb45c72 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -119,7 +119,7 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
spin_lock_irqsave(&dev->n_ahs_lock, flags);
if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 7aa7a4e312f1..0ae2ff8cf81e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -50,6 +50,7 @@
#include <linux/kthread.h>
#include "cq.h"
#include "vt.h"
+#include "trace.h"
/**
* rvt_cq_enter - add a new entry to the completion queue
@@ -93,6 +94,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
}
return;
}
+ trace_rvt_cq_enter(cq, entry, head);
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
@@ -482,6 +484,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
+ trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 05c8c2afb0e3..1f12b69a0d07 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -100,10 +100,11 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
/**
* mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
+ * @lid: the muilticast LID (host order)
*
* A list of QPs will be attached to this structure.
*/
-static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid, u16 lid)
{
struct rvt_mcast *mcast;
@@ -111,7 +112,9 @@ static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
if (!mcast)
goto bail;
- mcast->mgid = *mgid;
+ mcast->mcast_addr.mgid = *mgid;
+ mcast->mcast_addr.lid = lid;
+
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
@@ -131,15 +134,19 @@ static void rvt_mcast_free(struct rvt_mcast *mcast)
}
/**
- * rvt_mcast_find - search the global table for the given multicast GID
+ * rvt_mcast_find - search the global table for the given multicast GID/LID
+ * NOTE: It is valid to have 1 MLID with multiple MGIDs. It is not valid
+ * to have 1 MGID with multiple MLIDs.
* @ibp: the IB port structure
* @mgid: the multicast GID to search for
+ * @lid: the multicast LID portion of the multicast address (host order)
*
* The caller is responsible for decrementing the reference count if found.
*
* Return: NULL if not found.
*/
-struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
+ u16 lid)
{
struct rb_node *n;
unsigned long flags;
@@ -153,15 +160,18 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
mcast = rb_entry(n, struct rvt_mcast, rb_node);
- ret = memcmp(mgid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
+ ret = memcmp(mgid->raw, mcast->mcast_addr.mgid.raw,
+ sizeof(*mgid));
if (ret < 0) {
n = n->rb_left;
} else if (ret > 0) {
n = n->rb_right;
} else {
- atomic_inc(&mcast->refcount);
- found = mcast;
+ /* MGID/MLID must match */
+ if (mcast->mcast_addr.lid == lid) {
+ atomic_inc(&mcast->refcount);
+ found = mcast;
+ }
break;
}
}
@@ -177,7 +187,8 @@ EXPORT_SYMBOL(rvt_mcast_find);
*
* Return: zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
- * attached and neither structure was added.
+ * attached and neither structure was added. Return EINVAL if the MGID was
+ * found, but the MLID did NOT match.
*/
static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
@@ -195,8 +206,9 @@ static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
pn = *n;
tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
- ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
- sizeof(union ib_gid));
+ ret = memcmp(mcast->mcast_addr.mgid.raw,
+ tmcast->mcast_addr.mgid.raw,
+ sizeof(mcast->mcast_addr.mgid));
if (ret < 0) {
n = &pn->rb_left;
continue;
@@ -206,6 +218,11 @@ static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
continue;
}
+ if (tmcast->mcast_addr.lid != mcast->mcast_addr.lid) {
+ ret = EINVAL;
+ goto bail;
+ }
+
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
@@ -276,7 +293,7 @@ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
- mcast = rvt_mcast_alloc(gid);
+ mcast = rvt_mcast_alloc(gid, lid);
if (!mcast)
return -ENOMEM;
@@ -296,6 +313,10 @@ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
/* Exceeded the maximum number of mcast groups. */
ret = -ENOMEM;
goto bail_mqp;
+ case EINVAL:
+ /* Invalid MGID/MLID pair */
+ ret = -EINVAL;
+ goto bail_mqp;
default:
break;
}
@@ -344,14 +365,20 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
mcast = rb_entry(n, struct rvt_mcast, rb_node);
- ret = memcmp(gid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
- if (ret < 0)
+ ret = memcmp(gid->raw, mcast->mcast_addr.mgid.raw,
+ sizeof(*gid));
+ if (ret < 0) {
n = n->rb_left;
- else if (ret > 0)
+ } else if (ret > 0) {
n = n->rb_right;
- else
+ } else {
+ /* MGID/MLID must match */
+ if (mcast->mcast_addr.lid != lid) {
+ spin_unlock_irq(&ibp->lock);
+ return -EINVAL;
+ }
break;
+ }
}
/* Search the QP list. */
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index ae30b6838d79..aa5f9ea318e4 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -191,8 +191,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
- rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
+ /* Insure published written first */
+ rcu_assign_pointer(dev->dma_mr, mr);
rvt_get_mr(mr);
}
goto success;
@@ -224,8 +225,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
mr->lkey |= 1 << 8;
rkt->gen++;
}
- rcu_assign_pointer(rkt->table[r], mr);
mr->lkey_published = 1;
+ /* Insure published written first */
+ rcu_assign_pointer(rkt->table[r], mr);
success:
spin_unlock_irqrestore(&rkt->lock, flags);
out:
@@ -253,23 +255,24 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
spin_lock_irqsave(&rkt->lock, flags);
if (!lkey) {
if (mr->lkey_published) {
- RCU_INIT_POINTER(dev->dma_mr, NULL);
+ mr->lkey_published = 0;
+ /* insure published is written before pointer */
+ rcu_assign_pointer(dev->dma_mr, NULL);
rvt_put_mr(mr);
}
} else {
if (!mr->lkey_published)
goto out;
r = lkey >> (32 - dev->dparms.lkey_table_size);
- RCU_INIT_POINTER(rkt->table[r], NULL);
+ mr->lkey_published = 0;
+ /* insure published is written before pointer */
+ rcu_assign_pointer(rkt->table[r], NULL);
}
- mr->lkey_published = 0;
freed++;
out:
spin_unlock_irqrestore(&rkt->lock, flags);
- if (freed) {
- synchronize_rcu();
+ if (freed)
percpu_ref_kill(&mr->refcount);
- }
}
static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
@@ -405,8 +408,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
- if (is_power_of_2(umem->page_size))
- mr->mr.page_shift = ilog2(umem->page_size);
+ mr->mr.page_shift = umem->page_shift;
m = 0;
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -418,8 +420,9 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto bail_inval;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
+ mr->mr.map[m]->segs[n].length = BIT(umem->page_shift);
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr,
+ BIT(umem->page_shift));
n++;
if (n == RVT_SEGSZ) {
m++;
@@ -822,16 +825,21 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
goto ok;
}
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
- if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
- mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ if (!mr)
goto bail;
+ rvt_get_mr(mr);
+ if (!READ_ONCE(mr->lkey_published))
+ goto bail_unref;
+
+ if (unlikely(atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ goto bail_unref;
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
- goto bail;
- rvt_get_mr(mr);
+ goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@@ -867,6 +875,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = n;
ok:
return 1;
+bail_unref:
+ rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
@@ -922,15 +932,20 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
}
mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
- if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
- mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ if (!mr)
goto bail;
+ rvt_get_mr(mr);
+ /* insure mr read is before test */
+ if (!READ_ONCE(mr->lkey_published))
+ goto bail_unref;
+ if (unlikely(atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ goto bail_unref;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
- goto bail;
- rvt_get_mr(mr);
+ goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@@ -966,6 +981,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
sge->n = n;
ok:
return 1;
+bail_unref:
+ rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index f5ad8d4bfb39..728f5f1218c8 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -117,23 +117,6 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
- [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
- [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
- [IB_WR_REG_MR] = IB_WC_REG_MR
-};
-EXPORT_SYMBOL(ib_rvt_wc_opcode);
-
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@@ -1789,11 +1772,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
0);
qp->s_next_psn = wqe->lpsn + 1;
}
- trace_rvt_post_one_wr(qp, wqe);
- if (unlikely(reserved_op))
+ if (unlikely(reserved_op)) {
+ wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
- else
+ } else {
+ wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--;
+ }
+ trace_rvt_post_one_wr(qp, wqe);
smp_wmb(); /* see request builders */
qp->s_head = next;
@@ -2069,8 +2055,12 @@ static void rvt_rc_timeout(unsigned long arg)
spin_lock_irqsave(&qp->r_lock, flags);
spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_TIMER) {
+ struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+
qp->s_flags &= ~RVT_S_TIMER;
+ rvp->n_rc_timeouts++;
del_timer(&qp->s_timer);
+ trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
if (rdi->driver_f.notify_restart_rc)
rdi->driver_f.notify_restart_rc(qp,
qp->s_last_psn + 1,
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index e2d23acb6a7d..bb4b1e710f22 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -52,3 +52,5 @@
#include "trace_qp.h"
#include "trace_tx.h"
#include "trace_mr.h"
+#include "trace_cq.h"
+#include "trace_rc.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
new file mode 100644
index 000000000000..a315850aa9bb
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_CQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_cq
+
+#define wc_opcode_name(opcode) { IB_WC_##opcode, #opcode }
+#define show_wc_opcode(opcode) \
+__print_symbolic(opcode, \
+ wc_opcode_name(SEND), \
+ wc_opcode_name(RDMA_WRITE), \
+ wc_opcode_name(RDMA_READ), \
+ wc_opcode_name(COMP_SWAP), \
+ wc_opcode_name(FETCH_ADD), \
+ wc_opcode_name(LSO), \
+ wc_opcode_name(LOCAL_INV), \
+ wc_opcode_name(REG_MR), \
+ wc_opcode_name(MASKED_COMP_SWAP), \
+ wc_opcode_name(RECV), \
+ wc_opcode_name(RECV_RDMA_WITH_IMM))
+
+#define CQ_PRN \
+"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
+
+DECLARE_EVENT_CLASS(
+ rvt_cq_entry_template,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(cq->rdi)
+ __field(u64, wr_id)
+ __field(u32, status)
+ __field(u32, opcode)
+ __field(u32, qpn)
+ __field(u32, length)
+ __field(u32, idx)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(cq->rdi)
+ __entry->wr_id = wc->wr_id;
+ __entry->status = wc->status;
+ __entry->opcode = wc->opcode;
+ __entry->length = wc->byte_len;
+ __entry->qpn = wc->qp->qp_num;
+ __entry->idx = idx;
+ ),
+ TP_printk(
+ CQ_PRN,
+ __get_str(dev),
+ __entry->idx,
+ __entry->wr_id,
+ __entry->status,
+ __entry->opcode, show_wc_opcode(__entry->opcode),
+ __entry->length,
+ __entry->qpn
+ )
+);
+
+DEFINE_EVENT(
+ rvt_cq_entry_template, rvt_cq_enter,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx));
+
+DEFINE_EVENT(
+ rvt_cq_entry_template, rvt_cq_poll,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx));
+
+#endif /* __RVT_TRACE_CQ_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_cq
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h
new file mode 100644
index 000000000000..995276933a55
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_rc.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_rc
+
+DECLARE_EVENT_CLASS(rvt_rc_template,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u32, psn)
+ __field(u32, s_psn)
+ __field(u32, s_next_psn)
+ __field(u32, s_sending_psn)
+ __field(u32, s_sending_hpsn)
+ __field(u32, r_psn)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->psn = psn;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_next_psn = qp->s_next_psn;
+ __entry->s_sending_psn = qp->s_sending_psn;
+ __entry->s_sending_hpsn = qp->s_sending_hpsn;
+ __entry->r_psn = qp->r_psn;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->psn,
+ __entry->s_psn,
+ __entry->s_next_psn,
+ __entry->s_sending_psn,
+ __entry->s_sending_hpsn,
+ __entry->r_psn
+ )
+);
+
+DEFINE_EVENT(rvt_rc_template, rvt_rc_timeout,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+#endif /* __RVT_TRACE_RC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rc
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0e03173662d8..a613a2223751 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -71,10 +71,20 @@ __print_symbolic(opcode, \
wr_opcode_name(RDMA_READ_WITH_INV), \
wr_opcode_name(LOCAL_INV), \
wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(RESERVED1), \
+ wr_opcode_name(RESERVED2), \
+ wr_opcode_name(RESERVED3), \
+ wr_opcode_name(RESERVED4), \
+ wr_opcode_name(RESERVED5), \
+ wr_opcode_name(RESERVED6), \
+ wr_opcode_name(RESERVED7), \
+ wr_opcode_name(RESERVED8), \
+ wr_opcode_name(RESERVED9), \
+ wr_opcode_name(RESERVED10))
#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
TRACE_EVENT(
rvt_post_one_wr,
@@ -83,7 +93,9 @@ TRACE_EVENT(
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
+ __field(struct rvt_swqe *, wqe)
__field(u32, qpn)
+ __field(u32, qpt)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, length)
@@ -92,11 +104,17 @@ TRACE_EVENT(
__field(u32, avail)
__field(u32, head)
__field(u32, last)
+ __field(u32, ssn)
+ __field(int, send_flags)
+ __field(pid_t, pid)
+ __field(int, num_sge)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wqe = wqe;
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
__entry->psn = wqe->psn;
__entry->lpsn = wqe->lpsn;
__entry->length = wqe->length;
@@ -105,20 +123,30 @@ TRACE_EVENT(
__entry->avail = qp->s_avail;
__entry->head = qp->s_head;
__entry->last = qp->s_last;
+ __entry->pid = qp->pid;
+ __entry->ssn = wqe->ssn;
+ __entry->send_flags = wqe->wr.send_flags;
+ __entry->num_sge = wqe->wr.num_sge;
),
TP_printk(
POS_PRN,
__get_str(dev),
+ __entry->wqe,
__entry->wr_id,
+ __entry->send_flags,
__entry->qpn,
+ __entry->qpt,
__entry->psn,
__entry->lpsn,
+ __entry->ssn,
__entry->length,
__entry->opcode, show_wr_opcode(__entry->opcode),
__entry->size,
__entry->avail,
__entry->head,
- __entry->last
+ __entry->last,
+ __entry->pid,
+ __entry->num_sge
)
);
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 6332dedc11e8..320bffc980d8 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -2,6 +2,7 @@ config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
depends on NET_UDP_TUNNEL
+ depends on CRYPTO_CRC32
select DMA_VIRT_OPS
---help---
This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index ec35ff022a42..3f12beb7076f 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -20,4 +20,5 @@ rdma_rxe-y := \
rxe_mcast.o \
rxe_task.o \
rxe_net.o \
- rxe_sysfs.o
+ rxe_sysfs.o \
+ rxe_hw_counters.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index b12dd9b5a89d..c21c913f911a 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -64,6 +65,8 @@ static void rxe_cleanup(struct rxe_dev *rxe)
rxe_pool_cleanup(&rxe->mc_elem_pool);
rxe_cleanup_ports(rxe);
+
+ crypto_free_shash(rxe->tfm);
}
/* called when all references have been dropped */
@@ -178,7 +181,8 @@ static int rxe_init_ports(struct rxe_dev *rxe)
return -ENOMEM;
port->pkey_tbl[0] = 0xffff;
- port->port_guid = rxe_port_guid(rxe);
+ addrconf_addr_eui48((unsigned char *)&port->port_guid,
+ rxe->ndev->dev_addr);
spin_lock_init(&port->port_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index a696af81e4a5..ecdba2fce083 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -50,6 +50,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+#include <crypto/hash.h>
#include "rxe_net.h"
#include "rxe_opcode.h"
@@ -64,6 +65,25 @@
#define RXE_ROCE_V2_SPORT (0xc000)
+static inline u32 rxe_crc32(struct rxe_dev *rxe,
+ u32 crc, void *next, size_t len)
+{
+ int err;
+
+ SHASH_DESC_ON_STACK(shash, rxe->tfm);
+
+ shash->tfm = rxe->tfm;
+ shash->flags = 0;
+ *(u32 *)shash_desc_ctx(shash) = crc;
+ err = crypto_shash_update(shash, next, len);
+ if (unlikely(err)) {
+ pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+ return crc32_le(crc, next, len);
+ }
+
+ return *(u32 *)shash_desc_ctx(shash);
+}
+
int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 4cd55d5617f7..9eb12c2e3c74 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -154,6 +154,8 @@ void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_tail(&qp->resp_pkts, skb);
must_sched = skb_queue_len(&qp->resp_pkts) > 1;
+ if (must_sched != 0)
+ rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED);
rxe_run_task(&qp->comp.task, must_sched);
}
@@ -236,6 +238,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
{
unsigned int mask = pkt->mask;
u8 syn;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
/* Check the sequence only */
switch (qp->comp.opcode) {
@@ -298,6 +301,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_WRITE_SEND;
case AETH_RNR_NAK:
+ rxe_counter_inc(rxe, RXE_CNT_RCV_RNR);
return COMPST_RNR_RETRY;
case AETH_NAK:
@@ -307,6 +311,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
* before
*/
if (psn_compare(pkt->psn, qp->comp.psn) > 0) {
+ rxe_counter_inc(rxe,
+ RXE_CNT_RCV_SEQ_ERR);
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
@@ -534,6 +540,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
int rxe_completer(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_send_wqe *wqe = wqe;
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
@@ -683,8 +690,10 @@ int rxe_completer(void *arg)
if (psn_compare(qp->req.psn,
qp->comp.psn) > 0) {
/* tell the requester to retry the
- * send send queue next time around
+ * send queue next time around
*/
+ rxe_counter_inc(rxe,
+ RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
@@ -699,6 +708,7 @@ int rxe_completer(void *arg)
goto exit;
} else {
+ rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
@@ -720,6 +730,8 @@ int rxe_completer(void *arg)
skb = NULL;
goto exit;
} else {
+ rxe_counter_inc(rxe,
+ RXE_CNT_RNR_RETRY_EXCEEDED);
wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
new file mode 100644
index 000000000000..7ef90aad7dfd
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_hw_counters.h"
+
+const char * const rxe_counter_name[] = {
+ [RXE_CNT_SENT_PKTS] = "sent_pkts",
+ [RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
+ [RXE_CNT_DUP_REQ] = "duplicate_request",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
+ [RXE_CNT_SND_RNR] = "send_rnr_err",
+ [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
+ [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
+ [RXE_CNT_COMP_RETRY] = "completer_retry_err",
+ [RXE_CNT_SEND_ERR] = "send_err",
+};
+
+int rxe_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct rxe_dev *dev = to_rdev(ibdev);
+ unsigned int cnt;
+
+ if (!port || !stats)
+ return -EINVAL;
+
+ for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++)
+ stats->value[cnt] = dev->stats_counters[cnt];
+
+ return ARRAY_SIZE(rxe_counter_name);
+}
+
+struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS);
+ /* We support only per port stats */
+ if (!port_num)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(rxe_counter_name,
+ ARRAY_SIZE(rxe_counter_name),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
new file mode 100644
index 000000000000..f44df1b76742
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_HW_COUNTERS_H
+#define RXE_HW_COUNTERS_H
+
+/*
+ * when adding counters to enum also add
+ * them to rxe_counter_name[] vector.
+ */
+enum rxe_counters {
+ RXE_CNT_SENT_PKTS,
+ RXE_CNT_RCVD_PKTS,
+ RXE_CNT_DUP_REQ,
+ RXE_CNT_OUT_OF_SEQ_REQ,
+ RXE_CNT_RCV_RNR,
+ RXE_CNT_SND_RNR,
+ RXE_CNT_RCV_SEQ_ERR,
+ RXE_CNT_COMPLETER_SCHED,
+ RXE_CNT_RETRY_EXCEEDED,
+ RXE_CNT_RNR_RETRY_EXCEEDED,
+ RXE_CNT_COMP_RETRY,
+ RXE_CNT_SEND_ERR,
+ RXE_NUM_OF_COUNTERS
+};
+
+struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num);
+int rxe_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index);
+#endif /* RXE_HW_COUNTERS_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 413b56b23a06..39e0be31aab1 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -87,10 +87,10 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
length = hdr_size + RXE_BTH_BYTES;
- crc = crc32_le(crc, pshdr, length);
+ crc = rxe_crc32(pkt->rxe, crc, pshdr, length);
/* And finish to compute the CRC on the remainder of the headers. */
- crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
- rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
+ crc = rxe_crc32(pkt->rxe, crc, pkt->hdr + RXE_BTH_BYTES,
+ rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
return crc;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 183a9d379b41..ed809138fd55 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -145,7 +145,6 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
int rxe_loopback(struct sk_buff *skb);
int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
-__be64 rxe_port_guid(struct rxe_dev *rxe);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
@@ -153,7 +152,6 @@ int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
struct device *rxe_dma_device(struct rxe_dev *rxe);
-__be64 rxe_node_guid(struct rxe_dev *rxe);
int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
@@ -278,6 +276,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
if (err) {
rxe->xmit_errors++;
+ rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
return err;
}
@@ -287,6 +286,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_run_task(&qp->comp.task, 1);
}
+ rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
goto done;
drop:
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 37eea7441ca4..ced15c4446bd 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -191,10 +191,8 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
goto err1;
}
- WARN_ON_ONCE(!is_power_of_2(umem->page_size));
-
- mem->page_shift = ilog2(umem->page_size);
- mem->page_mask = umem->page_size - 1;
+ mem->page_shift = umem->page_shift;
+ mem->page_mask = BIT(umem->page_shift) - 1;
num_buf = 0;
map = mem->map;
@@ -210,7 +208,7 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
}
buf->addr = (uintptr_t)vaddr;
- buf->size = umem->page_size;
+ buf->size = BIT(umem->page_shift);
num_buf++;
buf++;
@@ -370,7 +368,8 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
((void *)(uintptr_t)iova) : addr;
if (crcp)
- *crcp = crc32_le(*crcp, src, length);
+ crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ *crcp, src, length);
memcpy(dest, src, length);
@@ -403,7 +402,8 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
bytes = length;
if (crcp)
- crc = crc32_le(crc, src, bytes);
+ crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ crc, src, bytes);
memcpy(dest, src, bytes);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index d8610960630a..c3a140ed4df2 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -84,34 +84,6 @@ struct rxe_dev *get_rxe_by_name(const char *name)
struct rxe_recv_sockets recv_sockets;
-static __be64 rxe_mac_to_eui64(struct net_device *ndev)
-{
- unsigned char *mac_addr = ndev->dev_addr;
- __be64 eui64;
- unsigned char *dst = (unsigned char *)&eui64;
-
- dst[0] = mac_addr[0] ^ 2;
- dst[1] = mac_addr[1];
- dst[2] = mac_addr[2];
- dst[3] = 0xff;
- dst[4] = 0xfe;
- dst[5] = mac_addr[3];
- dst[6] = mac_addr[4];
- dst[7] = mac_addr[5];
-
- return eui64;
-}
-
-__be64 rxe_node_guid(struct rxe_dev *rxe)
-{
- return rxe_mac_to_eui64(rxe->ndev);
-}
-
-__be64 rxe_port_guid(struct rxe_dev *rxe)
-{
- return rxe_mac_to_eui64(rxe->ndev);
-}
-
struct device *rxe_dma_device(struct rxe_dev *rxe)
{
struct net_device *ndev;
@@ -210,6 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
+static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
+ struct rxe_qp *qp,
+ struct rxe_av *av)
+{
+ struct dst_entry *dst = NULL;
+
+ if (qp_type(qp) == IB_QPT_RC)
+ dst = sk_dst_get(qp->sk->sk);
+
+ if (!dst || !(dst->obsolete && dst->ops->check(dst, 0))) {
+ if (dst)
+ dst_release(dst);
+
+ if (av->network_type == RDMA_NETWORK_IPV4) {
+ struct in_addr *saddr;
+ struct in_addr *daddr;
+
+ saddr = &av->sgid_addr._sockaddr_in.sin_addr;
+ daddr = &av->dgid_addr._sockaddr_in.sin_addr;
+ dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ } else if (av->network_type == RDMA_NETWORK_IPV6) {
+ struct in6_addr *saddr6;
+ struct in6_addr *daddr6;
+
+ saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
+ daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
+ dst = rxe_find_route6(rxe->ndev, saddr6, daddr6);
+ }
+ }
+
+ return dst;
+}
+
static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
@@ -301,7 +306,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
skb_scrub_packet(skb, xnet);
skb_clear_hash(skb);
- skb_dst_set(skb, dst);
+ skb_dst_set(skb, dst_clone(dst));
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_push(skb, sizeof(struct iphdr));
@@ -349,13 +354,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
+ struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route(rxe, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -369,17 +375,24 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_set(qp->sk->sk, dst);
+ else
+ dst_release(dst);
+
return 0;
}
static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
- struct dst_entry *dst;
+ struct rxe_qp *qp = pkt->qp;
+ struct dst_entry *dst = NULL;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route(rxe, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -394,6 +407,12 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
av->grh.traffic_class,
av->grh.hop_limit);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_set(qp->sk->sk, dst);
+ else
+ dst_release(dst);
+
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index f98a19e61a3d..1a123edf555e 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -273,10 +273,11 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_init_task(rxe, &qp->comp.task, qp,
rxe_completer, "comp");
- setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp);
- setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
-
+ if (init->qp_type == IB_QPT_RC) {
+ setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp);
+ setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp);
+ }
return 0;
}
@@ -804,8 +805,10 @@ void rxe_qp_destroy(struct rxe_qp *qp)
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
- del_timer_sync(&qp->retrans_timer);
- del_timer_sync(&qp->rnr_nak_timer);
+ if (qp_type(qp) == IB_QPT_RC) {
+ del_timer_sync(&qp->retrans_timer);
+ del_timer_sync(&qp->rnr_nak_timer);
+ }
rxe_cleanup_task(&qp->req.task);
rxe_cleanup_task(&qp->comp.task);
@@ -846,6 +849,14 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg)
qp->resp.mr = NULL;
}
+ if (qp_type(qp) == IB_QPT_RC) {
+ struct dst_entry *dst = NULL;
+
+ dst = sk_dst_get(qp->sk->sk);
+ if (dst)
+ dst_release(dst);
+ }
+
free_rd_atomic_resources(qp);
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 50886031096f..fb8c83e055e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -387,8 +387,8 @@ int rxe_rcv(struct sk_buff *skb)
pack_icrc = be32_to_cpu(*icrcp);
calc_icrc = rxe_icrc_hdr(pkt, skb);
- calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt),
- payload_size(pkt));
+ calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
+ payload_size(pkt));
calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
@@ -403,6 +403,8 @@ int rxe_rcv(struct sk_buff *skb)
goto drop;
}
+ rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
+
if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
rxe_rcv_mcast_pkt(rxe, skb);
else
@@ -417,4 +419,3 @@ drop:
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(rxe_rcv);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 9f95f50b2909..7ee465d1a1e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -32,6 +32,7 @@
*/
#include <linux/skbuff.h>
+#include <crypto/hash.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -483,8 +484,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
- crc = crc32_le(crc, tmp, paylen);
-
+ crc = rxe_crc32(rxe, crc, tmp, paylen);
memcpy(payload_addr(pkt), tmp, paylen);
wqe->dma.resid -= paylen;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c9dd385ce62e..ec11a9c25f23 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -149,6 +149,7 @@ static enum resp_states check_psn(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
int diff = psn_compare(pkt->psn, qp->resp.psn);
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
switch (qp_type(qp)) {
case IB_QPT_RC:
@@ -157,9 +158,11 @@ static enum resp_states check_psn(struct rxe_qp *qp,
return RESPST_CLEANUP;
qp->resp.sent_psn_nak = 1;
+ rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
return RESPST_ERR_PSN_OUT_OF_SEQ;
} else if (diff < 0) {
+ rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
return RESPST_DUPLICATE_REQUEST;
}
@@ -1223,6 +1226,7 @@ void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
int rxe_responder(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
@@ -1311,6 +1315,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_RNR:
if (qp_type(qp) == IB_QPT_RC) {
+ rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
send_ack(qp, pkt, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 5113e502f6f9..dbede3026138 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -32,9 +32,11 @@
*/
#include <linux/dma-mapping.h>
+#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
+#include "rxe_hw_counters.h"
static int rxe_query_device(struct ib_device *dev,
struct ib_device_attr *attr,
@@ -1237,7 +1239,8 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
- dev->node_guid = rxe_node_guid(rxe);
+ addrconf_addr_eui48((unsigned char *)&dev->node_guid,
+ rxe->ndev->dev_addr);
dev->dev.dma_ops = &dma_virt_ops;
dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
@@ -1318,6 +1321,15 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->map_mr_sg = rxe_map_mr_sg;
dev->attach_mcast = rxe_attach_mcast;
dev->detach_mcast = rxe_detach_mcast;
+ dev->get_hw_stats = rxe_ib_get_hw_stats;
+ dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
+
+ rxe->tfm = crypto_alloc_shash("crc32", 0, 0);
+ if (IS_ERR(rxe->tfm)) {
+ pr_err("failed to allocate crc algorithm err:%ld\n",
+ PTR_ERR(rxe->tfm));
+ return PTR_ERR(rxe->tfm);
+ }
err = ib_register_device(dev, NULL);
if (err) {
@@ -1339,6 +1351,8 @@ int rxe_register_device(struct rxe_dev *rxe)
err2:
ib_unregister_device(dev);
err1:
+ crypto_free_shash(rxe->tfm);
+
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index e100c500ae85..5a180fbe40d9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -38,6 +38,7 @@
#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
+#include "rxe_hw_counters.h"
static inline int pkey_match(u16 key1, u16 key2)
{
@@ -401,10 +402,18 @@ struct rxe_dev {
spinlock_t mmap_offset_lock; /* guard mmap_offset */
int mmap_offset;
+ u64 stats_counters[RXE_NUM_OF_COUNTERS];
+
struct rxe_port port;
struct list_head list;
+ struct crypto_shash *tfm;
};
+static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt)
+{
+ rxe->stats_counters[cnt]++;
+}
+
static inline struct rxe_dev *to_rdev(struct ib_device *dev)
{
return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf03098..c28af1823a2d 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP) += srp/
obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
obj-$(CONFIG_INFINIBAND_ISER) += iser/
obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bed233bf45c3..7c8de226e7f7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,7 +52,6 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/sched.h>
-
/* constants */
enum ipoib_flush_level {
@@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
skb_pull(skb, IPOIB_HARD_LEN);
}
+static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev)
+{
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ return rn->clnt_priv;
+}
+
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -404,6 +410,7 @@ struct ipoib_dev_priv {
struct timer_list poll_timer;
unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
+ const struct net_device_ops *rn_ops;
};
struct ipoib_ah {
@@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn);
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *format);
+void ipoib_ib_tx_timer_func(unsigned long ctx);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
+int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_ib_dev_up(struct net_device *dev);
void ipoib_ib_dev_down(struct net_device *dev);
-void ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop_default(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey);
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid);
void ipoib_mcast_remove_list(struct list_head *remove_list);
void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
struct list_head *remove_list);
@@ -587,7 +597,7 @@ void __exit ipoib_netlink_fini(void);
void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
int ipoib_set_mode(struct net_device *dev, const char *buf);
-void ipoib_setup(struct net_device *dev);
+void ipoib_setup_common(struct net_device *dev);
void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
@@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
@@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return !!priv->cm.srq;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return priv->cm.max_cm_mtu;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0cdf2b7f272f..21303c07ca55 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -92,7 +92,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -118,7 +118,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -145,7 +145,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
u64 mapping[IPOIB_CM_RX_SG],
gfp_t gfp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int i;
@@ -196,7 +196,7 @@ partial_error:
static void ipoib_cm_free_rx_ring(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i)
@@ -235,7 +235,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
struct ipoib_cm_rx *p = ctx;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
unsigned long flags;
if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
@@ -251,7 +251,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->recv_cq, /* For drain WR */
@@ -276,7 +276,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -331,7 +331,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < priv->cm.num_frags; ++i)
@@ -349,7 +349,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct {
struct ib_recv_wr wr;
struct ib_sge sge[IPOIB_CM_RX_SG];
@@ -422,7 +422,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
struct ib_qp *qp, struct ib_cm_req_event_param *req,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_rep_param rep = {};
@@ -442,7 +442,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned psn;
int ret;
@@ -515,7 +515,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
/* Fall through */
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
- priv = netdev_priv(p->dev);
+ priv = ipoib_priv(p->dev);
if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
/* Fall through */
@@ -559,7 +559,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx_buf *rx_ring;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
@@ -708,7 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
@@ -786,7 +786,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
@@ -855,7 +855,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
int ipoib_cm_dev_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
@@ -887,7 +887,7 @@ err_cm:
static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *rx, *n;
LIST_HEAD(list);
@@ -910,7 +910,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
void ipoib_cm_dev_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned long begin;
int ret;
@@ -969,7 +969,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct ipoib_cm_tx *p = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_cm_data *data = event->private_data;
struct sk_buff_head skqueue;
struct ib_qp_attr qp_attr;
@@ -1037,7 +1037,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
@@ -1070,7 +1070,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
u32 qpn,
struct ib_sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_req_param req = {};
@@ -1105,7 +1105,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
static int ipoib_cm_modify_tx_init(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
@@ -1130,7 +1130,7 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev,
static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
struct ib_sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
int ret;
p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
@@ -1186,7 +1186,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_tx_buf *tx_req;
unsigned long begin;
@@ -1236,7 +1236,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct ipoib_cm_tx *tx = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -1287,7 +1287,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
tx = kzalloc(sizeof *tx, GFP_ATOMIC);
@@ -1306,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
unsigned long flags;
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -1441,7 +1441,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
@@ -1490,7 +1490,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+ struct net_device *dev = to_net_dev(d);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
return sprintf(buf, "connected\n");
@@ -1503,7 +1504,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{
struct net_device *dev = to_net_dev(d);
int ret;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
return -EPERM;
@@ -1532,7 +1533,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_srq_init_attr srq_init_attr = {
.srq_type = IB_SRQT_BASIC,
.attr = {
@@ -1561,7 +1562,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1622,7 +1623,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!priv->cm.srq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index bac455a1942d..379c02fb4181 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,7 +60,7 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- struct ipoib_dev_priv *priv = netdev_priv(netdev);
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
@@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
static int ipoib_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
@@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
static int ipoib_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
/*
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740e2691..a15664b880ed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
@@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev)
void ipoib_delete_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
+ WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
+ priv->mcg_dentry = priv->path_dentry = NULL;
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 12c4f84a6639..2bfcfb1554d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -71,7 +71,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah = (struct ipoib_ah *)vah;
} else {
ah->ah = vah;
- ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
+ ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
}
return ah;
@@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
void ipoib_free_ah(struct kref *kref)
{
struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
- struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
unsigned long flags;
@@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int ret;
@@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int buf_size;
u64 *mapping;
@@ -153,7 +153,7 @@ error:
static int ipoib_ib_post_receives(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
@@ -381,7 +381,7 @@ free_res:
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
@@ -485,14 +485,14 @@ poll_more:
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
{
struct net_device *dev = dev_ptr;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
napi_schedule(&priv->napi);
}
static void drain_tx_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
netif_tx_lock(dev);
while (poll_tx(priv))
@@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev)
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
- struct ib_ah *address, u32 qpn,
+ struct ib_ah *address, u32 dqpn,
struct ipoib_tx_buf *tx_req,
void *head, int hlen)
{
@@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
priv->tx_wr.wr.wr_id = wr_id;
- priv->tx_wr.remote_qpn = qpn;
+ priv->tx_wr.remote_qpn = dqpn;
priv->tx_wr.ah = address;
if (head) {
@@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
}
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn)
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
@@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
- return;
+ return -1;
}
phead = NULL;
hlen = 0;
@@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
@@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
}
- ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
- skb->len, address, qpn);
+ ipoib_dbg_data(priv,
+ "sending packet, length=%d address=%p dqpn=0x%06x\n",
+ skb->len, address, dqpn);
/*
* We put the skb into the tx_ring _before_ we call post_send()
@@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
skb_dst_drop(skb);
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, tx_req, phead, hlen);
+ address, dqpn, tx_req, phead, hlen);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
@@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
+ rc = 0;
} else {
netif_trans_update(dev);
- address->last_send = priv->tx_head;
+ rc = priv->tx_head;
++priv->tx_head;
}
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
while (poll_tx(priv))
; /* nothing */
+
+ return rc;
}
static void __ipoib_reap_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work)
static void ipoib_flush_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
@@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev)
static void ipoib_stop_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
-static void ipoib_ib_tx_timer_func(unsigned long ctx)
+static int recvs_pending(struct net_device *dev)
{
- drain_tx_cq((struct net_device *)ctx);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int pending = 0;
+ int i;
+
+ for (i = 0; i < ipoib_recvq_size; ++i)
+ if (priv->rx_ring[i].skb)
+ ++pending;
+
+ return pending;
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_stop_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int ret;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ib_qp_attr qp_attr;
+ unsigned long begin;
+ struct ipoib_tx_buf *tx_req;
+ int i;
- ipoib_pkey_dev_check_presence(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_disable(&priv->napi);
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
- (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
- return -1;
+ ipoib_cm_dev_stop(dev);
+
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
+ qp_attr.qp_state = IB_QPS_ERR;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+
+ /* Wait for all sends and receives to complete */
+ begin = jiffies;
+
+ while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv,
+ "timing out; %d sends %d receives not completed\n",
+ priv->tx_head - priv->tx_tail,
+ recvs_pending(dev));
+
+ /*
+ * assume the HW is wedged and just free up
+ * all our pending work requests.
+ */
+ while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+ tx_req = &priv->tx_ring[priv->tx_tail &
+ (ipoib_sendq_size - 1)];
+ ipoib_dma_unmap_tx(priv, tx_req);
+ dev_kfree_skb_any(tx_req->skb);
+ ++priv->tx_tail;
+ --priv->tx_outstanding;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ struct ipoib_rx_buf *rx_req;
+
+ rx_req = &priv->rx_ring[i];
+ if (!rx_req->skb)
+ continue;
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
+ dev_kfree_skb_any(rx_req->skb);
+ rx_req->skb = NULL;
+ }
+
+ goto timeout;
+ }
+
+ ipoib_drain_cq(dev);
+
+ msleep(1);
}
+ ipoib_dbg(priv, "All sends and receives done.\n");
+
+timeout:
+ del_timer_sync(&priv->poll_timer);
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+ return 0;
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_flush_ah(dev);
+
+ return 0;
+}
+
+void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+ drain_tx_cq((struct net_device *)ctx);
+}
+
+int ipoib_ib_dev_open_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -719,33 +817,60 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- goto dev_stop;
+ goto out;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- goto dev_stop;
+ goto out;
+ }
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+
+ return 0;
+out:
+ return -1;
+}
+
+int ipoib_ib_dev_open(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
+ return -1;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ if (priv->rn_ops->ndo_open(dev)) {
+ pr_warn("%s: Failed to open dev\n", dev->name);
+ goto dev_stop;
+ }
+
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
+
dev_stop:
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ cancel_delayed_work(&priv->ah_reap_task);
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev);
return -1;
}
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!(priv->pkey & 0x7fff) ||
ib_find_pkey(priv->ca, priv->port, priv->pkey,
@@ -757,7 +882,7 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
void ipoib_ib_dev_up(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_pkey_dev_check_presence(dev);
@@ -773,7 +898,7 @@ void ipoib_ib_dev_up(struct net_device *dev)
void ipoib_ib_dev_down(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "downing ib_dev\n");
@@ -786,22 +911,9 @@ void ipoib_ib_dev_down(struct net_device *dev)
ipoib_flush_paths(dev);
}
-static int recvs_pending(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int pending = 0;
- int i;
-
- for (i = 0; i < ipoib_recvq_size; ++i)
- if (priv->rx_ring[i].skb)
- ++pending;
-
- return pending;
-}
-
void ipoib_drain_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i, n;
/*
@@ -838,107 +950,6 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-void ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_qp_attr qp_attr;
- unsigned long begin;
- struct ipoib_tx_buf *tx_req;
- int i;
-
- if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_disable(&priv->napi);
-
- ipoib_cm_dev_stop(dev);
-
- /*
- * Move our QP to the error state and then reinitialize in
- * when all work requests have completed or have been flushed.
- */
- qp_attr.qp_state = IB_QPS_ERR;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
-
- /* Wait for all sends and receives to complete */
- begin = jiffies;
-
- while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
- if (time_after(jiffies, begin + 5 * HZ)) {
- ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
- priv->tx_head - priv->tx_tail, recvs_pending(dev));
-
- /*
- * assume the HW is wedged and just free up
- * all our pending work requests.
- */
- while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
- tx_req = &priv->tx_ring[priv->tx_tail &
- (ipoib_sendq_size - 1)];
- ipoib_dma_unmap_tx(priv, tx_req);
- dev_kfree_skb_any(tx_req->skb);
- ++priv->tx_tail;
- --priv->tx_outstanding;
- }
-
- for (i = 0; i < ipoib_recvq_size; ++i) {
- struct ipoib_rx_buf *rx_req;
-
- rx_req = &priv->rx_ring[i];
- if (!rx_req->skb)
- continue;
- ipoib_ud_dma_unmap_rx(priv,
- priv->rx_ring[i].mapping);
- dev_kfree_skb_any(rx_req->skb);
- rx_req->skb = NULL;
- }
-
- goto timeout;
- }
-
- ipoib_drain_cq(dev);
-
- msleep(1);
- }
-
- ipoib_dbg(priv, "All sends and receives done.\n");
-
-timeout:
- del_timer_sync(&priv->poll_timer);
- qp_attr.qp_state = IB_QPS_RESET;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to RESET state\n");
-
- ipoib_flush_ah(dev);
-
- ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-}
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- priv->ca = ca;
- priv->port = port;
- priv->qp = NULL;
-
- if (ipoib_transport_dev_init(dev, ca)) {
- printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
- return -ENODEV;
- }
-
- setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
- (unsigned long) dev);
-
- if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
- ipoib_transport_dev_cleanup(dev);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
/*
* Takes whatever value which is in pkey index 0 and updates priv->pkey
* returns 0 if the pkey value was changed.
@@ -967,6 +978,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
+
+ /*
+ * Update the broadcast address in the priv->broadcast object,
+ * in case it already exists, otherwise no one will do that.
+ */
+ if (priv->broadcast) {
+ spin_lock_irq(&priv->lock);
+ memcpy(priv->broadcast->mcmember.mgid.raw,
+ priv->dev->broadcast + 4,
+ sizeof(union ib_gid));
+ spin_unlock_irq(&priv->lock);
+ }
+
return 0;
}
@@ -1216,7 +1240,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
/*
@@ -1236,7 +1260,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_stop_ah(dev);
- ipoib_transport_dev_cleanup(dev);
-}
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ priv->rn_ops->ndo_uninit(dev);
+
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index d1d3fb7a6127..05a07224fe7a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_info *ni = ptr;
+ struct net_device *dev = ni->dev;
+
+ if (dev->netdev_ops->ndo_open != ipoib_open)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_CHANGENAME:
+ ipoib_delete_debug_files(dev);
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_UNREGISTER:
+ ipoib_delete_debug_files(dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
int ipoib_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "bringing up interface\n");
@@ -157,7 +184,7 @@ err_disable:
static int ipoib_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "stopping interface\n");
@@ -195,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -205,7 +232,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev)) {
@@ -468,7 +495,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
int ipoib_set_mode(struct net_device *dev, const char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
!strcmp(buf, "connected\n")) ||
@@ -505,7 +532,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
struct ipoib_path *path;
int ret;
@@ -529,7 +556,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
static int __path_add(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->path_tree.rb_node;
struct rb_node *pn = NULL;
struct ipoib_path *tpath;
@@ -564,7 +591,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- ipoib_dbg(netdev_priv(dev), "path_free\n");
+ ipoib_dbg(ipoib_priv(dev), "path_free\n");
/* remove all neigh connected to this path */
ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -598,7 +625,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
int ipoib_path_iter_next(struct ipoib_path_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_path *path;
int ret = 1;
@@ -635,7 +662,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
void ipoib_mark_paths_invalid(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
spin_lock_irq(&priv->lock);
@@ -650,77 +677,6 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
spin_unlock_irq(&priv->lock);
}
-struct classport_info_context {
- struct ipoib_dev_priv *priv;
- struct completion done;
- struct ib_sa_query *sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
- void *context)
-{
- struct classport_info_context *cb_ctx = context;
- struct ipoib_dev_priv *priv;
-
- WARN_ON(!context);
-
- priv = cb_ctx->priv;
-
- if (status || !rec) {
- pr_debug("device: %s failed query classport_info status: %d\n",
- priv->dev->name, status);
- /* keeps the default, will try next mcast_restart */
- priv->sm_fullmember_sendonly_support = false;
- goto out;
- }
-
- if (ib_get_cpi_capmask2(rec) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
- pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = true;
- } else {
- pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = false;
- }
-
-out:
- complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
- struct classport_info_context *callback_context;
- int ret;
-
- callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
- if (!callback_context)
- return -ENOMEM;
-
- callback_context->priv = priv;
- init_completion(&callback_context->done);
-
- ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
- priv->ca, priv->port, 3000,
- GFP_KERNEL,
- classport_info_query_cb,
- callback_context,
- &callback_context->sa_query);
- if (ret < 0) {
- pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
- priv->dev->name, ret);
- kfree(callback_context);
- return ret;
- }
-
- /* waiting for the callback to finish before returnning */
- wait_for_completion(&callback_context->done);
- kfree(callback_context);
-
- return ret;
-}
-
static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
{
struct ipoib_pseudo_header *phdr;
@@ -731,7 +687,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
void ipoib_flush_paths(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -765,7 +721,7 @@ static void path_rec_completion(int status,
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah = NULL;
struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
@@ -858,7 +814,7 @@ static void path_rec_completion(int status,
static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path;
if (!priv->broadcast)
@@ -886,7 +842,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
static int path_rec_start(struct net_device *dev,
struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "Start path record lookup for %pI6\n",
path->pathrec.dgid.raw);
@@ -917,7 +873,8 @@ static int path_rec_start(struct net_device *dev,
static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -964,7 +921,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(daddr));
ipoib_neigh_put(neigh);
return;
}
@@ -998,7 +956,8 @@ err_drop:
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
struct ipoib_pseudo_header *phdr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
unsigned long flags;
@@ -1042,7 +1001,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
be16_to_cpu(path->pathrec.dlid));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
@@ -1058,7 +1018,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_neigh *neigh;
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
@@ -1122,7 +1083,8 @@ send_using_neigh:
goto unref;
}
} else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+ neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
goto unref;
}
@@ -1144,7 +1106,7 @@ unref:
static void ipoib_timeout(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1178,7 +1140,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
static void ipoib_set_mcast_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1190,7 +1152,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
static int ipoib_get_iflink(const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1218,7 +1180,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh = NULL;
@@ -1347,7 +1309,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh;
@@ -1404,7 +1366,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
{
/* neigh reference count was dropprd to zero */
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
if (neigh->ah)
ipoib_put_ah(neigh->ah);
@@ -1414,7 +1376,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
- ipoib_dbg(netdev_priv(dev),
+ ipoib_dbg(ipoib_priv(dev),
"neigh free for %06x %pI6\n",
IPOIB_QPN(neigh->daddr),
neigh->daddr + 4);
@@ -1436,7 +1398,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
void ipoib_neigh_free(struct ipoib_neigh *neigh)
{
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh __rcu **np;
@@ -1519,7 +1481,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
unsigned long flags;
@@ -1605,7 +1567,7 @@ out_unlock:
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1622,10 +1584,26 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
wait_for_completion(&priv->ntbl.deleted);
}
+void ipoib_dev_uninit_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+ ipoib_transport_dev_cleanup(dev);
+
+ ipoib_cm_dev_cleanup(dev);
+
+ kfree(priv->rx_ring);
+ vfree(priv->tx_ring);
+
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1636,46 +1614,111 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
- ca->name, ipoib_sendq_size);
+ priv->ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ if (ipoib_transport_dev_init(dev, priv->ca)) {
+ pr_warn("%s: ipoib_transport_dev_init failed\n",
+ priv->ca->name);
goto out_tx_ring_cleanup;
+ }
+
+ /* after qp created set dev address */
+ priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+ priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
+ priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+ setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+ (unsigned long)dev);
+
+ return 0;
+
+out_tx_ring_cleanup:
+ vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+ kfree(priv->rx_ring);
+
+out:
+ return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = -ENOMEM;
+
+ priv->ca = ca;
+ priv->port = port;
+ priv->qp = NULL;
/*
- * Must be after ipoib_ib_dev_init so we can allocate a per
- * device wq there and use it here
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
*/
- if (ipoib_neigh_hash_init(priv) < 0)
+ priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+ if (!priv->wq) {
+ pr_warn("%s: failed to allocate device WQ\n", dev->name);
+ goto out;
+ }
+
+ /* create pd, which used both for control and datapath*/
+ priv->pd = ib_alloc_pd(priv->ca, 0);
+ if (IS_ERR(priv->pd)) {
+ pr_warn("%s: failed to allocate PD\n", ca->name);
+ goto clean_wq;
+ }
+
+ ret = priv->rn_ops->ndo_init(dev);
+ if (ret) {
+ pr_warn("%s failed to init HW resource\n", dev->name);
+ goto out_free_pd;
+ }
+
+ if (ipoib_neigh_hash_init(priv) < 0) {
+ pr_warn("%s failed to init neigh hash\n", dev->name);
goto out_dev_uninit;
+ }
+
+ if (dev->flags & IFF_UP) {
+ if (ipoib_ib_dev_open(dev)) {
+ pr_warn("%s failed to open device\n", dev->name);
+ ret = -ENODEV;
+ goto out_dev_uninit;
+ }
+ }
return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
-out_tx_ring_cleanup:
- vfree(priv->tx_ring);
+out_free_pd:
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
-out_rx_ring_cleanup:
- kfree(priv->rx_ring);
+clean_wq:
+ if (priv->wq) {
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
out:
- return -ENOMEM;
+ return ret;
}
void ipoib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
LIST_HEAD(head);
ASSERT_RTNL();
- ipoib_delete_debug_files(dev);
-
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
@@ -1685,24 +1728,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
- /*
- * Must be before ipoib_ib_dev_cleanup or we delete an in use
- * work queue
- */
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
- kfree(priv->rx_ring);
- vfree(priv->tx_ring);
-
- priv->rx_ring = NULL;
- priv->tx_ring = NULL;
+ /* no more works over the priv->wq */
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
}
@@ -1710,7 +1750,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
static int ipoib_get_vf_config(struct net_device *dev, int vf,
struct ifla_vf_info *ivf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int err;
err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1724,7 +1764,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
return -EINVAL;
@@ -1735,7 +1775,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
static int ipoib_get_vf_stats(struct net_device *dev, int vf,
struct ifla_vf_stats *vf_stats)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
}
@@ -1773,21 +1813,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_get_iflink = ipoib_get_iflink,
};
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
- dev->netdev_ops = &ipoib_netdev_ops_vf;
- else
- dev->netdev_ops = &ipoib_netdev_ops_pf;
-
dev->header_ops = &ipoib_header_ops;
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
dev->watchdog_timeo = HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
@@ -1801,11 +1832,14 @@ void ipoib_setup(struct net_device *dev)
netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
- priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ priv->dev = dev;
spin_lock_init(&priv->lock);
-
init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
@@ -1823,22 +1857,99 @@ void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+ .ndo_init = ipoib_dev_init_default,
+ .ndo_uninit = ipoib_dev_uninit_default,
+ .ndo_open = ipoib_ib_dev_open_default,
+ .ndo_stop = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
{
struct net_device *dev;
+ struct rdma_netdev *rn;
- dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
- NET_NAME_UNKNOWN, ipoib_setup);
+ dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+ name,
+ name_assign_type, setup);
if (!dev)
return NULL;
- return netdev_priv(dev);
+ rn = netdev_priv(dev);
+
+ rn->send = ipoib_send;
+ rn->attach_mcast = ipoib_mcast_attach;
+ rn->detach_mcast = ipoib_mcast_detach;
+ rn->hca = hca;
+
+ dev->netdev_ops = &ipoib_netdev_default_pf;
+
+ return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+
+ if (hca->alloc_rdma_netdev) {
+ dev = hca->alloc_rdma_netdev(hca, port,
+ RDMA_NETDEV_IPOIB, name,
+ NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+ if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+ return NULL;
+ }
+
+ if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+ dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+
+ return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+ struct ipoib_dev_priv *priv;
+ struct rdma_netdev *rn;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ dev = ipoib_get_netdev(hca, port, name);
+ if (!dev)
+ goto free_priv;
+
+ priv->rn_ops = dev->netdev_ops;
+
+ /* fixme : should be after the query_cap */
+ if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+ dev->netdev_ops = &ipoib_netdev_ops_vf;
+ else
+ dev->netdev_ops = &ipoib_netdev_ops_pf;
+
+ rn = netdev_priv(dev);
+ rn->clnt_priv = priv;
+ ipoib_build_priv(dev);
+
+ return priv;
+free_priv:
+ kfree(priv);
+ return NULL;
}
static ssize_t show_pkey(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "0x%04x\n", priv->pkey);
}
@@ -1847,14 +1958,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
static ssize_t show_umcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
{
- struct ipoib_dev_priv *priv = netdev_priv(ndev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1927,7 +2039,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
@@ -2000,7 +2112,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+ priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
@@ -2016,7 +2128,7 @@ static struct net_device *ipoib_add_port(const char *format,
struct ib_port_attr attr;
int result = -ENOMEM;
- priv = ipoib_intf_alloc(format);
+ priv = ipoib_intf_alloc(hca, port, format);
if (!priv)
goto alloc_mem_failed;
@@ -2090,8 +2202,6 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@@ -2106,7 +2216,6 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@@ -2146,7 +2255,7 @@ static void ipoib_add_one(struct ib_device *device)
continue;
dev = ipoib_add_port("ib%d", device, p);
if (!IS_ERR(dev)) {
- priv = netdev_priv(dev);
+ priv = ipoib_priv(dev);
list_add_tail(&priv->list, dev_list);
count++;
}
@@ -2186,11 +2295,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
unregister_netdev(priv->dev);
free_netdev(priv->dev);
+ kfree(priv);
}
kfree(dev_list);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+ .notifier_call = ipoib_netdev_event,
+};
+#endif
+
static int __init ipoib_init_module(void)
{
int ret;
@@ -2243,6 +2359,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
return 0;
err_client:
@@ -2260,6 +2379,9 @@ err_fs:
static void __exit ipoib_cleanup_module(void)
{
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 69e146cdc306..3385869443ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
- ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+ ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
/* remove all neigh connected to this mcast */
@@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
while (n) {
@@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
while (*n) {
@@ -212,7 +212,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ib_sa_mcmember_rec *mcmember)
{
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_ah *ah;
int ret;
int set_qkey = 0;
@@ -260,8 +261,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
- ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid, set_qkey);
+ ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid),
+ set_qkey, priv->qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
mcast->mcmember.mgid.raw);
@@ -331,7 +333,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
- int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
@@ -344,11 +345,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
- ret = ipoib_check_sm_sendonly_fullmember_support(priv);
- if (ret < 0)
- pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
- priv->dev->name, ret);
-
+ priv->sm_fullmember_sendonly_support =
+ ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
+ priv->ca, priv->port);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
@@ -375,7 +374,7 @@ static int ipoib_mcast_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
@@ -477,7 +476,7 @@ out_locked:
*/
static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = {
.join_state = 1
@@ -489,6 +488,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return -EINVAL;
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
rec.mgid = mcast->mcmember.mgid;
@@ -647,8 +649,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (mcast->backoff == 1 ||
time_after_eq(jiffies, mcast->delay_until)) {
/* Found the next unjoined group */
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (ipoib_mcast_join(dev, mcast)) {
spin_unlock_irq(&priv->lock);
return;
@@ -668,17 +668,15 @@ out:
queue_delayed_work(priv->wq, &priv->mcast_task,
delay_until - jiffies);
}
- if (mcast) {
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (mcast)
ipoib_mcast_join(dev, mcast);
- }
+
spin_unlock_irq(&priv->lock);
}
void ipoib_mcast_start_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n");
@@ -690,7 +688,7 @@ void ipoib_mcast_start_thread(struct net_device *dev)
int ipoib_mcast_stop_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -706,7 +704,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
@@ -720,8 +719,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
mcast->mcmember.mgid.raw);
/* Remove ourselves from the multicast group */
- ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
- be16_to_cpu(mcast->mcmember.mlid));
+ ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
} else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -762,7 +761,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
void *mgid = daddr + 4;
@@ -825,7 +825,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
}
}
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
+ IB_MULTICAST_QPN);
if (neigh)
ipoib_neigh_put(neigh);
return;
@@ -837,7 +838,7 @@ unlock:
void ipoib_mcast_dev_flush(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
unsigned long flags;
@@ -1029,7 +1030,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_mcast *mcast;
int ret = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index cdc7df4fdb8a..28884781311b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
@@ -107,7 +107,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
@@ -129,7 +129,8 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
*/
child_pkey |= 0x8000;
- err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+ err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
+ child_pkey, IPOIB_RTNL_CHILD);
if (!err && data)
err = ipoib_changelink(dev, tb, data);
@@ -140,8 +141,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
{
struct ipoib_dev_priv *priv, *ppriv;
- priv = netdev_priv(dev);
- ppriv = netdev_priv(priv->parent);
+ priv = ipoib_priv(dev);
+ ppriv = ipoib_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
@@ -161,7 +162,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
- .setup = ipoib_setup,
+ .setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
.dellink = ipoib_unregister_child_dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 189dcd1709d2..bb64baf25309 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -35,9 +35,10 @@
#include "ipoib.h"
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
@@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
goto out;
/* set correct QKey for QP */
- qp_attr->qkey = priv->qkey;
+ qp_attr->qkey = qkey;
ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
@@ -74,9 +75,20 @@ out:
return ret;
}
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
+ ret = ib_detach_mcast(priv->qp, mgid, mlid);
+
+ return ret;
+}
+
int ipoib_init_qp(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
struct ib_qp_attr qp_attr;
int attr_mask;
@@ -130,7 +142,7 @@ out_fail:
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
.max_send_wr = ipoib_sendq_size,
@@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
- priv->pd = ib_alloc_pd(priv->ca, 0);
- if (IS_ERR(priv->pd)) {
- printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
- }
-
- /*
- * the various IPoIB tasks assume they will never race against
- * themselves, so always use a single thread workqueue
- */
- priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
- if (!priv->wq) {
- printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
- goto out_free_pd;
- }
-
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
@@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
if (ret != -ENOSYS)
- goto out_free_wq;
+ return -ENODEV;
cq_attr.cqe = size;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_send_cq;
}
- priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
- priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
- priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
-
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
@@ -247,26 +239,18 @@ out_free_recv_cq:
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
-out_free_wq:
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
-
-out_free_pd:
- ib_dealloc_pd(priv->pd);
-
return -ENODEV;
}
void ipoib_transport_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (priv->qp) {
if (ib_destroy_qp(priv->qp))
ipoib_warn(priv, "ib_qp_destroy failed\n");
priv->qp = NULL;
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
if (ib_destroy_cq(priv->send_cq))
@@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
-
- ipoib_cm_dev_cleanup(dev);
-
- if (priv->wq) {
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
- }
-
- ib_dealloc_pd(priv->pd);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 3e10e3dac2e7..36dc4fcaa3cd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -44,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
char *buf)
{
struct net_device *dev = to_net_dev(d);
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
@@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
@@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
sysfs_failed:
result = -ENOMEM;
- ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:
@@ -128,14 +125,15 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
- priv = ipoib_intf_alloc(intf_name);
+
+ priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv)
return -ENOMEM;
@@ -183,7 +181,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 81ae2e30dd12..12ed62ce9ff7 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -612,7 +612,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
iser_conn, rkey);
if (unlikely(!iser_conn->snd_w_inv)) {
- iser_err("conn %p: unexepected remote invalidation, "
+ iser_err("conn %p: unexpected remote invalidation, "
"terminating connection\n", iser_conn);
return -EPROTO;
}
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000000000000..48132ab5e6b9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+ tristate "Intel OPA VNIC support"
+ depends on X86_64 && INFINIBAND
+ ---help---
+ This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+ driver for Ethernet over Omni-Path feature. It implements the HW
+ independent VNIC functionality. It interfaces with Linux stack for
+ data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000000000000..8061b287cfe4
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+ opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000000000000..2e8fee982436
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "opa_vnic_internal.h"
+
+/* OPA 16B Header fields */
+#define OPA_16B_LID_MASK 0xFFFFFull
+#define OPA_16B_SLID_HIGH_SHFT 8
+#define OPA_16B_SLID_MASK 0xF00ull
+#define OPA_16B_DLID_MASK 0xF000ull
+#define OPA_16B_DLID_HIGH_SHFT 12
+#define OPA_16B_LEN_SHFT 20
+#define OPA_16B_SC_SHFT 20
+#define OPA_16B_RC_SHFT 25
+#define OPA_16B_PKEY_SHFT 16
+
+#define OPA_VNIC_L4_HDR_SHFT 16
+
+/* L2+L4 hdr len is 20 bytes (5 quad words) */
+#define OPA_VNIC_HDR_QW_LEN 5
+
+static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
+ u16 pkey, u16 entropy, u8 sc, u8 rc,
+ u8 l4_type, u16 l4_hdr)
+{
+ /* h[1]: LT=1, 16B L2=10 */
+ u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
+
+ h[2] = l4_type;
+ h[3] = entropy;
+ h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
+
+ /* Extract and set 4 upper bits and 20 lower bits of the lids */
+ h[0] |= (slid & OPA_16B_LID_MASK);
+ h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
+
+ h[1] |= (dlid & OPA_16B_LID_MASK);
+ h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
+
+ h[0] |= (len << OPA_16B_LEN_SHFT);
+ h[1] |= (rc << OPA_16B_RC_SHFT);
+ h[1] |= (sc << OPA_16B_SC_SHFT);
+ h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
+
+ memcpy(hdr, h, OPA_VNIC_HDR_LEN);
+}
+
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mactbl)
+ return;
+
+ vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+ hash_del(&node->hlist);
+ kfree(node);
+ }
+ kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+ u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+ struct hlist_head *mactbl;
+
+ mactbl = kzalloc(size, GFP_KERNEL);
+ if (!mactbl)
+ return ERR_PTR(-ENOMEM);
+
+ vnic_hash_init(mactbl);
+ return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+ struct hlist_head *mactbl;
+
+ mutex_lock(&adapter->mactbl_lock);
+ mactbl = rcu_access_pointer(adapter->mactbl);
+ rcu_assign_pointer(adapter->mactbl, NULL);
+ synchronize_rcu();
+ opa_vnic_free_mac_tbl(mactbl);
+ mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ int bkt;
+ u16 loffset, lnum_entries;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (!mactbl)
+ goto get_mac_done;
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ vnic_hash_for_each(mactbl, bkt, node, hlist) {
+ struct __opa_vnic_mactable_entry *nentry = &node->entry;
+ struct opa_veswport_mactable_entry *entry;
+
+ if ((node->index < loffset) ||
+ (node->index >= (loffset + lnum_entries)))
+ continue;
+
+ /* populate entry in the tbl corresponding to the index */
+ entry = &tbl->tbl_entries[node->index - loffset];
+ memcpy(entry->mac_addr, nentry->mac_addr,
+ ARRAY_SIZE(entry->mac_addr));
+ memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+ ARRAY_SIZE(entry->mac_addr_mask));
+ entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+ }
+ tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+ rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ * - Allocate a new mac (hash) table.
+ * - Add the specified entries to the new table.
+ * (except the ones that are requested to be deleted).
+ * - Add all the other entries from the old mac table.
+ * - If there is a failure, free the new table and return.
+ * - Switch to the new table.
+ * - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node, *new_node;
+ struct hlist_head *new_mactbl, *old_mactbl;
+ int i, bkt, rc = 0;
+ u8 key;
+ u16 loffset, lnum_entries;
+
+ mutex_lock(&adapter->mactbl_lock);
+ /* allocate new mac table */
+ new_mactbl = opa_vnic_alloc_mac_tbl();
+ if (IS_ERR(new_mactbl)) {
+ mutex_unlock(&adapter->mactbl_lock);
+ return PTR_ERR(new_mactbl);
+ }
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ /* add updated entries to the new mac table */
+ for (i = 0; i < lnum_entries; i++) {
+ struct __opa_vnic_mactable_entry *nentry;
+ struct opa_veswport_mactable_entry *entry =
+ &tbl->tbl_entries[i];
+ u8 *mac_addr = entry->mac_addr;
+ u8 empty_mac[ETH_ALEN] = { 0 };
+
+ v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
+ loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
+ mac_addr[3], mac_addr[4], mac_addr[5],
+ entry->dlid_sd);
+
+ /* if the entry is being removed, do not add it */
+ if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
+ continue;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ node->index = loffset + i;
+ nentry = &node->entry;
+ memcpy(nentry->mac_addr, entry->mac_addr,
+ ARRAY_SIZE(nentry->mac_addr));
+ memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
+ ARRAY_SIZE(nentry->mac_addr_mask));
+ nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
+ key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &node->hlist, key);
+ }
+
+ /* add other entries from current mac table to new mac table */
+ old_mactbl = rcu_access_pointer(adapter->mactbl);
+ if (!old_mactbl)
+ goto switch_tbl;
+
+ vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
+ if ((node->index >= loffset) &&
+ (node->index < (loffset + lnum_entries)))
+ continue;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ new_node->index = node->index;
+ memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
+ key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &new_node->hlist, key);
+ }
+
+switch_tbl:
+ /* switch to new table */
+ rcu_assign_pointer(adapter->mactbl, new_mactbl);
+ synchronize_rcu();
+
+ adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
+updt_done:
+ /* upon failure, free the new table; otherwise, free the old table */
+ if (rc)
+ opa_vnic_free_mac_tbl(new_mactbl);
+ else
+ opa_vnic_free_mac_tbl(old_mactbl);
+
+ mutex_unlock(&adapter->mactbl_lock);
+ return rc;
+}
+
+/* opa_vnic_chk_mac_tbl - check mac table for dlid */
+static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct ethhdr *mac_hdr)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ u32 dlid = 0;
+ u8 key;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (unlikely(!mactbl))
+ goto chk_done;
+
+ key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_for_each_possible(mactbl, node, hlist, key) {
+ struct __opa_vnic_mactable_entry *entry = &node->entry;
+
+ /* if related to source mac, skip */
+ if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
+ continue;
+
+ if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
+ ARRAY_SIZE(node->entry.mac_addr))) {
+ /* mac address found */
+ dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
+ break;
+ }
+ }
+
+chk_done:
+ rcu_read_unlock();
+ return dlid;
+}
+
+/* opa_vnic_get_dlid - find and return the DLID */
+static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
+ struct sk_buff *skb, u8 def_port)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u32 dlid;
+
+ dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
+ if (dlid)
+ return dlid;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+ dlid = info->vesw.u_mcast_dlid;
+ } else {
+ if (is_local_ether_addr(mac_hdr->h_dest)) {
+ dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+ ((uint32_t)mac_hdr->h_dest[4] << 8) |
+ mac_hdr->h_dest[3];
+ if (unlikely(!dlid))
+ v_warn("Null dlid in MAC address\n");
+ } else if (def_port != OPA_VNIC_INVALID_PORT) {
+ dlid = info->vesw.u_ucast_dlid[def_port];
+ }
+ }
+
+ return dlid;
+}
+
+/* opa_vnic_get_sc - return the service class */
+static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
+ struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u16 vlan_tci;
+ u8 sc;
+
+ if (!__vlan_get_tag(skb, &vlan_tci)) {
+ u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.pcp_to_sc_mc[pcp];
+ else
+ sc = info->vport.pcp_to_sc_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.non_vlan_sc_mc;
+ else
+ sc = info->vport.non_vlan_sc_uc;
+ }
+
+ return sc;
+}
+
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct __opa_veswport_info *info = &adapter->info;
+ u8 vl;
+
+ if (skb_vlan_tag_present(skb)) {
+ u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.pcp_to_vl_mc[pcp];
+ else
+ vl = info->vport.pcp_to_vl_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.non_vlan_vl_mc;
+ else
+ vl = info->vport.non_vlan_vl_uc;
+ }
+
+ return vl;
+}
+
+/* opa_vnic_calc_entropy - calculate the packet entropy */
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ u16 hash16;
+
+ /*
+ * Get flow based 16-bit hash and then XOR the upper and lower bytes
+ * to get the entropy.
+ * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+ */
+ hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+ return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* opa_vnic_get_def_port - get default port based on entropy */
+static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
+ u8 entropy)
+{
+ u8 flow_id;
+
+ /* Add the upper and lower 4-bits of entropy to get the flow id */
+ flow_id = ((entropy & 0xf) + (entropy >> 4));
+ return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int opa_vnic_wire_length(struct sk_buff *skb)
+{
+ u32 pad_len;
+
+ /* padding for 8 bytes size alignment */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ return (skb->len + pad_len) >> 3;
+}
+
+/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct opa_vnic_skb_mdata *mdata;
+ u8 def_port, sc, entropy, *hdr;
+ u16 len, l4_hdr;
+ u32 dlid;
+
+ hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
+
+ entropy = opa_vnic_calc_entropy(adapter, skb);
+ def_port = opa_vnic_get_def_port(adapter, entropy);
+ len = opa_vnic_wire_length(skb);
+ dlid = opa_vnic_get_dlid(adapter, skb, def_port);
+ sc = opa_vnic_get_sc(info, skb);
+ l4_hdr = info->vesw.vesw_id;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ mdata->entropy = entropy;
+ mdata->flags = 0;
+ if (unlikely(!dlid)) {
+ mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
+ return;
+ }
+
+ opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
+ info->vesw.pkey, entropy, sc, 0,
+ OPA_VNIC_L4_ETHR, l4_hdr);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
new file mode 100644
index 000000000000..4c434b9dd84c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -0,0 +1,489 @@
+#ifndef _OPA_VNIC_ENCAP_H
+#define _OPA_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all OPA VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION 0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA 0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO 0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
+/* VNIC configured and operational state values */
+#define OPA_VNIC_STATE_DROP_ALL 0x1
+#define OPA_VNIC_STATE_FORWARDING 0x3
+
+#define OPA_VESW_MAX_NUM_DEF_PORT 16
+#define OPA_VNIC_MAX_NUM_PCP 8
+
+#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8)
+
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN 2
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+ __be16 fabric_id;
+ __be16 vesw_id;
+
+ u8 rsvd0[6];
+ __be16 def_port_mask;
+
+ u8 rsvd1[2];
+ __be16 pkey;
+
+ u8 rsvd2[4];
+ __be32 u_mcast_dlid;
+ __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ __be16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+ __be32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ __be16 max_mac_tbl_ent;
+ __be16 max_smac_ent;
+ __be32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ __be32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ __be16 uc_macs_gen_count;
+ __be16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct opa_veswport_info - OPA vnic port information
+ * @vesw: OPA vnic switch information
+ * @vport: OPA vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct opa_veswport_info {
+ struct opa_vesw_info vesw;
+ struct opa_per_veswport_info vport;
+};
+
+/**
+ * struct opa_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct opa_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct opa_veswport_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ __be32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC port being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct opa_veswport_mactable {
+ __be16 offset;
+ __be16 num_entries;
+ __be32 mac_tbl_digest;
+ struct opa_veswport_mactable_entry tbl_entries[0];
+} __packed;
+
+/**
+ * struct opa_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct opa_veswport_summary_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+ __be64 tx_packets;
+ __be64 rx_packets;
+ __be64 tx_bytes;
+ __be64 rx_bytes;
+
+ __be64 tx_unicast;
+ __be64 tx_mcastbcast;
+
+ __be64 tx_untagged;
+ __be64 tx_vlan;
+
+ __be64 tx_64_size;
+ __be64 tx_65_127;
+ __be64 tx_128_255;
+ __be64 tx_256_511;
+ __be64 tx_512_1023;
+ __be64 tx_1024_1518;
+ __be64 tx_1519_max;
+
+ __be64 rx_unicast;
+ __be64 rx_mcastbcast;
+
+ __be64 rx_untagged;
+ __be64 rx_vlan;
+
+ __be64 rx_64_size;
+ __be64 rx_65_127;
+ __be64 rx_128_255;
+ __be64 rx_256_511;
+ __be64 rx_512_1023;
+ __be64 rx_1024_1518;
+ __be64 rx_1519_max;
+
+ __be64 reserved[16];
+} __packed;
+
+/**
+ * struct opa_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct opa_veswport_error_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+
+ __be64 rsvd0;
+ __be64 tx_smac_filt;
+ __be64 rsvd1;
+ __be64 rsvd2;
+ __be64 rsvd3;
+ __be64 tx_dlid_zero;
+ __be64 rsvd4;
+ __be64 tx_logic;
+ __be64 rsvd5;
+ __be64 tx_drop_state;
+
+ __be64 rx_bad_veswid;
+ __be64 rsvd6;
+ __be64 rx_runt;
+ __be64 rx_oversize;
+ __be64 rsvd7;
+ __be64 rx_eth_down;
+ __be64 rx_drop_state;
+ __be64 rx_logic;
+ __be64 rsvd8;
+
+ __be64 rsvd9[16];
+} __packed;
+
+/**
+ * struct opa_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @opaportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on opa port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct opa_veswport_trap {
+ __be16 fabric_id;
+ __be16 veswid;
+ __be32 veswportnum;
+ __be16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ __be32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct opa_vnic_iface_mac_entry {
+ u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct opa_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct opa_veswport_iface_macs {
+ __be16 start_idx;
+ __be16 num_macs_in_msg;
+ __be16 tot_macs_in_lst;
+ __be16 gen_count;
+ struct opa_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct opa_vnic_vema_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ u8 data[OPA_VNIC_EMA_DATA];
+};
+
+/**
+ * struct opa_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct opa_vnic_notice_attr {
+ u8 gen_type;
+ u8 oui_1;
+ u8 oui_2;
+ u8 oui_3;
+ __be16 trap_num;
+ __be16 toggle_count;
+ __be32 issuer_lid;
+ __be32 reserved;
+ u8 issuer_gid[16];
+ u8 raw_data[64];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct opa_vnic_vema_mad_trap {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ struct opa_vnic_notice_attr notice;
+};
+
+#endif /* _OPA_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
new file mode 100644
index 000000000000..d66540e24885
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "opa_vnic_internal.h"
+
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ struct {
+ int sizeof_stat;
+ int stat_offset;
+ };
+};
+
+#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+ /* NETDEV stats */
+ {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+ {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+ {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+ {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+ {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+ {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+ {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+ {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+ /* SUMMARY counters */
+ {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+ {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+ {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+ {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+ {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+ {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+ {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+ {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+ {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+ {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+ {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+ {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+ {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+ {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+ {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+ {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+ {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+ {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+ {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+ {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+ {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+ {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+ /* ERROR counters */
+ {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+ {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+ {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+ {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+ {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+ {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+ {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+ {"rx_oversize", VNIC_STAT(rx_oversize)},
+ {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, opa_vnic_driver_version,
+ sizeof(drvinfo->version));
+ strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+ sizeof(drvinfo->bus_info));
+}
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+ return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+ int i;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ for (i = 0; i < VNIC_STATS_LEN; i++) {
+ char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+ data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ int i;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ for (i = 0; i < VNIC_STATS_LEN; i++)
+ memcpy(data + i * ETH_GSTRING_LEN,
+ vnic_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+}
+
+/* ethtool ops */
+static const struct ethtool_ops opa_vnic_ethtool_ops = {
+ .get_drvinfo = vnic_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = vnic_get_strings,
+ .get_sset_count = vnic_get_sset_count,
+ .get_ethtool_stats = vnic_get_ethtool_stats,
+};
+
+/* opa_vnic_set_ethtool_ops - set ethtool ops */
+void opa_vnic_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &opa_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
new file mode 100644
index 000000000000..6bba886bec1f
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -0,0 +1,329 @@
+#ifndef _OPA_VNIC_INTERNAL_H
+#define _OPA_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+#include <rdma/opa_vnic.h>
+
+#include "opa_vnic_encap.h"
+
+#define OPA_VNIC_VLAN_PCP(vlan_tci) \
+ (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+/* Flow to default port redirection table size */
+#define OPA_VNIC_FLOW_TBL_SIZE 32
+
+/* Invalid port number */
+#define OPA_VNIC_INVALID_PORT 0xff
+
+struct opa_vnic_adapter;
+
+/**
+ * struct __opa_vesw_info - OPA vnic virtual switch info
+ *
+ * Same as opa_vesw_info without bitwise attribute.
+ */
+struct __opa_vesw_info {
+ u16 fabric_id;
+ u16 vesw_id;
+
+ u8 rsvd0[6];
+ u16 def_port_mask;
+
+ u8 rsvd1[2];
+ u16 pkey;
+
+ u8 rsvd2[4];
+ u32 u_mcast_dlid;
+ u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ u16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct __opa_per_veswport_info - OPA vnic per port info
+ *
+ * Same as opa_per_veswport_info without bitwise attribute.
+ */
+struct __opa_per_veswport_info {
+ u32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ u16 max_mac_tbl_ent;
+ u16 max_smac_ent;
+ u32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ u32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ u16 uc_macs_gen_count;
+ u16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct __opa_veswport_info - OPA vnic port info
+ *
+ * Same as opa_veswport_info without bitwise attribute.
+ */
+struct __opa_veswport_info {
+ struct __opa_vesw_info vesw;
+ struct __opa_per_veswport_info vport;
+};
+
+/**
+ * struct __opa_veswport_trap - OPA vnic trap info
+ *
+ * Same as opa_veswport_trap without bitwise attribute.
+ */
+struct __opa_veswport_trap {
+ u16 fabric_id;
+ u16 veswid;
+ u32 veswportnum;
+ u16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ u32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
+ */
+struct opa_vnic_ctrl_port {
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_ops *ops;
+ u8 num_ports;
+};
+
+/**
+ * struct opa_vnic_adapter - OPA VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
+ * @rn_ops: rdma netdev's net_device_ops
+ * @port_num: OPA port number
+ * @vport_num: vesw port number
+ * @lock: adapter lock
+ * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
+ * @mactbl: hash table of MAC entries
+ * @mactbl_lock: mac table lock
+ * @stats_lock: statistics lock
+ * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
+ */
+struct opa_vnic_adapter {
+ struct net_device *netdev;
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_port *cport;
+ const struct net_device_ops *rn_ops;
+
+ u8 port_num;
+ u8 vport_num;
+
+ /* Lock used around concurrent updates to netdev */
+ struct mutex lock;
+
+ struct __opa_veswport_info info;
+ u8 vema_mac_addr[ETH_ALEN];
+ u32 umac_hash;
+ u32 mmac_hash;
+ struct hlist_head __rcu *mactbl;
+
+ /* Lock used to protect updates to mac table */
+ struct mutex mactbl_lock;
+
+ /* Lock used to protect access to vnic counters */
+ struct mutex stats_lock;
+
+ u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+ unsigned long trap_timeout;
+ u8 trap_count;
+};
+
+/* Same as opa_veswport_mactable_entry, but without bitwise attribute */
+struct __opa_vnic_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ u32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node
+ * @hlist: hash list handle
+ * @index: index of entry in the mac table
+ * @entry: entry in the table
+ */
+struct opa_vnic_mac_tbl_node {
+ struct hlist_node hlist;
+ u16 index;
+ struct __opa_vnic_mactable_entry entry;
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+ netdev_warn(adapter->netdev, format, ## arg)
+
+#define c_err(format, arg...) \
+ dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+ dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+ dev_dbg(&cport->ibdev->dev, format, ## arg)
+
+/* The maximum allowed entries in the mac table */
+#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048
+/* Limit of smac entries in mac table */
+#define OPA_VNIC_MAX_SMAC_LIMIT 256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define OPA_VNIC_MAC_HASH_IDX 5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define OPA_VNIC_MAC_TBL_HASH_BITS 8
+#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS)
+
+/* VNIC HASH MACROS */
+#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE)
+
+#define vnic_hash_add(hashtable, node, key) \
+ hlist_add_head(node, \
+ &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))])
+
+#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+#define vnic_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, \
+ &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member)
+
+#define vnic_hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num);
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs);
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs);
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event);
+void opa_vnic_set_ethtool_ops(struct net_device *netdev);
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid);
+
+#endif /* _OPA_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
new file mode 100644
index 000000000000..905f39dda5aa
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC) driver
+ * netdev functionality.
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+
+#include "opa_vnic_internal.h"
+
+#define OPA_TX_TIMEOUT_MS 1000
+
+#define OPA_VNIC_SKB_HEADROOM \
+ ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8)
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void opa_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+ memcpy(stats, &vstats.netstats, sizeof(*stats));
+}
+
+/* opa_netdev_start_xmit - transmit function */
+static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+
+ v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len);
+ /* pad to ensure mininum ethernet packet length */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ if (skb_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ skb_put(skb, ETH_ZLEN - skb->len);
+ }
+
+ opa_vnic_encap_skb(adapter, skb);
+ return adapter->rn_ops->ndo_start_xmit(skb, netdev);
+}
+
+static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ int rc;
+
+ /* pass entropy and vl as metadata in skb */
+ mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+ mdata->entropy = opa_vnic_calc_entropy(adapter, skb);
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
+ accel_priv, fallback);
+ skb_pull(skb, sizeof(*mdata));
+ return rc;
+}
+
+/* opa_vnic_process_vema_config - process vema configuration updates */
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct rdma_netdev *rn = netdev_priv(adapter->netdev);
+ u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 };
+ struct net_device *netdev = adapter->netdev;
+ u8 i, port_count = 0;
+ u16 port_mask;
+
+ /* If the base_mac_addr is changed, update the interface mac address */
+ if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr))) {
+ struct sockaddr saddr;
+
+ memcpy(saddr.sa_data, info->vport.base_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr));
+ mutex_lock(&adapter->lock);
+ eth_mac_addr(netdev, &saddr);
+ memcpy(adapter->vema_mac_addr,
+ info->vport.base_mac_addr, ETH_ALEN);
+ mutex_unlock(&adapter->lock);
+ }
+
+ rn->set_id(netdev, info->vesw.vesw_id);
+
+ /* Handle MTU limit change */
+ rtnl_lock();
+ netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan,
+ netdev->min_mtu);
+ if (netdev->mtu > netdev->max_mtu)
+ dev_set_mtu(netdev, netdev->max_mtu);
+ rtnl_unlock();
+
+ /* Update flow to default port redirection table */
+ port_mask = info->vesw.def_port_mask;
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) {
+ if (port_mask & 1)
+ port_num[port_count++] = i;
+ port_mask >>= 1;
+ }
+
+ /*
+ * Build the flow table. Flow table is required when destination LID
+ * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported.
+ * Each flow need a default port number to get its dlid from the
+ * u_ucast_dlid array.
+ */
+ for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++)
+ adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+ OPA_VNIC_INVALID_PORT;
+
+ /* Operational state can only be DROP_ALL or FORWARDING */
+ if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) {
+ info->vport.oper_state = OPA_VNIC_STATE_FORWARDING;
+ netif_dormant_off(netdev);
+ } else {
+ info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ netif_dormant_on(netdev);
+ }
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter)
+{
+ adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES;
+ adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT;
+ adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+}
+
+/* opa_vnic_set_mac_addr - change mac address */
+static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct sockaddr *sa = addr;
+ int rc;
+
+ if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN))
+ return 0;
+
+ mutex_lock(&adapter->lock);
+ rc = eth_mac_addr(netdev, addr);
+ mutex_unlock(&adapter->lock);
+ if (rc)
+ return rc;
+
+ adapter->info.vport.uc_macs_gen_count++;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+ return 0;
+}
+
+/*
+ * opa_vnic_mac_send_event - post event on possible mac list exchange
+ * Send trap when digest from uc/mc mac list differs from previous run.
+ * Digest is evaluated similar to how cksum does.
+ */
+static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct netdev_hw_addr_list *hw_list;
+ u32 *ref_crc;
+ u32 l, crc = 0;
+
+ switch (event) {
+ case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+ hw_list = &netdev->uc;
+ adapter->info.vport.uc_macs_gen_count++;
+ ref_crc = &adapter->umac_hash;
+ break;
+ case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+ hw_list = &netdev->mc;
+ adapter->info.vport.mc_macs_gen_count++;
+ ref_crc = &adapter->mmac_hash;
+ break;
+ default:
+ return;
+ }
+ netdev_hw_addr_list_for_each(ha, hw_list) {
+ crc = crc32_le(crc, ha->addr, ETH_ALEN);
+ }
+ l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+ crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+ if (crc != *ref_crc) {
+ *ref_crc = crc;
+ opa_vnic_vema_report_event(adapter, event);
+ }
+}
+
+/* opa_vnic_set_rx_mode - handle uc/mc mac list change */
+static void opa_vnic_set_rx_mode(struct net_device *netdev)
+{
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
+}
+
+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_open(adapter->netdev);
+ if (rc) {
+ v_dbg("open failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* opa_netdev_close - disable network interface */
+static int opa_netdev_close(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_stop(adapter->netdev);
+ if (rc) {
+ v_dbg("close failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops opa_netdev_ops = {
+ .ndo_open = opa_netdev_open,
+ .ndo_stop = opa_netdev_close,
+ .ndo_start_xmit = opa_netdev_start_xmit,
+ .ndo_get_stats64 = opa_vnic_get_stats64,
+ .ndo_set_rx_mode = opa_vnic_set_rx_mode,
+ .ndo_select_queue = opa_vnic_select_queue,
+ .ndo_set_mac_address = opa_vnic_set_mac_addr,
+};
+
+/* opa_vnic_add_netdev - create vnic netdev interface */
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num)
+{
+ struct opa_vnic_adapter *adapter;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int rc;
+
+ netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+ else if (IS_ERR(netdev))
+ return ERR_CAST(netdev);
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter) {
+ rc = -ENOMEM;
+ goto adapter_err;
+ }
+
+ rn = netdev_priv(netdev);
+ rn->clnt_priv = adapter;
+ rn->hca = ibdev;
+ rn->port_num = port_num;
+ adapter->netdev = netdev;
+ adapter->ibdev = ibdev;
+ adapter->port_num = port_num;
+ adapter->vport_num = vport_num;
+ adapter->rn_ops = netdev->netdev_ops;
+
+ netdev->netdev_ops = &opa_netdev_ops;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM;
+ mutex_init(&adapter->lock);
+ mutex_init(&adapter->mactbl_lock);
+ mutex_init(&adapter->stats_lock);
+
+ SET_NETDEV_DEV(netdev, ibdev->dev.parent);
+
+ opa_vnic_set_ethtool_ops(netdev);
+
+ opa_vnic_set_pod_values(adapter);
+
+ rc = register_netdev(netdev);
+ if (rc)
+ goto netdev_err;
+
+ netif_carrier_off(netdev);
+ netif_dormant_on(netdev);
+ v_info("initialized\n");
+
+ return adapter;
+netdev_err:
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ mutex_destroy(&adapter->stats_lock);
+ kfree(adapter);
+adapter_err:
+ ibdev->free_rdma_netdev(netdev);
+
+ return ERR_PTR(rc);
+}
+
+/* opa_vnic_rem_netdev - remove vnic netdev interface */
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ib_device *ibdev = adapter->ibdev;
+
+ v_info("removing\n");
+ unregister_netdev(netdev);
+ opa_vnic_release_mac_tbl(adapter);
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ mutex_destroy(&adapter->stats_lock);
+ kfree(adapter);
+ ibdev->free_rdma_netdev(netdev);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
new file mode 100644
index 000000000000..cee49aa6676c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -0,0 +1,1053 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "opa_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char opa_vnic_driver_name[] = "opa_vnic";
+const char opa_vnic_driver_version[] = DRV_VERSION;
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define OPA_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000)
+
+#define OPA_VNIC_UNSUP_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define OPA_VNIC_INVAL_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define OPA_VNIC_CLASS_CAP_TRAP 0x1
+
+/* Maximum number of VNIC ports supported */
+#define OPA_VNIC_MAX_NUM_VPORT 255
+
+/**
+ * struct opa_vnic_vema_port -- VNIC VEMA port details
+ * @cport: pointer to port
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num: OPA port number
+ * @vport_idr: vnic ports idr
+ * @event_handler: ib event handler
+ * @lock: adapter interface lock
+ */
+struct opa_vnic_vema_port {
+ struct opa_vnic_ctrl_port *cport;
+ struct ib_mad_agent *mad_agent;
+ struct opa_class_port_info class_port_info;
+ u64 tid;
+ u8 port_num;
+ struct idr vport_idr;
+ struct ib_event_handler event_handler;
+
+ /* Lock to query/update network adapter */
+ struct mutex lock;
+};
+
+static void opa_vnic_vema_add_one(struct ib_device *device);
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data);
+
+static struct ib_client opa_vnic_client = {
+ .name = opa_vnic_driver_name,
+ .add = opa_vnic_vema_add_one,
+ .remove = opa_vnic_vema_rem_one,
+};
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad: Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
+{
+ return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
+}
+
+/**
+ * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ *
+ * Return: vnic adapter
+ */
+static inline struct opa_vnic_adapter *
+vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_port *port)
+{
+ u8 vport_num = vema_get_vport_num(recvd_mad);
+
+ return idr_find(&port->vport_idr, vport_num);
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
+{
+ u16 offset, num_entries;
+ u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+ sizeof(mac_tbl->tbl_entries[0]));
+
+ offset = be16_to_cpu(mac_tbl->offset);
+ num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+ return ((num_entries <= req_entries) &&
+ (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
+{
+ memset(port_info, 0, sizeof(*port_info));
+ port_info->vport.max_mac_tbl_ent =
+ cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
+ port_info->vport.max_smac_ent =
+ cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
+ port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_add_vport -- Add a new vnic port
+ * @port: ptr to opa_vnic_vema_port struct
+ * @vport_num: vnic port number (to be added)
+ *
+ * Return a pointer to the vnic adapter structure
+ */
+static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
+ u8 vport_num)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
+ if (!IS_ERR(adapter)) {
+ int rc;
+
+ adapter->cport = cport;
+ rc = idr_alloc(&port->vport_idr, adapter, vport_num,
+ vport_num + 1, GFP_NOWAIT);
+ if (rc < 0) {
+ opa_vnic_rem_netdev(adapter);
+ adapter = ERR_PTR(rc);
+ }
+ }
+
+ return adapter;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_class_port_info *port_info;
+
+ port_info = (struct opa_class_port_info *)rsp_mad->data;
+ memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+ port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->class_version = OPA_EMA_CLASS_VERSION;
+
+ /*
+ * Set capability mask bit indicating agent generates traps,
+ * and set the maximum number of VNIC ports supported.
+ */
+ port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
+ (OPA_VNIC_MAX_NUM_VPORT << 8)));
+
+ /*
+ * Since a get routine is always sent by the EM first we
+ * set the expected response time to
+ * 4.096 usec * 2^18 == 1.0737 sec here.
+ */
+ port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ memcpy(&port->class_port_info, recvd_mad->data,
+ sizeof(port->class_port_info));
+
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ */
+static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ memset(port_info, 0, sizeof(*port_info));
+ opa_vnic_get_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_get_per_veswport_info(adapter,
+ &port_info->vport);
+ } else {
+ vema_get_pod_values(port_info);
+ }
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_veswport_info *port_info;
+ struct opa_vnic_adapter *adapter;
+ u8 vport_num;
+
+ vport_num = vema_get_vport_num(recvd_mad);
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ adapter = vema_add_vport(port, vport_num);
+ if (IS_ERR(adapter)) {
+ c_err("failed to add vport %d: %ld\n",
+ vport_num, PTR_ERR(adapter));
+ goto err_exit;
+ }
+ }
+
+ port_info = (struct opa_veswport_info *)recvd_mad->data;
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ return;
+
+err_exit:
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
+ mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
+
+ if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+ mac_tbl_out->offset = mac_tbl_in->offset;
+ mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+ opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
+ if (vema_mac_tbl_req_ok(mac_tbl)) {
+ if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ }
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ vema_get_pod_values(port_info);
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ opa_vnic_release_mac_tbl(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ * @attr_id: Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad,
+ u16 attr_id)
+{
+ struct opa_veswport_iface_macs *macs_in, *macs_out;
+ int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
+ macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
+
+ macs_out->start_idx = macs_in->start_idx;
+ if (macs_in->num_macs_in_msg)
+ macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+ else
+ macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+ if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
+ opa_vnic_query_mcast_macs(adapter, macs_out);
+ else
+ opa_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_summary_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
+ opa_vnic_get_summary_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_error_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_error_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
+ opa_vnic_get_error_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_IFACE_UCAST_MACS:
+ /* fall through */
+ case OPA_EM_ATTR_IFACE_MCAST_MACS:
+ vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+ break;
+ case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+ vema_get_summary_counters(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+ vema_get_error_counters(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_set(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_set_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_set_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_set_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_DELETE_VESW:
+ vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_wc)
+{
+ ib_destroy_ah(mad_wc->send_buf->ah);
+ ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct opa_vnic_vema_port *port;
+ struct ib_ah *ah;
+ struct ib_mad_send_buf *rsp;
+ struct opa_vnic_vema_mad *vema_mad;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad)
+ return;
+
+ port = mad_agent->context;
+ ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+ mad_wc->recv_buf.grh, mad_agent->port_num);
+ if (IS_ERR(ah))
+ goto free_recv_mad;
+
+ rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+ mad_wc->wc->pkey_index, 0,
+ IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(rsp))
+ goto err_rsp;
+
+ rsp->ah = ah;
+ vema_mad = rsp->mad;
+ memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+ vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ vema_mad->mad_hdr.status = 0;
+
+ /* Lock ensures network adapter is not removed */
+ mutex_lock(&port->lock);
+
+ switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ case IB_MGMT_METHOD_SET:
+ vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ default:
+ vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+ mutex_unlock(&port->lock);
+
+ if (!ib_post_send_mad(rsp, NULL)) {
+ /*
+ * with post send successful ah and send mad
+ * will be destroyed in send handler
+ */
+ goto free_recv_mad;
+ }
+
+ ib_free_send_mad(rsp);
+
+err_rsp:
+ ib_destroy_ah(ah);
+free_recv_mad:
+ ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the opa_vnic_vema_port
+ * @cport: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the opa_vnic_vema port structure that is associated
+ * with the OPA port number
+ *
+ * Return: ptr to requested opa_vnic_vema_port strucure
+ * if success, NULL if not
+ */
+static struct opa_vnic_vema_port *
+vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
+{
+ struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
+
+ if (port_num > cport->num_ports)
+ return NULL;
+
+ return port + (port_num - 1);
+}
+
+/**
+ * opa_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cport: pointer to vnic control port
+ * @data: pointer to trap data filled by calling function
+ * @lid: issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid)
+{
+ struct opa_vnic_ctrl_port *cport = adapter->cport;
+ struct ib_mad_send_buf *send_buf;
+ struct opa_vnic_vema_port *port;
+ struct ib_device *ibp;
+ struct opa_vnic_vema_mad_trap *trap_mad;
+ struct opa_class_port_info *class;
+ struct ib_ah_attr ah_attr;
+ struct ib_ah *ah;
+ struct opa_veswport_trap *trap;
+ u32 trap_lid;
+ u16 pkey_idx;
+
+ if (!cport)
+ goto err_exit;
+ ibp = cport->ibdev;
+ port = vema_get_port(cport, data->opaportnum);
+ if (!port || !port->mad_agent)
+ goto err_exit;
+
+ if (time_before(jiffies, adapter->trap_timeout)) {
+ if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
+ v_warn("Trap rate exceeded\n");
+ goto err_exit;
+ } else {
+ adapter->trap_count++;
+ }
+ } else {
+ adapter->trap_count = 0;
+ }
+
+ class = &port->class_port_info;
+ /* Set up address handle */
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.sl = GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd);
+ ah_attr.port_num = port->port_num;
+ trap_lid = be32_to_cpu(class->trap_lid);
+ /*
+ * check for trap lid validity, must not be zero
+ * The trap sink could change after we fashion the MAD but since traps
+ * are not guaranteed we won't use a lock as anyway the change will take
+ * place even with locking.
+ */
+ if (!trap_lid) {
+ c_err("%s: Invalid dlid\n", __func__);
+ goto err_exit;
+ }
+
+ ah_attr.dlid = trap_lid;
+ ah = ib_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
+ c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
+ ah_attr.dlid, ah_attr.sl, ah_attr.port_num);
+ goto err_exit;
+ }
+
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
+ &pkey_idx) < 0) {
+ c_err("%s:full key not found, defaulting to partial\n",
+ __func__);
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
+ &pkey_idx) < 0)
+ pkey_idx = 1;
+ }
+
+ send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+ IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(send_buf)) {
+ c_err("%s:Couldn't allocate send buf\n", __func__);
+ goto err_sndbuf;
+ }
+
+ send_buf->ah = ah;
+
+ /* Set up common MAD hdr */
+ trap_mad = send_buf->mad;
+ trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
+ trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
+ trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+ port->tid++;
+ trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+ trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+ /* Set up vendor OUI */
+ trap_mad->oui[0] = INTEL_OUI_1;
+ trap_mad->oui[1] = INTEL_OUI_2;
+ trap_mad->oui[2] = INTEL_OUI_3;
+
+ /* Setup notice attribute portion */
+ trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+ trap_mad->notice.oui_1 = INTEL_OUI_1;
+ trap_mad->notice.oui_2 = INTEL_OUI_2;
+ trap_mad->notice.oui_3 = INTEL_OUI_3;
+ trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+ /* copy the actual trap data */
+ trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
+ trap->fabric_id = cpu_to_be16(data->fabric_id);
+ trap->veswid = cpu_to_be16(data->veswid);
+ trap->veswportnum = cpu_to_be32(data->veswportnum);
+ trap->opaportnum = cpu_to_be16(data->opaportnum);
+ trap->veswportindex = data->veswportindex;
+ trap->opcode = data->opcode;
+
+ /* If successful send set up rate limit timeout else bail */
+ if (ib_post_send_mad(send_buf, NULL)) {
+ ib_free_send_mad(send_buf);
+ } else {
+ if (adapter->trap_count)
+ return;
+ adapter->trap_timeout = jiffies +
+ usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
+ return;
+ }
+
+err_sndbuf:
+ ib_destroy_ah(ah);
+err_exit:
+ v_err("Aborting trap\n");
+}
+
+static int vema_rem_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ opa_vnic_rem_netdev(adapter);
+ return 0;
+}
+
+static int vema_enable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_on(adapter->netdev);
+ return 0;
+}
+
+static int vema_disable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_off(adapter->netdev);
+ return 0;
+}
+
+static void opa_vnic_event(struct ib_event_handler *handler,
+ struct ib_event *record)
+{
+ struct opa_vnic_vema_port *port =
+ container_of(handler, struct opa_vnic_vema_port, event_handler);
+ struct opa_vnic_ctrl_port *cport = port->cport;
+
+ if (record->element.port_num != port->port_num)
+ return;
+
+ c_dbg("OPA_VNIC received event %d on device %s port %d\n",
+ record->event, record->device->name, record->element.port_num);
+
+ if (record->event == IB_EVENT_PORT_ERR)
+ idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+ if (record->event == IB_EVENT_PORT_ACTIVE)
+ idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cport: pointer to control port
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct opa_vnic_ctrl_port *cport)
+{
+ int i;
+
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+
+ if (!port->mad_agent)
+ continue;
+
+ /* Lock ensures no MAD is being processed */
+ mutex_lock(&port->lock);
+ idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
+ mutex_unlock(&port->lock);
+
+ ib_unregister_mad_agent(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ ib_unregister_event_handler(&port->event_handler);
+ }
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cport: pointer to control port
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct opa_vnic_ctrl_port *cport)
+{
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
+ .mgmt_class_version = OPA_MGMT_BASE_VERSION,
+ .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+ };
+ int i;
+
+ set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+ set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+ /* register ib event handler and mad agent for each port on dev */
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+ int ret;
+
+ port->cport = cport;
+ port->port_num = i;
+
+ INIT_IB_EVENT_HANDLER(&port->event_handler,
+ cport->ibdev, opa_vnic_event);
+ ret = ib_register_event_handler(&port->event_handler);
+ if (ret) {
+ c_err("port %d: event handler register failed\n", i);
+ vema_unregister(cport);
+ return ret;
+ }
+
+ idr_init(&port->vport_idr);
+ mutex_init(&port->lock);
+ port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
+ IB_QPT_GSI, &reg_req,
+ IB_MGMT_RMPP_VERSION,
+ vema_send, vema_recv,
+ port, 0);
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ vema_unregister(cport);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * opa_vnic_vema_add_one -- Handle new ib device
+ * @device: ib device pointer
+ *
+ * Allocate the vnic control port and initialize it.
+ */
+static void opa_vnic_vema_add_one(struct ib_device *device)
+{
+ struct opa_vnic_ctrl_port *cport;
+ int rc, size = sizeof(*cport);
+
+ if (!rdma_cap_opa_vnic(device))
+ return;
+
+ size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
+ cport = kzalloc(size, GFP_KERNEL);
+ if (!cport)
+ return;
+
+ cport->num_ports = device->phys_port_cnt;
+ cport->ibdev = device;
+
+ /* Initialize opa vnic management agent (vema) */
+ rc = vema_register(cport);
+ if (!rc)
+ c_info("VNIC client initialized\n");
+
+ ib_set_client_data(device, &opa_vnic_client, cport);
+}
+
+/**
+ * opa_vnic_vema_rem_one -- Handle ib device removal
+ * @device: ib device pointer
+ * @client_data: ib client data
+ *
+ * Uninitialize and free the vnic control port.
+ */
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data)
+{
+ struct opa_vnic_ctrl_port *cport = client_data;
+
+ if (!cport)
+ return;
+
+ c_info("removing VNIC client\n");
+ vema_unregister(cport);
+ kfree(cport);
+}
+
+static int __init opa_vnic_init(void)
+{
+ int rc;
+
+ pr_info("OPA Virtual Network Driver - v%s\n",
+ opa_vnic_driver_version);
+
+ rc = ib_register_client(&opa_vnic_client);
+ if (rc)
+ pr_err("VNIC driver register failed %d\n", rc);
+
+ return rc;
+}
+module_init(opa_vnic_init);
+
+static void opa_vnic_deinit(void)
+{
+ ib_unregister_client(&opa_vnic_client);
+}
+module_exit(opa_vnic_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
new file mode 100644
index 000000000000..a51bf977f4d6
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC EMA Interface functions.
+ */
+
+#include "opa_vnic_internal.h"
+
+/**
+ * opa_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct __opa_veswport_trap trap_data;
+
+ trap_data.fabric_id = info->vesw.fabric_id;
+ trap_data.veswid = info->vesw.vesw_id;
+ trap_data.veswportnum = info->vport.port_num;
+ trap_data.opaportnum = adapter->port_num;
+ trap_data.veswportindex = adapter->vport_num;
+ trap_data.opcode = event;
+
+ opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
+}
+
+/**
+ * opa_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+ __be64 *dst;
+ u64 *src;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets);
+ cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets);
+ cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes);
+ cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes);
+
+ /*
+ * This loop depends on layout of
+ * opa_veswport_summary_counters opa_vnic_stats structures.
+ */
+ for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast;
+ dst < &cntrs->reserved[0]; dst++, src++) {
+ *dst = cpu_to_be64(*src);
+ }
+}
+
+/**
+ * opa_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero);
+ cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state);
+ cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors +
+ vstats.netstats.tx_carrier_errors);
+
+ cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler);
+ cntrs->rx_runt = cpu_to_be64(vstats.rx_runt);
+ cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize);
+ cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state);
+ cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors);
+}
+
+/**
+ * opa_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *src = &adapter->info.vesw;
+ int i;
+
+ info->fabric_id = cpu_to_be16(src->fabric_id);
+ info->vesw_id = cpu_to_be16(src->vesw_id);
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+ info->def_port_mask = cpu_to_be16(src->def_port_mask);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+ info->pkey = cpu_to_be16(src->pkey);
+
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+ info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+ info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+ memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * opa_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *dst = &adapter->info.vesw;
+ int i;
+
+ dst->fabric_id = be16_to_cpu(info->fabric_id);
+ dst->vesw_id = be16_to_cpu(info->vesw_id);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+ dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+ dst->pkey = be16_to_cpu(info->pkey);
+
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+ dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+ memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * opa_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *src = &adapter->info.vport;
+
+ info->port_num = cpu_to_be32(src->port_num);
+ info->eth_link_status = src->eth_link_status;
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+ memcpy(info->base_mac_addr, src->base_mac_addr,
+ ARRAY_SIZE(info->base_mac_addr));
+ info->config_state = src->config_state;
+ info->oper_state = src->oper_state;
+ info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+ info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+ info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+ info->encap_slid = cpu_to_be32(src->encap_slid);
+ memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+ ARRAY_SIZE(info->pcp_to_sc_uc));
+ memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+ ARRAY_SIZE(info->pcp_to_vl_uc));
+ memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+ ARRAY_SIZE(info->pcp_to_sc_mc));
+ memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+ ARRAY_SIZE(info->pcp_to_vl_mc));
+ info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+ info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+ info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+ info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+ info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+ info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * opa_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *dst = &adapter->info.vport;
+
+ dst->port_num = be32_to_cpu(info->port_num);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+ memcpy(dst->base_mac_addr, info->base_mac_addr,
+ ARRAY_SIZE(dst->base_mac_addr));
+ dst->config_state = info->config_state;
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+ dst->encap_slid = be32_to_cpu(info->encap_slid);
+ memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+ ARRAY_SIZE(dst->pcp_to_sc_uc));
+ memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+ ARRAY_SIZE(dst->pcp_to_vl_uc));
+ memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+ ARRAY_SIZE(dst->pcp_to_sc_mc));
+ memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+ ARRAY_SIZE(dst->pcp_to_vl_mc));
+ dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+ dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+ dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+ dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * opa_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ netdev_for_each_mc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * opa_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ /* loop through dev_addrs list first */
+ for_each_dev_addr(adapter->netdev, ha) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ /* Do not include EM specified MAC address */
+ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+ ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+ continue;
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ /* loop through uc list */
+ netdev_for_each_uc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+ netdev_uc_count(adapter->netdev);
+ macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1beb1ec2fbdf..74b765ce48ab 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1011,8 +1011,7 @@ struct mlx4_mad_ifc {
#define mlx4_foreach_ib_transport_port(port, dev) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
- ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
- ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
+ ((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_ETH))
#define MLX4_INVALID_SLAVE_ID 0xFF
#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 7c50bd39b297..954f42c268a4 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -236,7 +236,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
u8 outer_dmac[0x1];
u8 outer_smac[0x1];
u8 outer_ether_type[0x1];
- u8 reserved_at_3[0x1];
+ u8 outer_ip_version[0x1];
u8 outer_first_prio[0x1];
u8 outer_first_cfi[0x1];
u8 outer_first_vid[0x1];
@@ -265,7 +265,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
u8 inner_dmac[0x1];
u8 inner_smac[0x1];
u8 inner_ether_type[0x1];
- u8 reserved_at_23[0x1];
+ u8 inner_ip_version[0x1];
u8 inner_first_prio[0x1];
u8 inner_first_cfi[0x1];
u8 inner_first_vid[0x1];
@@ -371,7 +371,7 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 cvlan_tag[0x1];
u8 svlan_tag[0x1];
u8 frag[0x1];
- u8 reserved_at_93[0x4];
+ u8 ip_version[0x4];
u8 tcp_flags[0x9];
u8 tcp_sport[0x10];
@@ -4735,17 +4735,17 @@ struct mlx5_ifc_query_cong_statistics_out_bits {
u8 reserved_at_40[0x40];
- u8 cur_flows[0x20];
+ u8 rp_cur_flows[0x20];
u8 sum_flows[0x20];
- u8 cnp_ignored_high[0x20];
+ u8 rp_cnp_ignored_high[0x20];
- u8 cnp_ignored_low[0x20];
+ u8 rp_cnp_ignored_low[0x20];
- u8 cnp_handled_high[0x20];
+ u8 rp_cnp_handled_high[0x20];
- u8 cnp_handled_low[0x20];
+ u8 rp_cnp_handled_low[0x20];
u8 reserved_at_140[0x100];
@@ -4755,13 +4755,13 @@ struct mlx5_ifc_query_cong_statistics_out_bits {
u8 accumulators_period[0x20];
- u8 ecn_marked_roce_packets_high[0x20];
+ u8 np_ecn_marked_roce_packets_high[0x20];
- u8 ecn_marked_roce_packets_low[0x20];
+ u8 np_ecn_marked_roce_packets_low[0x20];
- u8 cnps_sent_high[0x20];
+ u8 np_cnp_sent_high[0x20];
- u8 cnps_sent_low[0x20];
+ u8 np_cnp_sent_low[0x20];
u8 reserved_at_320[0x560];
};
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 1aeb25dd42a7..2452e6449532 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -103,12 +103,24 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
u32 addr_flags, bool sllao, bool tokenized,
__u32 valid_lft, u32 prefered_lft);
+static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr)
+{
+ memcpy(eui, addr, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ memcpy(eui + 5, addr + 3, 3);
+}
+
+static inline void addrconf_addr_eui48(u8 *eui, const char *const addr)
+{
+ addrconf_addr_eui48_base(eui, addr);
+ eui[0] ^= 2;
+}
+
static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
if (dev->addr_len != ETH_ALEN)
return -1;
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
/*
* The zSeries OSA network cards can be shared among various
@@ -123,14 +135,16 @@ static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
* case. Hence the resulting interface identifier has local
* scope according to RFC2373.
*/
+
+ addrconf_addr_eui48_base(eui, dev->dev_addr);
+
if (dev->dev_id) {
eui[3] = (dev->dev_id >> 8) & 0xFF;
eui[4] = dev->dev_id & 0xFF;
} else {
- eui[3] = 0xFF;
- eui[4] = 0xFE;
eui[0] ^= 2;
}
+
return 0;
}
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index c755325f0831..5519f31f043a 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -74,6 +74,12 @@
#define IB_GRH_FLOW_MASK 0xFFFFF
#define IB_GRH_FLOW_SHIFT 0
#define IB_GRH_NEXT_HDR 0x1B
+#define IB_FECN_SHIFT 31
+#define IB_FECN_MASK 1
+#define IB_FECN_SMASK BIT(IB_FECN_SHIFT)
+#define IB_BECN_SHIFT 30
+#define IB_BECN_MASK 1
+#define IB_BECN_SMASK BIT(IB_BECN_SHIFT)
#define IB_AETH_CREDIT_SHIFT 24
#define IB_AETH_CREDIT_MASK 0x1F
@@ -181,4 +187,64 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth)
ib_u64_put(val, &ateth->compare_data);
}
+/*
+ * 9B/IB Packet Format
+ */
+#define IB_LNH_MASK 3
+#define IB_SC_MASK 0xf
+#define IB_SC_SHIFT 12
+#define IB_SL_MASK 0xf
+#define IB_SL_SHIFT 4
+
+static inline u8 ib_get_lnh(struct ib_header *hdr)
+{
+ return (be16_to_cpu(hdr->lrh[0]) & IB_LNH_MASK);
+}
+
+static inline u8 ib_get_sc(struct ib_header *hdr)
+{
+ return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK);
+}
+
+static inline u8 ib_get_sl(struct ib_header *hdr)
+{
+ return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK);
+}
+
+static inline u16 ib_get_dlid(struct ib_header *hdr)
+{
+ return (be16_to_cpu(hdr->lrh[1]));
+}
+
+static inline u16 ib_get_slid(struct ib_header *hdr)
+{
+ return (be16_to_cpu(hdr->lrh[3]));
+}
+
+/*
+ * BTH
+ */
+#define IB_BTH_OPCODE_MASK 0xff
+#define IB_BTH_OPCODE_SHIFT 24
+#define IB_BTH_PAD_MASK 3
+#define IB_BTH_PKEY_MASK 0xffff
+#define IB_BTH_PAD_SHIFT 20
+
+static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
+{
+ return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) &
+ IB_BTH_PAD_MASK);
+}
+
+static inline u16 ib_bth_get_pkey(struct ib_other_headers *ohdr)
+{
+ return (be32_to_cpu(ohdr->bth[0]) & IB_BTH_PKEY_MASK);
+}
+
+static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr)
+{
+ return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_OPCODE_SHIFT) &
+ IB_BTH_OPCODE_MASK);
+}
+
#endif /* IB_HDRS_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 981214b3790c..3d5f5d6031ec 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -262,6 +262,31 @@ struct ib_class_port_info {
__be32 trap_qkey;
};
+struct opa_class_port_info {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ __be32 cap_mask2_resp_time;
+
+ u8 redirect_gid[16];
+ __be32 redirect_tc_fl;
+ __be32 redirect_lid;
+ __be32 redirect_sl_qp;
+ __be32 redirect_qkey;
+
+ u8 trap_gid[16];
+ __be32 trap_tc_fl;
+ __be32 trap_lid;
+ __be32 trap_hl_qp;
+ __be32 trap_qkey;
+
+ __be16 trap_pkey;
+ __be16 redirect_pkey;
+
+ u8 trap_sl_rsvd;
+ u8 reserved[3];
+} __packed;
+
/**
* ib_get_cpi_resp_time - Returns the resp_time value from
* cap_mask2_resp_time in ib_class_port_info.
@@ -673,7 +698,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
* After invoking this routine, MAD services are no longer usable by the
* client on the associated QP.
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
/**
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b13419ce99ff..36655899ee02 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -80,6 +80,8 @@ enum {
IB_OPCODE_UD = 0x60,
/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
IB_OPCODE_CNP = 0x80,
+ /* Manufacturer specific */
+ IB_OPCODE_MSP = 0xe0,
/* operations -- just used to define real constants */
IB_OPCODE_SEND_FIRST = 0x00,
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index fd0e53219f93..843b56288ecf 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -56,6 +56,7 @@ enum {
IB_SA_METHOD_GET_TRACE_TBL = 0x13
};
+#define OPA_SA_CLASS_VERSION 0x80
enum {
IB_SA_ATTR_CLASS_PORTINFO = 0x01,
IB_SA_ATTR_NOTICE = 0x02,
@@ -454,14 +455,8 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context,
struct ib_sa_query **sa_query);
-/* Support get SA ClassPortInfo */
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num);
#endif /* IB_SA_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 2d83cfd7e6ce..23159dd5be18 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -44,7 +44,7 @@ struct ib_umem {
struct ib_ucontext *context;
size_t length;
unsigned long address;
- int page_size;
+ int page_shift;
int writable;
int hugetlb;
struct work_struct work;
@@ -60,7 +60,7 @@ struct ib_umem {
/* Returns the offset of the umem start relative to the first page. */
static inline int ib_umem_offset(struct ib_umem *umem)
{
- return umem->address & ((unsigned long)umem->page_size - 1);
+ return umem->address & (BIT(umem->page_shift) - 1);
}
/* Returns the first page of an ODP umem. */
@@ -72,12 +72,12 @@ static inline unsigned long ib_umem_start(struct ib_umem *umem)
/* Returns the address of the page after the last one of an ODP umem. */
static inline unsigned long ib_umem_end(struct ib_umem *umem)
{
- return PAGE_ALIGN(umem->address + umem->length);
+ return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
}
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+ return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
}
#ifdef CONFIG_INFINIBAND_USER_MEM
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 542cd8b3414c..fb67554aabd6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -84,7 +84,8 @@ struct ib_umem_odp {
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access);
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
unsigned long addr,
size_t size);
@@ -154,7 +155,8 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int ib_umem_odp_get(struct ib_ucontext *context,
- struct ib_umem *umem)
+ struct ib_umem *umem,
+ int access)
{
return -EINVAL;
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 43ac7ba75685..9d0d55d844d4 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,7 @@
#include <net/ip.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/netdevice.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
@@ -225,6 +226,7 @@ enum ib_device_cap_flags {
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
};
enum ib_signature_prot_cap {
@@ -432,7 +434,8 @@ enum ib_port_speed {
IB_SPEED_QDR = 4,
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
- IB_SPEED_EDR = 32
+ IB_SPEED_EDR = 32,
+ IB_SPEED_HDR = 64
};
/**
@@ -499,6 +502,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
/* Address format 0x000FF000 */
#define RDMA_CORE_CAP_AF_IB 0x00001000
#define RDMA_CORE_CAP_ETH_AH 0x00002000
+#define RDMA_CORE_CAP_OPA_AH 0x00004000
/* Protocol 0xFFF00000 */
#define RDMA_CORE_CAP_PROT_IB 0x00100000
@@ -1337,6 +1341,7 @@ enum ib_access_flags {
IB_ACCESS_MW_BIND = (1<<4),
IB_ZERO_BASED = (1<<5),
IB_ACCESS_ON_DEMAND = (1<<6),
+ IB_ACCESS_HUGETLB = (1<<7),
};
/*
@@ -1358,6 +1363,17 @@ struct ib_fmr_attr {
struct ib_umem;
+enum rdma_remove_reason {
+ /* Userspace requested uobject deletion. Call could fail */
+ RDMA_REMOVE_DESTROY,
+ /* Context deletion. This call should delete the actual object itself */
+ RDMA_REMOVE_CLOSE,
+ /* Driver is being hot-unplugged. This call should delete the actual object itself */
+ RDMA_REMOVE_DRIVER_REMOVE,
+ /* Context is being cleaned-up, but commit was just completed */
+ RDMA_REMOVE_DURING_CLEANUP,
+};
+
struct ib_rdmacg_object {
#ifdef CONFIG_CGROUP_RDMA
struct rdma_cgroup *cg; /* owner rdma cgroup */
@@ -1366,19 +1382,16 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
- struct list_head pd_list;
- struct list_head mr_list;
- struct list_head mw_list;
- struct list_head cq_list;
- struct list_head qp_list;
- struct list_head srq_list;
- struct list_head ah_list;
- struct list_head xrcd_list;
- struct list_head rule_list;
- struct list_head wq_list;
- struct list_head rwq_ind_tbl_list;
+ struct ib_uverbs_file *ufile;
int closing;
+ /* locking the uobjects_list */
+ struct mutex uobjects_lock;
+ struct list_head uobjects;
+ /* protects cleanup process from other actions */
+ struct rw_semaphore cleanup_rwsem;
+ enum rdma_remove_reason cleanup_reason;
+
struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct rb_root umem_tree;
@@ -1408,9 +1421,16 @@ struct ib_uobject {
struct ib_rdmacg_object cg_obj; /* rdmacg object */
int id; /* index into kernel idr */
struct kref ref;
- struct rw_semaphore mutex; /* protects .live */
+ atomic_t usecnt; /* protects exclusive access */
struct rcu_head rcu; /* kfree_rcu() overhead */
- int live;
+
+ const struct uverbs_obj_type *type;
+};
+
+struct ib_uobject_file {
+ struct ib_uobject uobj;
+ /* ufile contains the lock between context release and file close */
+ struct ib_uverbs_file *ufile;
};
struct ib_udata {
@@ -1663,6 +1683,7 @@ enum ib_flow_spec_type {
IB_FLOW_SPEC_INNER = 0x100,
/* Actions */
IB_FLOW_SPEC_ACTION_TAG = 0x1000,
+ IB_FLOW_SPEC_ACTION_DROP = 0x1001,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1791,6 +1812,11 @@ struct ib_flow_spec_action_tag {
u32 tag_id;
};
+struct ib_flow_spec_action_drop {
+ enum ib_flow_spec_type type;
+ u16 size;
+};
+
union ib_flow_spec {
struct {
u32 type;
@@ -1803,6 +1829,7 @@ union ib_flow_spec {
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
struct ib_flow_spec_action_tag flow_tag;
+ struct ib_flow_spec_action_drop drop;
};
struct ib_flow_attr {
@@ -1861,6 +1888,34 @@ struct ib_port_immutable {
u32 max_mad_size;
};
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+ RDMA_NETDEV_OPA_VNIC,
+ RDMA_NETDEV_IPOIB,
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+ void *clnt_priv;
+ struct ib_device *hca;
+ u8 port_num;
+
+ /* control functions */
+ void (*set_id)(struct net_device *netdev, int id);
+ /* send packet */
+ int (*send)(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
+ /* multicast */
+ int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *gid, u16 mlid,
+ int set_qkey, u32 qkey);
+ int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *gid, u16 mlid);
+};
+
struct ib_device {
/* Do not access @dma_device directly from ULP nor from HW drivers. */
struct device *dma_device;
@@ -2114,6 +2169,20 @@ struct ib_device {
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ /**
+ * rdma netdev operations
+ *
+ * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
+ * doesn't support the specified rdma netdev type.
+ */
+ struct net_device *(*alloc_rdma_netdev)(
+ struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+ void (*free_rdma_netdev)(struct net_device *netdev);
struct module *owner;
struct device dev;
@@ -2536,6 +2605,21 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
}
/**
+ * rdma_cap_opa_ah - Check if the port of device supports
+ * OPA Address handles
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Return: true if we are running on an OPA device which supports
+ * the extended OPA addressing.
+ */
+static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
+{
+ return (device->port_immutable[port_num].core_cap_flags &
+ RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
+}
+
+/**
* rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
*
* @device: Device
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index 9303e0e4f508..b4f0ac02f283 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014-2017 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -127,6 +127,7 @@
#define OPA_LINK_WIDTH_3X 0x0004
#define OPA_LINK_WIDTH_4X 0x0008
+#define OPA_CAP_MASK3_IsEthOnFabricSupported (1 << 13)
#define OPA_CAP_MASK3_IsSnoopSupported (1 << 7)
#define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6)
#define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5)
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
new file mode 100644
index 000000000000..39d6890616a6
--- /dev/null
+++ b/include/rdma/opa_vnic.h
@@ -0,0 +1,141 @@
+#ifndef _OPA_VNIC_H
+#define _OPA_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains Intel Omni-Path (OPA) Virtual Network Interface
+ * Controller (VNIC) specific declarations.
+ */
+
+#include <rdma/ib_verbs.h>
+
+/* VNIC uses 16B header format */
+#define OPA_VNIC_L2_TYPE 0x2
+
+/* 16 header bytes + 2 reserved bytes */
+#define OPA_VNIC_L2_HDR_LEN (16 + 2)
+
+#define OPA_VNIC_L4_HDR_LEN 2
+
+#define OPA_VNIC_HDR_LEN (OPA_VNIC_L2_HDR_LEN + \
+ OPA_VNIC_L4_HDR_LEN)
+
+#define OPA_VNIC_L4_ETHR 0x78
+
+#define OPA_VNIC_ICRC_LEN 4
+#define OPA_VNIC_TAIL_LEN 1
+#define OPA_VNIC_ICRC_TAIL_LEN (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN)
+
+#define OPA_VNIC_SKB_MDATA_LEN 4
+#define OPA_VNIC_SKB_MDATA_ENCAP_ERR 0x1
+
+/* opa vnic rdma netdev's private data structure */
+struct opa_vnic_rdma_netdev {
+ struct rdma_netdev rn; /* keep this first */
+ /* followed by device private data */
+ char *dev_priv[0];
+};
+
+static inline void *opa_vnic_priv(const struct net_device *dev)
+{
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ return rn->clnt_priv;
+}
+
+static inline void *opa_vnic_dev_priv(const struct net_device *dev)
+{
+ struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev);
+
+ return oparn->dev_priv;
+}
+
+/* opa_vnic skb meta data structrue */
+struct opa_vnic_skb_mdata {
+ u8 vl;
+ u8 entropy;
+ u8 flags;
+ u8 rsvd;
+} __packed;
+
+/* OPA VNIC group statistics */
+struct opa_vnic_grp_stats {
+ u64 unicast;
+ u64 mcastbcast;
+ u64 untagged;
+ u64 vlan;
+ u64 s_64;
+ u64 s_65_127;
+ u64 s_128_255;
+ u64 s_256_511;
+ u64 s_512_1023;
+ u64 s_1024_1518;
+ u64 s_1519_max;
+};
+
+struct opa_vnic_stats {
+ /* standard netdev statistics */
+ struct rtnl_link_stats64 netstats;
+
+ /* OPA VNIC statistics */
+ struct opa_vnic_grp_stats tx_grp;
+ struct opa_vnic_grp_stats rx_grp;
+ u64 tx_dlid_zero;
+ u64 tx_drop_state;
+ u64 rx_drop_state;
+ u64 rx_runt;
+ u64 rx_oversize;
+};
+
+static inline bool rdma_cap_opa_vnic(struct ib_device *device)
+{
+ return !!(device->attrs.device_cap_flags &
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC);
+}
+
+#endif /* _OPA_VNIC_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 8fc1ca7b6f23..7f8956893526 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -516,6 +516,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc);
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_sge *isge, struct ib_sge *sge, int acc);
-struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid);
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
+ u16 lid);
#endif /* DEF_RDMA_VT_H */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index f3816396c76a..75a077c59742 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
#define DEF_RDMAVT_INCQP_H
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -435,9 +435,14 @@ struct rvt_mcast_qp {
struct rvt_qp *qp;
};
+struct rvt_mcast_addr {
+ union ib_gid mgid;
+ u16 lid;
+};
+
struct rvt_mcast {
struct rb_node rb_node;
- union ib_gid mgid;
+ struct rvt_mcast_addr mcast_addr;
struct list_head qp_list;
wait_queue_head_t wait;
atomic_t refcount;
@@ -526,7 +531,6 @@ static inline void rvt_qp_wqe_reserve(
struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
- wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
atomic_inc(&qp->s_reserved_used);
}
@@ -550,7 +554,6 @@ static inline void rvt_qp_wqe_unreserve(
struct rvt_swqe *wqe)
{
if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
- wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
atomic_dec(&qp->s_reserved_used);
/* insure no compiler re-order up to s_last change */
smp_mb__after_atomic();
@@ -574,6 +577,7 @@ extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
static inline void rvt_qp_swqe_complete(
struct rvt_qp *qp,
struct rvt_swqe *wqe,
+ enum ib_wc_opcode opcode,
enum ib_wc_status status)
{
if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
@@ -586,7 +590,7 @@ static inline void rvt_qp_swqe_complete(
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
- wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode];
+ wc.opcode = opcode;
wc.qp = &qp->ibqp;
wc.byte_len = wqe->length;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
new file mode 100644
index 000000000000..7771ce966952
--- /dev/null
+++ b/include/rdma/uverbs_std_types.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_STD_TYPES__
+#define _UVERBS_STD_TYPES__
+
+#include <rdma/uverbs_types.h>
+
+extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_wq;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_srq;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_ah;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_flow;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd;
+extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd;
+
+static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
+ bool write,
+ struct ib_ucontext *ucontext,
+ int id)
+{
+ return rdma_lookup_get_uobject(type, ucontext, id, write);
+}
+
+#define uobj_get_type(_type) uverbs_type_attrs_##_type.type
+
+#define uobj_get_read(_type, _id, _ucontext) \
+ __uobj_get(&(_type), false, _ucontext, _id)
+
+#define uobj_get_obj_read(_type, _id, _ucontext) \
+({ \
+ struct ib_uobject *uobj = \
+ __uobj_get(&uobj_get_type(_type), \
+ false, _ucontext, _id); \
+ \
+ (struct ib_##_type *)(IS_ERR(uobj) ? NULL : uobj->object); \
+})
+
+#define uobj_get_write(_type, _id, _ucontext) \
+ __uobj_get(&(_type), true, _ucontext, _id)
+
+static inline void uobj_put_read(struct ib_uobject *uobj)
+{
+ rdma_lookup_put_uobject(uobj, false);
+}
+
+#define uobj_put_obj_read(_obj) \
+ uobj_put_read((_obj)->uobject)
+
+static inline void uobj_put_write(struct ib_uobject *uobj)
+{
+ rdma_lookup_put_uobject(uobj, true);
+}
+
+static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj)
+{
+ return rdma_remove_commit_uobject(uobj);
+}
+
+static inline void uobj_alloc_commit(struct ib_uobject *uobj)
+{
+ rdma_alloc_commit_uobject(uobj);
+}
+
+static inline void uobj_alloc_abort(struct ib_uobject *uobj)
+{
+ rdma_alloc_abort_uobject(uobj);
+}
+
+static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ return rdma_alloc_begin_uobject(type, ucontext);
+}
+
+#define uobj_alloc(_type, ucontext) \
+ __uobj_alloc(&(_type), ucontext)
+
+#endif
+
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
new file mode 100644
index 000000000000..351ea185df44
--- /dev/null
+++ b/include/rdma/uverbs_types.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_TYPES_
+#define _UVERBS_TYPES_
+
+#include <linux/kernel.h>
+#include <rdma/ib_verbs.h>
+
+struct uverbs_obj_type;
+
+struct uverbs_obj_type_class {
+ /*
+ * Get an ib_uobject that corresponds to the given id from ucontext,
+ * These functions could create or destroy objects if required.
+ * The action will be finalized only when commit, abort or put fops are
+ * called.
+ * The flow of the different actions is:
+ * [alloc]: Starts with alloc_begin. The handlers logic is than
+ * executed. If the handler is successful, alloc_commit
+ * is called and the object is inserted to the repository.
+ * Once alloc_commit completes the object is visible to
+ * other threads and userspace.
+ e Otherwise, alloc_abort is called and the object is
+ * destroyed.
+ * [lookup]: Starts with lookup_get which fetches and locks the
+ * object. After the handler finished using the object, it
+ * needs to call lookup_put to unlock it. The exclusive
+ * flag indicates if the object is locked for exclusive
+ * access.
+ * [remove]: Starts with lookup_get with exclusive flag set. This
+ * locks the object for exclusive access. If the handler
+ * code completed successfully, remove_commit is called
+ * and the ib_uobject is removed from the context's
+ * uobjects repository and put. The object itself is
+ * destroyed as well. Once remove succeeds new krefs to
+ * the object cannot be acquired by other threads or
+ * userspace and the hardware driver is removed from the
+ * object. Other krefs on the object may still exist.
+ * If the handler code failed, lookup_put should be
+ * called. This callback is used when the context
+ * is destroyed as well (process termination,
+ * reset flow).
+ */
+ struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext);
+ void (*alloc_commit)(struct ib_uobject *uobj);
+ void (*alloc_abort)(struct ib_uobject *uobj);
+
+ struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext, int id,
+ bool exclusive);
+ void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
+ /*
+ * Must be called with the exclusive lock held. If successful uobj is
+ * invalid on return. On failure uobject is left completely
+ * unchanged
+ */
+ int __must_check (*remove_commit)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why);
+ u8 needs_kfree_rcu;
+};
+
+struct uverbs_obj_type {
+ const struct uverbs_obj_type_class * const type_class;
+ size_t obj_size;
+ unsigned int destroy_order;
+};
+
+/*
+ * Objects type classes which support a detach state (object is still alive but
+ * it's not attached to any context need to make sure:
+ * (a) no call through to a driver after a detach is called
+ * (b) detach isn't called concurrently with context_cleanup
+ */
+
+struct uverbs_obj_idr_type {
+ /*
+ * In idr based objects, uverbs_obj_type_class points to a generic
+ * idr operations. In order to specialize the underlying types (e.g. CQ,
+ * QPs, etc.), we add destroy_object specific callbacks.
+ */
+ struct uverbs_obj_type type;
+
+ /* Free driver resources from the uobject, make the driver uncallable,
+ * and move the uobject to the detached state. If the object was
+ * destroyed by the user's request, a failure should leave the uobject
+ * completely unchanged.
+ */
+ int __must_check (*destroy_object)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why);
+};
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive);
+void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext);
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
+int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
+int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
+
+struct uverbs_obj_fd_type {
+ /*
+ * In fd based objects, uverbs_obj_type_ops points to generic
+ * fd operations. In order to specialize the underlying types (e.g.
+ * completion_channel), we use fops, name and flags for fd creation.
+ * context_closed is called when the context is closed either when
+ * the driver is removed or the process terminated.
+ */
+ struct uverbs_obj_type type;
+ int (*context_closed)(struct ib_uobject_file *uobj_file,
+ enum rdma_remove_reason why);
+ const struct file_operations *fops;
+ const char *name;
+ int flags;
+};
+
+extern const struct uverbs_obj_type_class uverbs_idr_class;
+extern const struct uverbs_obj_type_class uverbs_fd_class;
+
+#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
+ sizeof(char))
+#define UVERBS_TYPE_ALLOC_FD(_size, _order) \
+ { \
+ .destroy_order = _order, \
+ .type_class = &uverbs_fd_class, \
+ .obj_size = (_size) + \
+ UVERBS_BUILD_BUG_ON((_size) < \
+ sizeof(struct ib_uobject_file)),\
+ }
+#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \
+ { \
+ .destroy_order = _order, \
+ .type_class = &uverbs_idr_class, \
+ .obj_size = (_size) + \
+ UVERBS_BUILD_BUG_ON((_size) < \
+ sizeof(struct ib_uobject)), \
+ }
+#define UVERBS_TYPE_ALLOC_IDR(_order) \
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order)
+#endif
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index fff521c9458c..d56bb0051009 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -630,6 +630,7 @@
#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */
#define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */
#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */
+#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */
#define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */
#define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */
#define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 8edce2b65903..270c350bedc6 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -947,6 +947,17 @@ struct ib_uverbs_flow_spec_action_tag {
__u32 reserved1;
};
+struct ib_uverbs_flow_spec_action_drop {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+};
+
struct ib_uverbs_flow_tunnel_filter {
__be32 tunnel_id;
};
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
index 5016abc9ee97..c8c1d2d6df4d 100644
--- a/include/uapi/rdma/vmw_pvrdma-abi.h
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -222,7 +222,7 @@ struct pvrdma_sq_wqe_hdr {
__u32 opcode; /* operation type */
__u32 send_flags; /* wr flags */
union {
- __u32 imm_data;
+ __be32 imm_data;
__u32 invalidate_rkey;
} ex;
__u32 reserved;
@@ -273,7 +273,7 @@ struct pvrdma_cqe {
__u32 opcode;
__u32 status;
__u32 byte_len;
- __u32 imm_data;
+ __be32 imm_data;
__u32 src_qp;
__u32 wc_flags;
__u32 vendor_err;