diff options
author | Ilias Apalodimas <ilias.apalodimas@linaro.org> | 2017-10-19 12:08:20 +0300 |
---|---|---|
committer | Ilias Apalodimas <ilias.apalodimas@linaro.org> | 2017-10-19 12:08:20 +0300 |
commit | 57056106c9bf5defc74cf6535e2b89cccab9e698 (patch) | |
tree | 81718497caaf170150474c203af825eacfff9dc8 | |
parent | 68ecc9dd83383ef3554eadc07ead27f653028610 (diff) |
updated to work with https://github.com/apalos/linux/tree/kernel_410
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
-rw-r--r-- | README | 18 | ||||
-rw-r--r-- | api/vfio_api.c | 2 | ||||
-rw-r--r-- | patches/linux-r8169.patch | 723 | ||||
-rwxr-xr-x | run.sh | 4 | ||||
-rw-r--r-- | src/userspace_io.c | 14 |
5 files changed, 25 insertions, 736 deletions
@@ -1,7 +1,19 @@ + # Build -- apply kernel patch and recompile, should support every upstream kernel > - 4.10.x -- make +- Download and compile kernel from https://github.com/apalos/linux/tree/kernel_410 +- Make sure you include these kernel options: +CONFIG_VFIO_IOMMU_TYPE1=m +CONFIG_VFIO_VIRQFD=m +CONFIG_VFIO=m +CONFIG_VFIO_NOIOMMU=y +CONFIG_VFIO_PCI=m +CONFIG_VFIO_PCI_VGA=y +CONFIG_VFIO_PCI_MMAP=y +CONFIG_VFIO_PCI_INTX=y +CONFIG_VFIO_PCI_IGD=y +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m +CONFIG_VFIO_MDEV_NET_DEVICE=m # run - ./run.sh create diff --git a/api/vfio_api.c b/api/vfio_api.c index a75f413..4c1270b 100644 --- a/api/vfio_api.c +++ b/api/vfio_api.c @@ -218,7 +218,7 @@ int vfio_init_dev(int grp, int container, struct vfio_group_status *grp_status, struct vfio_device_info *dev_info, char *grp_uuid) { int ret; - int device; + int device = -1; ret = ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); if (!ret) { diff --git a/patches/linux-r8169.patch b/patches/linux-r8169.patch deleted file mode 100644 index 4894c74..0000000 --- a/patches/linux-r8169.patch +++ /dev/null @@ -1,723 +0,0 @@ -diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c -index 8f1623b..2f9cc53 100644 ---- a/drivers/net/ethernet/realtek/r8169.c -+++ b/drivers/net/ethernet/realtek/r8169.c -@@ -33,6 +33,12 @@ - #include <asm/io.h> - #include <asm/irq.h> - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+#include <linux/mm.h> -+#include <linux/vfio.h> -+#include <linux/mdev.h> -+#endif -+ - #define RTL8169_VERSION "2.3LK-NAPI" - #define MODULENAME "r8169" - #define PFX MODULENAME ": " -@@ -7141,6 +7147,9 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, - u32 opts[2]; - int frags; - -+ if (dev->priv_flags & IFF_VFNETDEV) -+ goto vf_netdev_ok; -+ - if (unlikely(!TX_FRAGS_READY_FOR(tp, skb_shinfo(skb)->nr_frags))) { - netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n"); - goto err_stop_0; -@@ -7218,6 +7227,9 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, - - return NETDEV_TX_OK; - -+vf_netdev_ok: -+ dev_kfree_skb_any(skb); -+ return NETDEV_TX_OK; - err_dma_1: - rtl8169_unmap_tx_skb(d, tp->tx_skb + entry, txd); - err_dma_0: -@@ -7383,6 +7395,11 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget - unsigned int cur_rx, rx_left; - unsigned int count; - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+ if (dev->priv_flags & IFF_VFNETDEV) -+ return budget ; -+#endif -+ - cur_rx = tp->cur_rx; - - for (rx_left = min(budget, NUM_RX_DESC); rx_left > 0; rx_left--, cur_rx++) { -@@ -7567,6 +7584,11 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) - int work_done= 0; - u16 status; - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+ if (dev->priv_flags & IFF_VFNETDEV) -+ return budget; -+#endif -+ - status = rtl_get_events(tp); - rtl_ack_events(tp, status & ~tp->event_slow); - -@@ -7690,11 +7712,19 @@ static int rtl_open(struct net_device *dev) - if (!tp->TxDescArray) - goto err_pm_runtime_put; - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+ printk(KERN_INFO"TxDescArray @%p (%llx), virt_tophys=%llx\n", -+ tp->TxDescArray, tp->TxPhyAddr, virt_to_phys(tp->TxDescArray)); -+#endif - tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES, - &tp->RxPhyAddr, GFP_KERNEL); - if (!tp->RxDescArray) - goto err_free_tx_0; - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+ printk(KERN_INFO"RxDescArray KVA(@%p) -> PA(%llx) <- IOVA(%llx)\n", -+ tp->RxDescArray, virt_to_phys(tp->RxDescArray), tp->RxPhyAddr); -+#endif - retval = rtl8169_init_ring(dev); - if (retval < 0) - goto err_free_rx_1; -@@ -8000,6 +8030,10 @@ static void rtl_remove_one(struct pci_dev *pdev) - struct net_device *dev = pci_get_drvdata(pdev); - struct rtl8169_private *tp = netdev_priv(dev); - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+ mdev_unregister_device(&pdev->dev); -+#endif -+ - if ((tp->mac_version == RTL_GIGA_MAC_VER_27 || - tp->mac_version == RTL_GIGA_MAC_VER_28 || - tp->mac_version == RTL_GIGA_MAC_VER_31 || -@@ -8183,6 +8217,605 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) - } - } - -+ -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+struct iovamap { -+ u64 iova; -+ void *vaddr; -+ struct device *dev; -+ u32 size; /* maximum of 32MB */ -+ enum dma_data_direction direction; /* DMA_FROM_DEVICE... */ -+}; -+ -+typedef struct netmdev { -+ union { -+ char page0[4096]; -+ struct { -+ struct net_device *netdev; -+ /* FIXME USE A LINKED LIST */ -+ int mappings_count; -+ struct iovamap mappings[128]; /* 3.5KB */ -+ }; -+ }; -+ union { -+ /* shadow features & statistics page */ -+ char page1[4096]; -+ struct { -+ netdev_features_t features; -+ netdev_features_t hw_features; -+ netdev_features_t wanted_features; -+ netdev_features_t vlan_features; -+ netdev_features_t hw_enc_features; -+ netdev_features_t mpls_features; -+ netdev_features_t gso_partial_features; -+ struct net_device_stats stats; -+ atomic_long_t rx_dropped; -+ atomic_long_t tx_dropped; -+ atomic_long_t rx_nohandler; -+ }; -+ }; -+} netmdev; -+ -+/* -+SYSFS structure for the controlling device -+*/ -+ -+static ssize_t available_instances_show(struct kobject *kobj, struct device *dev, -+ char *buf) -+{ -+ return scnprintf(buf, PAGE_SIZE, "%d\n", 1); -+} -+static MDEV_TYPE_ATTR_RO(available_instances); -+ -+static ssize_t device_api_show(struct kobject *kobj, struct device *dev, -+ char *buf) -+{ -+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); -+} -+static MDEV_TYPE_ATTR_RO(device_api); -+ -+static struct attribute *sysfs_vfnetdev_attributes[] = { -+ &mdev_type_attr_available_instances.attr, -+ &mdev_type_attr_device_api.attr, -+ NULL, -+}; -+ -+static struct attribute_group sysfs_vfnetdev_type = { -+ .name = "vfnetdev", -+ .attrs = sysfs_vfnetdev_attributes, -+}; -+ -+/* Only 1 supported for now */ -+static struct attribute_group *sysfs_type_list[] = { -+ &sysfs_vfnetdev_type, -+ NULL -+}; -+ -+/* -+ * libraries -+ */ -+static struct net_device *netmdev_get_netdev(struct mdev_device *mdev) -+{ -+ struct netmdev *netmdev; -+ -+ netmdev = mdev_get_drvdata(mdev); -+ if (!netmdev) -+ return NULL; -+ -+ return netmdev->netdev; -+} -+ -+static void r8169_pause_datapath(struct net_device *netdev) -+{ -+ void __iomem *ioaddr; -+ struct rtl8169_private *tp; -+ -+ if (!netdev) -+ return; -+ tp = netdev_priv(netdev); -+ if (!tp) -+ return; -+ -+ ioaddr = tp->mmio_addr; -+ RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb)); -+} -+ -+static void r8169_resume_datapath(struct net_device *netdev) -+{ -+ void __iomem *ioaddr; -+ struct rtl8169_private *tp; -+ -+ if (!netdev) -+ return; -+ -+ tp = netdev_priv(netdev); -+ -+ if (!tp) -+ return; -+ -+ ioaddr = tp->mmio_addr; -+ RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); -+} -+ -+/* helper macros copied from vfio-pci */ -+#define VFIO_PCI_OFFSET_SHIFT 40 -+#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) -+#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) -+#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) -+static int r8169_get_region(struct net_device *netdev, struct vfio_region_info *info) -+{ -+ struct rtl8169_private *tp; -+ struct pci_dev *pdev; -+ -+ if (!netdev) -+ return -EINVAL; -+ -+ tp = netdev_priv(netdev); -+ if (!tp) -+ return -EFAULT; -+ -+ pdev = tp->pci_dev; -+ -+ switch (info->index) { -+ case VFIO_PCI_CONFIG_REGION_INDEX: -+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); -+ info->size = pdev->cfg_size; -+ info->flags = VFIO_REGION_INFO_FLAG_READ | -+ VFIO_REGION_INFO_FLAG_WRITE; -+ break; -+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: -+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); -+ info->size = pci_resource_len(pdev, info->index); -+ if (!info->size) { -+ info->flags = 0; -+ break; -+ } -+ -+ info->flags = VFIO_REGION_INFO_FLAG_READ | -+ VFIO_REGION_INFO_FLAG_WRITE; -+ /* FIXME not all BARS are/should be mappable */ -+ info->flags |= VFIO_REGION_INFO_FLAG_MMAP; -+#if 0 -+ if (vdev->bar_mmap_supported[info->index]) { -+ info.flags |= VFIO_REGION_INFO_FLAG_MMAP; -+ if (info->index == vdev->msix_bar) { -+ ret = msix_sparse_mmap_cap(vdev, &caps); -+ if (ret) -+ return ret; -+ } -+ } -+#endif -+ break; -+ case VFIO_PCI_NUM_REGIONS + 1: -+ case VFIO_PCI_NUM_REGIONS + 2: -+ if (info->index == VFIO_PCI_NUM_REGIONS + 1) { -+ info->offset = (__u64)(tp->RxDescArray); -+ info->size = R8169_RX_RING_BYTES; -+ } -+ else if (info->index == VFIO_PCI_NUM_REGIONS + 2) { -+ info->offset = (__u64)(tp->TxDescArray); -+ info->size = R8169_TX_RING_BYTES; -+ } -+ else return -EINVAL; -+ -+ info->flags = VFIO_REGION_INFO_FLAG_MMAP; -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ return 0; -+ -+} -+ -+/* -+ * SYSFS structure for created mdevices -+ */ -+static ssize_t netdev_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ struct mdev_device *mdev; -+ struct net_device *netdev; -+ -+ mdev = mdev_from_dev(dev); -+ if (!mdev) -+ return scnprintf(buf, PAGE_SIZE, "mdev not found\n"); -+ -+ netdev = netmdev_get_netdev(mdev); -+ if (!netdev) -+ return scnprintf(buf, PAGE_SIZE, "ndev-mdev not found\n"); -+ -+ return scnprintf(buf, PAGE_SIZE, "%.16s\n", netdev->name); -+} -+ -+static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct mdev_device *mdev; -+ struct net_device *port; -+ struct netmdev *netmdev; -+ char name[IFNAMSIZ+1]; -+ -+ if (count < 2) -+ return -EINVAL; -+ -+ mdev = mdev_from_dev(dev); -+ if (!mdev) -+ return -ENODEV; -+ -+ netmdev = mdev_get_drvdata(mdev); -+ if (netmdev) -+ return -ENODEV; -+ -+ netmdev = kzalloc(sizeof(*netmdev), GFP_KERNEL); -+ if (!netmdev) -+ return -ENOMEM; -+ mdev_set_drvdata(mdev, netmdev); -+ -+ if (count > IFNAMSIZ) -+ return -ENODEV; -+ -+ memset(name, 0, sizeof(name)); -+ scnprintf(name, IFNAMSIZ + 1, "%.*s", (int)count - 1, buf); -+ port = dev_get_by_name(&init_net, name); -+ if (!port) -+ return -ENODEV; -+ -+ /* FIXME find a way to check if this is the parent device */ -+ //if (&port->dev != mdev_parent_dev(mdev)) return -1; -+ -+ netmdev->netdev = port; -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(netdev); -+static struct attribute *sysfs_mdev_vfnetdev_attributes[] = { -+ &dev_attr_netdev.attr, -+ NULL, -+}; -+ -+static struct attribute_group sysfs_mdev_vfnetdev_group = { -+ .name = "vfnetdev", -+ .attrs = sysfs_mdev_vfnetdev_attributes, -+}; -+ -+static const struct attribute_group *sysfs_mdev_groups[] = { -+ &sysfs_mdev_vfnetdev_group, -+ NULL, -+}; -+ -+ -+static int vf_netdev_create(struct kobject *kobj, struct mdev_device *mdev) -+{ -+ return 0; -+} -+ -+static int vf_netdev_remove(struct mdev_device *mdev) -+{ -+ struct netmdev *netmdev = mdev_get_drvdata(mdev); -+ struct net_device *port; -+ -+ printk(KERN_INFO"%s %d\n", __func__, __LINE__); -+ port = netmdev_get_netdev(mdev); -+ dev_put(port); -+ kfree(netmdev); -+ mdev_set_drvdata(mdev, NULL); -+ -+ return 0; -+} -+ -+static int vf_netdev_open(struct mdev_device *mdev) -+{ -+ //struct netmdev *netmdev = mdev_get_drvdata(mdev); -+ struct net_device *port; -+ -+ printk(KERN_INFO"%s %d\n", __func__, __LINE__); -+ /* TODO shadow stats to netmdev */ -+ port = netmdev_get_netdev(mdev); -+ r8169_pause_datapath(port); -+ /* barrier required? */ -+ port->priv_flags |= IFF_VFNETDEV; -+ /* deallocate kernel buffers from ring */ -+ rtl8169_rx_clear(netdev_priv(port)); -+ -+ return 0; -+} -+ -+static void vf_netdev_release(struct mdev_device *mdev) -+{ -+ struct netmdev *nd = mdev_get_drvdata(mdev); -+ struct net_device *port; -+ int i; -+ int maps; -+ struct rtl8169_private *tp; -+ -+ if (!nd) -+ return; -+ -+ -+ -+ /* TODO export shadow stats to net_device */ -+ port = netmdev_get_netdev(mdev); -+ if (!port) -+ return; -+ -+ tp = netdev_priv(port); -+ /* replenish the rings with kernel buffers */ -+ /* barrier required? */ -+ //rtl_rx_close(tp); -+ -+ for (i = 0; i < NUM_RX_DESC; i++) { -+ rtl8169_make_unusable_by_asic(tp->RxDescArray + i); -+ } -+ -+ maps = nd->mappings_count; -+ -+ for (i = 0; i < maps; i++) { -+ dma_unmap_single(nd->mappings[i].dev, -+ nd->mappings[i].iova, nd->mappings[i].size, -+ nd->mappings[i].direction); -+ kfree(nd->mappings[i].vaddr); -+ nd->mappings_count--; -+ } -+ -+ rtl8169_rx_fill(tp); -+ r8169_resume_datapath(port); -+ rtl_reset_work(tp); -+ port->priv_flags &= ~IFF_VFNETDEV; -+ -+ return; -+} -+ -+static long vf_netdev_ioctl(struct mdev_device *mdev, unsigned int cmd, -+ unsigned long arg) -+{ -+ unsigned long minsz; -+ struct net_device *netdev; -+ struct netmdev *netmdev; -+ -+ if (!mdev) -+ return -EINVAL; -+ -+ netdev = netmdev_get_netdev(mdev); -+ netmdev = mdev_get_drvdata(mdev); -+ -+ if (!netdev || !netmdev) -+ return -ENODEV; -+ -+ switch (cmd) { -+ case VFIO_DEVICE_GET_INFO: -+ { -+ struct vfio_device_info info; -+ -+ minsz = offsetofend(struct vfio_device_info, num_irqs); -+ if (copy_from_user(&info, (void __user *)arg, minsz)) -+ return -EFAULT; -+ -+ if (info.argsz < minsz) -+ return -EINVAL; -+ -+ info.flags = VFIO_DEVICE_FLAGS_PCI; -+ /* -+ * FIXME - find the number of rx queues when not having -+ * CONFIG_SYSFS if not possible to do it in a generic way, plan -+ * for a callback -+ */ -+ /* rx_ring and tx_ring */ -+ info.num_regions = VFIO_PCI_NUM_REGIONS + netdev->num_tx_queues + 1; -+ info.num_irqs = 1; -+ -+ if (copy_to_user((void __user *)arg, &info, minsz)) -+ return -EFAULT; -+ -+ return 0; -+ } -+ case VFIO_DEVICE_GET_REGION_INFO: -+ { -+ struct vfio_region_info info; -+ int ret; -+ -+ minsz = offsetofend(struct vfio_region_info, offset); -+ -+ if (copy_from_user(&info, (void __user *)arg, minsz)) -+ return -EFAULT; -+ -+ if (info.argsz < minsz) -+ return -EINVAL; -+ -+ ret = r8169_get_region(netdev, &info); -+ -+ if (ret < 0) return ret; -+ -+ if (copy_to_user((void __user *)arg, &info, minsz)) -+ return -EFAULT; -+ -+ return 0; -+ } -+ case VFIO_IOMMU_MAP_DMA: -+ { -+ struct vfio_iommu_type1_dma_map map; -+ struct vm_area_struct *vma; -+ void *data; -+ struct device *parent_dev; -+ int node; -+ dma_addr_t mapping; -+ int ret = -EINVAL; -+ -+ /* allocate DMA area and map it where the userland asks -+ * userland need to mmap an area WITHOUT allocating pages: -+ * mmap(vaddr,size, PROT_READ | PROT_WRITE, MAP_SHARED | -+ * MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, -1, 0 -+ * MAP_NORESERVE ensures only VA space is booked, no pages are -+ * mapped * the mapping must be the entire area, not partial on -+ * the vma -+ */ -+ -+ if (netmdev->mappings_count >= 128) -+ return -EFAULT; -+ -+ minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); -+ -+ if (copy_from_user(&map, (void __user *)arg, minsz)) { -+ ret = -EFAULT; -+ goto out; -+ } -+ -+ if (map.argsz < minsz) -+ goto out; -+ -+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: find_vma(%llx)\n", map.vaddr); -+ /* -+ * locates the containing vma for the required map.vaddr -+ * the vma must point to the entire zone allocated by mmap in -+ * userland -+ */ -+ vma = find_vma(current->mm, map.vaddr); -+ if (!vma) -+ return -EFAULT; -+ if (map.vaddr >= vma->vm_end) -+ return -EFAULT; -+ -+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: found vma(%llx) -> start=%lx end=%lx pg_off=%lx\n", -+ map.vaddr, vma->vm_start, vma->vm_end, vma->vm_pgoff); -+ /* the iova will be returned as part of the ioctl to the userland */ -+ //parent_dev = &tp->pci_dev->dev; -+ parent_dev = mdev_parent_dev(mdev); -+ -+ node = netdev->dev.parent ? dev_to_node(netdev->dev.parent) : -1; -+ data = kmalloc_node(map.size, GFP_KERNEL, node); -+ if (!data) -+ /* return ret? */ -+ return -ENOMEM; -+ -+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: about to dma_map_single(%p, %p, %lld, DMA_FROM_DEVICE)\n", -+ parent_dev, data, map.size); -+ mapping = dma_map_single(parent_dev, data, map.size, -+ DMA_BIDIRECTIONAL); -+ if (unlikely(dma_mapping_error(parent_dev, mapping))) { -+ if (net_ratelimit()) -+ printk(KERN_ERR"Failed to dma_map_single buffer for userland!\n"); -+ kfree(data); -+ goto out; -+ } -+ map.iova = mapping; -+ ret = io_remap_pfn_range(vma, map.vaddr, -+ virt_to_phys(data) >> PAGE_SHIFT, -+ map.size, vma->vm_page_prot); -+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: io_remap_pfn_range %llx -> physmem <- @%llx, %lld:%d\n", -+ map.vaddr, map.iova, map.size, ret); -+ if (ret != 0) { -+ dma_unmap_single(parent_dev, mapping, map.size, -+ DMA_BIDIRECTIONAL); -+ kfree(data); -+ printk(KERN_ERR"VFIO_IOMMU_MAP_DMA: io_remap_pfn_range failed\n"); -+ return -EFAULT; -+ } -+ -+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: recording the mapping %d\n", -+ netmdev->mappings_count); -+ netmdev->mappings[netmdev->mappings_count].dev = parent_dev; -+ netmdev->mappings[netmdev->mappings_count].vaddr = data; -+ netmdev->mappings[netmdev->mappings_count].iova = mapping; -+ netmdev->mappings[netmdev->mappings_count].size = map.size; -+ netmdev->mappings_count++; -+ -+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: preparing response back to user\n"); -+ if (copy_to_user((void __user *)arg, &map, minsz)) -+ return -EFAULT; -+ -+ ret = 0; -+out: -+ return ret; -+ } -+ -+ case 500: { -+ r8169_resume_datapath(netdev); -+ return 0; -+ } -+ -+ } /* switch */ -+ -+ return -EINVAL; -+ -+} -+ -+static int vf_netdev_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) -+{ -+ struct net_device *netdev; -+ struct rtl8169_private *tp; -+ u64 req_len; -+ int ret = 0; -+ /* userland wants to access ring descrptors that was pre-allocated -+ * by the kernel -+ * note: userland need to user IOCTL MAP to CREATE packet buffers -+ */ -+ netdev = netmdev_get_netdev(mdev); -+ tp = netdev_priv(netdev); -+ -+ /* check that we try to map only authorized areas -+ * FIXME is there a way to check all the transmit and receive rings -+ * from an abstract netdev? -+ */ -+ /* Remap Rx/Tx descriptors */ -+ if (vma->vm_pgoff == ((__u64)tp->RxDescArray >> PAGE_SHIFT) || -+ vma->vm_pgoff == ((__u64)tp->TxDescArray >> PAGE_SHIFT)) { -+ -+ req_len = PAGE_ALIGN(vma->vm_end - vma->vm_start); -+ -+ vma->vm_private_data = NULL; -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ ret = remap_pfn_range(vma, vma->vm_start, -+ virt_to_phys((void *)(vma->vm_pgoff << PAGE_SHIFT)) >> PAGE_SHIFT, -+ req_len, vma->vm_page_prot); -+ -+ printk(KERN_INFO"vfnetdev_map %lx, @%llx, %lld:%d\n", -+ vma->vm_start, virt_to_phys((void *)(vma->vm_pgoff << PAGE_SHIFT)), -+ req_len, ret); -+ } else { -+ struct pci_dev *pdev = tp->pci_dev; -+ unsigned int index; -+ u64 phys_len, pgoff, req_start; -+ -+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); -+ phys_len = PAGE_ALIGN(pci_resource_len(pdev, index)); -+ req_len = vma->vm_end - vma->vm_start; -+ pgoff = vma->vm_pgoff & -+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); -+ req_start = pgoff << PAGE_SHIFT; -+ -+ if (req_start + req_len > phys_len) -+ return -EINVAL; -+ -+ vma->vm_private_data = NULL; -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; -+ -+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, -+ req_len, vma->vm_page_prot); -+ } -+ -+ return ret; -+} -+ -+static const struct mdev_parent_ops vf_netdev_ops = { -+ .supported_type_groups = sysfs_type_list, -+ .mdev_attr_groups = sysfs_mdev_groups, -+ .create = vf_netdev_create, -+ .remove = vf_netdev_remove, -+ -+ .open = vf_netdev_open, -+ .release = vf_netdev_release, -+ -+ .read = NULL, -+ .write = NULL, -+ .mmap = vf_netdev_mmap, -+ .ioctl = vf_netdev_ioctl, -+}; -+ -+#endif -+ - static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - { - const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; -@@ -8199,6 +8832,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - MODULENAME, RTL8169_VERSION); - } - -+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE) -+ if (mdev_register_device(&pdev->dev, &vf_netdev_ops) < 0) -+ printk(KERN_ERR"Could not register device\n"); -+ else -+ printk(KERN_INFO"Successfully registered vf-netdev device\n"); -+#endif - dev = alloc_etherdev(sizeof (*tp)); - if (!dev) { - rc = -ENOMEM; -diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index 2791467..f70a743 100644 ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -1399,6 +1399,7 @@ enum netdev_priv_flags { - IFF_RXFH_CONFIGURED = 1<<25, - IFF_PHONY_HEADROOM = 1<<26, - IFF_MACSEC = 1<<27, -+ IFF_VFNETDEV = 1<<28, - }; - - #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -15,7 +15,7 @@ if [ ! -e "/sys/class/net/$intf" ]; then exit 1 fi driver="$(basename $(readlink /sys/class/net/$intf/device/driver))" -sys_drv_name="${driver}-vfnetdev" +sys_drv_name="${driver}-netmdev" if [ ! -e "/sys/class/net/$intf/device/mdev_supported_types/$sys_drv_name/create" ]; then echo "interface $intf has no vfio-mdev support" exit 1 @@ -26,7 +26,7 @@ vf_create() { sudo sh -c "echo $dev_uuid > /sys/class/net/$intf/device/mdev_supported_types/$sys_drv_name/create" #the newly created mdev is not tied to any port of the parent deice yet echo "Bind $intf to the newly created mdevice $dev_uuid" - sudo sh -c "echo $intf > /sys/bus/mdev/devices/$dev_uuid/vfnetdev/netdev" + sudo sh -c "echo $intf > /sys/bus/mdev/devices/$dev_uuid/netmdev/netdev" #ensure the IOMMU group is readble by non root program vfio_group=$(basename $(readlink /sys/bus/mdev/devices/$dev_uuid/iommu_group)) user=$(whoami) diff --git a/src/userspace_io.c b/src/userspace_io.c index b7afb7e..4677409 100644 --- a/src/userspace_io.c +++ b/src/userspace_io.c @@ -132,22 +132,22 @@ int main(int argc, char *argv[]) device = vfio_init_dev(group, container, &group_status, &iommu_info, &device_info, group_uuid); + mmio = uio_map_mmio(exec_ops, device); + if (!mmio) { + printf("Cannot map MMIO\n"); + goto out; + } /* XXX FIXME check pointers and munmap properly */ - rxring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 1); + rxring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 2); if (!rxring) { printf("Cannot map RxRing\n"); goto out; } - txring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 2); + txring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 3); if (!txring) { printf("Cannot map TxRing\n"); goto out; } - mmio = uio_map_mmio(exec_ops, device); - if (!mmio) { - printf("Cannot map MMIO\n"); - goto out; - } /* FIXME decide on allocated areas, instead of getting 2MB per direction */ ret = iomem_alloc(device, 2 * 1024 * 1024, &iocur, &rx_data); |