aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlias Apalodimas <ilias.apalodimas@linaro.org>2017-10-19 12:08:20 +0300
committerIlias Apalodimas <ilias.apalodimas@linaro.org>2017-10-19 12:08:20 +0300
commit57056106c9bf5defc74cf6535e2b89cccab9e698 (patch)
tree81718497caaf170150474c203af825eacfff9dc8
parent68ecc9dd83383ef3554eadc07ead27f653028610 (diff)
updated to work with https://github.com/apalos/linux/tree/kernel_410
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
-rw-r--r--README18
-rw-r--r--api/vfio_api.c2
-rw-r--r--patches/linux-r8169.patch723
-rwxr-xr-xrun.sh4
-rw-r--r--src/userspace_io.c14
5 files changed, 25 insertions, 736 deletions
diff --git a/README b/README
index bd8c760..ff1e247 100644
--- a/README
+++ b/README
@@ -1,7 +1,19 @@
+
# Build
-- apply kernel patch and recompile, should support every upstream kernel >
- 4.10.x
-- make
+- Download and compile kernel from https://github.com/apalos/linux/tree/kernel_410
+- Make sure you include these kernel options:
+CONFIG_VFIO_IOMMU_TYPE1=m
+CONFIG_VFIO_VIRQFD=m
+CONFIG_VFIO=m
+CONFIG_VFIO_NOIOMMU=y
+CONFIG_VFIO_PCI=m
+CONFIG_VFIO_PCI_VGA=y
+CONFIG_VFIO_PCI_MMAP=y
+CONFIG_VFIO_PCI_INTX=y
+CONFIG_VFIO_PCI_IGD=y
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
+CONFIG_VFIO_MDEV_NET_DEVICE=m
# run
- ./run.sh create
diff --git a/api/vfio_api.c b/api/vfio_api.c
index a75f413..4c1270b 100644
--- a/api/vfio_api.c
+++ b/api/vfio_api.c
@@ -218,7 +218,7 @@ int vfio_init_dev(int grp, int container, struct vfio_group_status *grp_status,
struct vfio_device_info *dev_info, char *grp_uuid)
{
int ret;
- int device;
+ int device = -1;
ret = ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
if (!ret) {
diff --git a/patches/linux-r8169.patch b/patches/linux-r8169.patch
deleted file mode 100644
index 4894c74..0000000
--- a/patches/linux-r8169.patch
+++ /dev/null
@@ -1,723 +0,0 @@
-diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
-index 8f1623b..2f9cc53 100644
---- a/drivers/net/ethernet/realtek/r8169.c
-+++ b/drivers/net/ethernet/realtek/r8169.c
-@@ -33,6 +33,12 @@
- #include <asm/io.h>
- #include <asm/irq.h>
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+#include <linux/mm.h>
-+#include <linux/vfio.h>
-+#include <linux/mdev.h>
-+#endif
-+
- #define RTL8169_VERSION "2.3LK-NAPI"
- #define MODULENAME "r8169"
- #define PFX MODULENAME ": "
-@@ -7141,6 +7147,9 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
- u32 opts[2];
- int frags;
-
-+ if (dev->priv_flags & IFF_VFNETDEV)
-+ goto vf_netdev_ok;
-+
- if (unlikely(!TX_FRAGS_READY_FOR(tp, skb_shinfo(skb)->nr_frags))) {
- netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n");
- goto err_stop_0;
-@@ -7218,6 +7227,9 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
-
- return NETDEV_TX_OK;
-
-+vf_netdev_ok:
-+ dev_kfree_skb_any(skb);
-+ return NETDEV_TX_OK;
- err_dma_1:
- rtl8169_unmap_tx_skb(d, tp->tx_skb + entry, txd);
- err_dma_0:
-@@ -7383,6 +7395,11 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
- unsigned int cur_rx, rx_left;
- unsigned int count;
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+ if (dev->priv_flags & IFF_VFNETDEV)
-+ return budget ;
-+#endif
-+
- cur_rx = tp->cur_rx;
-
- for (rx_left = min(budget, NUM_RX_DESC); rx_left > 0; rx_left--, cur_rx++) {
-@@ -7567,6 +7584,11 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
- int work_done= 0;
- u16 status;
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+ if (dev->priv_flags & IFF_VFNETDEV)
-+ return budget;
-+#endif
-+
- status = rtl_get_events(tp);
- rtl_ack_events(tp, status & ~tp->event_slow);
-
-@@ -7690,11 +7712,19 @@ static int rtl_open(struct net_device *dev)
- if (!tp->TxDescArray)
- goto err_pm_runtime_put;
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+ printk(KERN_INFO"TxDescArray @%p (%llx), virt_tophys=%llx\n",
-+ tp->TxDescArray, tp->TxPhyAddr, virt_to_phys(tp->TxDescArray));
-+#endif
- tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES,
- &tp->RxPhyAddr, GFP_KERNEL);
- if (!tp->RxDescArray)
- goto err_free_tx_0;
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+ printk(KERN_INFO"RxDescArray KVA(@%p) -> PA(%llx) <- IOVA(%llx)\n",
-+ tp->RxDescArray, virt_to_phys(tp->RxDescArray), tp->RxPhyAddr);
-+#endif
- retval = rtl8169_init_ring(dev);
- if (retval < 0)
- goto err_free_rx_1;
-@@ -8000,6 +8030,10 @@ static void rtl_remove_one(struct pci_dev *pdev)
- struct net_device *dev = pci_get_drvdata(pdev);
- struct rtl8169_private *tp = netdev_priv(dev);
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+ mdev_unregister_device(&pdev->dev);
-+#endif
-+
- if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
- tp->mac_version == RTL_GIGA_MAC_VER_28 ||
- tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-@@ -8183,6 +8217,605 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
- }
- }
-
-+
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+struct iovamap {
-+ u64 iova;
-+ void *vaddr;
-+ struct device *dev;
-+ u32 size; /* maximum of 32MB */
-+ enum dma_data_direction direction; /* DMA_FROM_DEVICE... */
-+};
-+
-+typedef struct netmdev {
-+ union {
-+ char page0[4096];
-+ struct {
-+ struct net_device *netdev;
-+ /* FIXME USE A LINKED LIST */
-+ int mappings_count;
-+ struct iovamap mappings[128]; /* 3.5KB */
-+ };
-+ };
-+ union {
-+ /* shadow features & statistics page */
-+ char page1[4096];
-+ struct {
-+ netdev_features_t features;
-+ netdev_features_t hw_features;
-+ netdev_features_t wanted_features;
-+ netdev_features_t vlan_features;
-+ netdev_features_t hw_enc_features;
-+ netdev_features_t mpls_features;
-+ netdev_features_t gso_partial_features;
-+ struct net_device_stats stats;
-+ atomic_long_t rx_dropped;
-+ atomic_long_t tx_dropped;
-+ atomic_long_t rx_nohandler;
-+ };
-+ };
-+} netmdev;
-+
-+/*
-+SYSFS structure for the controlling device
-+*/
-+
-+static ssize_t available_instances_show(struct kobject *kobj, struct device *dev,
-+ char *buf)
-+{
-+ return scnprintf(buf, PAGE_SIZE, "%d\n", 1);
-+}
-+static MDEV_TYPE_ATTR_RO(available_instances);
-+
-+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
-+ char *buf)
-+{
-+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
-+}
-+static MDEV_TYPE_ATTR_RO(device_api);
-+
-+static struct attribute *sysfs_vfnetdev_attributes[] = {
-+ &mdev_type_attr_available_instances.attr,
-+ &mdev_type_attr_device_api.attr,
-+ NULL,
-+};
-+
-+static struct attribute_group sysfs_vfnetdev_type = {
-+ .name = "vfnetdev",
-+ .attrs = sysfs_vfnetdev_attributes,
-+};
-+
-+/* Only 1 supported for now */
-+static struct attribute_group *sysfs_type_list[] = {
-+ &sysfs_vfnetdev_type,
-+ NULL
-+};
-+
-+/*
-+ * libraries
-+ */
-+static struct net_device *netmdev_get_netdev(struct mdev_device *mdev)
-+{
-+ struct netmdev *netmdev;
-+
-+ netmdev = mdev_get_drvdata(mdev);
-+ if (!netmdev)
-+ return NULL;
-+
-+ return netmdev->netdev;
-+}
-+
-+static void r8169_pause_datapath(struct net_device *netdev)
-+{
-+ void __iomem *ioaddr;
-+ struct rtl8169_private *tp;
-+
-+ if (!netdev)
-+ return;
-+ tp = netdev_priv(netdev);
-+ if (!tp)
-+ return;
-+
-+ ioaddr = tp->mmio_addr;
-+ RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
-+}
-+
-+static void r8169_resume_datapath(struct net_device *netdev)
-+{
-+ void __iomem *ioaddr;
-+ struct rtl8169_private *tp;
-+
-+ if (!netdev)
-+ return;
-+
-+ tp = netdev_priv(netdev);
-+
-+ if (!tp)
-+ return;
-+
-+ ioaddr = tp->mmio_addr;
-+ RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
-+}
-+
-+/* helper macros copied from vfio-pci */
-+#define VFIO_PCI_OFFSET_SHIFT 40
-+#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
-+#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
-+#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
-+static int r8169_get_region(struct net_device *netdev, struct vfio_region_info *info)
-+{
-+ struct rtl8169_private *tp;
-+ struct pci_dev *pdev;
-+
-+ if (!netdev)
-+ return -EINVAL;
-+
-+ tp = netdev_priv(netdev);
-+ if (!tp)
-+ return -EFAULT;
-+
-+ pdev = tp->pci_dev;
-+
-+ switch (info->index) {
-+ case VFIO_PCI_CONFIG_REGION_INDEX:
-+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
-+ info->size = pdev->cfg_size;
-+ info->flags = VFIO_REGION_INFO_FLAG_READ |
-+ VFIO_REGION_INFO_FLAG_WRITE;
-+ break;
-+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
-+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
-+ info->size = pci_resource_len(pdev, info->index);
-+ if (!info->size) {
-+ info->flags = 0;
-+ break;
-+ }
-+
-+ info->flags = VFIO_REGION_INFO_FLAG_READ |
-+ VFIO_REGION_INFO_FLAG_WRITE;
-+ /* FIXME not all BARS are/should be mappable */
-+ info->flags |= VFIO_REGION_INFO_FLAG_MMAP;
-+#if 0
-+ if (vdev->bar_mmap_supported[info->index]) {
-+ info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
-+ if (info->index == vdev->msix_bar) {
-+ ret = msix_sparse_mmap_cap(vdev, &caps);
-+ if (ret)
-+ return ret;
-+ }
-+ }
-+#endif
-+ break;
-+ case VFIO_PCI_NUM_REGIONS + 1:
-+ case VFIO_PCI_NUM_REGIONS + 2:
-+ if (info->index == VFIO_PCI_NUM_REGIONS + 1) {
-+ info->offset = (__u64)(tp->RxDescArray);
-+ info->size = R8169_RX_RING_BYTES;
-+ }
-+ else if (info->index == VFIO_PCI_NUM_REGIONS + 2) {
-+ info->offset = (__u64)(tp->TxDescArray);
-+ info->size = R8169_TX_RING_BYTES;
-+ }
-+ else return -EINVAL;
-+
-+ info->flags = VFIO_REGION_INFO_FLAG_MMAP;
-+ break;
-+ default:
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+
-+}
-+
-+/*
-+ * SYSFS structure for created mdevices
-+ */
-+static ssize_t netdev_show(struct device *dev, struct device_attribute *attr,
-+ char *buf)
-+{
-+ struct mdev_device *mdev;
-+ struct net_device *netdev;
-+
-+ mdev = mdev_from_dev(dev);
-+ if (!mdev)
-+ return scnprintf(buf, PAGE_SIZE, "mdev not found\n");
-+
-+ netdev = netmdev_get_netdev(mdev);
-+ if (!netdev)
-+ return scnprintf(buf, PAGE_SIZE, "ndev-mdev not found\n");
-+
-+ return scnprintf(buf, PAGE_SIZE, "%.16s\n", netdev->name);
-+}
-+
-+static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
-+ const char *buf, size_t count)
-+{
-+ struct mdev_device *mdev;
-+ struct net_device *port;
-+ struct netmdev *netmdev;
-+ char name[IFNAMSIZ+1];
-+
-+ if (count < 2)
-+ return -EINVAL;
-+
-+ mdev = mdev_from_dev(dev);
-+ if (!mdev)
-+ return -ENODEV;
-+
-+ netmdev = mdev_get_drvdata(mdev);
-+ if (netmdev)
-+ return -ENODEV;
-+
-+ netmdev = kzalloc(sizeof(*netmdev), GFP_KERNEL);
-+ if (!netmdev)
-+ return -ENOMEM;
-+ mdev_set_drvdata(mdev, netmdev);
-+
-+ if (count > IFNAMSIZ)
-+ return -ENODEV;
-+
-+ memset(name, 0, sizeof(name));
-+ scnprintf(name, IFNAMSIZ + 1, "%.*s", (int)count - 1, buf);
-+ port = dev_get_by_name(&init_net, name);
-+ if (!port)
-+ return -ENODEV;
-+
-+ /* FIXME find a way to check if this is the parent device */
-+ //if (&port->dev != mdev_parent_dev(mdev)) return -1;
-+
-+ netmdev->netdev = port;
-+
-+ return count;
-+}
-+
-+static DEVICE_ATTR_RW(netdev);
-+static struct attribute *sysfs_mdev_vfnetdev_attributes[] = {
-+ &dev_attr_netdev.attr,
-+ NULL,
-+};
-+
-+static struct attribute_group sysfs_mdev_vfnetdev_group = {
-+ .name = "vfnetdev",
-+ .attrs = sysfs_mdev_vfnetdev_attributes,
-+};
-+
-+static const struct attribute_group *sysfs_mdev_groups[] = {
-+ &sysfs_mdev_vfnetdev_group,
-+ NULL,
-+};
-+
-+
-+static int vf_netdev_create(struct kobject *kobj, struct mdev_device *mdev)
-+{
-+ return 0;
-+}
-+
-+static int vf_netdev_remove(struct mdev_device *mdev)
-+{
-+ struct netmdev *netmdev = mdev_get_drvdata(mdev);
-+ struct net_device *port;
-+
-+ printk(KERN_INFO"%s %d\n", __func__, __LINE__);
-+ port = netmdev_get_netdev(mdev);
-+ dev_put(port);
-+ kfree(netmdev);
-+ mdev_set_drvdata(mdev, NULL);
-+
-+ return 0;
-+}
-+
-+static int vf_netdev_open(struct mdev_device *mdev)
-+{
-+ //struct netmdev *netmdev = mdev_get_drvdata(mdev);
-+ struct net_device *port;
-+
-+ printk(KERN_INFO"%s %d\n", __func__, __LINE__);
-+ /* TODO shadow stats to netmdev */
-+ port = netmdev_get_netdev(mdev);
-+ r8169_pause_datapath(port);
-+ /* barrier required? */
-+ port->priv_flags |= IFF_VFNETDEV;
-+ /* deallocate kernel buffers from ring */
-+ rtl8169_rx_clear(netdev_priv(port));
-+
-+ return 0;
-+}
-+
-+static void vf_netdev_release(struct mdev_device *mdev)
-+{
-+ struct netmdev *nd = mdev_get_drvdata(mdev);
-+ struct net_device *port;
-+ int i;
-+ int maps;
-+ struct rtl8169_private *tp;
-+
-+ if (!nd)
-+ return;
-+
-+
-+
-+ /* TODO export shadow stats to net_device */
-+ port = netmdev_get_netdev(mdev);
-+ if (!port)
-+ return;
-+
-+ tp = netdev_priv(port);
-+ /* replenish the rings with kernel buffers */
-+ /* barrier required? */
-+ //rtl_rx_close(tp);
-+
-+ for (i = 0; i < NUM_RX_DESC; i++) {
-+ rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
-+ }
-+
-+ maps = nd->mappings_count;
-+
-+ for (i = 0; i < maps; i++) {
-+ dma_unmap_single(nd->mappings[i].dev,
-+ nd->mappings[i].iova, nd->mappings[i].size,
-+ nd->mappings[i].direction);
-+ kfree(nd->mappings[i].vaddr);
-+ nd->mappings_count--;
-+ }
-+
-+ rtl8169_rx_fill(tp);
-+ r8169_resume_datapath(port);
-+ rtl_reset_work(tp);
-+ port->priv_flags &= ~IFF_VFNETDEV;
-+
-+ return;
-+}
-+
-+static long vf_netdev_ioctl(struct mdev_device *mdev, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ unsigned long minsz;
-+ struct net_device *netdev;
-+ struct netmdev *netmdev;
-+
-+ if (!mdev)
-+ return -EINVAL;
-+
-+ netdev = netmdev_get_netdev(mdev);
-+ netmdev = mdev_get_drvdata(mdev);
-+
-+ if (!netdev || !netmdev)
-+ return -ENODEV;
-+
-+ switch (cmd) {
-+ case VFIO_DEVICE_GET_INFO:
-+ {
-+ struct vfio_device_info info;
-+
-+ minsz = offsetofend(struct vfio_device_info, num_irqs);
-+ if (copy_from_user(&info, (void __user *)arg, minsz))
-+ return -EFAULT;
-+
-+ if (info.argsz < minsz)
-+ return -EINVAL;
-+
-+ info.flags = VFIO_DEVICE_FLAGS_PCI;
-+ /*
-+ * FIXME - find the number of rx queues when not having
-+ * CONFIG_SYSFS if not possible to do it in a generic way, plan
-+ * for a callback
-+ */
-+ /* rx_ring and tx_ring */
-+ info.num_regions = VFIO_PCI_NUM_REGIONS + netdev->num_tx_queues + 1;
-+ info.num_irqs = 1;
-+
-+ if (copy_to_user((void __user *)arg, &info, minsz))
-+ return -EFAULT;
-+
-+ return 0;
-+ }
-+ case VFIO_DEVICE_GET_REGION_INFO:
-+ {
-+ struct vfio_region_info info;
-+ int ret;
-+
-+ minsz = offsetofend(struct vfio_region_info, offset);
-+
-+ if (copy_from_user(&info, (void __user *)arg, minsz))
-+ return -EFAULT;
-+
-+ if (info.argsz < minsz)
-+ return -EINVAL;
-+
-+ ret = r8169_get_region(netdev, &info);
-+
-+ if (ret < 0) return ret;
-+
-+ if (copy_to_user((void __user *)arg, &info, minsz))
-+ return -EFAULT;
-+
-+ return 0;
-+ }
-+ case VFIO_IOMMU_MAP_DMA:
-+ {
-+ struct vfio_iommu_type1_dma_map map;
-+ struct vm_area_struct *vma;
-+ void *data;
-+ struct device *parent_dev;
-+ int node;
-+ dma_addr_t mapping;
-+ int ret = -EINVAL;
-+
-+ /* allocate DMA area and map it where the userland asks
-+ * userland need to mmap an area WITHOUT allocating pages:
-+ * mmap(vaddr,size, PROT_READ | PROT_WRITE, MAP_SHARED |
-+ * MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, -1, 0
-+ * MAP_NORESERVE ensures only VA space is booked, no pages are
-+ * mapped * the mapping must be the entire area, not partial on
-+ * the vma
-+ */
-+
-+ if (netmdev->mappings_count >= 128)
-+ return -EFAULT;
-+
-+ minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
-+
-+ if (copy_from_user(&map, (void __user *)arg, minsz)) {
-+ ret = -EFAULT;
-+ goto out;
-+ }
-+
-+ if (map.argsz < minsz)
-+ goto out;
-+
-+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: find_vma(%llx)\n", map.vaddr);
-+ /*
-+ * locates the containing vma for the required map.vaddr
-+ * the vma must point to the entire zone allocated by mmap in
-+ * userland
-+ */
-+ vma = find_vma(current->mm, map.vaddr);
-+ if (!vma)
-+ return -EFAULT;
-+ if (map.vaddr >= vma->vm_end)
-+ return -EFAULT;
-+
-+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: found vma(%llx) -> start=%lx end=%lx pg_off=%lx\n",
-+ map.vaddr, vma->vm_start, vma->vm_end, vma->vm_pgoff);
-+ /* the iova will be returned as part of the ioctl to the userland */
-+ //parent_dev = &tp->pci_dev->dev;
-+ parent_dev = mdev_parent_dev(mdev);
-+
-+ node = netdev->dev.parent ? dev_to_node(netdev->dev.parent) : -1;
-+ data = kmalloc_node(map.size, GFP_KERNEL, node);
-+ if (!data)
-+ /* return ret? */
-+ return -ENOMEM;
-+
-+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: about to dma_map_single(%p, %p, %lld, DMA_FROM_DEVICE)\n",
-+ parent_dev, data, map.size);
-+ mapping = dma_map_single(parent_dev, data, map.size,
-+ DMA_BIDIRECTIONAL);
-+ if (unlikely(dma_mapping_error(parent_dev, mapping))) {
-+ if (net_ratelimit())
-+ printk(KERN_ERR"Failed to dma_map_single buffer for userland!\n");
-+ kfree(data);
-+ goto out;
-+ }
-+ map.iova = mapping;
-+ ret = io_remap_pfn_range(vma, map.vaddr,
-+ virt_to_phys(data) >> PAGE_SHIFT,
-+ map.size, vma->vm_page_prot);
-+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: io_remap_pfn_range %llx -> physmem <- @%llx, %lld:%d\n",
-+ map.vaddr, map.iova, map.size, ret);
-+ if (ret != 0) {
-+ dma_unmap_single(parent_dev, mapping, map.size,
-+ DMA_BIDIRECTIONAL);
-+ kfree(data);
-+ printk(KERN_ERR"VFIO_IOMMU_MAP_DMA: io_remap_pfn_range failed\n");
-+ return -EFAULT;
-+ }
-+
-+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: recording the mapping %d\n",
-+ netmdev->mappings_count);
-+ netmdev->mappings[netmdev->mappings_count].dev = parent_dev;
-+ netmdev->mappings[netmdev->mappings_count].vaddr = data;
-+ netmdev->mappings[netmdev->mappings_count].iova = mapping;
-+ netmdev->mappings[netmdev->mappings_count].size = map.size;
-+ netmdev->mappings_count++;
-+
-+ printk(KERN_INFO"VFIO_IOMMU_MAP_DMA: preparing response back to user\n");
-+ if (copy_to_user((void __user *)arg, &map, minsz))
-+ return -EFAULT;
-+
-+ ret = 0;
-+out:
-+ return ret;
-+ }
-+
-+ case 500: {
-+ r8169_resume_datapath(netdev);
-+ return 0;
-+ }
-+
-+ } /* switch */
-+
-+ return -EINVAL;
-+
-+}
-+
-+static int vf_netdev_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
-+{
-+ struct net_device *netdev;
-+ struct rtl8169_private *tp;
-+ u64 req_len;
-+ int ret = 0;
-+ /* userland wants to access ring descrptors that was pre-allocated
-+ * by the kernel
-+ * note: userland need to user IOCTL MAP to CREATE packet buffers
-+ */
-+ netdev = netmdev_get_netdev(mdev);
-+ tp = netdev_priv(netdev);
-+
-+ /* check that we try to map only authorized areas
-+ * FIXME is there a way to check all the transmit and receive rings
-+ * from an abstract netdev?
-+ */
-+ /* Remap Rx/Tx descriptors */
-+ if (vma->vm_pgoff == ((__u64)tp->RxDescArray >> PAGE_SHIFT) ||
-+ vma->vm_pgoff == ((__u64)tp->TxDescArray >> PAGE_SHIFT)) {
-+
-+ req_len = PAGE_ALIGN(vma->vm_end - vma->vm_start);
-+
-+ vma->vm_private_data = NULL;
-+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-+
-+ ret = remap_pfn_range(vma, vma->vm_start,
-+ virt_to_phys((void *)(vma->vm_pgoff << PAGE_SHIFT)) >> PAGE_SHIFT,
-+ req_len, vma->vm_page_prot);
-+
-+ printk(KERN_INFO"vfnetdev_map %lx, @%llx, %lld:%d\n",
-+ vma->vm_start, virt_to_phys((void *)(vma->vm_pgoff << PAGE_SHIFT)),
-+ req_len, ret);
-+ } else {
-+ struct pci_dev *pdev = tp->pci_dev;
-+ unsigned int index;
-+ u64 phys_len, pgoff, req_start;
-+
-+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
-+ phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
-+ req_len = vma->vm_end - vma->vm_start;
-+ pgoff = vma->vm_pgoff &
-+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
-+ req_start = pgoff << PAGE_SHIFT;
-+
-+ if (req_start + req_len > phys_len)
-+ return -EINVAL;
-+
-+ vma->vm_private_data = NULL;
-+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-+ vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
-+
-+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-+ req_len, vma->vm_page_prot);
-+ }
-+
-+ return ret;
-+}
-+
-+static const struct mdev_parent_ops vf_netdev_ops = {
-+ .supported_type_groups = sysfs_type_list,
-+ .mdev_attr_groups = sysfs_mdev_groups,
-+ .create = vf_netdev_create,
-+ .remove = vf_netdev_remove,
-+
-+ .open = vf_netdev_open,
-+ .release = vf_netdev_release,
-+
-+ .read = NULL,
-+ .write = NULL,
-+ .mmap = vf_netdev_mmap,
-+ .ioctl = vf_netdev_ioctl,
-+};
-+
-+#endif
-+
- static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
- {
- const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
-@@ -8199,6 +8832,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
- MODULENAME, RTL8169_VERSION);
- }
-
-+#if defined(CONFIG_VFIO_MDEV_DEVICE) || defined(CONFIG_VFIO_MDEV_DEVICE_MODULE)
-+ if (mdev_register_device(&pdev->dev, &vf_netdev_ops) < 0)
-+ printk(KERN_ERR"Could not register device\n");
-+ else
-+ printk(KERN_INFO"Successfully registered vf-netdev device\n");
-+#endif
- dev = alloc_etherdev(sizeof (*tp));
- if (!dev) {
- rc = -ENOMEM;
-diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
-index 2791467..f70a743 100644
---- a/include/linux/netdevice.h
-+++ b/include/linux/netdevice.h
-@@ -1399,6 +1399,7 @@ enum netdev_priv_flags {
- IFF_RXFH_CONFIGURED = 1<<25,
- IFF_PHONY_HEADROOM = 1<<26,
- IFF_MACSEC = 1<<27,
-+ IFF_VFNETDEV = 1<<28,
- };
-
- #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
diff --git a/run.sh b/run.sh
index 5d3e2c9..b7ea52e 100755
--- a/run.sh
+++ b/run.sh
@@ -15,7 +15,7 @@ if [ ! -e "/sys/class/net/$intf" ]; then
exit 1
fi
driver="$(basename $(readlink /sys/class/net/$intf/device/driver))"
-sys_drv_name="${driver}-vfnetdev"
+sys_drv_name="${driver}-netmdev"
if [ ! -e "/sys/class/net/$intf/device/mdev_supported_types/$sys_drv_name/create" ]; then
echo "interface $intf has no vfio-mdev support"
exit 1
@@ -26,7 +26,7 @@ vf_create() {
sudo sh -c "echo $dev_uuid > /sys/class/net/$intf/device/mdev_supported_types/$sys_drv_name/create"
#the newly created mdev is not tied to any port of the parent deice yet
echo "Bind $intf to the newly created mdevice $dev_uuid"
- sudo sh -c "echo $intf > /sys/bus/mdev/devices/$dev_uuid/vfnetdev/netdev"
+ sudo sh -c "echo $intf > /sys/bus/mdev/devices/$dev_uuid/netmdev/netdev"
#ensure the IOMMU group is readble by non root program
vfio_group=$(basename $(readlink /sys/bus/mdev/devices/$dev_uuid/iommu_group))
user=$(whoami)
diff --git a/src/userspace_io.c b/src/userspace_io.c
index b7afb7e..4677409 100644
--- a/src/userspace_io.c
+++ b/src/userspace_io.c
@@ -132,22 +132,22 @@ int main(int argc, char *argv[])
device = vfio_init_dev(group, container, &group_status, &iommu_info,
&device_info, group_uuid);
+ mmio = uio_map_mmio(exec_ops, device);
+ if (!mmio) {
+ printf("Cannot map MMIO\n");
+ goto out;
+ }
/* XXX FIXME check pointers and munmap properly */
- rxring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 1);
+ rxring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 2);
if (!rxring) {
printf("Cannot map RxRing\n");
goto out;
}
- txring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 2);
+ txring = vfio_mmap_region(device, VFIO_PCI_NUM_REGIONS + 3);
if (!txring) {
printf("Cannot map TxRing\n");
goto out;
}
- mmio = uio_map_mmio(exec_ops, device);
- if (!mmio) {
- printf("Cannot map MMIO\n");
- goto out;
- }
/* FIXME decide on allocated areas, instead of getting 2MB per direction */
ret = iomem_alloc(device, 2 * 1024 * 1024, &iocur, &rx_data);