From 655d406a7c80bffc03263d071b6ba1e0fcf548f9 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 18:46:48 -0800 Subject: xen: add /proc/xen/xsd_{kva,port} to xenfs These are used by the userspace xenstore daemon, which runs in dom0. Xenstored is what's behind the xenfs "xenbus" filesystem. [ Impact: provide mapping and port to usermode for xenstore ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/Makefile | 3 +- drivers/xen/xenfs/super.c | 54 +++++++++++++++++++++++++++++++++- drivers/xen/xenfs/xenfs.h | 2 ++ drivers/xen/xenfs/xenstored.c | 68 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 drivers/xen/xenfs/xenstored.c (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 25275c3bbdf..5d45ff13cc0 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o -xenfs-objs = super.o xenbus.o \ No newline at end of file +xenfs-y = super.o xenbus.o +xenfs-$(CONFIG_XEN_DOM0) += xenstored.o diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 78bfab0700b..3cf7707217f 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -22,6 +22,46 @@ MODULE_DESCRIPTION("Xen filesystem"); MODULE_LICENSE("GPL"); +static struct inode *xenfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + ret->i_mode = mode; + ret->i_uid = ret->i_gid = 0; + ret->i_blocks = 0; + ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; + } + return ret; +} + +static struct dentry *xenfs_create_file(struct super_block *sb, + struct dentry *parent, + const char *name, + const struct file_operations *fops, + void *data, + int mode) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = xenfs_make_inode(sb, S_IFREG | mode); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_fop = fops; + inode->i_private = data; + + d_add(dentry, inode); + return dentry; +} + static ssize_t capabilities_read(struct file *file, char __user *buf, size_t size, loff_t *off) { @@ -45,8 +85,20 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { "capabilities", &capabilities_file_ops, S_IRUGO }, {""}, }; + int rc; + + rc = simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files); + if (rc < 0) + return rc; + + if (xen_initial_domain()) { + xenfs_create_file(sb, sb->s_root, "xsd_kva", + &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); + xenfs_create_file(sb, sb->s_root, "xsd_port", + &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); + } - return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files); + return rc; } static int xenfs_get_sb(struct file_system_type *fs_type, diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index 51f08b2d0bf..5056306e7aa 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h @@ -2,5 +2,7 @@ #define _XENFS_XENBUS_H extern const struct file_operations xenbus_file_ops; +extern const struct file_operations xsd_kva_file_ops; +extern const struct file_operations xsd_port_file_ops; #endif /* _XENFS_XENBUS_H */ diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c new file mode 100644 index 00000000000..fef20dbc6a5 --- /dev/null +++ b/drivers/xen/xenfs/xenstored.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include + +#include + +#include "xenfs.h" +#include "../xenbus/xenbus_comms.h" + +static ssize_t xsd_read(struct file *file, char __user *buf, + size_t size, loff_t *off) +{ + const char *str = (const char *)file->private_data; + return simple_read_from_buffer(buf, size, off, str, strlen(str)); +} + +static int xsd_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static int xsd_kva_open(struct inode *inode, struct file *file) +{ + file->private_data = (void *)kasprintf(GFP_KERNEL, "0x%p", + xen_store_interface); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +const struct file_operations xsd_kva_file_ops = { + .open = xsd_kva_open, + .mmap = xsd_kva_mmap, + .read = xsd_read, + .release = xsd_release, +}; + +static int xsd_port_open(struct inode *inode, struct file *file) +{ + file->private_data = (void *)kasprintf(GFP_KERNEL, "%d", + xen_store_evtchn); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +const struct file_operations xsd_port_file_ops = { + .open = xsd_port_open, + .read = xsd_read, + .release = xsd_release, +}; -- cgit v1.2.3 From 1c5de1939c204bde9cce87f4eb3d26e9f9eb732b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:49 -0800 Subject: xen: add privcmd driver The privcmd interface in xenfs allows the tool stack in the privileged domain to get fairly direct access to the hypervisor in order to do various management things such as domain construction. [ Impact: new xenfs interface for privileged operations ] Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/Makefile | 2 +- drivers/xen/xenfs/privcmd.c | 436 ++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/xenfs/super.c | 2 + drivers/xen/xenfs/xenfs.h | 1 + 4 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/xenfs/privcmd.c (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 5d45ff13cc0..4a0be9a82af 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o xenfs-y = super.o xenbus.o -xenfs-$(CONFIG_XEN_DOM0) += xenstored.o +xenfs-$(CONFIG_XEN_DOM0) += xenstored.o privcmd.o diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c new file mode 100644 index 00000000000..c7192f314f8 --- /dev/null +++ b/drivers/xen/xenfs/privcmd.c @@ -0,0 +1,436 @@ +/****************************************************************************** + * privcmd.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); +#endif + +struct remap_data { + unsigned long mfn; + unsigned domid; + pgprot_t prot; +}; + +static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_data *rmd = data; + pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); + + xen_set_domain_pte(ptep, pte, rmd->domid); + + return 0; +} + +int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long mfn, unsigned long size, + pgprot_t prot, unsigned domid) +{ + struct remap_data rmd; + int err; + + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); + + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + + rmd.mfn = mfn; + rmd.prot = prot; + rmd.domid = domid; + + err = apply_to_page_range(vma->vm_mm, addr, size, + remap_area_mfn_pte_fn, &rmd); + + return err; +} + +static long privcmd_ioctl_hypercall(void __user *udata) +{ + struct privcmd_hypercall hypercall; + long ret; + + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) + return -EFAULT; + + ret = privcmd_call(hypercall.op, + hypercall.arg[0], hypercall.arg[1], + hypercall.arg[2], hypercall.arg[3], + hypercall.arg[4]); + + return ret; +} + +static void free_page_list(struct list_head *pages) +{ + struct page *p, *n; + + list_for_each_entry_safe(p, n, pages, lru) + __free_page(p); + + INIT_LIST_HEAD(pages); +} + +/* + * Given an array of items in userspace, return a list of pages + * containing the data. If copying fails, either because of memory + * allocation failure or a problem reading user memory, return an + * error code; its up to the caller to dispose of any partial list. + */ +static int gather_array(struct list_head *pagelist, + unsigned nelem, size_t size, + void __user *data) +{ + unsigned pageidx; + void *pagedata; + int ret; + + if (size > PAGE_SIZE) + return 0; + + pageidx = PAGE_SIZE; + pagedata = NULL; /* quiet, gcc */ + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page = alloc_page(GFP_KERNEL); + + ret = -ENOMEM; + if (page == NULL) + goto fail; + + pagedata = page_address(page); + + list_add_tail(&page->lru, pagelist); + pageidx = 0; + } + + ret = -EFAULT; + if (copy_from_user(pagedata + pageidx, data, size)) + goto fail; + + data += size; + pageidx += size; + } + + ret = 0; + +fail: + return ret; +} + +/* + * Call function "fn" on each element of the array fragmented + * over a list of pages. + */ +static int traverse_pages(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + pagedata = NULL; /* hush, gcc */ + + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + pageidx = 0; + } + + ret = (*fn)(pagedata + pageidx, state); + if (ret) + break; + pageidx += size; + } + + return ret; +} + +struct mmap_mfn_state { + unsigned long va; + struct vm_area_struct *vma; + domid_t domain; +}; + +static int mmap_mfn_range(void *data, void *state) +{ + struct privcmd_mmap_entry *msg = data; + struct mmap_mfn_state *st = state; + struct vm_area_struct *vma = st->vma; + int rc; + + /* Do not allow range to wrap the address space. */ + if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) + return -EINVAL; + + /* Range chunks must be contiguous in va space. */ + if ((msg->va != st->va) || + ((msg->va+(msg->npages< vma->vm_end)) + return -EINVAL; + + rc = remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, + msg->npages << PAGE_SHIFT, + vma->vm_page_prot, + st->domain); + if (rc < 0) + return rc; + + st->va += msg->npages << PAGE_SHIFT; + + return 0; +} + +static long privcmd_ioctl_mmap(void __user *udata) +{ + struct privcmd_mmap mmapcmd; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + LIST_HEAD(pagelist); + struct mmap_mfn_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) + return -EFAULT; + + rc = gather_array(&pagelist, + mmapcmd.num, sizeof(struct privcmd_mmap_entry), + mmapcmd.entry); + + if (rc || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + { + struct page *page = list_first_entry(&pagelist, + struct page, lru); + struct privcmd_mmap_entry *msg = page_address(page); + + vma = find_vma(mm, msg->va); + rc = -EINVAL; + + if (!vma || (msg->va != vma->vm_start) || + !privcmd_enforce_singleshot_mapping(vma)) + goto out_up; + } + + state.va = vma->vm_start; + state.vma = vma; + state.domain = mmapcmd.dom; + + rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), + &pagelist, + mmap_mfn_range, &state); + + +out_up: + up_write(&mm->mmap_sem); + +out: + free_page_list(&pagelist); + + return rc; +} + +struct mmap_batch_state { + domid_t domain; + unsigned long va; + struct vm_area_struct *vma; + int err; + + xen_pfn_t __user *user; +}; + +static int mmap_batch_fn(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, + *mfnp, PAGE_SIZE, + st->vma->vm_page_prot, st->domain) < 0) { + *mfnp |= 0xf0000000U; + st->err++; + } + st->va += PAGE_SIZE; + + return 0; +} + +static int mmap_return_errors(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + put_user(*mfnp, st->user++); + + return 0; +} + +static long privcmd_ioctl_mmap_batch(void __user *udata) +{ + int ret; + struct privcmd_mmapbatch m; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long nr_pages; + LIST_HEAD(pagelist); + struct mmap_batch_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&m, udata, sizeof(m))) + return -EFAULT; + + nr_pages = m.num; + if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) + return -EINVAL; + + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), + m.arr); + + if (ret || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + vma = find_vma(mm, m.addr); + ret = -EINVAL; + if (!vma || + (m.addr != vma->vm_start) || + ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || + !privcmd_enforce_singleshot_mapping(vma)) { + up_write(&mm->mmap_sem); + goto out; + } + + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.err = 0; + + ret = traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state); + + up_write(&mm->mmap_sem); + + if (state.err > 0) { + ret = state.err; + + state.user = udata; + traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, + mmap_return_errors, &state); + } + +out: + free_page_list(&pagelist); + + return ret; +} + +static long privcmd_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = -ENOSYS; + void __user *udata = (void __user *) data; + + switch (cmd) { + case IOCTL_PRIVCMD_HYPERCALL: + ret = privcmd_ioctl_hypercall(udata); + break; + + case IOCTL_PRIVCMD_MMAP: + ret = privcmd_ioctl_mmap(udata); + break; + + case IOCTL_PRIVCMD_MMAPBATCH: + ret = privcmd_ioctl_mmap_batch(udata); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static struct vm_operations_struct privcmd_vm_ops = { + .fault = privcmd_fault +}; + +static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* Unsupported for auto-translate guests. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + vma->vm_ops = &privcmd_vm_ops; + vma->vm_private_data = NULL; + + return 0; +} + +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +{ + return (xchg(&vma->vm_private_data, (void *)1) == NULL); +} +#endif + +const struct file_operations privcmd_file_ops = { + .unlocked_ioctl = privcmd_ioctl, + .mmap = privcmd_mmap, +}; diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 3cf7707217f..8c7462866e9 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -96,6 +96,8 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); xenfs_create_file(sb, sb->s_root, "xsd_port", &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); + xenfs_create_file(sb, sb->s_root, "privcmd", + &privcmd_file_ops, NULL, S_IRUSR|S_IWUSR); } return rc; diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index 5056306e7aa..b68aa620000 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h @@ -2,6 +2,7 @@ #define _XENFS_XENBUS_H extern const struct file_operations xenbus_file_ops; +extern const struct file_operations privcmd_file_ops; extern const struct file_operations xsd_kva_file_ops; extern const struct file_operations xsd_port_file_ops; -- cgit v1.2.3 From 24a89b5be4cf2b7f1b49b56b6cb4a7b71fccf241 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:49 -0800 Subject: xen/privcmd: create address space to allow writable mmaps These are necessary to allow writeable mmap of the privcmd node to succeed without being marked read-only for writenotify purposes. Which in turn is necessary to allow mappings of foreign guest pages [ Impact: bugfix: allow writable mappings ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/super.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 8c7462866e9..23f1cca5a2e 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include @@ -22,12 +24,30 @@ MODULE_DESCRIPTION("Xen filesystem"); MODULE_LICENSE("GPL"); +static int xenfs_set_page_dirty(struct page *page) +{ + if (!PageDirty(page)) + SetPageDirty(page); + return 0; +} + +static const struct address_space_operations xenfs_aops = { + .set_page_dirty = xenfs_set_page_dirty, +}; + +static struct backing_dev_info xenfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, +}; + static struct inode *xenfs_make_inode(struct super_block *sb, int mode) { struct inode *ret = new_inode(sb); if (ret) { ret->i_mode = mode; + ret->i_mapping->a_ops = &xenfs_aops; + ret->i_mapping->backing_dev_info = &xenfs_backing_dev_info; ret->i_uid = ret->i_gid = 0; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; @@ -119,11 +139,25 @@ static struct file_system_type xenfs_type = { static int __init xenfs_init(void) { - if (xen_domain()) - return register_filesystem(&xenfs_type); + int err; + if (!xen_domain()) { + printk(KERN_INFO "xenfs: not registering filesystem on non-xen platform\n"); + return 0; + } - printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n"); - return 0; + err = register_filesystem(&xenfs_type); + if (err) { + printk(KERN_ERR "xenfs: Unable to register filesystem!\n"); + goto out; + } + + err = bdi_init(&xenfs_backing_dev_info); + if (err) + unregister_filesystem(&xenfs_type); + + out: + + return err; } static void __exit xenfs_exit(void) -- cgit v1.2.3 From 35f8c1c343f2918ea24f05282d14e711887d8278 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Mar 2009 09:56:26 -0800 Subject: xen/xenfs: set_page_dirty is supposed to return true if it dirties I don't think it matters at all in this case (there's only one caller which checks the return value), but may as well be strictly correct. [ Impact: cleanup ] Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 23f1cca5a2e..afaa6ede016 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -26,9 +26,7 @@ MODULE_LICENSE("GPL"); static int xenfs_set_page_dirty(struct page *page) { - if (!PageDirty(page)) - SetPageDirty(page); - return 0; + return !TestSetPageDirty(page); } static const struct address_space_operations xenfs_aops = { -- cgit v1.2.3 From 441c7416b55d3d48b4aaafc5bdd804092387d877 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Mar 2009 09:56:59 -0800 Subject: xen/privcmd: print SIGBUS faults Print more detail about privcmd mapping faults for debugging. [ Impact: debug ] Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/privcmd.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index c7192f314f8..6b602f50536 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -403,6 +403,10 @@ static long privcmd_ioctl(struct file *file, #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { + printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", + vma, vma->vm_start, vma->vm_end, + vmf->pgoff, vmf->virtual_address); + return VM_FAULT_SIGBUS; } -- cgit v1.2.3 From f31fdf510531333dea95f0a92e6eaa1c3a7541e2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Mar 2009 04:10:00 -0700 Subject: xen/privcmd: make sure vma is ours before doing anything to it Test vma->vm_ops is our operations to make sure we created it. We don't want to stomp on other random vmas. [ Impact: bugfix; prevent ioctl from affecting other mappings ] Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/privcmd.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 6b602f50536..80526afd306 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -310,6 +310,8 @@ static int mmap_return_errors(void *data, void *state) return 0; } +static struct vm_operations_struct privcmd_vm_ops; + static long privcmd_ioctl_mmap_batch(void __user *udata) { int ret; @@ -341,6 +343,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) vma = find_vma(mm, m.addr); ret = -EINVAL; if (!vma || + vma->vm_ops != &privcmd_vm_ops || (m.addr != vma->vm_start) || ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || !privcmd_enforce_singleshot_mapping(vma)) { -- cgit v1.2.3 From f020e2905166e12f9a8f109fe968cb5a9db887e9 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 20 May 2009 15:42:14 +0100 Subject: privcmd: MMAPBATCH: Fix error handling/reporting On error IOCTL_PRIVCMD_MMAPBATCH is expected to set the top nibble of the effected MFN and return 0. Currently it leaves the MFN unmodified and returns the number of failures. Therefore: - reimplement remap_domain_mfn_range() using direct HYPERVISOR_mmu_update() calls and small batches. The xen_set_domain_pte() interface does not report errors and since some failures are expected/normal using the multicall infrastructure is too noisy. - return 0 as expected - writeback the updated MFN list to mmapbatch->arr not over mmapbatch, smashing the caller's stack. - remap_domain_mfn_range can be static. With this change I am able to start an HVM domain. Signed-off-by: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/privcmd.c | 56 +++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 80526afd306..438223ae0fc 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -32,14 +32,16 @@ #include #include +#define REMAP_BATCH_SIZE 16 + #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif struct remap_data { unsigned long mfn; - unsigned domid; pgprot_t prot; + struct mmu_update *mmu_update; }; static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, @@ -48,17 +50,23 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, struct remap_data *rmd = data; pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); - xen_set_domain_pte(ptep, pte, rmd->domid); + rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; + rmd->mmu_update->val = pte_val_ma(pte); + rmd->mmu_update++; return 0; } -int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, unsigned long size, - pgprot_t prot, unsigned domid) +static int remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + unsigned long mfn, int nr, + pgprot_t prot, unsigned domid) { struct remap_data rmd; - int err; + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; + int batch; + unsigned long range; + int err = 0; prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); @@ -66,10 +74,29 @@ int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, rmd.mfn = mfn; rmd.prot = prot; - rmd.domid = domid; - err = apply_to_page_range(vma->vm_mm, addr, size, - remap_area_mfn_pte_fn, &rmd); + while (nr) { + batch = min(REMAP_BATCH_SIZE, nr); + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; + err = apply_to_page_range(vma->vm_mm, addr, range, + remap_area_mfn_pte_fn, &rmd); + if (err) + goto out; + + err = -EFAULT; + if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + goto out; + + nr -= batch; + addr += range; + } + + err = 0; +out: + + flush_tlb_all(); return err; } @@ -158,7 +185,7 @@ static int traverse_pages(unsigned nelem, size_t size, { void *pagedata; unsigned pageidx; - int ret; + int ret = 0; BUG_ON(size > PAGE_SIZE); @@ -208,8 +235,7 @@ static int mmap_mfn_range(void *data, void *state) rc = remap_domain_mfn_range(vma, msg->va & PAGE_MASK, - msg->mfn, - msg->npages << PAGE_SHIFT, + msg->mfn, msg->npages, vma->vm_page_prot, st->domain); if (rc < 0) @@ -290,7 +316,7 @@ static int mmap_batch_fn(void *data, void *state) struct mmap_batch_state *st = state; if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, - *mfnp, PAGE_SIZE, + *mfnp, 1, st->vma->vm_page_prot, st->domain) < 0) { *mfnp |= 0xf0000000U; st->err++; @@ -362,9 +388,9 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) up_write(&mm->mmap_sem); if (state.err > 0) { - ret = state.err; + ret = 0; - state.user = udata; + state.user = m.arr; traverse_pages(m.num, sizeof(xen_pfn_t), &pagelist, mmap_return_errors, &state); -- cgit v1.2.3 From de1ef2065c4675ab1062ebc8d1cb6c5f42b61d04 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 21 May 2009 10:09:46 +0100 Subject: xen/privcmd: move remap_domain_mfn_range() to core xen code and export. This allows xenfs to be built as a module, previously it required flush_tlb_all and arbitrary_virt_to_machine to be exported. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/privcmd.c | 81 +++++---------------------------------------- 1 file changed, 8 insertions(+), 73 deletions(-) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 438223ae0fc..f80be7f6eb9 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -31,76 +31,12 @@ #include #include #include - -#define REMAP_BATCH_SIZE 16 +#include #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif -struct remap_data { - unsigned long mfn; - pgprot_t prot; - struct mmu_update *mmu_update; -}; - -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) -{ - struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); - - rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; - rmd->mmu_update->val = pte_val_ma(pte); - rmd->mmu_update++; - - return 0; -} - -static int remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) -{ - struct remap_data rmd; - struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; - unsigned long range; - int err = 0; - - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - - rmd.mfn = mfn; - rmd.prot = prot; - - while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); - range = (unsigned long)batch << PAGE_SHIFT; - - rmd.mmu_update = mmu_update; - err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); - if (err) - goto out; - - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) - goto out; - - nr -= batch; - addr += range; - } - - err = 0; -out: - - flush_tlb_all(); - - return err; -} - static long privcmd_ioctl_hypercall(void __user *udata) { struct privcmd_hypercall hypercall; @@ -233,11 +169,11 @@ static int mmap_mfn_range(void *data, void *state) ((msg->va+(msg->npages< vma->vm_end)) return -EINVAL; - rc = remap_domain_mfn_range(vma, - msg->va & PAGE_MASK, - msg->mfn, msg->npages, - vma->vm_page_prot, - st->domain); + rc = xen_remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, msg->npages, + vma->vm_page_prot, + st->domain); if (rc < 0) return rc; @@ -315,9 +251,8 @@ static int mmap_batch_fn(void *data, void *state) xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, - *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { + if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain) < 0) { *mfnp |= 0xf0000000U; st->err++; } -- cgit v1.2.3 From 9387377eb79a44f453fd27c3d00a2e5da587e369 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 6 Oct 2010 08:51:32 -0700 Subject: xen/privcmd: make privcmd visible in domU It has its uses in a domU as well as dom0. Xen will prevent an unprivileged domain from doing anything untoward. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/Makefile | 4 ++-- drivers/xen/xenfs/super.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/xen/xenfs') diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 4a0be9a82af..4fde9440fe1 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o -xenfs-y = super.o xenbus.o -xenfs-$(CONFIG_XEN_DOM0) += xenstored.o privcmd.o +xenfs-y = super.o xenbus.o privcmd.o +xenfs-$(CONFIG_XEN_DOM0) += xenstored.o diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index afaa6ede016..984891e9a39 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -101,6 +101,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) [1] = {}, { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, + { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR }, {""}, }; int rc; @@ -114,8 +115,6 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); xenfs_create_file(sb, sb->s_root, "xsd_port", &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); - xenfs_create_file(sb, sb->s_root, "privcmd", - &privcmd_file_ops, NULL, S_IRUSR|S_IWUSR); } return rc; -- cgit v1.2.3