aboutsummaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 17:45:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 17:45:39 -0700
commitcf39c8e5352b4fb9efedfe7e9acb566a85ed847c (patch)
tree58d9f4b8c2ac48134264f1480cfc35b36462c4f4 /drivers/xen
parent3398d252a4da80c47fe9b802184fa0a792387732 (diff)
parent23b7eaf8220721892975610dd0ae5c846a34dcb4 (diff)
Merge tag 'stable/for-linus-3.12-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from Konrad Rzeszutek Wilk: "A couple of features and a ton of bug-fixes. There is also some maintership changes. Jeremy is enjoying the full-time work at the startup and as much as he would love to help - he can't find the time. I have a bunch of other things that I promised to work on - paravirt diet, get SWIOTLB working everywhere, etc, but haven't been able to find the time. As such both David Vrabel and Boris Ostrovsky have graciously volunteered to help with the maintership role. They will keep the lid on regressions, bug-fixes, etc. I will be in the background to help - but eventually there will be less of me doing the Xen GIT pulls and more of them. Stefano is still doing the ARM/ARM64 and will continue on doing so. Features: - Xen Trusted Platform Module (TPM) frontend driver - with the backend in MiniOS. - Scalability improvements in event channel. - Two extra Xen co-maintainers (David, Boris) and one going away (Jeremy) Bug-fixes: - Make the 1:1 mapping work during early bootup on selective regions. - Add scratch page to balloon driver to deal with unexpected code still holding on stale pages. - Allow NMIs on PV guests (64-bit only) - Remove unnecessary TLB flush in M2P code. - Fixes duplicate callbacks in Xen granttable code. - Fixes in PRIVCMD_MMAPBATCH ioctls to allow retries - Fix for events being lost due to rescheduling on different VCPUs. - More documentation" * tag 'stable/for-linus-3.12-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (23 commits) hvc_xen: Remove unnecessary __GFP_ZERO from kzalloc drivers/xen-tpmfront: Fix compile issue with missing option. xen/balloon: don't set P2M entry for auto translated guest xen/evtchn: double free on error Xen: Fix retry calls into PRIVCMD_MMAPBATCH*. xen/pvhvm: Initialize xen panic handler for PVHVM guests xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping xen: fix ARM build after 6efa20e4 MAINTAINERS: Remove Jeremy from the Xen subsystem. xen/events: document behaviour when scanning the start word for events x86/xen: during early setup, only 1:1 map the ISA region x86/xen: disable premption when enabling local irqs swiotlb-xen: replace dma_length with sg_dma_len() macro swiotlb: replace dma_length with sg_dma_len() macro xen/balloon: set a mapping for ballooned out pages xen/evtchn: improve scalability by using per-user locks xen/p2m: avoid unneccesary TLB flush in m2p_remove_override() MAINTAINERS: Add in two extra co-maintainers of the Xen tree. MAINTAINERS: Update the Xen subsystem's with proper mailing list. xen: replace strict_strtoul() with kstrtoul() ...
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/balloon.c74
-rw-r--r--drivers/xen/events.c30
-rw-r--r--drivers/xen/evtchn.c191
-rw-r--r--drivers/xen/gntdev.c11
-rw-r--r--drivers/xen/grant-table.c13
-rw-r--r--drivers/xen/privcmd.c83
-rw-r--r--drivers/xen/swiotlb-xen.c8
-rw-r--r--drivers/xen/xen-selfballoon.c54
8 files changed, 322 insertions, 142 deletions
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97697b..3101cf6daf5 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
@@ -52,6 +53,7 @@
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
+#include <linux/percpu-defs.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
/* We increase/decrease in batches which fit in a page */
static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
+
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
if (xen_pv_domain() && !PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
+ pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
+ PAGE_KERNEL_RO), 0);
BUG_ON(ret);
}
#endif
@@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ unsigned long p;
+ struct page *pg;
+ pg = __get_cpu_var(balloon_scratch_page);
+ p = page_to_pfn(pg);
+ __set_phys_to_machine(pfn, pfn_to_mfn(p));
+ }
balloon_append(pfn_to_page(pfn));
}
@@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work)
mutex_unlock(&balloon_mutex);
}
+struct page *get_balloon_scratch_page(void)
+{
+ struct page *ret = get_cpu_var(balloon_scratch_page);
+ BUG_ON(ret == NULL);
+ return ret;
+}
+
+void put_balloon_scratch_page(void)
+{
+ put_cpu_var(balloon_scratch_page);
+}
+
/* Resets the Xen limit, sets new target, and kicks off processing. */
void balloon_set_new_target(unsigned long target)
{
@@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn,
}
}
+static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (per_cpu(balloon_scratch_page, cpu) != NULL)
+ break;
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
+ .notifier_call = balloon_cpu_notify,
+};
+
static int __init balloon_init(void)
{
- int i;
+ int i, cpu;
if (!xen_domain())
return -ENODEV;
+ for_each_online_cpu(cpu)
+ {
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+ return -ENOMEM;
+ }
+ }
+ register_cpu_notifier(&balloon_cpu_notifier);
+
pr_info("Initialising balloon driver\n");
balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +673,15 @@ static int __init balloon_init(void)
subsys_initcall(balloon_init);
+static int __init balloon_clear(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(balloon_scratch_page, cpu) = NULL;
+
+ return 0;
+}
+early_initcall(balloon_clear);
+
MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 5e8be462aed..4035e833ea2 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,7 @@
#include <xen/interface/hvm/params.h>
#include <xen/interface/physdev.h>
#include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
#include <asm/hw_irq.h>
/*
@@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put);
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
- int irq = per_cpu(ipi_to_irq, cpu)[vector];
+ int irq;
+
+#ifdef CONFIG_X86
+ if (unlikely(vector == XEN_NMI_VECTOR)) {
+ int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+ if (rc < 0)
+ printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
+ return;
+ }
+#endif
+ irq = per_cpu(ipi_to_irq, cpu)[vector];
BUG_ON(irq < 0);
notify_remote_via_irq(irq);
}
@@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void)
pending_bits = active_evtchns(cpu, s, word_idx);
bit_idx = 0; /* usually scan entire word from start */
+ /*
+ * We scan the starting word in two parts.
+ *
+ * 1st time: start in the middle, scanning the
+ * upper bits.
+ *
+ * 2nd time: scan the whole word (not just the
+ * parts skipped in the first pass) -- if an
+ * event in the previously scanned bits is
+ * pending again it would just be scanned on
+ * the next loop anyway.
+ */
if (word_idx == start_word_idx) {
- /* We scan the starting word in two parts */
if (i == 0)
- /* 1st time: start in the middle */
bit_idx = start_bit_idx;
- else
- /* 2nd time: mask bits done already */
- bit_idx &= (1UL << start_bit_idx) - 1;
}
do {
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b6165e047f4..8b3a69a06c3 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -57,6 +57,7 @@
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
+ struct rb_root evtchns;
/* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@ struct per_user_data {
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */
+ spinlock_t ring_prod_lock; /* product against concurrent interrupts */
/* Processes wait on this queue when ring is empty. */
wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@ struct per_user_data {
const char *name;
};
-/*
- * Who's bound to each port? This is logically an array of struct
- * per_user_data *, but we encode the current enabled-state in bit 0.
- */
-static unsigned long *port_user;
-static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+struct user_evtchn {
+ struct rb_node node;
+ struct per_user_data *user;
+ unsigned port;
+ bool enabled;
+};
-static inline struct per_user_data *get_port_user(unsigned port)
+static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
- return (struct per_user_data *)(port_user[port] & ~1);
-}
+ struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
-static inline void set_port_user(unsigned port, struct per_user_data *u)
-{
- port_user[port] = (unsigned long)u;
+ while (*new) {
+ struct user_evtchn *this;
+
+ this = container_of(*new, struct user_evtchn, node);
+
+ parent = *new;
+ if (this->port < evtchn->port)
+ new = &((*new)->rb_left);
+ else if (this->port > evtchn->port)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&evtchn->node, parent, new);
+ rb_insert_color(&evtchn->node, &u->evtchns);
+
+ return 0;
}
-static inline bool get_port_enabled(unsigned port)
+static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
- return port_user[port] & 1;
+ rb_erase(&evtchn->node, &u->evtchns);
+ kfree(evtchn);
}
-static inline void set_port_enabled(unsigned port, bool enabled)
+static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
{
- if (enabled)
- port_user[port] |= 1;
- else
- port_user[port] &= ~1;
+ struct rb_node *node = u->evtchns.rb_node;
+
+ while (node) {
+ struct user_evtchn *evtchn;
+
+ evtchn = container_of(node, struct user_evtchn, node);
+
+ if (evtchn->port < port)
+ node = node->rb_left;
+ else if (evtchn->port > port)
+ node = node->rb_right;
+ else
+ return evtchn;
+ }
+ return NULL;
}
static irqreturn_t evtchn_interrupt(int irq, void *data)
{
- unsigned int port = (unsigned long)data;
- struct per_user_data *u;
-
- spin_lock(&port_user_lock);
-
- u = get_port_user(port);
+ struct user_evtchn *evtchn = data;
+ struct per_user_data *u = evtchn->user;
- WARN(!get_port_enabled(port),
+ WARN(!evtchn->enabled,
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
- port, u);
+ evtchn->port, u);
disable_irq_nosync(irq);
- set_port_enabled(port, false);
+ evtchn->enabled = false;
+
+ spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
- u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
} else
u->ring_overflow = 1;
- spin_unlock(&port_user_lock);
+ spin_unlock(&u->ring_prod_lock);
return IRQ_HANDLED;
}
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
if (copy_from_user(kbuf, buf, count) != 0)
goto out;
- spin_lock_irq(&port_user_lock);
+ mutex_lock(&u->bind_mutex);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
unsigned port = kbuf[i];
+ struct user_evtchn *evtchn;
- if (port < NR_EVENT_CHANNELS &&
- get_port_user(port) == u &&
- !get_port_enabled(port)) {
- set_port_enabled(port, true);
+ evtchn = find_evtchn(u, port);
+ if (evtchn && !evtchn->enabled) {
+ evtchn->enabled = true;
enable_irq(irq_from_evtchn(port));
}
}
- spin_unlock_irq(&port_user_lock);
+ mutex_unlock(&u->bind_mutex);
rc = count;
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
static int evtchn_bind_to_user(struct per_user_data *u, int port)
{
+ struct user_evtchn *evtchn;
+ struct evtchn_close close;
int rc = 0;
/*
@@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
* interrupt handler yet, and our caller has already
* serialized bind operations.)
*/
- BUG_ON(get_port_user(port) != NULL);
- set_port_user(port, u);
- set_port_enabled(port, true); /* start enabled */
+
+ evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
+ if (!evtchn)
+ return -ENOMEM;
+
+ evtchn->user = u;
+ evtchn->port = port;
+ evtchn->enabled = true; /* start enabled */
+
+ rc = add_evtchn(u, evtchn);
+ if (rc < 0)
+ goto err;
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
- u->name, (void *)(unsigned long)port);
- if (rc >= 0)
- rc = evtchn_make_refcounted(port);
- else {
- /* bind failed, should close the port now */
- struct evtchn_close close;
- close.port = port;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
- set_port_user(port, NULL);
- }
+ u->name, evtchn);
+ if (rc < 0)
+ goto err;
+ rc = evtchn_make_refcounted(port);
+ return rc;
+
+err:
+ /* bind failed, should close the port now */
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+ del_evtchn(u, evtchn);
return rc;
}
-static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+static void evtchn_unbind_from_user(struct per_user_data *u,
+ struct user_evtchn *evtchn)
{
- int irq = irq_from_evtchn(port);
+ int irq = irq_from_evtchn(evtchn->port);
BUG_ON(irq < 0);
- unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+ unbind_from_irqhandler(irq, evtchn);
- set_port_user(port, NULL);
+ del_evtchn(u, evtchn);
}
static long evtchn_ioctl(struct file *file,
@@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_UNBIND: {
struct ioctl_evtchn_unbind unbind;
+ struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -380,29 +421,27 @@ static long evtchn_ioctl(struct file *file,
break;
rc = -ENOTCONN;
- if (get_port_user(unbind.port) != u)
+ evtchn = find_evtchn(u, unbind.port);
+ if (!evtchn)
break;
disable_irq(irq_from_evtchn(unbind.port));
-
- evtchn_unbind_from_user(u, unbind.port);
-
+ evtchn_unbind_from_user(u, evtchn);
rc = 0;
break;
}
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
+ struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&notify, uarg, sizeof(notify)))
break;
- if (notify.port >= NR_EVENT_CHANNELS) {
- rc = -EINVAL;
- } else if (get_port_user(notify.port) != u) {
- rc = -ENOTCONN;
- } else {
+ rc = -ENOTCONN;
+ evtchn = find_evtchn(u, notify.port);
+ if (evtchn) {
notify_remote_via_evtchn(notify.port);
rc = 0;
}
@@ -412,9 +451,9 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_RESET: {
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
- spin_lock_irq(&port_user_lock);
+ spin_lock_irq(&u->ring_prod_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
- spin_unlock_irq(&port_user_lock);
+ spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
break;
@@ -473,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex);
+ spin_lock_init(&u->ring_prod_lock);
filp->private_data = u;
@@ -481,15 +521,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
static int evtchn_release(struct inode *inode, struct file *filp)
{
- int i;
struct per_user_data *u = filp->private_data;
+ struct rb_node *node;
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (get_port_user(i) != u)
- continue;
+ while ((node = u->evtchns.rb_node)) {
+ struct user_evtchn *evtchn;
- disable_irq(irq_from_evtchn(i));
- evtchn_unbind_from_user(get_port_user(i), i);
+ evtchn = rb_entry(node, struct user_evtchn, node);
+ disable_irq(irq_from_evtchn(evtchn->port));
+ evtchn_unbind_from_user(u, evtchn);
}
free_page((unsigned long)u->ring);
@@ -523,12 +563,6 @@ static int __init evtchn_init(void)
if (!xen_domain())
return -ENODEV;
- port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
- if (port_user == NULL)
- return -ENOMEM;
-
- spin_lock_init(&port_user_lock);
-
/* Create '/dev/xen/evtchn'. */
err = misc_register(&evtchn_miscdev);
if (err != 0) {
@@ -543,9 +577,6 @@ static int __init evtchn_init(void)
static void __exit evtchn_cleanup(void)
{
- kfree(port_user);
- port_user = NULL;
-
misc_deregister(&evtchn_miscdev);
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427c75f..e41c79c986e 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
* with find_grant_ptes.
*/
for (i = 0; i < map->count; i++) {
- unsigned level;
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
- pte_t *ptep;
- u64 pte_maddr = 0;
BUG_ON(PageHighMem(map->pages[i]));
- ptep = lookup_address(address, &level);
- pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
- gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
- map->flags |
- GNTMAP_host_map |
- GNTMAP_contains_pte,
+ gnttab_set_map_op(&map->kmap_ops[i], address,
+ map->flags | GNTMAP_host_map,
map->grants[i].ref,
map->grants[i].domid);
}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8e371..c4d2298893b 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, u16 count)
{
unsigned long flags;
+ struct gnttab_free_callback *cb;
+
spin_lock_irqsave(&gnttab_list_lock, flags);
- if (callback->next)
- goto out;
+
+ /* Check if the callback is already on the list */
+ cb = gnttab_free_callback_list;
+ while (cb) {
+ if (cb == callback)
+ goto out;
+ cb = cb->next;
+ }
+
callback->fn = fn;
callback->arg = arg;
callback->count = count;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd701ec..8e74590fa1b 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL");
#define PRIV_VMA_LOCKED ((void *)1)
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
+static int privcmd_vma_range_is_mapped(
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long nr_pages);
static long privcmd_ioctl_hypercall(void __user *udata)
{
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
vma = find_vma(mm, msg->va);
rc = -EINVAL;
- if (!vma || (msg->va != vma->vm_start) ||
- !privcmd_enforce_singleshot_mapping(vma))
+ if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
goto out_up;
+ vma->vm_private_data = PRIV_VMA_LOCKED;
}
state.va = vma->vm_start;
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
kfree(pages);
return -ENOMEM;
}
- BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+ BUG_ON(vma->vm_private_data != NULL);
vma->vm_private_data = pages;
return 0;
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
vma = find_vma(mm, m.addr);
if (!vma ||
- vma->vm_ops != &privcmd_vm_ops ||
- (m.addr != vma->vm_start) ||
- ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
- !privcmd_enforce_singleshot_mapping(vma)) {
- up_write(&mm->mmap_sem);
+ vma->vm_ops != &privcmd_vm_ops) {
ret = -EINVAL;
- goto out;
+ goto out_unlock;
}
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- ret = alloc_empty_pages(vma, m.num);
- if (ret < 0) {
- up_write(&mm->mmap_sem);
- goto out;
+
+ /*
+ * Caller must either:
+ *
+ * Map the whole VMA range, which will also allocate all the
+ * pages required for the auto_translated_physmap case.
+ *
+ * Or
+ *
+ * Map unmapped holes left from a previous map attempt (e.g.,
+ * because those foreign frames were previously paged out).
+ */
+ if (vma->vm_private_data == NULL) {
+ if (m.addr != vma->vm_start ||
+ m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = alloc_empty_pages(vma, m.num);
+ if (ret < 0)
+ goto out_unlock;
+ } else
+ vma->vm_private_data = PRIV_VMA_LOCKED;
+ } else {
+ if (m.addr < vma->vm_start ||
+ m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
+ ret = -EINVAL;
+ goto out_unlock;
}
}
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
out:
free_page_list(&pagelist);
-
return ret;
+
+out_unlock:
+ up_write(&mm->mmap_sem);
+ goto out;
}
static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+/*
+ * For MMAPBATCH*. This allows asserting the singleshot mapping
+ * on a per pfn/pte basis. Mapping calls that fail with ENOENT
+ * can be then retried until success.
+ */
+static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+ return pte_none(*pte) ? 0 : -EBUSY;
+}
+
+static int privcmd_vma_range_is_mapped(
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long nr_pages)
{
- return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
+ return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
+ is_mapped_fn, NULL) != 0;
}
const struct file_operations xen_privcmd_fops = {
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf7db9..1b2277c311d 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
to do proper error handling. */
xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
attrs);
- sgl[0].dma_length = 0;
+ sg_dma_len(sgl) = 0;
return DMA_ERROR_CODE;
}
sg->dma_address = xen_phys_to_bus(map);
} else
sg->dma_address = dev_addr;
- sg->dma_length = sg->length;
+ sg_dma_len(sg) = sg->length;
}
return nelems;
}
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i)
- xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+ xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
for_each_sg(sgl, sg, nelems, i)
xen_swiotlb_sync_single(hwdev, sg->dma_address,
- sg->dma_length, dir, target);
+ sg_dma_len(sg), dir, target);
}
void
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a85f87..21e18c18c7a 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &tmp);
- if (err || ((tmp != 0) && (tmp != 1)))
+ err = kstrtoul(buf, 10, &tmp);
+ if (err)
+ return err;
+ if ((tmp != 0) && (tmp != 1))
return -EINVAL;
xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_interval = val;
return count;
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_downhysteresis = val;
return count;
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_uphysteresis = val;
return count;
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_min_usable_mb = val;
return count;
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_reserved_mb = val;
return count;
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &tmp);
- if (err || ((tmp != 0) && (tmp != 1)))
+ err = kstrtoul(buf, 10, &tmp);
+ if (err)
+ return err;
+ if ((tmp != 0) && (tmp != 1))
return -EINVAL;
frontswap_selfshrinking = !!tmp;
if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
frontswap_inertia = val;
frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
frontswap_hysteresis = val;
return count;