aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-02-26 22:31:10 +0000
committerPeter Maydell <peter.maydell@linaro.org>2014-02-26 22:31:10 +0000
commit6f6831f61a44fde832ee6fab0cc5632de34cf6b7 (patch)
tree47e59106156748bef6ef87537ad230ae7bec6044
parentbc3fbad816961a5b4a7f51a37472c4ac01effb92 (diff)
parent41310c68781d742fa9bbfd5fcb1df9b7f23f5759 (diff)
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20140225' into staging
migration/next for 20140225 # gpg: Signature made Tue 25 Feb 2014 14:04:31 GMT using RSA key ID 5872D723 # gpg: Can't check signature: public key not found * remotes/juanquintela/tags/migration/20140225: rdma: rename 'x-rdma' => 'rdma' Fix two XBZRLE corruption issues Fix vmstate_info_int32_le comparison/assign qemu_file: use fwrite() correctly Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--arch_init.c64
-rw-r--r--docs/rdma.txt24
-rw-r--r--include/migration/page_cache.h2
-rw-r--r--migration-rdma.c2
-rw-r--r--migration.c6
-rw-r--r--page_cache.c2
-rw-r--r--qapi-schema.json7
-rw-r--r--qemu-file.c9
-rw-r--r--vmstate.c15
9 files changed, 84 insertions, 47 deletions
diff --git a/arch_init.c b/arch_init.c
index 80574a090c..fe1727922c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -122,7 +122,6 @@ static void check_guest_throttling(void);
#define RAM_SAVE_FLAG_XBZRLE 0x40
/* 0x80 is reserved in migration.h start with 0x100 next */
-
static struct defconfig_file {
const char *filename;
/* Indicates it is an user config file (disabled by -no-user-config) */
@@ -133,6 +132,7 @@ static struct defconfig_file {
{ NULL }, /* end of list */
};
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
int qemu_read_default_config_files(bool userconfig)
{
@@ -273,6 +273,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
return size;
}
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+ if (ram_bulk_stage || !migrate_use_xbzrle()) {
+ return;
+ }
+
+ /* We don't care if this fails to allocate a new cache page
+ * as long as it updated an old one */
+ cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+}
+
#define ENCODING_FLAG_XBZRLE 0x1
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -329,18 +357,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
return bytes_sent;
}
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
ram_addr_t start)
@@ -512,6 +528,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
} else {
int ret;
uint8_t *p;
+ bool send_async = true;
int cont = (block == last_sent_block) ?
RAM_SAVE_FLAG_CONTINUE : 0;
@@ -522,6 +539,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
ret = ram_control_save_page(f, block->offset,
offset, TARGET_PAGE_SIZE, &bytes_sent);
+ current_addr = block->offset + offset;
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
if (ret != RAM_SAVE_CONTROL_DELAYED) {
if (bytes_sent > 0) {
@@ -536,19 +554,35 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
RAM_SAVE_FLAG_COMPRESS);
qemu_put_byte(f, 0);
bytes_sent++;
+ /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+ * page would be stale
+ */
+ xbzrle_cache_zero_page(current_addr);
} else if (!ram_bulk_stage && migrate_use_xbzrle()) {
- current_addr = block->offset + offset;
bytes_sent = save_xbzrle_page(f, p, current_addr, block,
offset, cont, last_stage);
if (!last_stage) {
+ /* We must send exactly what's in the xbzrle cache
+ * even if the page wasn't xbzrle compressed, so that
+ * it's right next time.
+ */
p = get_cached_data(XBZRLE.cache, current_addr);
+
+ /* Can't send this cached data async, since the cache page
+ * might get updated before it gets to the wire
+ */
+ send_async = false;
}
}
/* XBZRLE overflow or normal page */
if (bytes_sent == -1) {
bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
- qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+ if (send_async) {
+ qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+ } else {
+ qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+ }
bytes_sent += TARGET_PAGE_SIZE;
acct_info.norm_pages++;
}
diff --git a/docs/rdma.txt b/docs/rdma.txt
index 2aca63bd72..1f5d9e9fe4 100644
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
high-performance RDMA hardware using the following command:
QEMU Monitor Command:
-$ migrate_set_capability x-rdma-pin-all on # disabled by default
+$ migrate_set_capability rdma-pin-all on # disabled by default
Performing this action will cause all 8GB to be pinned, so if that's
not what you want, then please ignore this step altogether.
@@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device
Next, on the destination machine, add the following to the QEMU command line:
-qemu ..... -incoming x-rdma:host:port
+qemu ..... -incoming rdma:host:port
Finally, perform the actual migration on the source machine:
QEMU Monitor Command:
-$ migrate -d x-rdma:host:port
+$ migrate -d rdma:host:port
PERFORMANCE
===========
@@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
active use and the VM itself is completely idle using the same 40 gbps
infiniband link:
-1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
+1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
+2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
These numbers would of course scale up to whatever size virtual machine
you have to migrate using RDMA.
@@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.
TODO:
=====
-1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
- renamed to 'rdma' after the experimental phase of this work has
- completed upstream.
-2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
+1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
are not compatible with infinband memory pinning and will result in
an aborted migration (but with the source VM left unaffected).
-3. Use of the recent /proc/<pid>/pagemap would likely speed up
+2. Use of the recent /proc/<pid>/pagemap would likely speed up
the use of KSM and ballooning while using RDMA.
-4. Also, some form of balloon-device usage tracking would also
+3. Also, some form of balloon-device usage tracking would also
help alleviate some issues.
-5. Move UNREGISTER requests to a separate thread.
-6. Use LRU to provide more fine-grained direction of UNREGISTER
+4. Use LRU to provide more fine-grained direction of UNREGISTER
requests for unpinning memory in an overcommitted environment.
-7. Expose UNREGISTER support to the user by way of workload-specific
+5. Expose UNREGISTER support to the user by way of workload-specific
hints about application behavior.
diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h
index d156f0d398..2d5ce2dd7a 100644
--- a/include/migration/page_cache.h
+++ b/include/migration/page_cache.h
@@ -66,7 +66,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
* @addr: page address
* @pdata: pointer to the page
*/
-int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata);
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
/**
* cache_resize: resize the page cache. In case of size reduction the extra
diff --git a/migration-rdma.c b/migration-rdma.c
index f94f3b4e3a..eeb4302215 100644
--- a/migration-rdma.c
+++ b/migration-rdma.c
@@ -3412,7 +3412,7 @@ void rdma_start_outgoing_migration(void *opaque,
}
ret = qemu_rdma_source_init(rdma, &local_err,
- s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]);
+ s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
if (ret) {
goto err;
diff --git a/migration.c b/migration.c
index 25add6f9e2..14235b280a 100644
--- a/migration.c
+++ b/migration.c
@@ -82,7 +82,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
if (strstart(uri, "tcp:", &p))
tcp_start_incoming_migration(p, errp);
#ifdef CONFIG_RDMA
- else if (strstart(uri, "x-rdma:", &p))
+ else if (strstart(uri, "rdma:", &p))
rdma_start_incoming_migration(p, errp);
#endif
#if !defined(WIN32)
@@ -438,7 +438,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
if (strstart(uri, "tcp:", &p)) {
tcp_start_outgoing_migration(s, p, &local_err);
#ifdef CONFIG_RDMA
- } else if (strstart(uri, "x-rdma:", &p)) {
+ } else if (strstart(uri, "rdma:", &p)) {
rdma_start_outgoing_migration(s, p, &local_err);
#endif
#if !defined(WIN32)
@@ -532,7 +532,7 @@ bool migrate_rdma_pin_all(void)
s = migrate_get_current();
- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
}
bool migrate_auto_converge(void)
diff --git a/page_cache.c b/page_cache.c
index 3ef6ee7ad2..b033681a93 100644
--- a/page_cache.c
+++ b/page_cache.c
@@ -150,7 +150,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr)
return cache_get_by_addr(cache, addr)->it_data;
}
-int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata)
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
{
CacheItem *it = NULL;
diff --git a/qapi-schema.json b/qapi-schema.json
index fcb2280053..ac8ad24966 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -751,10 +751,9 @@
# This feature allows us to minimize migration traffic for certain work
# loads, by sending compressed difference of the pages
#
-# @x-rdma-pin-all: Controls whether or not the entire VM memory footprint is
+# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
# mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
-# Disabled by default. Experimental: may (or may not) be renamed after
-# further testing is complete. (since 1.6)
+# Disabled by default. (since 2.0)
#
# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
# essentially saves 1MB of zeroes per block on the wire. Enabling requires
@@ -768,7 +767,7 @@
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
- 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
+ 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks'] }
##
# @MigrationCapabilityStatus
diff --git a/qemu-file.c b/qemu-file.c
index 9473b674ba..f074af15c3 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -100,7 +100,14 @@ static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos,
int size)
{
QEMUFileStdio *s = opaque;
- return fwrite(buf, 1, size, s->stdio_file);
+ int res;
+
+ res = fwrite(buf, 1, size, s->stdio_file);
+
+ if (res != size) {
+ return -EIO; /* fake errno value */
+ }
+ return res;
}
static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
diff --git a/vmstate.c b/vmstate.c
index 284b080f46..d1f5eb0e6a 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -321,23 +321,24 @@ const VMStateInfo vmstate_info_int32_equal = {
.put = put_int32,
};
-/* 32 bit int. See that the received value is the less or the same
- than the one in the field */
+/* 32 bit int. Check that the received value is less than or equal to
+ the one in the field */
static int get_int32_le(QEMUFile *f, void *pv, size_t size)
{
- int32_t *old = pv;
- int32_t new;
- qemu_get_sbe32s(f, &new);
+ int32_t *cur = pv;
+ int32_t loaded;
+ qemu_get_sbe32s(f, &loaded);
- if (*old <= new) {
+ if (loaded <= *cur) {
+ *cur = loaded;
return 0;
}
return -EINVAL;
}
const VMStateInfo vmstate_info_int32_le = {
- .name = "int32 equal",
+ .name = "int32 le",
.get = get_int32_le,
.put = put_int32,
};