Skip to content

Commit

Permalink
Try not to exceed max downtime on stage3
Browse files Browse the repository at this point in the history
Move to stage3 only when remaining work can be done below max downtime.
Use qemu_get_clock_ns for measuring read performance.

Signed-off-by: Liran Schour <lirans@il.ibm.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
  • Loading branch information
liranschour authored and Anthony Liguori committed Feb 9, 2010
1 parent aaa0eb7 commit 889ae39
Showing 1 changed file with 70 additions and 9 deletions.
79 changes: 70 additions & 9 deletions block-migration.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#include "block_int.h"
#include "hw/hw.h"
#include "qemu-queue.h"
#include "qemu-timer.h"
#include "monitor.h"
#include "block-migration.h"
#include "migration.h"
#include <assert.h>

#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
Expand Down Expand Up @@ -57,6 +59,7 @@ typedef struct BlkMigBlock {
QEMUIOVector qiov;
BlockDriverAIOCB *aiocb;
int ret;
int64_t time;
QSIMPLEQ_ENTRY(BlkMigBlock) entry;
} BlkMigBlock;

Expand All @@ -71,7 +74,8 @@ typedef struct BlkMigState {
int64_t total_sector_sum;
int prev_progress;
int bulk_completed;
int dirty_iterations;
long double total_time;
int reads;
} BlkMigState;

static BlkMigState block_mig_state;
Expand Down Expand Up @@ -124,12 +128,28 @@ uint64_t blk_mig_bytes_total(void)
return sum << BDRV_SECTOR_BITS;
}

static inline void add_avg_read_time(int64_t time)
{
block_mig_state.reads++;
block_mig_state.total_time += time;
}

static inline long double compute_read_bwidth(void)
{
assert(block_mig_state.total_time != 0);
return (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
}

static void blk_mig_read_cb(void *opaque, int ret)
{
BlkMigBlock *blk = opaque;

blk->ret = ret;

blk->time = qemu_get_clock_ns(rt_clock) - blk->time;

add_avg_read_time(blk->time);

QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);

block_mig_state.submitted--;
Expand Down Expand Up @@ -179,6 +199,8 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);

blk->time = qemu_get_clock_ns(rt_clock);

blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
if (!blk->aiocb) {
Expand Down Expand Up @@ -220,6 +242,8 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f)
block_mig_state.total_sector_sum = 0;
block_mig_state.prev_progress = -1;
block_mig_state.bulk_completed = 0;
block_mig_state.total_time = 0;
block_mig_state.reads = 0;

for (bs = bdrv_first; bs != NULL; bs = bs->next) {
if (bs->type == BDRV_TYPE_HD) {
Expand Down Expand Up @@ -314,11 +338,13 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
blk->bmds = bmds;
blk->sector = sector;

if(is_async) {
if (is_async) {
blk->iov.iov_base = blk->buf;
blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);

blk->time = qemu_get_clock_ns(rt_clock);

blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
if (!blk->aiocb) {
Expand All @@ -345,7 +371,7 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,

return (bmds->cur_dirty >= bmds->total_sectors);

error:
error:
monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
qemu_file_set_error(f);
qemu_free(blk->buf);
Expand All @@ -359,7 +385,7 @@ static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
int ret = 0;

QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
if(mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
ret = 1;
break;
}
Expand Down Expand Up @@ -400,9 +426,42 @@ static void flush_blks(QEMUFile* f)
block_mig_state.transferred);
}

static int64_t get_remaining_dirty(void)
{
BlkMigDevState *bmds;
int64_t dirty = 0;

QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
dirty += bdrv_get_dirty_count(bmds->bs);
}

return dirty * BLOCK_SIZE;
}

static int is_stage2_completed(void)
{
return (block_mig_state.submitted == 0 && block_mig_state.bulk_completed);
int64_t remaining_dirty;
long double bwidth;

if (block_mig_state.bulk_completed == 1) {

remaining_dirty = get_remaining_dirty();
if (remaining_dirty == 0) {
return 1;
}

bwidth = compute_read_bwidth();

if ((remaining_dirty / bwidth) <=
migrate_max_downtime()) {
/* finish stage2 because we think that we can finish remaing work
below max_downtime */

return 1;
}
}

return 0;
}

static void blk_mig_cleanup(Monitor *mon)
Expand Down Expand Up @@ -458,15 +517,15 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)

blk_mig_reset_dirty_cursor();

if(stage == 2) {
if (stage == 2) {
/* control the rate of transfer */
while ((block_mig_state.submitted +
block_mig_state.read_done) * BLOCK_SIZE <
qemu_file_get_rate_limit(f)) {
if (block_mig_state.bulk_completed == 0) {
/* first finish the bulk phase */
if (blk_mig_save_bulked_block(mon, f) == 0) {
/* finish saving bulk on all devices */
/* finished saving bulk on all devices */
block_mig_state.bulk_completed = 1;
}
} else {
Expand All @@ -486,9 +545,11 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
}

if (stage == 3) {
/* we know for sure that save bulk is completed */
/* we know for sure that save bulk is completed and
all async read completed */
assert(block_mig_state.submitted == 0);

while(blk_mig_save_dirty_block(mon, f, 0) != 0);
while (blk_mig_save_dirty_block(mon, f, 0) != 0);
blk_mig_cleanup(mon);

/* report completion */
Expand Down

0 comments on commit 889ae39

Please sign in to comment.