Commit 6686ce3f authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging



# gpg: Signature made Thu Jul  2 10:10:39 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request:
  block: remove redundant check before g_slist_find()
  block/nfs: limit maximum readahead size to 1MB
  block/iscsi: restore compatiblity with libiscsi 1.9.0
  iotests: Use event_wait in wait_ready
  qemu-iotests: Add test case for mirror with unmap
  qemu-iotests: Make block job methods common
  block: Remove bdrv_reset_dirty
  block: Fix dirty bitmap in bdrv_co_discard
  mirror: Do zero write on target if sectors not allocated
  qmp: Add optional bool "unmap" to drive-mirror
  block: Add bdrv_get_block_status_above
  timer: Use a single definition of NSEC_PER_SEC for the whole codebase
  timer: Move NANOSECONDS_PER_SECONDS to timer.h
  blockdev: no need to drain+flush in hmp_drive_del
  qapi: Rename 'dirty-bitmap' mode to 'incremental'
  qcow2: Handle EAGAIN returned from update_refcount
  block/iscsi: add support for request timeouts

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents d2966f80 764ba3ae
Loading
Loading
Loading
Loading
+0 −12
Original line number Diff line number Diff line
@@ -3528,18 +3528,6 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
    }
}

void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
                      int nr_sectors)
{
    BdrvDirtyBitmap *bitmap;
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
            continue;
        }
        hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
    }
}

/**
 * Advance an HBitmapIter to an arbitrary offset.
 */
+5 −5
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ typedef struct CowRequest {
typedef struct BackupBlockJob {
    BlockJob common;
    BlockDriverState *target;
    /* bitmap for sync=dirty-bitmap */
    /* bitmap for sync=incremental */
    BdrvDirtyBitmap *sync_bitmap;
    MirrorSyncMode sync_mode;
    RateLimit limit;
@@ -365,7 +365,7 @@ static void coroutine_fn backup_run(void *opaque)
            qemu_coroutine_yield();
            job->common.busy = true;
        }
    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
    } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        ret = backup_run_incremental(job);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
@@ -497,10 +497,10 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        if (!sync_bitmap) {
            error_setg(errp, "must provide a valid bitmap name for "
                             "\"dirty-bitmap\" sync mode");
                             "\"incremental\" sync mode");
            return;
        }

@@ -535,7 +535,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
    job->on_target_error = on_target_error;
    job->target = target;
    job->sync_mode = sync_mode;
    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                       sync_bitmap : NULL;
    job->common.len = len;
    job->common.co = qemu_coroutine_create(backup_run);
+48 −14
Original line number Diff line number Diff line
@@ -283,7 +283,7 @@ void bdrv_drain_all(void)
        }
        aio_context_release(aio_context);

        if (!aio_ctxs || !g_slist_find(aio_ctxs, aio_context)) {
        if (!g_slist_find(aio_ctxs, aio_context)) {
            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
        }
    }
@@ -1531,28 +1531,54 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    return ret;
}

/* Coroutine wrapper for bdrv_get_block_status() */
static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
        BlockDriverState *base,
        int64_t sector_num,
        int nb_sectors,
        int *pnum)
{
    BlockDriverState *p;
    int64_t ret = 0;

    assert(bs != base);
    for (p = bs; p != base; p = p->backing_hd) {
        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
        if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
            break;
        }
        /* [sector_num, pnum] unallocated on this layer, which could be only
         * the first part of [sector_num, nb_sectors].  */
        nb_sectors = MIN(nb_sectors, *pnum);
    }
    return ret;
}

/* Coroutine wrapper for bdrv_get_block_status_above() */
static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
{
    BdrvCoGetBlockStatusData *data = opaque;
    BlockDriverState *bs = data->bs;

    data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
    data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
                                               data->sector_num,
                                               data->nb_sectors,
                                               data->pnum);
    data->done = true;
}

/*
 * Synchronous wrapper around bdrv_co_get_block_status().
 * Synchronous wrapper around bdrv_co_get_block_status_above().
 *
 * See bdrv_co_get_block_status() for details.
 * See bdrv_co_get_block_status_above() for details.
 */
int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
int64_t bdrv_get_block_status_above(BlockDriverState *bs,
                                    BlockDriverState *base,
                                    int64_t sector_num,
                                    int nb_sectors, int *pnum)
{
    Coroutine *co;
    BdrvCoGetBlockStatusData data = {
        .bs = bs,
        .base = base,
        .sector_num = sector_num,
        .nb_sectors = nb_sectors,
        .pnum = pnum,
@@ -1561,11 +1587,11 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_get_block_status_co_entry(&data);
        bdrv_get_block_status_above_co_entry(&data);
    } else {
        AioContext *aio_context = bdrv_get_aio_context(bs);

        co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
        qemu_coroutine_enter(co, &data);
        while (!data.done) {
            aio_poll(aio_context, true);
@@ -1574,6 +1600,14 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
    return data.ret;
}

int64_t bdrv_get_block_status(BlockDriverState *bs,
                              int64_t sector_num,
                              int nb_sectors, int *pnum)
{
    return bdrv_get_block_status_above(bs, bs->backing_hd,
                                       sector_num, nb_sectors, pnum);
}

int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
                                   int nb_sectors, int *pnum)
{
@@ -2378,8 +2412,6 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
        return -EPERM;
    }

    bdrv_reset_dirty(bs, sector_num, nb_sectors);

    /* Do nothing if disabled.  */
    if (!(bs->open_flags & BDRV_O_UNMAP)) {
        return 0;
@@ -2389,6 +2421,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
        return 0;
    }

    bdrv_set_dirty(bs, sector_num, nb_sectors);

    max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
    while (nb_sectors > 0) {
        int ret;
+88 −23
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ typedef struct IscsiLun {
    bool dpofua;
    bool has_write_same;
    bool force_next_flush;
    bool request_timed_out;
} IscsiLun;

typedef struct IscsiTask {
@@ -100,7 +101,8 @@ typedef struct IscsiAIOCB {
#endif
} IscsiAIOCB;

#define EVENT_INTERVAL 250
/* libiscsi uses time_t so its enough to process events every second */
#define EVENT_INTERVAL 1000
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
@@ -167,6 +169,19 @@ static inline unsigned exp_random(double mean)
    return -mean * log((double)rand() / RAND_MAX);
}

/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced
 * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION
 * macro was introduced in 1.11.0. So use the API_VERSION macro as
 * a hint that the macros are defined and define them ourselves
 * otherwise to keep the required libiscsi version at 1.9.0 */
#if !defined(LIBISCSI_API_VERSION)
#define QEMU_SCSI_STATUS_TASK_SET_FULL  0x28
#define QEMU_SCSI_STATUS_TIMEOUT        0x0f000002
#else
#define QEMU_SCSI_STATUS_TASK_SET_FULL  SCSI_STATUS_TASK_SET_FULL
#define QEMU_SCSI_STATUS_TIMEOUT        SCSI_STATUS_TIMEOUT
#endif

static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                        void *command_data, void *opaque)
@@ -187,13 +202,19 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                iTask->do_retry = 1;
                goto out;
            }
            /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
             * in libiscsi 1.10.0. Hardcode this value here to avoid
             * the need to bump the libiscsi requirement to 1.10.0 */
            if (status == SCSI_STATUS_BUSY || status == 0x28) {
            if (status == SCSI_STATUS_BUSY ||
                status == QEMU_SCSI_STATUS_TIMEOUT ||
                status == QEMU_SCSI_STATUS_TASK_SET_FULL) {
                unsigned retry_time =
                    exp_random(iscsi_retry_times[iTask->retries - 1]);
                error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
                if (status == QEMU_SCSI_STATUS_TIMEOUT) {
                    /* make sure the request is rescheduled AFTER the
                     * reconnect is initiated */
                    retry_time = EVENT_INTERVAL * 2;
                    iTask->iscsilun->request_timed_out = true;
                }
                error_report("iSCSI Busy/TaskSetFull/TimeOut"
                             " (retry #%u in %u ms): %s",
                             iTask->retries, retry_time,
                             iscsi_get_error(iscsi));
                aio_timer_init(iTask->iscsilun->aio_context,
@@ -277,20 +298,26 @@ iscsi_set_events(IscsiLun *iscsilun)
                           iscsilun);
        iscsilun->events = ev;
    }

    /* newer versions of libiscsi may return zero events. In this
     * case start a timer to ensure we are able to return to service
     * once this situation changes. */
    if (!ev) {
        timer_mod(iscsilun->event_timer,
                  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
    }
}

static void iscsi_timed_set_events(void *opaque)
static void iscsi_timed_check_events(void *opaque)
{
    IscsiLun *iscsilun = opaque;

    /* check for timed out requests */
    iscsi_service(iscsilun->iscsi, 0);

    if (iscsilun->request_timed_out) {
        iscsilun->request_timed_out = false;
        iscsi_reconnect(iscsilun->iscsi);
    }

    /* newer versions of libiscsi may return zero events. Ensure we are able
     * to return to service once this situation changes. */
    iscsi_set_events(iscsilun);

    timer_mod(iscsilun->event_timer,
              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
}

static void
@@ -1093,16 +1120,37 @@ static char *parse_initiator_name(const char *target)
    return iscsi_name;
}

static int parse_timeout(const char *target)
{
    QemuOptsList *list;
    QemuOpts *opts;
    const char *timeout;

    list = qemu_find_opts("iscsi");
    if (list) {
        opts = qemu_opts_find(list, target);
        if (!opts) {
            opts = QTAILQ_FIRST(&list->head);
        }
        if (opts) {
            timeout = qemu_opt_get(opts, "timeout");
            if (timeout) {
                return atoi(timeout);
            }
        }
    }

    return 0;
}

static void iscsi_nop_timed_event(void *opaque)
{
    IscsiLun *iscsilun = opaque;

    if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
        error_report("iSCSI: NOP timeout. Reconnecting...");
        iscsi_reconnect(iscsilun->iscsi);
    }

    if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
        iscsilun->request_timed_out = true;
    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
        return;
    }
@@ -1260,10 +1308,13 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
    timer_mod(iscsilun->nop_timer,
              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);

    /* Prepare a timer for a delayed call to iscsi_set_events */
    /* Set up a timer for periodic calls to iscsi_set_events and to
     * scan for command timeout */
    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
                                          QEMU_CLOCK_REALTIME, SCALE_MS,
                                          iscsi_timed_set_events, iscsilun);
                                          iscsi_timed_check_events, iscsilun);
    timer_mod(iscsilun->event_timer,
              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
}

static void iscsi_modesense_sync(IscsiLun *iscsilun)
@@ -1318,7 +1369,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
    int i, ret = 0;
    int i, ret = 0, timeout = 0;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1388,6 +1439,16 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

    /* timeout handling is broken in libiscsi before 1.15.0 */
    timeout = parse_timeout(iscsi_url->target);
#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
    iscsi_set_timeout(iscsi, timeout);
#else
    if (timeout) {
        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
    }
#endif

    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
            iscsi_get_error(iscsi));
@@ -1736,6 +1797,10 @@ static QemuOptsList qemu_iscsi_opts = {
            .name = "initiator-name",
            .type = QEMU_OPT_STRING,
            .help = "Initiator iqn name to use when connecting",
        },{
            .name = "timeout",
            .type = QEMU_OPT_NUMBER,
            .help = "Request timeout in seconds (default 0 = no timeout)",
        },
        { /* end of list */ }
    },
+26 −6
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@ typedef struct MirrorBlockJob {
    int in_flight;
    int sectors_in_flight;
    int ret;
    bool unmap;
} MirrorBlockJob;

typedef struct MirrorOp {
@@ -164,6 +165,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
    uint64_t delay_ns = 0;
    MirrorOp *op;
    int pnum;
    int64_t ret;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
@@ -290,8 +293,22 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    s->in_flight++;
    s->sectors_in_flight += nb_sectors;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);

    ret = bdrv_get_block_status_above(source, NULL, sector_num,
                                      nb_sectors, &pnum);
    if (ret < 0 || pnum < nb_sectors ||
            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                       mirror_read_complete, op);
    } else if (ret & BDRV_BLOCK_ZERO) {
        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                              mirror_write_complete, op);
    } else {
        assert(!(ret & BDRV_BLOCK_DATA));
        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
                         mirror_write_complete, op);
    }
    return delay_ns;
}

@@ -652,6 +669,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                             int64_t buf_size,
                             BlockdevOnError on_source_error,
                             BlockdevOnError on_target_error,
                             bool unmap,
                             BlockCompletionFunc *cb,
                             void *opaque, Error **errp,
                             const BlockJobDriver *driver,
@@ -686,6 +704,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    s->base = base;
    s->granularity = granularity;
    s->buf_size = MAX(buf_size, granularity);
    s->unmap = unmap;

    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
    if (!s->dirty_bitmap) {
@@ -704,21 +723,22 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, uint32_t granularity, int64_t buf_size,
                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  bool unmap,
                  BlockCompletionFunc *cb,
                  void *opaque, Error **errp)
{
    bool is_none_mode;
    BlockDriverState *base;

    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
    if (mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        error_setg(errp, "Sync mode 'incremental' not supported");
        return;
    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
    mirror_start_job(bs, target, replaces,
                     speed, granularity, buf_size,
                     on_source_error, on_target_error, cb, opaque, errp,
                     on_source_error, on_target_error, unmap, cb, opaque, errp,
                     &mirror_job_driver, is_none_mode, base);
}

@@ -766,7 +786,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,

    bdrv_ref(base);
    mirror_start_job(bs, base, NULL, speed, 0, 0,
                     on_error, on_error, cb, opaque, &local_err,
                     on_error, on_error, false, cb, opaque, &local_err,
                     &commit_active_job_driver, false, base);
    if (local_err) {
        error_propagate(errp, local_err);
Loading