Commit c5e4e492 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/xanclic/tags/pull-block-2018-09-25' into staging



Block layer patches:
- Drain fixes
- node-name parameters for block-commit
- Refactor block jobs to use transactional callbacks for exiting

# gpg: Signature made Tue 25 Sep 2018 16:12:44 BST
# gpg:                using RSA key F407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/xanclic/tags/pull-block-2018-09-25: (42 commits)
  test-bdrv-drain: Test draining job source child and parent
  block: Use a single global AioWait
  test-bdrv-drain: Fix outdated comments
  test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort
  job: Avoid deadlocks in job_completed_txn_abort()
  test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level()
  block: Remove aio_poll() in bdrv_drain_poll variants
  blockjob: Lie better in child_job_drained_poll()
  block-backend: Decrease in_flight only after callback
  block-backend: Fix potential double blk_delete()
  block-backend: Add .drained_poll callback
  block: Add missing locking in bdrv_co_drain_bh_cb()
  test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback
  job: Use AIO_WAIT_WHILE() in job_finish_sync()
  test-blockjob: Acquire AioContext around job_cancel_sync()
  test-bdrv-drain: Drain with block jobs in an I/O thread
  aio-wait: Increase num_waiters even in home thread
  blockjob: Wake up BDS when job becomes idle
  job: Fix missing locking due to mismerge
  job: Fix nested aio_poll() hanging in job_txn_apply
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 0a736f7a 9c76ff9c
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -2792,6 +2792,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
    bdrv_parent_cb_change_media(bs, true);

    qobject_unref(options);
    options = NULL;

    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
     * temporary snapshot afterwards. */
@@ -4885,11 +4886,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
    return bs ? bs->aio_context : qemu_get_aio_context();
}

AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
{
    return bs ? &bs->wait : NULL;
}

void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
{
    aio_co_enter(bdrv_get_aio_context(bs), co);
+22 −9
Original line number Diff line number Diff line
@@ -88,7 +88,6 @@ struct BlockBackend {
     * Accessed with atomic ops.
     */
    unsigned int in_flight;
    AioWait wait;
};

typedef struct BlockBackendAIOCB {
@@ -121,6 +120,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
    abort();
}
static void blk_root_drained_begin(BdrvChild *child);
static bool blk_root_drained_poll(BdrvChild *child);
static void blk_root_drained_end(BdrvChild *child);

static void blk_root_change_media(BdrvChild *child, bool load);
@@ -294,6 +294,7 @@ static const BdrvChildRole child_root = {
    .get_parent_desc    = blk_root_get_parent_desc,

    .drained_begin      = blk_root_drained_begin,
    .drained_poll       = blk_root_drained_poll,
    .drained_end        = blk_root_drained_end,

    .activate           = blk_root_activate,
@@ -433,6 +434,7 @@ int blk_get_refcnt(BlockBackend *blk)
 */
void blk_ref(BlockBackend *blk)
{
    assert(blk->refcnt > 0);
    blk->refcnt++;
}

@@ -445,7 +447,13 @@ void blk_unref(BlockBackend *blk)
{
    if (blk) {
        assert(blk->refcnt > 0);
        if (!--blk->refcnt) {
        if (blk->refcnt > 1) {
            blk->refcnt--;
        } else {
            blk_drain(blk);
            /* blk_drain() cannot resurrect blk, nobody held a reference */
            assert(blk->refcnt == 1);
            blk->refcnt = 0;
            blk_delete(blk);
        }
    }
@@ -1289,7 +1297,7 @@ static void blk_inc_in_flight(BlockBackend *blk)
static void blk_dec_in_flight(BlockBackend *blk)
{
    atomic_dec(&blk->in_flight);
    aio_wait_kick(&blk->wait);
    aio_wait_kick();
}

static void error_callback_bh(void *opaque)
@@ -1330,8 +1338,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
    if (acb->has_returned) {
        blk_dec_in_flight(acb->rwco.blk);
        acb->common.cb(acb->common.opaque, acb->rwco.ret);
        blk_dec_in_flight(acb->rwco.blk);
        qemu_aio_unref(acb);
    }
}
@@ -1590,8 +1598,7 @@ void blk_drain(BlockBackend *blk)
    }

    /* We may have -ENOMEDIUM completions in flight */
    AIO_WAIT_WHILE(&blk->wait,
            blk_get_aio_context(blk),
    AIO_WAIT_WHILE(blk_get_aio_context(blk),
                   atomic_mb_read(&blk->in_flight) > 0);

    if (bs) {
@@ -1611,8 +1618,7 @@ void blk_drain_all(void)
        aio_context_acquire(ctx);

        /* We may have -ENOMEDIUM completions in flight */
        AIO_WAIT_WHILE(&blk->wait, ctx,
                atomic_mb_read(&blk->in_flight) > 0);
        AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);

        aio_context_release(ctx);
    }
@@ -2189,6 +2195,13 @@ static void blk_root_drained_begin(BdrvChild *child)
    }
}

static bool blk_root_drained_poll(BdrvChild *child)
{
    BlockBackend *blk = child->opaque;
    assert(blk->quiesce_counter);
    return !!blk->in_flight;
}

static void blk_root_drained_end(BdrvChild *child)
{
    BlockBackend *blk = child->opaque;
+54 −43
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
    BlockDriverState *commit_top_bs;
    BlockBackend *top;
    BlockBackend *base;
    BlockDriverState *base_bs;
    BlockdevOnError on_error;
    int base_flags;
    char *backing_file_str;
@@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
    return 0;
}

static void commit_exit(Job *job)
static int commit_prepare(Job *job)
{
    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
    BlockJob *bjob = &s->common;
    BlockDriverState *top = blk_bs(s->top);
    BlockDriverState *base = blk_bs(s->base);
    BlockDriverState *commit_top_bs = s->commit_top_bs;
    bool remove_commit_top_bs = false;

    /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
    bdrv_ref(top);
    bdrv_ref(commit_top_bs);

    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
     * the normal backing chain can be restored. */
    blk_unref(s->base);
    s->base = NULL;

    if (!job_is_cancelled(job) && job->ret == 0) {
        /* success */
        job->ret = bdrv_drop_intermediate(s->commit_top_bs, base,
    /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
     * identically. Further work is needed to disambiguate these cases. */
    return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
                                  s->backing_file_str);
    } else {
        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
}

static void commit_abort(Job *job)
{
    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
    BlockDriverState *top_bs = blk_bs(s->top);

    /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
    bdrv_ref(top_bs);
    bdrv_ref(s->commit_top_bs);

    if (s->base) {
        blk_unref(s->base);
    }

    /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
     * can succeed */
    block_job_remove_all_bdrv(&s->common);

    /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
     * commit filter driver from the backing chain now. Do this as the final
     * step so that the 'consistent read' permission can be granted.
     *
     * XXX Can (or should) we somehow keep 'consistent read' blocked even
     * after the failed/cancelled commit job is gone? If we already wrote
     * something to base, the intermediate images aren't valid any more. */
        remove_commit_top_bs = true;
    bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
                            &error_abort);
    bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
                      &error_abort);

    bdrv_unref(s->commit_top_bs);
    bdrv_unref(top_bs);
}

static void commit_clean(Job *job)
{
    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);

    /* restore base open flags here if appropriate (e.g., change the base back
     * to r/o). These reopens do not need to be atomic, since we won't abort
     * even on failure here */
    if (s->base_flags != bdrv_get_flags(base)) {
        bdrv_reopen(base, s->base_flags, NULL);
    if (s->base_flags != bdrv_get_flags(s->base_bs)) {
        bdrv_reopen(s->base_bs, s->base_flags, NULL);
    }

    g_free(s->backing_file_str);
    blk_unref(s->top);

    /* If there is more than one reference to the job (e.g. if called from
     * job_finish_sync()), job_completed() won't free it and therefore the
     * blockers on the intermediate nodes remain. This would cause
     * bdrv_set_backing_hd() to fail. */
    block_job_remove_all_bdrv(bjob);

    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
     * filter driver from the backing chain. Do this as the final step so that
     * the 'consistent read' permission can be granted.  */
    if (remove_commit_top_bs) {
        bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
                                &error_abort);
        bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
                          &error_abort);
    }

    bdrv_unref(commit_top_bs);
    bdrv_unref(top);
}

static int coroutine_fn commit_run(Job *job, Error **errp)
@@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = {
        .user_resume   = block_job_user_resume,
        .drain         = block_job_drain,
        .run           = commit_run,
        .exit          = commit_exit,
        .prepare       = commit_prepare,
        .abort         = commit_abort,
        .clean         = commit_clean
    },
};

@@ -249,7 +258,8 @@ static BlockDriver bdrv_commit_top = {
};

void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
                  BlockDriverState *base, BlockDriverState *top,
                  int creation_flags, int64_t speed,
                  BlockdevOnError on_error, const char *backing_file_str,
                  const char *filter_node_name, Error **errp)
{
@@ -267,7 +277,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    }

    s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
                         speed, JOB_DEFAULT, NULL, NULL, errp);
                         speed, creation_flags, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -344,6 +354,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    if (ret < 0) {
        goto fail;
    }
    s->base_bs = base;

    /* Required permissions are already taken with block_job_add_bdrv() */
    s->top = blk_new(0, BLK_PERM_ALL);
+17 −13
Original line number Diff line number Diff line
@@ -38,8 +38,6 @@
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)

static AioWait drain_all_aio_wait;

static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
    int64_t offset, int bytes, BdrvRequestFlags flags);
@@ -268,10 +266,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
                                      BdrvChild *ignore_parent)
{
    /* Execute pending BHs first and check everything else only after the BHs
     * have executed. */
    while (aio_poll(bs->aio_context, false));

    return bdrv_drain_poll(bs, recursive, ignore_parent, false);
}

@@ -288,6 +282,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
    BlockDriverState *bs = data->bs;

    if (bs) {
        AioContext *ctx = bdrv_get_aio_context(bs);
        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);

        /*
         * When the coroutine yielded, the lock for its home context was
         * released, so we need to re-acquire it here. If it explicitly
         * acquired a different context, the lock is still held and we don't
         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
         */
        if (ctx == co_ctx) {
            aio_context_acquire(ctx);
        }
        bdrv_dec_in_flight(bs);
        if (data->begin) {
            bdrv_do_drained_begin(bs, data->recursive, data->parent,
@@ -296,6 +302,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
            bdrv_do_drained_end(bs, data->recursive, data->parent,
                                data->ignore_bds_parents);
        }
        if (ctx == co_ctx) {
            aio_context_release(ctx);
        }
    } else {
        assert(data->begin);
        bdrv_drain_all_begin();
@@ -496,10 +505,6 @@ static bool bdrv_drain_all_poll(void)
    BlockDriverState *bs = NULL;
    bool result = false;

    /* Execute pending BHs first (may modify the graph) and check everything
     * else only after the BHs have executed. */
    while (aio_poll(qemu_get_aio_context(), false));

    /* bdrv_drain_poll() can't make changes to the graph and we are holding the
     * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
    while ((bs = bdrv_next_all_states(bs))) {
@@ -550,7 +555,7 @@ void bdrv_drain_all_begin(void)
    }

    /* Now poll the in-flight requests */
    AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
    AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());

    while ((bs = bdrv_next_all_states(bs))) {
        bdrv_drain_assert_idle(bs);
@@ -706,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)

void bdrv_wakeup(BlockDriverState *bs)
{
    aio_wait_kick(bdrv_get_aio_wait(bs));
    aio_wait_kick(&drain_all_aio_wait);
    aio_wait_kick();
}

void bdrv_dec_in_flight(BlockDriverState *bs)
+1 −1
Original line number Diff line number Diff line
@@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)

static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
    aio_context_acquire(s->aio_context);
    qemu_laio_process_completions(s);

    aio_context_acquire(s->aio_context);
    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
Loading