Commit 088d6709 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-10-10' into staging



Block patches:
- Parallelized request handling for qcow2
- Backup job refactoring to use a filter node instead of before-write
  notifiers
- Add discard accounting information to file-posix nodes
- Allow trivial reopening of nbd nodes
- Some iotest fixes

# gpg: Signature made Thu 10 Oct 2019 12:40:34 BST
# gpg:                using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg:                issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2019-10-10: (36 commits)
  iotests/162: Fix for newer Linux 5.3+
  tests: fix I/O test for hosts defaulting to LUKSv2
  nbd: add empty .bdrv_reopen_prepare
  block/backup: use backup-top instead of write notifiers
  block: introduce backup-top filter driver
  block/block-copy: split block_copy_set_callbacks function
  block/backup: move write_flags calculation inside backup_job_create
  block/backup: move in-flight requests handling from backup to block-copy
  iotests: Use stat -c %b in 125
  iotests: Disable 125 on broken XFS versions
  iotests: Fix 125 for growth_mode = metadata
  qapi: query-blockstat: add driver specific file-posix stats
  file-posix: account discard operations
  scsi: account unmap operations
  scsi: move unmap error checking to the complete callback
  scsi: store unmap offset and nb_sectors in request struct
  ide: account UNMAP (TRIM) operations
  block: add empty account cookie type
  qapi: add unmap to BlockDeviceStats
  qapi: group BlockDeviceStats fields
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents cdfc44ac 35f05b2e
Loading
Loading
Loading
Loading
+35 −8
Original line number Diff line number Diff line
@@ -5155,6 +5155,15 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
    return NULL;
}

BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
{
    BlockDriver *drv = bs->drv;
    if (!drv || !drv->bdrv_get_specific_stats) {
        return NULL;
    }
    return drv->bdrv_get_specific_stats(bs);
}

void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
@@ -5164,14 +5173,35 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
    bs->drv->bdrv_debug_event(bs, event);
}

int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
                          const char *tag)
static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
{
    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
        bs = bs->file ? bs->file->bs : NULL;
        if (bs->file) {
            bs = bs->file->bs;
            continue;
        }

        if (bs->drv->is_filter && bs->backing) {
            bs = bs->backing->bs;
            continue;
        }

        break;
    }

    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
        assert(bs->drv->bdrv_debug_remove_breakpoint);
        return bs;
    }

    return NULL;
}

int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
                          const char *tag)
{
    bs = bdrv_find_debug_node(bs);
    if (bs) {
        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
    }

@@ -5180,11 +5210,8 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,

int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
{
    while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
        bs = bs->file ? bs->file->bs : NULL;
    }

    if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
    bs = bdrv_find_debug_node(bs);
    if (bs) {
        return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
    }

+4 −0
Original line number Diff line number Diff line
@@ -37,9 +37,13 @@ block-obj-y += write-threshold.o
block-obj-y += backup.o
block-obj-$(CONFIG_REPLICATION) += replication.o
block-obj-y += throttle.o copy-on-read.o
block-obj-y += block-copy.o

block-obj-y += crypto.o

block-obj-y += aio_task.o
block-obj-y += backup-top.o

common-obj-y += stream.o

nfs.o-libs         := $(LIBNFS_LIBS)
+6 −0
Original line number Diff line number Diff line
@@ -195,6 +195,10 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,

    assert(cookie->type < BLOCK_MAX_IOTYPE);

    if (cookie->type == BLOCK_ACCT_NONE) {
        return;
    }

    qemu_mutex_lock(&stats->lock);

    if (failed) {
@@ -217,6 +221,8 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
    }

    qemu_mutex_unlock(&stats->lock);

    cookie->type = BLOCK_ACCT_NONE;
}

void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)

block/aio_task.c

0 → 100644
+124 −0
Original line number Diff line number Diff line
/*
 * Aio tasks loops
 *
 * Copyright (c) 2019 Virtuozzo International GmbH.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include "qemu/osdep.h"
#include "block/aio.h"
#include "block/aio_task.h"

struct AioTaskPool {
    Coroutine *main_co;
    int status;
    int max_busy_tasks;
    int busy_tasks;
    bool waiting;
};

static void coroutine_fn aio_task_co(void *opaque)
{
    AioTask *task = opaque;
    AioTaskPool *pool = task->pool;

    assert(pool->busy_tasks < pool->max_busy_tasks);
    pool->busy_tasks++;

    task->ret = task->func(task);

    pool->busy_tasks--;

    if (task->ret < 0 && pool->status == 0) {
        pool->status = task->ret;
    }

    g_free(task);

    if (pool->waiting) {
        pool->waiting = false;
        aio_co_wake(pool->main_co);
    }
}

void coroutine_fn aio_task_pool_wait_one(AioTaskPool *pool)
{
    assert(pool->busy_tasks > 0);
    assert(qemu_coroutine_self() == pool->main_co);

    pool->waiting = true;
    qemu_coroutine_yield();

    assert(!pool->waiting);
    assert(pool->busy_tasks < pool->max_busy_tasks);
}

void coroutine_fn aio_task_pool_wait_slot(AioTaskPool *pool)
{
    if (pool->busy_tasks < pool->max_busy_tasks) {
        return;
    }

    aio_task_pool_wait_one(pool);
}

void coroutine_fn aio_task_pool_wait_all(AioTaskPool *pool)
{
    while (pool->busy_tasks > 0) {
        aio_task_pool_wait_one(pool);
    }
}

void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task)
{
    aio_task_pool_wait_slot(pool);

    task->pool = pool;
    qemu_coroutine_enter(qemu_coroutine_create(aio_task_co, task));
}

AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks)
{
    AioTaskPool *pool = g_new0(AioTaskPool, 1);

    pool->main_co = qemu_coroutine_self();
    pool->max_busy_tasks = max_busy_tasks;

    return pool;
}

void aio_task_pool_free(AioTaskPool *pool)
{
    g_free(pool);
}

int aio_task_pool_status(AioTaskPool *pool)
{
    if (!pool) {
        return 0; /* Sugar for lazy allocation of aio pool */
    }

    return pool->status;
}

bool aio_task_pool_empty(AioTaskPool *pool)
{
    return pool->busy_tasks == 0;
}

block/backup-top.c

0 → 100644
+276 −0
Original line number Diff line number Diff line
/*
 * backup-top filter driver
 *
 * The driver performs Copy-Before-Write (CBW) operation: it is injected above
 * some node, and before each write it copies _old_ data to the target node.
 *
 * Copyright (c) 2018-2019 Virtuozzo International GmbH.
 *
 * Author:
 *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "qemu/osdep.h"

#include "sysemu/block-backend.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "block/block_int.h"
#include "block/qdict.h"
#include "block/block-copy.h"

#include "block/backup-top.h"

typedef struct BDRVBackupTopState {
    BlockCopyState *bcs;
    BdrvChild *target;
    bool active;
} BDRVBackupTopState;

static coroutine_fn int backup_top_co_preadv(
        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
        QEMUIOVector *qiov, int flags)
{
    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}

static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
                                       uint64_t bytes)
{
    BDRVBackupTopState *s = bs->opaque;
    uint64_t end = QEMU_ALIGN_UP(offset + bytes, s->bcs->cluster_size);
    uint64_t off = QEMU_ALIGN_DOWN(offset, s->bcs->cluster_size);

    return block_copy(s->bcs, off, end - off, NULL);
}

static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs,
                                               int64_t offset, int bytes)
{
    int ret = backup_top_cbw(bs, offset, bytes);
    if (ret < 0) {
        return ret;
    }

    return bdrv_co_pdiscard(bs->backing, offset, bytes);
}

static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs,
        int64_t offset, int bytes, BdrvRequestFlags flags)
{
    int ret = backup_top_cbw(bs, offset, bytes);
    if (ret < 0) {
        return ret;
    }

    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
}

static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs,
                                              uint64_t offset,
                                              uint64_t bytes,
                                              QEMUIOVector *qiov, int flags)
{
    if (!(flags & BDRV_REQ_WRITE_UNCHANGED)) {
        int ret = backup_top_cbw(bs, offset, bytes);
        if (ret < 0) {
            return ret;
        }
    }

    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
}

static int coroutine_fn backup_top_co_flush(BlockDriverState *bs)
{
    if (!bs->backing) {
        return 0;
    }

    return bdrv_co_flush(bs->backing->bs);
}

static void backup_top_refresh_filename(BlockDriverState *bs)
{
    if (bs->backing == NULL) {
        /*
         * we can be here after failed bdrv_attach_child in
         * bdrv_set_backing_hd
         */
        return;
    }
    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
            bs->backing->bs->filename);
}

static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
                                  const BdrvChildRole *role,
                                  BlockReopenQueue *reopen_queue,
                                  uint64_t perm, uint64_t shared,
                                  uint64_t *nperm, uint64_t *nshared)
{
    BDRVBackupTopState *s = bs->opaque;

    if (!s->active) {
        /*
         * The filter node may be in process of bdrv_append(), which firstly do
         * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that
         * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So,
         * let's require nothing during bdrv_append() and refresh permissions
         * after it (see bdrv_backup_top_append()).
         */
        *nperm = 0;
        *nshared = BLK_PERM_ALL;
        return;
    }

    if (role == &child_file) {
        /*
         * Target child
         *
         * Share write to target (child_file), to not interfere
         * with guest writes to its disk which may be in target backing chain.
         */
        *nshared = BLK_PERM_ALL;
        *nperm = BLK_PERM_WRITE;
    } else {
        /* Source child */
        bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
                                  nperm, nshared);

        if (perm & BLK_PERM_WRITE) {
            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
        }
        *nshared &= ~BLK_PERM_WRITE;
    }
}

BlockDriver bdrv_backup_top_filter = {
    .format_name = "backup-top",
    .instance_size = sizeof(BDRVBackupTopState),

    .bdrv_co_preadv             = backup_top_co_preadv,
    .bdrv_co_pwritev            = backup_top_co_pwritev,
    .bdrv_co_pwrite_zeroes      = backup_top_co_pwrite_zeroes,
    .bdrv_co_pdiscard           = backup_top_co_pdiscard,
    .bdrv_co_flush              = backup_top_co_flush,

    .bdrv_co_block_status       = bdrv_co_block_status_from_backing,

    .bdrv_refresh_filename      = backup_top_refresh_filename,

    .bdrv_child_perm            = backup_top_child_perm,

    .is_filter = true,
};

BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
                                         BlockDriverState *target,
                                         const char *filter_node_name,
                                         uint64_t cluster_size,
                                         BdrvRequestFlags write_flags,
                                         BlockCopyState **bcs,
                                         Error **errp)
{
    Error *local_err = NULL;
    BDRVBackupTopState *state;
    BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter,
                                                 filter_node_name,
                                                 BDRV_O_RDWR, errp);

    if (!top) {
        return NULL;
    }

    top->total_sectors = source->total_sectors;
    top->opaque = state = g_new0(BDRVBackupTopState, 1);

    bdrv_ref(target);
    state->target = bdrv_attach_child(top, target, "target", &child_file, errp);
    if (!state->target) {
        bdrv_unref(target);
        bdrv_unref(top);
        return NULL;
    }

    bdrv_drained_begin(source);

    bdrv_ref(top);
    bdrv_append(top, source, &local_err);
    if (local_err) {
        error_prepend(&local_err, "Cannot append backup-top filter: ");
        goto append_failed;
    }

    /*
     * bdrv_append() finished successfully, now we can require permissions
     * we want.
     */
    state->active = true;
    bdrv_child_refresh_perms(top, top->backing, &local_err);
    if (local_err) {
        error_prepend(&local_err,
                      "Cannot set permissions for backup-top filter: ");
        goto failed_after_append;
    }

    state->bcs = block_copy_state_new(top->backing, state->target,
                                      cluster_size, write_flags, &local_err);
    if (local_err) {
        error_prepend(&local_err, "Cannot create block-copy-state: ");
        goto failed_after_append;
    }
    *bcs = state->bcs;

    bdrv_drained_end(source);

    return top;

failed_after_append:
    state->active = false;
    bdrv_backup_top_drop(top);

append_failed:
    bdrv_drained_end(source);
    bdrv_unref_child(top, state->target);
    bdrv_unref(top);
    error_propagate(errp, local_err);

    return NULL;
}

void bdrv_backup_top_drop(BlockDriverState *bs)
{
    BDRVBackupTopState *s = bs->opaque;
    AioContext *aio_context = bdrv_get_aio_context(bs);

    block_copy_state_free(s->bcs);

    aio_context_acquire(aio_context);

    bdrv_drained_begin(bs);

    s->active = false;
    bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
    bdrv_replace_node(bs, backing_bs(bs), &error_abort);
    bdrv_set_backing_hd(bs, NULL, &error_abort);

    bdrv_drained_end(bs);

    bdrv_unref(bs);

    aio_context_release(aio_context);
}
Loading