Commit b2df6a79 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging



Block layer patches (rebased Stefan's pull request)

# gpg: Signature made Thu 12 Nov 2015 15:34:16 GMT using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (43 commits)
  block: Update copyright of the accounting code
  scsi-disk: Account for failed operations
  macio: Account for failed operations
  ide: Account for failed and invalid operations
  atapi: Account for failed and invalid operations
  xen_disk: Account for failed and invalid operations
  virtio-blk: Account for failed and invalid operations
  nvme: Account for failed and invalid operations
  iotests: Add test for the block device statistics
  block: Use QEMU_CLOCK_VIRTUAL for the accounting code in qtest mode
  qemu-io: Account for failed, invalid and flush operations
  block: New option to define the intervals for collecting I/O statistics
  block: Add average I/O queue depth to BlockDeviceTimedStats
  block: Compute minimum, maximum and average I/O latencies
  block: Allow configuring whether to account failed and invalid ops
  block: Add statistics for failed and invalid I/O operations
  block: Add idle_time_ns to BlockDeviceStats
  util: Infrastructure for computing recent averages
  block: define 'clock_type' for the accounting code
  ide: Account for write operations correctly
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents cfcc7c14 aece5edc
Loading
Loading
Loading
Loading
+17 −2
Original line number Diff line number Diff line
@@ -3404,10 +3404,25 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}

void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    if (!out) {
        hbitmap_reset_all(bitmap->bitmap);
    } else {
        HBitmap *backup = bitmap->bitmap;
        bitmap->bitmap = hbitmap_alloc(bitmap->size,
                                       hbitmap_granularity(backup));
        *out = backup;
    }
}

void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
{
    HBitmap *tmp = bitmap->bitmap;
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    bitmap->bitmap = in;
    hbitmap_free(tmp);
}

void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+120 −3
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
 * QEMU System Emulator block accounting
 *
 * Copyright (c) 2011 Christoph Hellwig
 * Copyright (c) 2015 Igalia, S.L.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -25,6 +26,54 @@
#include "block/accounting.h"
#include "block/block_int.h"
#include "qemu/timer.h"
#include "sysemu/qtest.h"

static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;

void block_acct_init(BlockAcctStats *stats, bool account_invalid,
                     bool account_failed)
{
    stats->account_invalid = account_invalid;
    stats->account_failed = account_failed;

    if (qtest_enabled()) {
        clock_type = QEMU_CLOCK_VIRTUAL;
    }
}

void block_acct_cleanup(BlockAcctStats *stats)
{
    BlockAcctTimedStats *s, *next;
    QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
        g_free(s);
    }
}

void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
{
    BlockAcctTimedStats *s;
    unsigned i;

    s = g_new0(BlockAcctTimedStats, 1);
    s->interval_length = interval_length;
    QSLIST_INSERT_HEAD(&stats->intervals, s, entries);

    for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
        timed_average_init(&s->latency[i], clock_type,
                           (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
    }
}

BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
                                              BlockAcctTimedStats *s)
{
    if (s == NULL) {
        return QSLIST_FIRST(&stats->intervals);
    } else {
        return QSLIST_NEXT(s, entries);
    }
}

void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
                      int64_t bytes, enum BlockAcctType type)
@@ -32,20 +81,71 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
    assert(type < BLOCK_MAX_IOTYPE);

    cookie->bytes = bytes;
    cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    cookie->start_time_ns = qemu_clock_get_ns(clock_type);
    cookie->type = type;
}

void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
    BlockAcctTimedStats *s;
    int64_t time_ns = qemu_clock_get_ns(clock_type);
    int64_t latency_ns = time_ns - cookie->start_time_ns;

    if (qtest_enabled()) {
        latency_ns = qtest_latency_ns;
    }

    assert(cookie->type < BLOCK_MAX_IOTYPE);

    stats->nr_bytes[cookie->type] += cookie->bytes;
    stats->nr_ops[cookie->type]++;
    stats->total_time_ns[cookie->type] +=
        qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns;
    stats->total_time_ns[cookie->type] += latency_ns;
    stats->last_access_time_ns = time_ns;

    QSLIST_FOREACH(s, &stats->intervals, entries) {
        timed_average_account(&s->latency[cookie->type], latency_ns);
    }
}

void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
    assert(cookie->type < BLOCK_MAX_IOTYPE);

    stats->failed_ops[cookie->type]++;

    if (stats->account_failed) {
        BlockAcctTimedStats *s;
        int64_t time_ns = qemu_clock_get_ns(clock_type);
        int64_t latency_ns = time_ns - cookie->start_time_ns;

        if (qtest_enabled()) {
            latency_ns = qtest_latency_ns;
        }

        stats->total_time_ns[cookie->type] += latency_ns;
        stats->last_access_time_ns = time_ns;

        QSLIST_FOREACH(s, &stats->intervals, entries) {
            timed_average_account(&s->latency[cookie->type], latency_ns);
        }
    }
}

void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
{
    assert(type < BLOCK_MAX_IOTYPE);

    /* block_acct_done() and block_acct_failed() update
     * total_time_ns[], but this one does not. The reason is that
     * invalid requests are accounted during their submission,
     * therefore there's no actual I/O involved. */

    stats->invalid_ops[type]++;

    if (stats->account_invalid) {
        stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
    }
}

void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                      int num_requests)
@@ -53,3 +153,20 @@ void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
    assert(type < BLOCK_MAX_IOTYPE);
    stats->merged[type] += num_requests;
}

int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
{
    return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
}

double block_acct_queue_depth(BlockAcctTimedStats *stats,
                              enum BlockAcctType type)
{
    uint64_t sum, elapsed;

    assert(type < BLOCK_MAX_IOTYPE);

    sum = timed_average_sum(&stats->latency[type], &elapsed);

    return (double) sum / elapsed;
}
+36 −14
Original line number Diff line number Diff line
@@ -221,11 +221,45 @@ static void backup_iostatus_reset(BlockJob *job)
    }
}

static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
{
    BdrvDirtyBitmap *bm;
    BlockDriverState *bs = job->common.bs;

    if (ret < 0 || block_job_is_cancelled(&job->common)) {
        /* Merge the successor back into the parent, delete nothing. */
        bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
        assert(bm);
    } else {
        /* Everything is fine, delete this bitmap and install the backup. */
        bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
        assert(bm);
    }
}

static void backup_commit(BlockJob *job)
{
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
    if (s->sync_bitmap) {
        backup_cleanup_sync_bitmap(s, 0);
    }
}

static void backup_abort(BlockJob *job)
{
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
    if (s->sync_bitmap) {
        backup_cleanup_sync_bitmap(s, -1);
    }
}

static const BlockJobDriver backup_job_driver = {
    .instance_size  = sizeof(BackupBlockJob),
    .job_type       = BLOCK_JOB_TYPE_BACKUP,
    .set_speed      = backup_set_speed,
    .iostatus_reset = backup_iostatus_reset,
    .commit         = backup_commit,
    .abort          = backup_abort,
};

static BlockErrorAction backup_error_action(BackupBlockJob *job,
@@ -441,19 +475,6 @@ static void coroutine_fn backup_run(void *opaque)
    /* wait until pending backup_do_cow() calls have completed */
    qemu_co_rwlock_wrlock(&job->flush_rwlock);
    qemu_co_rwlock_unlock(&job->flush_rwlock);

    if (job->sync_bitmap) {
        BdrvDirtyBitmap *bm;
        if (ret < 0 || block_job_is_cancelled(&job->common)) {
            /* Merge the successor back into the parent, delete nothing. */
            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
            assert(bm);
        } else {
            /* Everything is fine, delete this bitmap and install the backup. */
            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
            assert(bm);
        }
    }
    hbitmap_free(job->bitmap);

    if (target->blk) {
@@ -472,7 +493,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  BlockCompletionFunc *cb, void *opaque,
                  Error **errp)
                  BlockJobTxn *txn, Error **errp)
{
    int64_t len;

@@ -554,6 +575,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
                       sync_bitmap : NULL;
    job->common.len = len;
    job->common.co = qemu_coroutine_create(backup_run);
    block_job_txn_add_job(txn, &job->common);
    qemu_coroutine_enter(job->common.co, job);
    return;

+1 −0
Original line number Diff line number Diff line
@@ -176,6 +176,7 @@ static void blk_delete(BlockBackend *blk)
    }
    g_free(blk->name);
    drive_info_del(blk->legacy_dinfo);
    block_acct_cleanup(&blk->stats);
    g_free(blk);
}

+130 −20
Original line number Diff line number Diff line
@@ -237,8 +237,21 @@ bool bdrv_requests_pending(BlockDriverState *bs)
    return false;
}

static void bdrv_drain_recurse(BlockDriverState *bs)
{
    BdrvChild *child;

    if (bs->drv && bs->drv->bdrv_drain) {
        bs->drv->bdrv_drain(bs);
    }
    QLIST_FOREACH(child, &bs->children, next) {
        bdrv_drain_recurse(child->bs);
    }
}

/*
 * Wait for pending requests to complete on a single BlockDriverState subtree
 * Wait for pending requests to complete on a single BlockDriverState subtree,
 * and suspend block driver's internal I/O until next request arrives.
 *
 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
 * AioContext.
@@ -251,6 +264,7 @@ void bdrv_drain(BlockDriverState *bs)
{
    bool busy = true;

    bdrv_drain_recurse(bs);
    while (busy) {
        /* Keep iterating */
         bdrv_flush_io_queue(bs);
@@ -348,13 +362,14 @@ static void tracked_request_end(BdrvTrackedRequest *req)
static void tracked_request_begin(BdrvTrackedRequest *req,
                                  BlockDriverState *bs,
                                  int64_t offset,
                                  unsigned int bytes, bool is_write)
                                  unsigned int bytes,
                                  enum BdrvTrackedRequestType type)
{
    *req = (BdrvTrackedRequest){
        .bs = bs,
        .offset         = offset,
        .bytes          = bytes,
        .is_write       = is_write,
        .type           = type,
        .co             = qemu_coroutine_self(),
        .serialising    = false,
        .overlap_offset = offset,
@@ -971,7 +986,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
        bytes = ROUND_UP(bytes, align);
    }

    tracked_request_begin(&req, bs, offset, bytes, false);
    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
                              use_local_qiov ? &local_qiov : qiov,
                              flags);
@@ -1292,7 +1307,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
     * Pad qiov with the read parts and be sure to have a tracked request not
     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
     */
    tracked_request_begin(&req, bs, offset, bytes, true);
    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);

    if (!qiov) {
        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
@@ -2317,18 +2332,20 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
    int ret;
    BdrvTrackedRequest req;

    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
        bdrv_is_sg(bs)) {
        return 0;
    }

    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
    /* Write back cached data to the OS even with cache=unsafe */
    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
    if (bs->drv->bdrv_co_flush_to_os) {
        ret = bs->drv->bdrv_co_flush_to_os(bs);
        if (ret < 0) {
            return ret;
            goto out;
        }
    }

@@ -2368,14 +2385,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
        ret = 0;
    }
    if (ret < 0) {
        return ret;
        goto out;
    }

    /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
     * in the case of cache=unsafe, so there are no useless flushes.
     */
flush_parent:
    return bs->file ? bdrv_co_flush(bs->file->bs) : 0;
    ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
out:
    tracked_request_end(&req);
    return ret;
}

int bdrv_flush(BlockDriverState *bs)
@@ -2418,6 +2438,7 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors)
{
    BdrvTrackedRequest req;
    int max_discard, ret;

    if (!bs->drv) {
@@ -2440,6 +2461,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
        return 0;
    }

    tracked_request_begin(&req, bs, sector_num, nb_sectors,
                          BDRV_TRACKED_DISCARD);
    bdrv_set_dirty(bs, sector_num, nb_sectors);

    max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
@@ -2473,20 +2496,24 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
            acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
                                            bdrv_co_io_em_complete, &co);
            if (acb == NULL) {
                return -EIO;
                ret = -EIO;
                goto out;
            } else {
                qemu_coroutine_yield();
                ret = co.ret;
            }
        }
        if (ret && ret != -ENOTSUP) {
            return ret;
            goto out;
        }

        sector_num += num;
        nb_sectors -= num;
    }
    return 0;
    ret = 0;
out:
    tracked_request_end(&req);
    return ret;
}

int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
@@ -2515,26 +2542,109 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
    return rwco.ret;
}

/* needed for generic scsi interface */
typedef struct {
    CoroutineIOCompletion *co;
    QEMUBH *bh;
} BdrvIoctlCompletionData;

int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
static void bdrv_ioctl_bh_cb(void *opaque)
{
    BdrvIoctlCompletionData *data = opaque;

    bdrv_co_io_em_complete(data->co, -ENOTSUP);
    qemu_bh_delete(data->bh);
}

static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
{
    BlockDriver *drv = bs->drv;
    BdrvTrackedRequest tracked_req;
    CoroutineIOCompletion co = {
        .coroutine = qemu_coroutine_self(),
    };
    BlockAIOCB *acb;

    if (drv && drv->bdrv_ioctl)
        return drv->bdrv_ioctl(bs, req, buf);
    return -ENOTSUP;
    tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
    if (!drv || !drv->bdrv_aio_ioctl) {
        co.ret = -ENOTSUP;
        goto out;
    }

    acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
    if (!acb) {
        BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
        data->bh = aio_bh_new(bdrv_get_aio_context(bs),
                                bdrv_ioctl_bh_cb, data);
        data->co = &co;
        qemu_bh_schedule(data->bh);
    }
    qemu_coroutine_yield();
out:
    tracked_request_end(&tracked_req);
    return co.ret;
}

typedef struct {
    BlockDriverState *bs;
    int req;
    void *buf;
    int ret;
} BdrvIoctlCoData;

static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
{
    BdrvIoctlCoData *data = opaque;
    data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
}

/* needed for generic scsi interface */
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
{
    BdrvIoctlCoData data = {
        .bs = bs,
        .req = req,
        .buf = buf,
        .ret = -EINPROGRESS,
    };

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_co_ioctl_entry(&data);
    } else {
        Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
        qemu_coroutine_enter(co, &data);
    }
    while (data.ret == -EINPROGRESS) {
        aio_poll(bdrv_get_aio_context(bs), true);
    }
    return data.ret;
}

static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
{
    BlockAIOCBCoroutine *acb = opaque;
    acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
                                      acb->req.req, acb->req.buf);
    bdrv_co_complete(acb);
}

BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
        unsigned long int req, void *buf,
        BlockCompletionFunc *cb, void *opaque)
{
    BlockDriver *drv = bs->drv;
    BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
                                            bs, cb, opaque);
    Coroutine *co;

    acb->need_bh = true;
    acb->req.error = -EINPROGRESS;
    acb->req.req = req;
    acb->req.buf = buf;
    co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
    qemu_coroutine_enter(co, acb);

    if (drv && drv->bdrv_aio_ioctl)
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
    return NULL;
    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
}

void *qemu_blockalign(BlockDriverState *bs, size_t size)
Loading