Commit f141eafe authored by Anthony Liguori's avatar Anthony Liguori
Browse files

push down vector linearization to posix-aio-compat.c (Christoph Hellwig)



Make all AIO requests vectored and defer linearization until the actual
I/O thread.  This prepares for using native preadv/pwritev.

Also enables asynchronous direct I/O by handling that case in the I/O thread.

Qcow and qcow2 propably want to be adopted to directly deal with multi-segment
requests, but that can be implemented later.


Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarAnthony Liguori <aliguori@us.ibm.com>


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
parent c87c0672
Loading
Loading
Loading
Loading
+51 −33
Original line number Diff line number Diff line
@@ -525,7 +525,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
typedef struct QCowAIOCB {
    BlockDriverAIOCB common;
    int64_t sector_num;
    QEMUIOVector *qiov;
    uint8_t *buf;
    void *orig_buf;
    int nb_sectors;
    int n;
    uint64_t cluster_offset;
@@ -543,12 +545,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
    int index_in_cluster;

    acb->hd_aiocb = NULL;
    if (ret < 0) {
    fail:
        acb->common.cb(acb->common.opaque, ret);
        qemu_aio_release(acb);
        return;
    }
    if (ret < 0)
        goto done;

 redo:
    /* post process the read buffer */
@@ -570,9 +568,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)

    if (acb->nb_sectors == 0) {
        /* request completed */
        acb->common.cb(acb->common.opaque, 0);
        qemu_aio_release(acb);
        return;
        ret = 0;
        goto done;
    }

    /* prepare next AIO request */
@@ -592,7 +589,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
            if (acb->hd_aiocb == NULL)
                goto fail;
                goto done;
        } else {
            /* Note: in this case, no need to wait */
            memset(acb->buf, 0, 512 * acb->n);
@@ -601,14 +598,14 @@ static void qcow_aio_read_cb(void *opaque, int ret)
    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
        /* add AIO support for compressed blocks ? */
        if (decompress_cluster(s, acb->cluster_offset) < 0)
            goto fail;
            goto done;
        memcpy(acb->buf,
               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
        goto redo;
    } else {
        if ((acb->cluster_offset & 511) != 0) {
            ret = -EIO;
            goto fail;
            goto done;
        }
        acb->hd_iov.iov_base = acb->buf;
        acb->hd_iov.iov_len = acb->n * 512;
@@ -617,12 +614,22 @@ static void qcow_aio_read_cb(void *opaque, int ret)
                            (acb->cluster_offset >> 9) + index_in_cluster,
                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
        if (acb->hd_aiocb == NULL)
            goto fail;
            goto done;
    }

    return;

done:
    if (acb->qiov->niov > 1) {
        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
        qemu_vfree(acb->orig_buf);
    }
    acb->common.cb(acb->common.opaque, ret);
    qemu_aio_release(acb);
}

static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
        int64_t sector_num, uint8_t *buf, int nb_sectors,
static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    QCowAIOCB *acb;
@@ -632,7 +639,11 @@ static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
        return NULL;
    acb->hd_aiocb = NULL;
    acb->sector_num = sector_num;
    acb->buf = buf;
    acb->qiov = qiov;
    if (qiov->niov > 1)
        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
    else
        acb->buf = qiov->iov->iov_base;
    acb->nb_sectors = nb_sectors;
    acb->n = 0;
    acb->cluster_offset = 0;
@@ -652,12 +663,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)

    acb->hd_aiocb = NULL;

    if (ret < 0) {
    fail:
        acb->common.cb(acb->common.opaque, ret);
        qemu_aio_release(acb);
        return;
    }
    if (ret < 0)
        goto done;

    acb->nb_sectors -= acb->n;
    acb->sector_num += acb->n;
@@ -665,9 +672,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)

    if (acb->nb_sectors == 0) {
        /* request completed */
        acb->common.cb(acb->common.opaque, 0);
        qemu_aio_release(acb);
        return;
        ret = 0;
        goto done;
    }

    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -679,14 +685,14 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                        index_in_cluster + acb->n);
    if (!cluster_offset || (cluster_offset & 511) != 0) {
        ret = -EIO;
        goto fail;
        goto done;
    }
    if (s->crypt_method) {
        if (!acb->cluster_data) {
            acb->cluster_data = qemu_mallocz(s->cluster_size);
            if (!acb->cluster_data) {
                ret = -ENOMEM;
                goto fail;
                goto done;
            }
        }
        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
@@ -704,11 +710,18 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                    &acb->hd_qiov, acb->n,
                                    qcow_aio_write_cb, acb);
    if (acb->hd_aiocb == NULL)
        goto fail;
        goto done;
    return;

done:
    if (acb->qiov->niov > 1)
        qemu_vfree(acb->orig_buf);
    acb->common.cb(acb->common.opaque, ret);
    qemu_aio_release(acb);
}

static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    BDRVQcowState *s = bs->opaque;
@@ -721,7 +734,12 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
        return NULL;
    acb->hd_aiocb = NULL;
    acb->sector_num = sector_num;
    acb->buf = (uint8_t *)buf;
    acb->qiov = qiov;
    if (qiov->niov > 1) {
        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
        qemu_iovec_to_buffer(qiov, acb->buf);
    } else
        acb->buf = qiov->iov->iov_base;
    acb->nb_sectors = nb_sectors;
    acb->n = 0;

@@ -909,8 +927,8 @@ BlockDriver bdrv_qcow = {
    .bdrv_is_allocated	= qcow_is_allocated,
    .bdrv_set_key	= qcow_set_key,
    .bdrv_make_empty	= qcow_make_empty,
    .bdrv_aio_read	= qcow_aio_read,
    .bdrv_aio_write	= qcow_aio_write,
    .bdrv_aio_readv	= qcow_aio_readv,
    .bdrv_aio_writev	= qcow_aio_writev,
    .bdrv_aio_cancel	= qcow_aio_cancel,
    .aiocb_size		= sizeof(QCowAIOCB),
    .bdrv_write_compressed = qcow_write_compressed,
+54 −39
Original line number Diff line number Diff line
@@ -1264,7 +1264,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
typedef struct QCowAIOCB {
    BlockDriverAIOCB common;
    int64_t sector_num;
    QEMUIOVector *qiov;
    uint8_t *buf;
    void *orig_buf;
    int nb_sectors;
    int n;
    uint64_t cluster_offset;
@@ -1307,12 +1309,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
    int index_in_cluster, n1;

    acb->hd_aiocb = NULL;
    if (ret < 0) {
fail:
        acb->common.cb(acb->common.opaque, ret);
        qemu_aio_release(acb);
        return;
    }
    if (ret < 0)
        goto done;

    /* post process the read buffer */
    if (!acb->cluster_offset) {
@@ -1333,9 +1331,8 @@ fail:

    if (acb->nb_sectors == 0) {
        /* request completed */
        acb->common.cb(acb->common.opaque, 0);
        qemu_aio_release(acb);
        return;
        ret = 0;
        goto done;
    }

    /* prepare next AIO request */
@@ -1356,32 +1353,32 @@ fail:
                                    &acb->hd_qiov, acb->n,
				    qcow_aio_read_cb, acb);
                if (acb->hd_aiocb == NULL)
                    goto fail;
                    goto done;
            } else {
                ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
                if (ret < 0)
                    goto fail;
                    goto done;
            }
        } else {
            /* Note: in this case, no need to wait */
            memset(acb->buf, 0, 512 * acb->n);
            ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
            if (ret < 0)
                goto fail;
                goto done;
        }
    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
        /* add AIO support for compressed blocks ? */
        if (decompress_cluster(s, acb->cluster_offset) < 0)
            goto fail;
            goto done;
        memcpy(acb->buf,
               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
        ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
        if (ret < 0)
            goto fail;
            goto done;
    } else {
        if ((acb->cluster_offset & 511) != 0) {
            ret = -EIO;
            goto fail;
            goto done;
        }

        acb->hd_iov.iov_base = acb->buf;
@@ -1391,13 +1388,22 @@ fail:
                            (acb->cluster_offset >> 9) + index_in_cluster,
                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
        if (acb->hd_aiocb == NULL)
            goto fail;
            goto done;
    }

    return;
done:
    if (acb->qiov->niov > 1) {
        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
        qemu_vfree(acb->orig_buf);
    }
    acb->common.cb(acb->common.opaque, ret);
    qemu_aio_release(acb);
}

static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
        int64_t sector_num, uint8_t *buf, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
{
    QCowAIOCB *acb;

@@ -1406,7 +1412,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
        return NULL;
    acb->hd_aiocb = NULL;
    acb->sector_num = sector_num;
    acb->buf = buf;
    acb->qiov = qiov;
    if (qiov->niov > 1) {
        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
        if (is_write)
            qemu_iovec_to_buffer(qiov, acb->buf);
    } else
        acb->buf = qiov->iov->iov_base;
    acb->nb_sectors = nb_sectors;
    acb->n = 0;
    acb->cluster_offset = 0;
@@ -1414,13 +1426,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
    return acb;
}

static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
        int64_t sector_num, uint8_t *buf, int nb_sectors,
static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    QCowAIOCB *acb;

    acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
    if (!acb)
        return NULL;

@@ -1439,16 +1451,12 @@ static void qcow_aio_write_cb(void *opaque, int ret)

    acb->hd_aiocb = NULL;

    if (ret < 0) {
    fail:
        acb->common.cb(acb->common.opaque, ret);
        qemu_aio_release(acb);
        return;
    }
    if (ret < 0)
        goto done;

    if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
        free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
        goto fail;
        goto done;
    }

    acb->nb_sectors -= acb->n;
@@ -1457,9 +1465,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)

    if (acb->nb_sectors == 0) {
        /* request completed */
        acb->common.cb(acb->common.opaque, 0);
        qemu_aio_release(acb);
        return;
        ret = 0;
        goto done;
    }

    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -1473,7 +1480,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                          n_end, &acb->n, &acb->l2meta);
    if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
        ret = -EIO;
        goto fail;
        goto done;
    }
    if (s->crypt_method) {
        if (!acb->cluster_data) {
@@ -1494,11 +1501,19 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                    &acb->hd_qiov, acb->n,
                                    qcow_aio_write_cb, acb);
    if (acb->hd_aiocb == NULL)
        goto fail;
        goto done;

    return;

done:
    if (acb->qiov->niov > 1)
        qemu_vfree(acb->orig_buf);
    acb->common.cb(acb->common.opaque, ret);
    qemu_aio_release(acb);
}

static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    BDRVQcowState *s = bs->opaque;
@@ -1506,7 +1521,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,

    s->cluster_cache_offset = -1; /* disable compressed cache */

    acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
    if (!acb)
        return NULL;

@@ -2771,8 +2786,8 @@ BlockDriver bdrv_qcow2 = {
    .bdrv_set_key	= qcow_set_key,
    .bdrv_make_empty	= qcow_make_empty,

    .bdrv_aio_read	= qcow_aio_read,
    .bdrv_aio_write	= qcow_aio_write,
    .bdrv_aio_readv	= qcow_aio_readv,
    .bdrv_aio_writev	= qcow_aio_writev,
    .bdrv_aio_cancel	= qcow_aio_cancel,
    .aiocb_size		= sizeof(QCowAIOCB),
    .bdrv_write_compressed = qcow_write_compressed,
+38 −55
Original line number Diff line number Diff line
@@ -599,8 +599,8 @@ static int posix_aio_init(void)
    return 0;
}

static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
        int64_t sector_num, uint8_t *buf, int nb_sectors,
static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
        QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    BDRVRawState *s = bs->opaque;
@@ -614,24 +614,25 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
        return NULL;
    acb->aiocb.aio_fildes = s->fd;
    acb->aiocb.ev_signo = SIGUSR2;
    acb->aiocb.aio_buf = buf;
    if (nb_sectors < 0)
        acb->aiocb.aio_nbytes = -nb_sectors;
    else
    acb->aiocb.aio_iov = qiov->iov;
    acb->aiocb.aio_niov = qiov->niov;
    acb->aiocb.aio_nbytes = nb_sectors * 512;
    acb->aiocb.aio_offset = sector_num * 512;
    acb->aiocb.aio_flags = 0;

    /*
     * If O_DIRECT is used the buffer needs to be aligned on a sector
     * boundary. Tell the low level code to ensure that in case it's
     * not done yet.
     */
    if (s->aligned_buf)
        acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;

    acb->next = posix_aio_state->first_aio;
    posix_aio_state->first_aio = acb;
    return acb;
}

static void raw_aio_em_cb(void* opaque)
{
    RawAIOCB *acb = opaque;
    acb->common.cb(acb->common.opaque, acb->ret);
    qemu_aio_release(acb);
}

static void raw_aio_remove(RawAIOCB *acb)
{
    RawAIOCB **pacb;
@@ -651,28 +652,13 @@ static void raw_aio_remove(RawAIOCB *acb)
    }
}

static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
        int64_t sector_num, uint8_t *buf, int nb_sectors,
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    RawAIOCB *acb;

    /*
     * If O_DIRECT is used and the buffer is not aligned fall back
     * to synchronous IO.
     */
    BDRVRawState *s = bs->opaque;

    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
        QEMUBH *bh;
        acb = qemu_aio_get(bs, cb, opaque);
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
        bh = qemu_bh_new(raw_aio_em_cb, acb);
        qemu_bh_schedule(bh);
        return &acb->common;
    }

    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
    if (!acb)
        return NULL;
    if (qemu_paio_read(&acb->aiocb) < 0) {
@@ -682,28 +668,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
    return &acb->common;
}

static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    RawAIOCB *acb;

    /*
     * If O_DIRECT is used and the buffer is not aligned fall back
     * to synchronous IO.
     */
    BDRVRawState *s = bs->opaque;

    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
        QEMUBH *bh;
        acb = qemu_aio_get(bs, cb, opaque);
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
        bh = qemu_bh_new(raw_aio_em_cb, acb);
        qemu_bh_schedule(bh);
        return &acb->common;
    }

    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
    if (!acb)
        return NULL;
    if (qemu_paio_write(&acb->aiocb) < 0) {
@@ -887,8 +858,8 @@ BlockDriver bdrv_raw = {
    .bdrv_flush = raw_flush,

#ifdef CONFIG_AIO
    .bdrv_aio_read = raw_aio_read,
    .bdrv_aio_write = raw_aio_write,
    .bdrv_aio_readv = raw_aio_readv,
    .bdrv_aio_writev = raw_aio_writev,
    .bdrv_aio_cancel = raw_aio_cancel,
    .aiocb_size = sizeof(RawAIOCB),
#endif
@@ -1215,12 +1186,24 @@ static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
        unsigned long int req, void *buf,
        BlockDriverCompletionFunc *cb, void *opaque)
{
    BDRVRawState *s = bs->opaque;
    RawAIOCB *acb;

    acb = raw_aio_setup(bs, 0, buf, 0, cb, opaque);
    if (fd_open(bs) < 0)
        return NULL;

    acb = qemu_aio_get(bs, cb, opaque);
    if (!acb)
        return NULL;
    acb->aiocb.aio_fildes = s->fd;
    acb->aiocb.ev_signo = SIGUSR2;
    acb->aiocb.aio_offset = 0;
    acb->aiocb.aio_flags = 0;

    acb->next = posix_aio_state->first_aio;
    posix_aio_state->first_aio = acb;

    acb->aiocb.aio_ioctl_buf = buf;
    acb->aiocb.aio_ioctl_cmd = req;
    if (qemu_paio_ioctl(&acb->aiocb) < 0) {
        raw_aio_remove(acb);
@@ -1424,8 +1407,8 @@ BlockDriver bdrv_host_device = {
    .bdrv_flush		= raw_flush,

#ifdef CONFIG_AIO
    .bdrv_aio_read	= raw_aio_read,
    .bdrv_aio_write	= raw_aio_write,
    .bdrv_aio_readv	= raw_aio_readv,
    .bdrv_aio_writev	= raw_aio_writev,
    .bdrv_aio_cancel	= raw_aio_cancel,
    .aiocb_size		= sizeof(RawAIOCB),
#endif
+70 −126

File changed.

Preview size limit exceeded, changes collapsed.

+4 −4
Original line number Diff line number Diff line
@@ -54,11 +54,11 @@ struct BlockDriver {
    int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
    int (*bdrv_make_empty)(BlockDriverState *bs);
    /* aio */
    BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs,
        int64_t sector_num, uint8_t *buf, int nb_sectors,
    BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque);
    BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs,
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
    BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque);
    void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
    int aiocb_size;
Loading