Commit 54b37623 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging



# gpg: Signature made Fri 25 Sep 2015 16:47:31 BST using RSA key ID C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"

* remotes/cody/tags/block-pull-request:
  sheepdog: refine discard support
  sheepdog: use per AIOCB dirty indexes for non overlapping requests
  Backup: don't do copy-on-read in before_write_notifier
  block: Introduce a new API bdrv_co_no_copy_on_readv()
  sheepdog: add reopen support
  block/nfs: cache allocated filesize for read-only files
  block/nfs: fix calculation of allocated file size

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 690b286f e6fd57ea
Loading
Loading
Loading
Loading
+14 −6
Original line number Diff line number Diff line
@@ -89,7 +89,8 @@ static void cow_request_end(CowRequest *req)

static int coroutine_fn backup_do_cow(BlockDriverState *bs,
                                      int64_t sector_num, int nb_sectors,
                                      bool *error_is_read)
                                      bool *error_is_read,
                                      bool is_write_notifier)
{
    BackupBlockJob *job = (BackupBlockJob *)bs->job;
    CowRequest cow_request;
@@ -129,8 +130,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
        iov.iov_len = n * BDRV_SECTOR_SIZE;
        qemu_iovec_init_external(&bounce_qiov, &iov, 1);

        if (is_write_notifier) {
            ret = bdrv_co_no_copy_on_readv(bs,
                                           start * BACKUP_SECTORS_PER_CLUSTER,
                                           n, &bounce_qiov);
        } else {
            ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
                                &bounce_qiov);
        }
        if (ret < 0) {
            trace_backup_do_cow_read_fail(job, start, ret);
            if (error_is_read) {
@@ -190,7 +197,7 @@ static int coroutine_fn backup_before_write_notify(
    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);

    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
}

static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -303,7 +310,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                    return ret;
                }
                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
                                    false);
                if ((ret < 0) &&
                    backup_error_action(job, error_is_read, -ret) ==
                    BLOCK_ERROR_ACTION_REPORT) {
@@ -408,7 +416,7 @@ static void coroutine_fn backup_run(void *opaque)
            }
            /* FULL sync mode we copy the whole drive. */
            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
+11 −1
Original line number Diff line number Diff line
@@ -932,7 +932,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
        return ret;
    }

    if (bs->copy_on_read) {
    /* Don't do copy-on-read if we read data before write operation */
    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_COPY_ON_READ)) {
        flags |= BDRV_REQ_COPY_ON_READ;
    }

@@ -1001,6 +1002,15 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
}

int coroutine_fn bdrv_co_no_copy_on_readv(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
    trace_bdrv_co_no_copy_on_readv(bs, sector_num, nb_sectors);

    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
                            BDRV_REQ_NO_COPY_ON_READ);
}

int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
+37 −1
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ typedef struct NFSClient {
    int events;
    bool has_zero_init;
    AioContext *aio_context;
    blkcnt_t st_blocks;
} NFSClient;

typedef struct NFSRPC {
@@ -374,6 +375,7 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
    }

    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
    client->st_blocks = st.st_blocks;
    client->has_zero_init = S_ISREG(st.st_mode);
    goto out;
fail:
@@ -464,6 +466,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    NFSRPC task = {0};
    struct stat st;

    if (bdrv_is_read_only(bs) &&
        !(bs->open_flags & BDRV_O_NOCACHE)) {
        return client->st_blocks * 512;
    }

    task.st = &st;
    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
@@ -475,7 +482,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
        aio_poll(client->aio_context, true);
    }

    return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
}

static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
@@ -484,6 +491,34 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
    return nfs_ftruncate(client->context, client->fh, offset);
}

/* Note that this will not re-establish a connection with the NFS server
 * - it is effectively a NOP.  */
static int nfs_reopen_prepare(BDRVReopenState *state,
                              BlockReopenQueue *queue, Error **errp)
{
    NFSClient *client = state->bs->opaque;
    struct stat st;
    int ret = 0;

    if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
        error_setg(errp, "Cannot open a read-only mount as read-write");
        return -EACCES;
    }

    /* Update cache for read-only reopens */
    if (!(state->flags & BDRV_O_RDWR)) {
        ret = nfs_fstat(client->context, client->fh, &st);
        if (ret < 0) {
            error_setg(errp, "Failed to fstat file: %s",
                       nfs_get_error(client->context));
            return ret;
        }
        client->st_blocks = st.st_blocks;
    }

    return 0;
}

static BlockDriver bdrv_nfs = {
    .format_name                    = "nfs",
    .protocol_name                  = "nfs",
@@ -499,6 +534,7 @@ static BlockDriver bdrv_nfs = {
    .bdrv_file_open                 = nfs_file_open,
    .bdrv_close                     = nfs_file_close,
    .bdrv_create                    = nfs_file_create,
    .bdrv_reopen_prepare            = nfs_reopen_prepare,

    .bdrv_co_readv                  = nfs_co_readv,
    .bdrv_co_writev                 = nfs_co_writev,
+131 −37
Original line number Diff line number Diff line
@@ -28,7 +28,6 @@
#define SD_OP_READ_OBJ       0x02
#define SD_OP_WRITE_OBJ      0x03
/* 0x04 is used internally by Sheepdog */
#define SD_OP_DISCARD_OBJ    0x05

#define SD_OP_NEW_VDI        0x11
#define SD_OP_LOCK_VDI       0x12
@@ -318,7 +317,7 @@ enum AIOCBState {
    AIOCB_DISCARD_OBJ,
};

#define AIOCBOverwrapping(x, y)                                 \
#define AIOCBOverlapping(x, y)                                 \
    (!(x->max_affect_data_idx < y->min_affect_data_idx          \
       || y->max_affect_data_idx < x->min_affect_data_idx))

@@ -342,6 +341,15 @@ struct SheepdogAIOCB {
    uint32_t min_affect_data_idx;
    uint32_t max_affect_data_idx;

    /*
     * The difference between affect_data_idx and dirty_data_idx:
     * affect_data_idx represents range of index of all request types.
     * dirty_data_idx represents range of index updated by COW requests.
     * dirty_data_idx is used for updating an inode object.
     */
    uint32_t min_dirty_data_idx;
    uint32_t max_dirty_data_idx;

    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};

@@ -351,9 +359,6 @@ typedef struct BDRVSheepdogState {

    SheepdogInode inode;

    uint32_t min_dirty_data_idx;
    uint32_t max_dirty_data_idx;

    char name[SD_MAX_VDI_LEN];
    bool is_snapshot;
    uint32_t cache_flags;
@@ -373,10 +378,15 @@ typedef struct BDRVSheepdogState {
    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;

    CoQueue overwrapping_queue;
    CoQueue overlapping_queue;
    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
} BDRVSheepdogState;

typedef struct BDRVSheepdogReopenState {
    int fd;
    int cache_flags;
} BDRVSheepdogReopenState;

static const char * sd_strerror(int err)
{
    int i;
@@ -556,6 +566,9 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
    acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
                              acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;

    acb->min_dirty_data_idx = UINT32_MAX;
    acb->max_dirty_data_idx = 0;

    return acb;
}

@@ -819,8 +832,8 @@ static void coroutine_fn aio_read_response(void *opaque)
             */
            if (rsp.result == SD_RES_SUCCESS) {
                s->inode.data_vdi_id[idx] = s->inode.vdi_id;
                s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
                s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
                acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
                acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
            }
        }
        break;
@@ -847,10 +860,6 @@ static void coroutine_fn aio_read_response(void *opaque)
            rsp.result = SD_RES_SUCCESS;
            s->discard_supported = false;
            break;
        case SD_RES_SUCCESS:
            idx = data_oid_to_idx(aio_req->oid);
            s->inode.data_vdi_id[idx] = 0;
            break;
        default:
            break;
        }
@@ -1165,7 +1174,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
        hdr.flags = SD_FLAG_CMD_WRITE | flags;
        break;
    case AIOCB_DISCARD_OBJ:
        hdr.opcode = SD_OP_DISCARD_OBJ;
        hdr.opcode = SD_OP_WRITE_OBJ;
        hdr.flags = SD_FLAG_CMD_WRITE | flags;
        s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
        offset = offsetof(SheepdogInode,
                          data_vdi_id[data_oid_to_idx(oid)]);
        oid = vid_to_vdi_oid(s->inode.vdi_id);
        wlen = datalen = sizeof(uint32_t);
        break;
    }

@@ -1466,13 +1481,11 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    }

    memcpy(&s->inode, buf, sizeof(s->inode));
    s->min_dirty_data_idx = UINT32_MAX;
    s->max_dirty_data_idx = 0;

    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
    pstrcpy(s->name, sizeof(s->name), vdi);
    qemu_co_mutex_init(&s->lock);
    qemu_co_queue_init(&s->overwrapping_queue);
    qemu_co_queue_init(&s->overlapping_queue);
    qemu_opts_del(opts);
    g_free(buf);
    return 0;
@@ -1486,6 +1499,68 @@ out:
    return ret;
}

static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
                             Error **errp)
{
    BDRVSheepdogState *s = state->bs->opaque;
    BDRVSheepdogReopenState *re_s;
    int ret = 0;

    re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);

    re_s->cache_flags = SD_FLAG_CMD_CACHE;
    if (state->flags & BDRV_O_NOCACHE) {
        re_s->cache_flags = SD_FLAG_CMD_DIRECT;
    }

    re_s->fd = get_sheep_fd(s, errp);
    if (re_s->fd < 0) {
        ret = re_s->fd;
        return ret;
    }

    return ret;
}

static void sd_reopen_commit(BDRVReopenState *state)
{
    BDRVSheepdogReopenState *re_s = state->opaque;
    BDRVSheepdogState *s = state->bs->opaque;

    if (s->fd) {
        aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
        closesocket(s->fd);
    }

    s->fd = re_s->fd;
    s->cache_flags = re_s->cache_flags;

    g_free(state->opaque);
    state->opaque = NULL;

    return;
}

static void sd_reopen_abort(BDRVReopenState *state)
{
    BDRVSheepdogReopenState *re_s = state->opaque;
    BDRVSheepdogState *s = state->bs->opaque;

    if (re_s == NULL) {
        return;
    }

    if (re_s->fd) {
        aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL);
        closesocket(re_s->fd);
    }

    g_free(state->opaque);
    state->opaque = NULL;

    return;
}

static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
                        Error **errp)
{
@@ -1922,16 +1997,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
    AIOReq *aio_req;
    uint32_t offset, data_len, mn, mx;

    mn = s->min_dirty_data_idx;
    mx = s->max_dirty_data_idx;
    mn = acb->min_dirty_data_idx;
    mx = acb->max_dirty_data_idx;
    if (mn <= mx) {
        /* we need to update the vdi object. */
        offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
            mn * sizeof(s->inode.data_vdi_id[0]);
        data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);

        s->min_dirty_data_idx = UINT32_MAX;
        s->max_dirty_data_idx = 0;
        acb->min_dirty_data_idx = UINT32_MAX;
        acb->max_dirty_data_idx = 0;

        iov.iov_base = &s->inode;
        iov.iov_len = sizeof(s->inode);
@@ -2140,7 +2215,9 @@ static int coroutine_fn sd_co_rw_vector(void *p)
        }

        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
                                old_oid, done);
                                old_oid,
                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
                                0 : done);
        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);

        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
@@ -2157,12 +2234,12 @@ out:
    return 1;
}

static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
{
    SheepdogAIOCB *cb;

    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
        if (AIOCBOverwrapping(aiocb, cb)) {
        if (AIOCBOverlapping(aiocb, cb)) {
            return true;
        }
    }
@@ -2191,15 +2268,15 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
    acb->aiocb_type = AIOCB_WRITE_UDATA;

retry:
    if (check_overwrapping_aiocb(s, acb)) {
        qemu_co_queue_wait(&s->overwrapping_queue);
    if (check_overlapping_aiocb(s, acb)) {
        qemu_co_queue_wait(&s->overlapping_queue);
        goto retry;
    }

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
        QLIST_REMOVE(acb, aiocb_siblings);
        qemu_co_queue_restart_all(&s->overwrapping_queue);
        qemu_co_queue_restart_all(&s->overlapping_queue);
        qemu_aio_unref(acb);
        return ret;
    }
@@ -2207,7 +2284,7 @@ retry:
    qemu_coroutine_yield();

    QLIST_REMOVE(acb, aiocb_siblings);
    qemu_co_queue_restart_all(&s->overwrapping_queue);
    qemu_co_queue_restart_all(&s->overlapping_queue);

    return acb->ret;
}
@@ -2224,15 +2301,15 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
    acb->aio_done_func = sd_finish_aiocb;

retry:
    if (check_overwrapping_aiocb(s, acb)) {
        qemu_co_queue_wait(&s->overwrapping_queue);
    if (check_overlapping_aiocb(s, acb)) {
        qemu_co_queue_wait(&s->overlapping_queue);
        goto retry;
    }

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
        QLIST_REMOVE(acb, aiocb_siblings);
        qemu_co_queue_restart_all(&s->overwrapping_queue);
        qemu_co_queue_restart_all(&s->overlapping_queue);
        qemu_aio_unref(acb);
        return ret;
    }
@@ -2240,7 +2317,7 @@ retry:
    qemu_coroutine_yield();

    QLIST_REMOVE(acb, aiocb_siblings);
    qemu_co_queue_restart_all(&s->overwrapping_queue);
    qemu_co_queue_restart_all(&s->overlapping_queue);
    return acb->ret;
}

@@ -2576,28 +2653,36 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors)
{
    SheepdogAIOCB *acb;
    QEMUIOVector dummy;
    BDRVSheepdogState *s = bs->opaque;
    int ret;
    QEMUIOVector discard_iov;
    struct iovec iov;
    uint32_t zero = 0;

    if (!s->discard_supported) {
            return 0;
    }

    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
    memset(&discard_iov, 0, sizeof(discard_iov));
    memset(&iov, 0, sizeof(iov));
    iov.iov_base = &zero;
    iov.iov_len = sizeof(zero);
    discard_iov.iov = &iov;
    discard_iov.niov = 1;
    acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
    acb->aiocb_type = AIOCB_DISCARD_OBJ;
    acb->aio_done_func = sd_finish_aiocb;

retry:
    if (check_overwrapping_aiocb(s, acb)) {
        qemu_co_queue_wait(&s->overwrapping_queue);
    if (check_overlapping_aiocb(s, acb)) {
        qemu_co_queue_wait(&s->overlapping_queue);
        goto retry;
    }

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
        QLIST_REMOVE(acb, aiocb_siblings);
        qemu_co_queue_restart_all(&s->overwrapping_queue);
        qemu_co_queue_restart_all(&s->overlapping_queue);
        qemu_aio_unref(acb);
        return ret;
    }
@@ -2605,7 +2690,7 @@ retry:
    qemu_coroutine_yield();

    QLIST_REMOVE(acb, aiocb_siblings);
    qemu_co_queue_restart_all(&s->overwrapping_queue);
    qemu_co_queue_restart_all(&s->overlapping_queue);

    return acb->ret;
}
@@ -2702,6 +2787,9 @@ static BlockDriver bdrv_sheepdog = {
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_needs_filename = true,
    .bdrv_file_open = sd_open,
    .bdrv_reopen_prepare    = sd_reopen_prepare,
    .bdrv_reopen_commit     = sd_reopen_commit,
    .bdrv_reopen_abort      = sd_reopen_abort,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2735,6 +2823,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_needs_filename = true,
    .bdrv_file_open = sd_open,
    .bdrv_reopen_prepare    = sd_reopen_prepare,
    .bdrv_reopen_commit     = sd_reopen_commit,
    .bdrv_reopen_abort      = sd_reopen_abort,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2768,6 +2859,9 @@ static BlockDriver bdrv_sheepdog_unix = {
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_needs_filename = true,
    .bdrv_file_open = sd_open,
    .bdrv_reopen_prepare    = sd_reopen_prepare,
    .bdrv_reopen_commit     = sd_reopen_commit,
    .bdrv_reopen_abort      = sd_reopen_abort,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
+6 −3
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@ typedef enum {
     * opened with BDRV_O_UNMAP.
     */
    BDRV_REQ_MAY_UNMAP          = 0x4,
    BDRV_REQ_NO_COPY_ON_READ    = 0x8,
} BdrvRequestFlags;

typedef struct BlockSizes {
@@ -252,6 +253,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
    int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_no_copy_on_readv(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
    int nb_sectors, QEMUIOVector *qiov);
/*
Loading