Commit f3947986 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging



Block layer patches for 2.4.0-rc1

# gpg: Signature made Tue Jul 14 16:15:35 2015 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream:
  rbd: fix ceph settings precedence
  rbd: make qemu's cache setting override any ceph setting
  MAINTAINERS: update email address
  rbd: remove unused constants and fields
  block: Fix backing file child when modifying graph
  block: Reorder cleanups in bdrv_close()
  block: Introduce bdrv_unref_child()
  block: Introduce bdrv_open_child()
  block: Move bdrv_attach_child() calls up the call chain
  nvme: properly report volatile write caches
  nvme: implement the Flush command

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 0030ff40 e34d8f29
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1169,7 +1169,7 @@ S: Supported
F: block/vmdk.c

RBD
M: Josh Durgin <josh.durgin@inktank.com>
M: Josh Durgin <jdurgin@redhat.com>
M: Jeff Cody <jcody@redhat.com>
L: qemu-block@nongnu.org
S: Supported
+100 −44
Original line number Diff line number Diff line
@@ -1102,12 +1102,46 @@ static int bdrv_fill_options(QDict **options, const char **pfilename,
    return 0;
}

static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                                    BlockDriverState *child_bs,
                                    const BdrvChildRole *child_role)
{
    BdrvChild *child = g_new(BdrvChild, 1);
    *child = (BdrvChild) {
        .bs     = child_bs,
        .role   = child_role,
    };

    QLIST_INSERT_HEAD(&parent_bs->children, child, next);

    return child;
}

static void bdrv_detach_child(BdrvChild *child)
{
    QLIST_REMOVE(child, next);
    g_free(child);
}

void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
    BlockDriverState *child_bs = child->bs;

    if (child->bs->inherits_from == parent) {
        child->bs->inherits_from = NULL;
    }

    bdrv_detach_child(child);
    bdrv_unref(child_bs);
}

void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
{

    if (bs->backing_hd) {
        assert(bs->backing_blocker);
        bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
        bdrv_detach_child(bs->backing_child);
    } else if (backing_hd) {
        error_setg(&bs->backing_blocker,
                   "node is used as backing hd of '%s'",
@@ -1118,8 +1152,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
    if (!backing_hd) {
        error_free(bs->backing_blocker);
        bs->backing_blocker = NULL;
        bs->backing_child = NULL;
        goto out;
    }
    bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
    bs->open_flags &= ~BDRV_O_NO_BACKING;
    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
@@ -1202,6 +1238,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
        error_free(local_err);
        goto free_exit;
    }

    bdrv_set_backing_hd(bs, backing_hd);

free_exit:
@@ -1214,7 +1251,7 @@ free_exit:
 * device's options.
 *
 * If allow_none is true, no image will be opened if filename is false and no
 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
 * BlockdevRef is given. NULL will be returned, but errp remains unset.
 *
 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
@@ -1222,21 +1259,21 @@ free_exit:
 * BlockdevRef.
 *
 * The BlockdevRef will be removed from the options QDict.
 *
 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
 */
int bdrv_open_image(BlockDriverState **pbs, const char *filename,
BdrvChild *bdrv_open_child(const char *filename,
                           QDict *options, const char *bdref_key,
                    BlockDriverState* parent, const BdrvChildRole *child_role,
                           BlockDriverState* parent,
                           const BdrvChildRole *child_role,
                           bool allow_none, Error **errp)
{
    BdrvChild *c = NULL;
    BlockDriverState *bs;
    QDict *image_options;
    int ret;
    char *bdref_key_dot;
    const char *reference;

    assert(pbs);
    assert(*pbs == NULL);
    assert(child_role != NULL);

    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
@@ -1244,23 +1281,60 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,

    reference = qdict_get_try_str(options, bdref_key);
    if (!filename && !reference && !qdict_size(image_options)) {
        if (allow_none) {
            ret = 0;
        } else {
        if (!allow_none) {
            error_setg(errp, "A block device must be specified for \"%s\"",
                       bdref_key);
            ret = -EINVAL;
        }
        QDECREF(image_options);
        goto done;
    }

    ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
    bs = NULL;
    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
                            parent, child_role, NULL, errp);
    if (ret < 0) {
        goto done;
    }

    c = bdrv_attach_child(parent, bs, child_role);

done:
    qdict_del(options, bdref_key);
    return ret;
    return c;
}

/*
 * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
 * a BdrvChild object.
 *
 * If allow_none is true, no image will be opened if filename is false and no
 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
 *
 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
 */
int bdrv_open_image(BlockDriverState **pbs, const char *filename,
                    QDict *options, const char *bdref_key,
                    BlockDriverState* parent, const BdrvChildRole *child_role,
                    bool allow_none, Error **errp)
{
    Error *local_err = NULL;
    BdrvChild *c;

    assert(pbs);
    assert(*pbs == NULL);

    c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
                        allow_none, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return -EINVAL;
    }

    if (c != NULL) {
        *pbs = c->bs;
    }

    return 0;
}

int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
@@ -1328,19 +1402,6 @@ out:
    return ret;
}

static void bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const BdrvChildRole *child_role)
{
    BdrvChild *child = g_new(BdrvChild, 1);
    *child = (BdrvChild) {
        .bs     = child_bs,
        .role   = child_role,
    };

    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
}

/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
 *
@@ -1393,9 +1454,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
            return -ENODEV;
        }
        bdrv_ref(bs);
        if (child_role) {
            bdrv_attach_child(parent, bs, child_role);
        }
        *pbs = bs;
        return 0;
    }
@@ -1540,10 +1598,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
        goto close_and_fail;
    }

    if (child_role) {
        bdrv_attach_child(parent, bs, child_role);
    }

    QDECREF(options);
    *pbs = bs;
    return 0;
@@ -1849,20 +1903,23 @@ void bdrv_close(BlockDriverState *bs)
    if (bs->drv) {
        BdrvChild *child, *next;

        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
            if (child->bs->inherits_from == bs) {
                child->bs->inherits_from = NULL;
            }
            QLIST_REMOVE(child, next);
            g_free(child);
        }
        bs->drv->bdrv_close(bs);

        if (bs->backing_hd) {
            BlockDriverState *backing_hd = bs->backing_hd;
            bdrv_set_backing_hd(bs, NULL);
            bdrv_unref(backing_hd);
        }
        bs->drv->bdrv_close(bs);

        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
            /* TODO Remove bdrv_unref() from drivers' close function and use
             * bdrv_unref_child() here */
            if (child->bs->inherits_from == bs) {
                child->bs->inherits_from = NULL;
            }
            bdrv_detach_child(child);
        }

        g_free(bs->opaque);
        bs->opaque = NULL;
        bs->drv = NULL;
@@ -2116,7 +2173,6 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
    /* The contents of 'tmp' will become bs_top, as we are
     * swapping bs_new and bs_top contents. */
    bdrv_set_backing_hd(bs_top, bs_new);
    bdrv_attach_child(bs_top, bs_new, &child_backing);
}

static void bdrv_delete(BlockDriverState *bs)
+35 −29
Original line number Diff line number Diff line
@@ -74,25 +74,18 @@ typedef struct RBDAIOCB {
    QEMUIOVector *qiov;
    char *bounce;
    RBDAIOCmd cmd;
    int64_t sector_num;
    int error;
    struct BDRVRBDState *s;
    int status;
} RBDAIOCB;

typedef struct RADOSCB {
    int rcbid;
    RBDAIOCB *acb;
    struct BDRVRBDState *s;
    int done;
    int64_t size;
    char *buf;
    int64_t ret;
} RADOSCB;

#define RBD_FD_READ 0
#define RBD_FD_WRITE 1

typedef struct BDRVRBDState {
    rados_t cluster;
    rados_ioctx_t io_ctx;
@@ -235,7 +228,9 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
    return NULL;
}

static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
                             bool only_read_conf_file,
                             Error **errp)
{
    char *p, *buf;
    char name[RBD_MAX_CONF_NAME_SIZE];
@@ -267,14 +262,18 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
        qemu_rbd_unescape(value);

        if (strcmp(name, "conf") == 0) {
            /* read the conf file alone, so it doesn't override more
               specific settings for a particular device */
            if (only_read_conf_file) {
                ret = rados_conf_read_file(cluster, value);
                if (ret < 0) {
                    error_setg(errp, "error reading conf file %s", value);
                    break;
                }
            }
        } else if (strcmp(name, "id") == 0) {
            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
        } else {
        } else if (!only_read_conf_file) {
            ret = rados_conf_set(cluster, name, value);
            if (ret < 0) {
                error_setg(errp, "invalid conf option %s", name);
@@ -337,10 +336,15 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    if (strstr(conf, "conf=") == NULL) {
        /* try default location, but ignore failure */
        rados_conf_read_file(cluster, NULL);
    } else if (conf[0] != '\0' &&
               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
        rados_shutdown(cluster);
        error_propagate(errp, local_err);
        return -EIO;
    }

    if (conf[0] != '\0' &&
        qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
        rados_shutdown(cluster);
        error_propagate(errp, local_err);
        return -EIO;
@@ -405,7 +409,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
    }
    qemu_vfree(acb->bounce);
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
    acb->status = 0;

    qemu_aio_unref(acb);
}
@@ -468,6 +471,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        s->snap = g_strdup(snap_buf);
    }

    if (strstr(conf, "conf=") == NULL) {
        /* try default location, but ignore failure */
        rados_conf_read_file(s->cluster, NULL);
    } else if (conf[0] != '\0') {
        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
        if (r < 0) {
            goto failed_shutdown;
        }
    }

    if (conf[0] != '\0') {
        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
        if (r < 0) {
            goto failed_shutdown;
        }
    }

    /*
     * Fallback to more conservative semantics if setting cache
     * options fails. Ignore errors from setting rbd_cache because the
@@ -481,18 +501,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        rados_conf_set(s->cluster, "rbd_cache", "true");
    }

    if (strstr(conf, "conf=") == NULL) {
        /* try default location, but ignore failure */
        rados_conf_read_file(s->cluster, NULL);
    }

    if (conf[0] != '\0') {
        r = qemu_rbd_set_conf(s->cluster, conf, errp);
        if (r < 0) {
            goto failed_shutdown;
        }
    }

    r = rados_connect(s->cluster);
    if (r < 0) {
        error_setg(errp, "error connecting");
@@ -621,7 +629,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb->error = 0;
    acb->s = s;
    acb->bh = NULL;
    acb->status = -EINPROGRESS;

    if (cmd == RBD_AIO_WRITE) {
        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -633,7 +640,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    size = nb_sectors * BDRV_SECTOR_SIZE;

    rcb = g_new(RADOSCB, 1);
    rcb->done = 0;
    rcb->acb = acb;
    rcb->buf = buf;
    rcb->s = acb->s;
+30 −8
Original line number Diff line number Diff line
@@ -207,11 +207,23 @@ static void nvme_rw_cb(void *opaque, int ret)
    } else {
        req->status = NVME_INTERNAL_DEV_ERROR;
    }

    if (req->has_sg) {
        qemu_sglist_destroy(&req->qsg);
    }
    nvme_enqueue_req_completion(cq, req);
}

static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
    NvmeRequest *req)
{
    req->has_sg = false;
    block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
         BLOCK_ACCT_FLUSH);
    req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);

    return NVME_NO_COMPLETE;
}

static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
    NvmeRequest *req)
{
@@ -235,6 +247,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
    }
    assert((nlb << data_shift) == req->qsg.size);

    req->has_sg = true;
    dma_acct_start(n->conf.blk, &req->acct, &req->qsg,
                   is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
    req->aiocb = is_write ?
@@ -256,7 +269,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
    ns = &n->namespaces[nsid - 1];
    switch (cmd->opcode) {
    case NVME_CMD_FLUSH:
        return NVME_SUCCESS;
        return nvme_flush(n, ns, cmd, req);
    case NVME_CMD_WRITE:
    case NVME_CMD_READ:
        return nvme_rw(n, ns, cmd, req);
@@ -474,26 +487,32 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
    uint32_t result;

    switch (dw10) {
    case NVME_NUMBER_OF_QUEUES:
        req->cqe.result =
            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
        break;
    case NVME_VOLATILE_WRITE_CACHE:
        req->cqe.result = cpu_to_le32(1);
        result = blk_enable_write_cache(n->conf.blk);
        break;
    case NVME_NUMBER_OF_QUEUES:
        result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
        break;
    default:
        return NVME_INVALID_FIELD | NVME_DNR;
    }

    req->cqe.result = result;
    return NVME_SUCCESS;
}

static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
    uint32_t dw11 = le32_to_cpu(cmd->cdw11);

    switch (dw10) {
    case NVME_VOLATILE_WRITE_CACHE:
        blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
        break;
    case NVME_NUMBER_OF_QUEUES:
        req->cqe.result =
            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
@@ -818,6 +837,9 @@ static int nvme_init(PCIDevice *pci_dev)
    id->psd[0].mp = cpu_to_le16(0x9c4);
    id->psd[0].enlat = cpu_to_le32(0x10);
    id->psd[0].exlat = cpu_to_le32(0x4);
    if (blk_enable_write_cache(n->conf.blk)) {
        id->vwc = 1;
    }

    n->bar.cap = 0;
    NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
+1 −0
Original line number Diff line number Diff line
@@ -638,6 +638,7 @@ typedef struct NvmeRequest {
    struct NvmeSQueue       *sq;
    BlockAIOCB              *aiocb;
    uint16_t                status;
    bool                    has_sg;
    NvmeCqe                 cqe;
    BlockAcctCookie         acct;
    QEMUSGList              qsg;
Loading