Commit 5e72179b authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'sstabellini/xen-2013-01-14' into staging



* sstabellini/xen-2013-01-14:
  xen_disk: implement BLKIF_OP_FLUSH_DISKCACHE, remove BLKIF_OP_WRITE_BARRIER
  xen_disk: add persistent grant support to xen_disk backend
  xen_disk: fix memory leak

Signed-off-by: default avatarAnthony Liguori <aliguori@us.ibm.com>
parents cf7c3f0c 7e7b7cba
Loading
Loading
Loading
Loading
+184 −24
Original line number Diff line number Diff line
@@ -51,6 +51,13 @@ static int max_requests = 32;
#define BLOCK_SIZE  512
#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)

struct PersistentGrant {
    void *page;
    struct XenBlkDev *blkdev;
};

typedef struct PersistentGrant PersistentGrant;

struct ioreq {
    blkif_request_t     req;
    int16_t             status;
@@ -68,6 +75,7 @@ struct ioreq {
    int                 prot;
    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    void                *pages;
    int                 num_unmap;

    /* aio status */
    int                 aio_inflight;
@@ -104,6 +112,12 @@ struct XenBlkDev {
    int                 requests_inflight;
    int                 requests_finished;

    /* Persistent grants extension */
    gboolean            feature_persistent;
    GTree               *persistent_gnts;
    unsigned int        persistent_gnt_count;
    unsigned int        max_grants;

    /* qemu block driver */
    DriveInfo           *dinfo;
    BlockDriverState    *bs;
@@ -112,6 +126,54 @@ struct XenBlkDev {

/* ------------------------------------------------------------- */

static void ioreq_reset(struct ioreq *ioreq)
{
    memset(&ioreq->req, 0, sizeof(ioreq->req));
    ioreq->status = 0;
    ioreq->start = 0;
    ioreq->presync = 0;
    ioreq->postsync = 0;
    ioreq->mapped = 0;

    memset(ioreq->domids, 0, sizeof(ioreq->domids));
    memset(ioreq->refs, 0, sizeof(ioreq->refs));
    ioreq->prot = 0;
    memset(ioreq->page, 0, sizeof(ioreq->page));
    ioreq->pages = NULL;

    ioreq->aio_inflight = 0;
    ioreq->aio_errors = 0;

    ioreq->blkdev = NULL;
    memset(&ioreq->list, 0, sizeof(ioreq->list));
    memset(&ioreq->acct, 0, sizeof(ioreq->acct));

    qemu_iovec_reset(&ioreq->v);
}

static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
{
    uint ua = GPOINTER_TO_UINT(a);
    uint ub = GPOINTER_TO_UINT(b);
    return (ua > ub) - (ua < ub);
}

static void destroy_grant(gpointer pgnt)
{
    PersistentGrant *grant = pgnt;
    XenGnttab gnt = grant->blkdev->xendev.gnttabdev;

    if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) {
        xen_be_printf(&grant->blkdev->xendev, 0,
                      "xc_gnttab_munmap failed: %s\n",
                      strerror(errno));
    }
    grant->blkdev->persistent_gnt_count--;
    xen_be_printf(&grant->blkdev->xendev, 3,
                  "unmapped grant %p\n", grant->page);
    g_free(grant);
}

static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
{
    struct ioreq *ioreq = NULL;
@@ -129,7 +191,6 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
        /* get one from freelist */
        ioreq = QLIST_FIRST(&blkdev->freelist);
        QLIST_REMOVE(ioreq, list);
        qemu_iovec_reset(&ioreq->v);
    }
    QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
    blkdev->requests_inflight++;
@@ -153,7 +214,7 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
    struct XenBlkDev *blkdev = ioreq->blkdev;

    QLIST_REMOVE(ioreq, list);
    memset(ioreq, 0, sizeof(*ioreq));
    ioreq_reset(ioreq);
    ioreq->blkdev = blkdev;
    QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
    if (finish) {
@@ -182,12 +243,11 @@ static int ioreq_parse(struct ioreq *ioreq)
    case BLKIF_OP_READ:
        ioreq->prot = PROT_WRITE; /* to memory */
        break;
    case BLKIF_OP_WRITE_BARRIER:
        if (!ioreq->req.nr_segments) {
    case BLKIF_OP_FLUSH_DISKCACHE:
        ioreq->presync = 1;
        if (!ioreq->req.nr_segments) {
            return 0;
        }
        ioreq->presync = ioreq->postsync = 1;
        /* fall through */
    case BLKIF_OP_WRITE:
        ioreq->prot = PROT_READ; /* from memory */
@@ -241,21 +301,21 @@ static void ioreq_unmap(struct ioreq *ioreq)
    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
    int i;

    if (ioreq->v.niov == 0 || ioreq->mapped == 0) {
    if (ioreq->num_unmap == 0 || ioreq->mapped == 0) {
        return;
    }
    if (batch_maps) {
        if (!ioreq->pages) {
            return;
        }
        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
            xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
                          strerror(errno));
        }
        ioreq->blkdev->cnt_map -= ioreq->v.niov;
        ioreq->blkdev->cnt_map -= ioreq->num_unmap;
        ioreq->pages = NULL;
    } else {
        for (i = 0; i < ioreq->v.niov; i++) {
        for (i = 0; i < ioreq->num_unmap; i++) {
            if (!ioreq->page[i]) {
                continue;
            }
@@ -273,41 +333,120 @@ static void ioreq_unmap(struct ioreq *ioreq)
static int ioreq_map(struct ioreq *ioreq)
{
    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
    int i;
    uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    int i, j, new_maps = 0;
    PersistentGrant *grant;
    /* domids and refs variables will contain the information necessary
     * to map the grants that are needed to fulfill this request.
     *
     * After mapping the needed grants, the page array will contain the
     * memory address of each granted page in the order specified in ioreq
     * (disregarding if it's a persistent grant or not).
     */

    if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
        return 0;
    }
    if (batch_maps) {
    if (ioreq->blkdev->feature_persistent) {
        for (i = 0; i < ioreq->v.niov; i++) {
            grant = g_tree_lookup(ioreq->blkdev->persistent_gnts,
                                    GUINT_TO_POINTER(ioreq->refs[i]));

            if (grant != NULL) {
                page[i] = grant->page;
                xen_be_printf(&ioreq->blkdev->xendev, 3,
                              "using persistent-grant %" PRIu32 "\n",
                              ioreq->refs[i]);
            } else {
                    /* Add the grant to the list of grants that
                     * should be mapped
                     */
                    domids[new_maps] = ioreq->domids[i];
                    refs[new_maps] = ioreq->refs[i];
                    page[i] = NULL;
                    new_maps++;
            }
        }
        /* Set the protection to RW, since grants may be reused later
         * with a different protection than the one needed for this request
         */
        ioreq->prot = PROT_WRITE | PROT_READ;
    } else {
        /* All grants in the request should be mapped */
        memcpy(refs, ioreq->refs, sizeof(refs));
        memcpy(domids, ioreq->domids, sizeof(domids));
        memset(page, 0, sizeof(page));
        new_maps = ioreq->v.niov;
    }

    if (batch_maps && new_maps) {
        ioreq->pages = xc_gnttab_map_grant_refs
            (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
            (gnt, new_maps, domids, refs, ioreq->prot);
        if (ioreq->pages == NULL) {
            xen_be_printf(&ioreq->blkdev->xendev, 0,
                          "can't map %d grant refs (%s, %d maps)\n",
                          ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
                          new_maps, strerror(errno), ioreq->blkdev->cnt_map);
            return -1;
        }
        for (i = 0; i < ioreq->v.niov; i++) {
            ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
                (uintptr_t)ioreq->v.iov[i].iov_base;
        for (i = 0, j = 0; i < ioreq->v.niov; i++) {
            if (page[i] == NULL) {
                page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE;
            }
        ioreq->blkdev->cnt_map += ioreq->v.niov;
    } else  {
        for (i = 0; i < ioreq->v.niov; i++) {
        }
        ioreq->blkdev->cnt_map += new_maps;
    } else if (new_maps)  {
        for (i = 0; i < new_maps; i++) {
            ioreq->page[i] = xc_gnttab_map_grant_ref
                (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
                (gnt, domids[i], refs[i], ioreq->prot);
            if (ioreq->page[i] == NULL) {
                xen_be_printf(&ioreq->blkdev->xendev, 0,
                              "can't map grant ref %d (%s, %d maps)\n",
                              ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
                              refs[i], strerror(errno), ioreq->blkdev->cnt_map);
                ioreq_unmap(ioreq);
                return -1;
            }
            ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
            ioreq->blkdev->cnt_map++;
        }
        for (i = 0, j = 0; i < ioreq->v.niov; i++) {
            if (page[i] == NULL) {
                page[i] = ioreq->page[j++];
            }
        }
    }
    if (ioreq->blkdev->feature_persistent) {
        while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants)
              && new_maps) {
            /* Go through the list of newly mapped grants and add as many
             * as possible to the list of persistently mapped grants.
             *
             * Since we start at the end of ioreq->page(s), we only need
             * to decrease new_maps to prevent this granted pages from
             * being unmapped in ioreq_unmap.
             */
            grant = g_malloc0(sizeof(*grant));
            new_maps--;
            if (batch_maps) {
                grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE;
            } else {
                grant->page = ioreq->page[new_maps];
            }
            grant->blkdev = ioreq->blkdev;
            xen_be_printf(&ioreq->blkdev->xendev, 3,
                          "adding grant %" PRIu32 " page: %p\n",
                          refs[new_maps], grant->page);
            g_tree_insert(ioreq->blkdev->persistent_gnts,
                          GUINT_TO_POINTER(refs[new_maps]),
                          grant);
            ioreq->blkdev->persistent_gnt_count++;
        }
    }
    for (i = 0; i < ioreq->v.niov; i++) {
        ioreq->v.iov[i].iov_base += (uintptr_t)page[i];
    }
    ioreq->mapped = 1;
    ioreq->num_unmap = new_maps;
    return 0;
}

@@ -369,7 +508,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
                       qemu_aio_complete, ioreq);
        break;
    case BLKIF_OP_WRITE:
    case BLKIF_OP_WRITE_BARRIER:
    case BLKIF_OP_FLUSH_DISKCACHE:
        if (!ioreq->req.nr_segments) {
            break;
        }
@@ -654,7 +793,8 @@ static int blk_init(struct XenDevice *xendev)
                  blkdev->file_size, blkdev->file_size >> 20);

    /* fill info */
    xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
    xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
    xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
    xenstore_write_be_int(&blkdev->xendev, "info",            info);
    xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
    xenstore_write_be_int(&blkdev->xendev, "sectors",
@@ -678,6 +818,7 @@ out_error:
static int blk_connect(struct XenDevice *xendev)
{
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
    int pers;

    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
        return -1;
@@ -686,6 +827,11 @@ static int blk_connect(struct XenDevice *xendev)
                             &blkdev->xendev.remote_port) == -1) {
        return -1;
    }
    if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) {
        blkdev->feature_persistent = FALSE;
    } else {
        blkdev->feature_persistent = !!pers;
    }

    blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
    if (blkdev->xendev.protocol) {
@@ -729,6 +875,15 @@ static int blk_connect(struct XenDevice *xendev)
    }
    }

    if (blkdev->feature_persistent) {
        /* Init persistent grants */
        blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
        blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
                                             NULL, NULL,
                                             (GDestroyNotify)destroy_grant);
        blkdev->persistent_gnt_count = 0;
    }

    xen_be_bind_evtchn(&blkdev->xendev);

    xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
@@ -769,6 +924,11 @@ static int blk_free(struct XenDevice *xendev)
        blk_disconnect(xendev);
    }

    /* Free persistent grants */
    if (blkdev->feature_persistent) {
        g_tree_destroy(blkdev->persistent_gnts);
    }

    while (!QLIST_EMPTY(&blkdev->freelist)) {
        ioreq = QLIST_FIRST(&blkdev->freelist);
        QLIST_REMOVE(ioreq, list);