Commit 51b19ebe authored by Paolo Bonzini's avatar Paolo Bonzini Committed by Michael S. Tsirkin
Browse files

virtio: move allocation to virtqueue_pop/vring_pop



The return code of virtqueue_pop/vring_pop is unused except to check for
errors or 0.  We can thus easily move allocation inside the functions
and just return a pointer to the VirtQueueElement.

The advantage is that we will be able to allocate only the space that
is needed for the actual size of the s/g list instead of the full
VIRTQUEUE_MAX_SIZE items.  Currently VirtQueueElement takes about 48K
of memory, and this kind of allocation puts a lot of stress on malloc.
By cutting the size by two or three orders of magnitude, malloc can
use much more efficient algorithms.

The patch is pretty large, but changes to each device are testable
more or less independently.  Splitting it would mostly add churn.

Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Reviewed-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Reviewed-by: default avatarCornelia Huck <cornelia.huck@de.ibm.com>
parent 6aa46d8f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1587,7 +1587,7 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
    int read_count;
    int64_t xattr_len;
    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    VirtQueueElement *elem = &v->elems[pdu->idx];
    VirtQueueElement *elem = v->elems[pdu->idx];

    xattr_len = fidp->fs.xattr.len;
    read_count = xattr_len - off;
+10 −7
Original line number Diff line number Diff line
@@ -26,10 +26,12 @@ void virtio_9p_push_and_notify(V9fsPDU *pdu)
{
    V9fsState *s = pdu->s;
    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    VirtQueueElement *elem = &v->elems[pdu->idx];
    VirtQueueElement *elem = v->elems[pdu->idx];

    /* push onto queue and notify */
    virtqueue_push(v->vq, elem, pdu->size);
    g_free(elem);
    v->elems[pdu->idx] = NULL;

    /* FIXME: we should batch these completions */
    virtio_notify(VIRTIO_DEVICE(v), v->vq);
@@ -48,10 +50,10 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
            uint8_t id;
            uint16_t tag_le;
        } QEMU_PACKED out;
        VirtQueueElement *elem = &v->elems[pdu->idx];
        VirtQueueElement *elem;

        len = virtqueue_pop(vq, elem);
        if (!len) {
        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
        if (!elem) {
            pdu_free(pdu);
            break;
        }
@@ -59,6 +61,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
        BUG_ON(elem->out_num == 0 || elem->in_num == 0);
        QEMU_BUILD_BUG_ON(sizeof out != 7);

        v->elems[pdu->idx] = elem;
        len = iov_to_buf(elem->out_sg, elem->out_num, 0,
                         &out, sizeof out);
        BUG_ON(len != sizeof out);
@@ -141,7 +144,7 @@ ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
{
    V9fsState *s = pdu->s;
    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    VirtQueueElement *elem = &v->elems[pdu->idx];
    VirtQueueElement *elem = v->elems[pdu->idx];

    return v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap);
}
@@ -151,7 +154,7 @@ ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset,
{
    V9fsState *s = pdu->s;
    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    VirtQueueElement *elem = &v->elems[pdu->idx];
    VirtQueueElement *elem = v->elems[pdu->idx];

    return v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap);
}
@@ -161,7 +164,7 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
{
    V9fsState *s = pdu->s;
    V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
    VirtQueueElement *elem = &v->elems[pdu->idx];
    VirtQueueElement *elem = v->elems[pdu->idx];

    if (is_write) {
        *piov = elem->out_sg;
+1 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ typedef struct V9fsVirtioState
    VirtQueue *vq;
    size_t config_size;
    V9fsPDU pdus[MAX_REQ];
    VirtQueueElement elems[MAX_REQ];
    VirtQueueElement *elems[MAX_REQ];
    V9fsState state;
} V9fsVirtioState;

+5 −6
Original line number Diff line number Diff line
@@ -100,20 +100,19 @@ static void handle_notify(EventNotifier *e)
    blk_io_plug(s->conf->conf.blk);
    for (;;) {
        MultiReqBuffer mrb = {};
        int ret;

        /* Disable guest->host notifies to avoid unnecessary vmexits */
        vring_disable_notification(s->vdev, &s->vring);

        for (;;) {
            VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
            VirtIOBlockReq *req = vring_pop(s->vdev, &s->vring,
                                            sizeof(VirtIOBlockReq));

            ret = vring_pop(s->vdev, &s->vring, &req->elem);
            if (ret < 0) {
                virtio_blk_free_request(req);
            if (req == NULL) {
                break; /* no more requests */
            }

            virtio_blk_init_request(vblk, req);
            trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
                                                        req->elem.in_num,
                                                        req->elem.index);
@@ -125,7 +124,7 @@ static void handle_notify(EventNotifier *e)
            virtio_blk_submit_multireq(s->conf->conf.blk, &mrb);
        }

        if (likely(ret == -EAGAIN)) { /* vring emptied */
        if (likely(!vring_more_avail(s->vdev, &s->vring))) { /* vring emptied */
            /* Re-enable guest->host notifies and stop processing the vring.
             * But if the guest has snuck in more descriptors, keep processing.
             */
+6 −9
Original line number Diff line number Diff line
@@ -29,15 +29,13 @@
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"

VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
void virtio_blk_init_request(VirtIOBlock *s, VirtIOBlockReq *req)
{
    VirtIOBlockReq *req = g_new(VirtIOBlockReq, 1);
    req->dev = s;
    req->qiov.size = 0;
    req->in_len = 0;
    req->next = NULL;
    req->mr_next = NULL;
    return req;
}

void virtio_blk_free_request(VirtIOBlockReq *req)
@@ -193,13 +191,11 @@ out:

static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
{
    VirtIOBlockReq *req = virtio_blk_alloc_request(s);
    VirtIOBlockReq *req = virtqueue_pop(s->vq, sizeof(VirtIOBlockReq));

    if (!virtqueue_pop(s->vq, &req->elem)) {
        virtio_blk_free_request(req);
        return NULL;
    if (req) {
        virtio_blk_init_request(s, req);
    }

    return req;
}

@@ -836,7 +832,8 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
    VirtIOBlock *s = VIRTIO_BLK(vdev);

    while (qemu_get_sbyte(f)) {
        VirtIOBlockReq *req = virtio_blk_alloc_request(s);
        VirtIOBlockReq *req = g_new(VirtIOBlockReq, 1);
        virtio_blk_init_request(s, req);
        qemu_get_buffer(f, (unsigned char *)&req->elem,
                        sizeof(VirtQueueElement));
        req->next = s->rq;
Loading