Commit 71df81af authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'stefanha/block' into staging

# By MORITA Kazutaka (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
  block: for HMP commit() operations on 'all', skip non-COW drives
  sheepdog: add support for connecting to unix domain socket
  sheepdog: use inet_connect to simplify connect code
  sheepdog: accept URIs
  move socket_set_nodelay to osdep.c
  slirp/tcp_subr.c: fix coding style in tcp_connect
  dataplane: remove EventPoll in favor of AioContext
  virtio-blk: fix unplug + virsh reboot
  ide/macio: Fix macio DMA initialisation.
parents a6900601 272d2d8e
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -1640,11 +1640,13 @@ int bdrv_commit_all(void)
    BlockDriverState *bs;

    QTAILQ_FOREACH(bs, &bdrv_states, list) {
        if (bs->drv && bs->backing_hd) {
            int ret = bdrv_commit(bs);
            if (ret < 0) {
                return ret;
            }
        }
    }
    return 0;
}

+192 −123
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
 */

#include "qemu-common.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
@@ -21,7 +22,7 @@
#define SD_PROTO_VER 0x01

#define SD_DEFAULT_ADDR "localhost"
#define SD_DEFAULT_PORT "7000"
#define SD_DEFAULT_PORT 7000

#define SD_OP_CREATE_AND_WRITE_OBJ  0x01
#define SD_OP_READ_OBJ       0x02
@@ -297,8 +298,8 @@ typedef struct BDRVSheepdogState {
    bool is_snapshot;
    uint32_t cache_flags;

    char *addr;
    char *port;
    char *host_spec;
    bool is_unix;
    int fd;

    CoMutex lock;
@@ -446,56 +447,29 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
    return acb;
}

static int connect_to_sdog(const char *addr, const char *port)
static int connect_to_sdog(BDRVSheepdogState *s)
{
    char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
    int fd, ret;
    struct addrinfo hints, *res, *res0;

    if (!addr) {
        addr = SD_DEFAULT_ADDR;
        port = SD_DEFAULT_PORT;
    }
    int fd;
    Error *err = NULL;

    memset(&hints, 0, sizeof(hints));
    hints.ai_socktype = SOCK_STREAM;
    if (s->is_unix) {
        fd = unix_connect(s->host_spec, &err);
    } else {
        fd = inet_connect(s->host_spec, &err);

    ret = getaddrinfo(addr, port, &hints, &res0);
    if (ret) {
        error_report("unable to get address info %s, %s",
                     addr, strerror(errno));
        return -errno;
        if (err == NULL) {
            int ret = socket_set_nodelay(fd);
            if (ret < 0) {
                error_report("%s", strerror(errno));
            }

    for (res = res0; res; res = res->ai_next) {
        ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
                          sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
        if (ret) {
            continue;
        }

        fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
        if (fd < 0) {
            continue;
    }

    reconnect:
        ret = connect(fd, res->ai_addr, res->ai_addrlen);
        if (ret < 0) {
            if (errno == EINTR) {
                goto reconnect;
            }
            close(fd);
            break;
    if (err != NULL) {
        qerror_report_err(err);
        error_free(err);
    }

        dprintf("connected to %s:%s\n", addr, port);
        goto success;
    }
    fd = -errno;
    error_report("failed connect to %s:%s", addr, port);
success:
    freeaddrinfo(res0);
    return fd;
}

@@ -787,15 +761,6 @@ static int aio_flush_request(void *opaque)
        !QLIST_EMPTY(&s->pending_aio_head);
}

static int set_nodelay(int fd)
{
    int ret, opt;

    opt = 1;
    ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
    return ret;
}

/*
 * Return a socket discriptor to read/write objects.
 *
@@ -804,29 +769,88 @@ static int set_nodelay(int fd)
 */
static int get_sheep_fd(BDRVSheepdogState *s)
{
    int ret, fd;
    int fd;

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        error_report("%s", strerror(errno));
        return fd;
    }

    socket_set_nonblock(fd);

    ret = set_nodelay(fd);
    if (ret) {
        error_report("%s", strerror(errno));
        closesocket(fd);
        return -errno;
    }

    qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
    return fd;
}

static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
                        char *vdi, uint32_t *snapid, char *tag)
{
    URI *uri;
    QueryParams *qp = NULL;
    int ret = 0;

    uri = uri_parse(filename);
    if (!uri) {
        return -EINVAL;
    }

    /* transport */
    if (!strcmp(uri->scheme, "sheepdog")) {
        s->is_unix = false;
    } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
        s->is_unix = false;
    } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
        s->is_unix = true;
    } else {
        ret = -EINVAL;
        goto out;
    }

    if (uri->path == NULL || !strcmp(uri->path, "/")) {
        ret = -EINVAL;
        goto out;
    }
    pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);

    qp = query_params_parse(uri->query);
    if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
        ret = -EINVAL;
        goto out;
    }

    if (s->is_unix) {
        /* sheepdog+unix:///vdiname?socket=path */
        if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
            ret = -EINVAL;
            goto out;
        }
        s->host_spec = g_strdup(qp->p[0].value);
    } else {
        /* sheepdog[+tcp]://[host:port]/vdiname */
        s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
                                       uri->port ?: SD_DEFAULT_PORT);
    }

    /* snapshot tag */
    if (uri->fragment) {
        *snapid = strtoul(uri->fragment, NULL, 10);
        if (*snapid == 0) {
            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
        }
    } else {
        *snapid = CURRENT_VDI_ID; /* search current vdi */
    }

out:
    if (qp) {
        query_params_free(qp);
    }
    uri_free(uri);
    return ret;
}

/*
 * Parse a filename
 * Parse a filename (old syntax)
 *
 * filename must be one of the following formats:
 *   1. [vdiname]
@@ -845,9 +869,11 @@ static int get_sheep_fd(BDRVSheepdogState *s)
static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
                         char *vdi, uint32_t *snapid, char *tag)
{
    char *p, *q;
    int nr_sep;
    char *p, *q, *uri;
    const char *host_spec, *vdi_spec;
    int nr_sep, ret;

    strstart(filename, "sheepdog:", (const char **)&filename);
    p = q = g_strdup(filename);

    /* count the number of separators */
@@ -860,38 +886,32 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
    }
    p = q;

    /* use the first two tokens as hostname and port number. */
    /* use the first two tokens as host_spec. */
    if (nr_sep >= 2) {
        s->addr = p;
        host_spec = p;
        p = strchr(p, ':');
        *p++ = '\0';

        s->port = p;
        p++;
        p = strchr(p, ':');
        *p++ = '\0';
    } else {
        s->addr = NULL;
        s->port = 0;
        host_spec = "";
    }

    pstrcpy(vdi, SD_MAX_VDI_LEN, p);
    vdi_spec = p;

    p = strchr(vdi, ':');
    p = strchr(vdi_spec, ':');
    if (p) {
        *p++ = '\0';
        *snapid = strtoul(p, NULL, 10);
        if (*snapid == 0) {
            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
        }
    } else {
        *snapid = CURRENT_VDI_ID; /* search current vdi */
        *p++ = '#';
    }

    if (s->addr == NULL) {
    uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);

    ret = sd_parse_uri(s, uri, vdi, snapid, tag);

    g_free(q);
    }
    g_free(uri);

    return 0;
    return ret;
}

static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
@@ -903,7 +923,7 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
    unsigned int wlen, rlen = 0;
    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        return fd;
    }
@@ -1106,16 +1126,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
    uint32_t snapid;
    char *buf = NULL;

    strstart(filename, "sheepdog:", (const char **)&filename);

    QLIST_INIT(&s->inflight_aio_head);
    QLIST_INIT(&s->pending_aio_head);
    s->fd = -1;

    memset(vdi, 0, sizeof(vdi));
    memset(tag, 0, sizeof(tag));
    if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
        ret = -EINVAL;

    if (strstr(filename, "://")) {
        ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
    } else {
        ret = parse_vdiname(s, filename, vdi, &snapid, tag);
    }
    if (ret < 0) {
        goto out;
    }
    s->fd = get_sheep_fd(s);
@@ -1143,9 +1166,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
        s->is_snapshot = true;
    }

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        error_report("failed to connect");
        ret = fd;
        goto out;
    }
@@ -1178,9 +1200,8 @@ out:
    return ret;
}

static int do_sd_create(char *filename, int64_t vdi_size,
                        uint32_t base_vid, uint32_t *vdi_id, int snapshot,
                        const char *addr, const char *port)
static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
                        uint32_t base_vid, uint32_t *vdi_id, int snapshot)
{
    SheepdogVdiReq hdr;
    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1188,7 +1209,7 @@ static int do_sd_create(char *filename, int64_t vdi_size,
    unsigned int wlen, rlen = 0;
    char buf[SD_MAX_VDI_LEN];

    fd = connect_to_sdog(addr, port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        return fd;
    }
@@ -1284,17 +1305,17 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
    char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
    uint32_t snapid;
    bool prealloc = false;
    const char *vdiname;

    s = g_malloc0(sizeof(BDRVSheepdogState));

    strstart(filename, "sheepdog:", &vdiname);

    memset(vdi, 0, sizeof(vdi));
    memset(tag, 0, sizeof(tag));
    if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) {
        error_report("invalid filename");
        ret = -EINVAL;
    if (strstr(filename, "://")) {
        ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
    } else {
        ret = parse_vdiname(s, filename, vdi, &snapid, tag);
    }
    if (ret < 0) {
        goto out;
    }

@@ -1355,7 +1376,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
        bdrv_delete(bs);
    }

    ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port);
    ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
    if (!prealloc || ret) {
        goto out;
    }
@@ -1376,7 +1397,7 @@ static void sd_close(BlockDriverState *bs)

    dprintf("%s\n", s->name);

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        return;
    }
@@ -1400,7 +1421,7 @@ static void sd_close(BlockDriverState *bs)

    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
    closesocket(s->fd);
    g_free(s->addr);
    g_free(s->host_spec);
}

static int64_t sd_getlength(BlockDriverState *bs)
@@ -1424,7 +1445,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
        return -EINVAL;
    }

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        return fd;
    }
@@ -1500,17 +1521,15 @@ static int sd_create_branch(BDRVSheepdogState *s)

    buf = g_malloc(SD_INODE_SIZE);

    ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1,
                       s->addr, s->port);
    ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1);
    if (ret) {
        goto out;
    }

    dprintf("%" PRIx32 " is created.\n", vid);

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        error_report("failed to connect");
        ret = fd;
        goto out;
    }
@@ -1769,7 +1788,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);

    /* refresh inode. */
    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        ret = fd;
        goto cleanup;
@@ -1782,8 +1801,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto cleanup;
    }

    ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1,
                       s->addr, s->port);
    ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
                       1);
    if (ret < 0) {
        error_report("failed to create inode for snapshot. %s",
                     strerror(errno));
@@ -1838,9 +1857,8 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto out;
    }

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        error_report("failed to connect");
        ret = fd;
        goto out;
    }
@@ -1902,7 +1920,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)

    vdi_inuse = g_malloc(max);

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        ret = fd;
        goto out;
@@ -1929,9 +1947,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
    start_nr = hval & (SD_NR_VDIS - 1);

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        error_report("failed to connect");
        ret = fd;
        goto out;
    }
@@ -1988,7 +2005,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
    uint32_t vdi_index;
    uint64_t offset;

    fd = connect_to_sdog(s->addr, s->port);
    fd = connect_to_sdog(s);
    if (fd < 0) {
        return fd;
    }
@@ -2063,7 +2080,7 @@ static QEMUOptionParameter sd_create_options[] = {
    { NULL }
};

BlockDriver bdrv_sheepdog = {
static BlockDriver bdrv_sheepdog = {
    .format_name    = "sheepdog",
    .protocol_name  = "sheepdog",
    .instance_size  = sizeof(BDRVSheepdogState),
@@ -2088,8 +2105,60 @@ BlockDriver bdrv_sheepdog = {
    .create_options = sd_create_options,
};

static BlockDriver bdrv_sheepdog_tcp = {
    .format_name    = "sheepdog",
    .protocol_name  = "sheepdog+tcp",
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_file_open = sd_open,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_getlength = sd_getlength,
    .bdrv_truncate  = sd_truncate,

    .bdrv_co_readv  = sd_co_readv,
    .bdrv_co_writev = sd_co_writev,
    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,

    .bdrv_snapshot_create   = sd_snapshot_create,
    .bdrv_snapshot_goto     = sd_snapshot_goto,
    .bdrv_snapshot_delete   = sd_snapshot_delete,
    .bdrv_snapshot_list     = sd_snapshot_list,

    .bdrv_save_vmstate  = sd_save_vmstate,
    .bdrv_load_vmstate  = sd_load_vmstate,

    .create_options = sd_create_options,
};

static BlockDriver bdrv_sheepdog_unix = {
    .format_name    = "sheepdog",
    .protocol_name  = "sheepdog+unix",
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_file_open = sd_open,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_getlength = sd_getlength,
    .bdrv_truncate  = sd_truncate,

    .bdrv_co_readv  = sd_co_readv,
    .bdrv_co_writev = sd_co_writev,
    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,

    .bdrv_snapshot_create   = sd_snapshot_create,
    .bdrv_snapshot_goto     = sd_snapshot_goto,
    .bdrv_snapshot_delete   = sd_snapshot_delete,
    .bdrv_snapshot_list     = sd_snapshot_list,

    .bdrv_save_vmstate  = sd_save_vmstate,
    .bdrv_load_vmstate  = sd_load_vmstate,

    .create_options = sd_create_options,
};

static void bdrv_sheepdog_init(void)
{
    bdrv_register(&bdrv_sheepdog);
    bdrv_register(&bdrv_sheepdog_tcp);
    bdrv_register(&bdrv_sheepdog_unix);
}
block_init(bdrv_sheepdog_init);
+2 −3
Original line number Diff line number Diff line
@@ -2841,7 +2841,7 @@ static void gdb_accept(void)
    GDBState *s;
    struct sockaddr_in sockaddr;
    socklen_t len;
    int val, fd;
    int fd;

    for(;;) {
        len = sizeof(sockaddr);
@@ -2858,8 +2858,7 @@ static void gdb_accept(void)
    }

    /* set short latency */
    val = 1;
    setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
    socket_set_nodelay(fd);

    s = g_malloc0(sizeof(GDBState));
    s->c_cpu = first_cpu;
+1 −1
Original line number Diff line number Diff line
obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o
obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o

hw/dataplane/event-poll.c

deleted100644 → 0
+0 −100
Original line number Diff line number Diff line
/*
 * Event loop with file descriptor polling
 *
 * Copyright 2012 IBM, Corp.
 * Copyright 2012 Red Hat, Inc. and/or its affiliates
 *
 * Authors:
 *   Stefan Hajnoczi <stefanha@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include <sys/epoll.h>
#include "hw/dataplane/event-poll.h"

/* Add an event notifier and its callback for polling */
void event_poll_add(EventPoll *poll, EventHandler *handler,
                    EventNotifier *notifier, EventCallback *callback)
{
    struct epoll_event event = {
        .events = EPOLLIN,
        .data.ptr = handler,
    };
    handler->notifier = notifier;
    handler->callback = callback;
    if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD,
                  event_notifier_get_fd(notifier), &event) != 0) {
        fprintf(stderr, "failed to add event handler to epoll: %m\n");
        exit(1);
    }
}

/* Event callback for stopping event_poll() */
static void handle_stop(EventHandler *handler)
{
    /* Do nothing */
}

void event_poll_init(EventPoll *poll)
{
    /* Create epoll file descriptor */
    poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
    if (poll->epoll_fd < 0) {
        fprintf(stderr, "epoll_create1 failed: %m\n");
        exit(1);
    }

    /* Set up stop notifier */
    if (event_notifier_init(&poll->stop_notifier, 0) < 0) {
        fprintf(stderr, "failed to init stop notifier\n");
        exit(1);
    }
    event_poll_add(poll, &poll->stop_handler,
                   &poll->stop_notifier, handle_stop);
}

void event_poll_cleanup(EventPoll *poll)
{
    event_notifier_cleanup(&poll->stop_notifier);
    close(poll->epoll_fd);
    poll->epoll_fd = -1;
}

/* Block until the next event and invoke its callback */
void event_poll(EventPoll *poll)
{
    EventHandler *handler;
    struct epoll_event event;
    int nevents;

    /* Wait for the next event.  Only do one event per call to keep the
     * function simple, this could be changed later. */
    do {
        nevents = epoll_wait(poll->epoll_fd, &event, 1, -1);
    } while (nevents < 0 && errno == EINTR);
    if (unlikely(nevents != 1)) {
        fprintf(stderr, "epoll_wait failed: %m\n");
        exit(1); /* should never happen */
    }

    /* Find out which event handler has become active */
    handler = event.data.ptr;

    /* Clear the eventfd */
    event_notifier_test_and_clear(handler->notifier);

    /* Handle the event */
    handler->callback(handler);
}

/* Stop event_poll()
 *
 * This function can be used from another thread.
 */
void event_poll_notify(EventPoll *poll)
{
    event_notifier_set(&poll->stop_notifier);
}
Loading