Commit dabc50e4 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-2017-10-16-1' into staging



Merge QIO 2017/10/16 v1

# gpg: Signature made Mon 16 Oct 2017 17:10:54 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qio-2017-10-16-1:
  io: fix mem leak in websock error path
  io: add trace points for websocket HTTP protocol headers
  io: cope with websock 'Connection' header having multiple values
  io: get rid of bounce buffering in websock write path
  io: pass a struct iovec into qio_channel_websock_encode
  io: get rid of qio_channel_websock_encode helper method
  io: simplify websocket ping reply handling
  io: monitor encoutput buffer size from websocket GSource
  sockets: Handle race condition between binds to the same port
  sockets: factor out create_fast_reuse_socket
  sockets: factor out a new try_bind() function

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 9f99c85c 7fc3fcef
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -59,9 +59,8 @@ struct QIOChannelWebsock {
    Buffer encinput;
    Buffer encoutput;
    Buffer rawinput;
    Buffer rawoutput;
    Buffer ping_reply;
    size_t payload_remain;
    size_t pong_remain;
    QIOChannelWebsockMask mask;
    guint io_tag;
    Error *io_err;
+94 −74
Original line number Diff line number Diff line
@@ -24,11 +24,12 @@
#include "io/channel-websock.h"
#include "crypto/hash.h"
#include "trace.h"
#include "qemu/iov.h"

#include <time.h>


/* Max amount to allow in rawinput/rawoutput buffers */
/* Max amount to allow in rawinput/encoutput buffers */
#define QIO_CHANNEL_WEBSOCK_MAX_BUFFER 8192

#define QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN 24
@@ -223,6 +224,7 @@ qio_channel_websock_extract_headers(QIOChannelWebsock *ioc,
        goto bad_request;
    }
    *nl = '\0';
    trace_qio_channel_websock_http_greeting(ioc, buffer);

    tmp = strchr(buffer, ' ');
    if (!tmp) {
@@ -339,7 +341,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc,
    char combined_key[QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN +
                      QIO_CHANNEL_WEBSOCK_GUID_LEN + 1];
    char *accept = NULL;
    char *date = qio_channel_websock_date_str();
    char *date = NULL;

    g_strlcpy(combined_key, key, QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + 1);
    g_strlcat(combined_key, QIO_CHANNEL_WEBSOCK_GUID,
@@ -358,6 +360,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc,
        return;
    }

    date = qio_channel_websock_date_str();
    qio_channel_websock_handshake_send_res(
        ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_OK, date, accept);

@@ -373,6 +376,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc,
    size_t nhdrs = G_N_ELEMENTS(hdrs);
    const char *protocols = NULL, *version = NULL, *key = NULL,
        *host = NULL, *connection = NULL, *upgrade = NULL;
    char **connectionv;
    bool upgraded = false;
    size_t i;

    nhdrs = qio_channel_websock_extract_headers(ioc, buffer, hdrs, nhdrs, errp);
    if (!nhdrs) {
@@ -421,6 +427,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc,
        goto bad_request;
    }

    trace_qio_channel_websock_http_request(ioc, protocols, version,
                                           host, connection, upgrade, key);

    if (!g_strrstr(protocols, QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY)) {
        error_setg(errp, "No '%s' protocol is supported by client '%s'",
                   QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY, protocols);
@@ -439,7 +448,16 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc,
        goto bad_request;
    }

    if (strcasecmp(connection, QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) != 0) {
    connectionv = g_strsplit(connection, ",", 0);
    for (i = 0; connectionv != NULL && connectionv[i] != NULL; i++) {
        g_strstrip(connectionv[i]);
        if (strcasecmp(connectionv[i],
                       QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) == 0) {
            upgraded = true;
        }
    }
    g_strfreev(connectionv);
    if (!upgraded) {
        error_setg(errp, "No connection upgrade requested '%s'", connection);
        goto bad_request;
    }
@@ -582,49 +600,48 @@ static gboolean qio_channel_websock_handshake_io(QIOChannel *ioc,
}


static void qio_channel_websock_encode_buffer(QIOChannelWebsock *ioc,
                                              Buffer *output,
                                              uint8_t opcode, Buffer *buffer)
static void qio_channel_websock_encode(QIOChannelWebsock *ioc,
                                       uint8_t opcode,
                                       const struct iovec *iov,
                                       size_t niov,
                                       size_t size)
{
    size_t header_size;
    size_t i;
    union {
        char buf[QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT];
        QIOChannelWebsockHeader ws;
    } header;

    assert(size <= iov_size(iov, niov));

    header.ws.b0 = QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN |
        (opcode & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE);
    if (buffer->offset < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) {
        header.ws.b1 = (uint8_t)buffer->offset;
    if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) {
        header.ws.b1 = (uint8_t)size;
        header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT;
    } else if (buffer->offset <
               QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) {
    } else if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) {
        header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT;
        header.ws.u.s16.l16 = cpu_to_be16((uint16_t)buffer->offset);
        header.ws.u.s16.l16 = cpu_to_be16((uint16_t)size);
        header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT;
    } else {
        header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT;
        header.ws.u.s64.l64 = cpu_to_be64(buffer->offset);
        header.ws.u.s64.l64 = cpu_to_be64(size);
        header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT;
    }
    header_size -= QIO_CHANNEL_WEBSOCK_HEADER_LEN_MASK;

    trace_qio_channel_websock_encode(ioc, opcode, header_size, buffer->offset);
    buffer_reserve(output, header_size + buffer->offset);
    buffer_append(output, header.buf, header_size);
    buffer_append(output, buffer->buffer, buffer->offset);
    trace_qio_channel_websock_encode(ioc, opcode, header_size, size);
    buffer_reserve(&ioc->encoutput, header_size + size);
    buffer_append(&ioc->encoutput, header.buf, header_size);
    for (i = 0; i < niov && size != 0; i++) {
        size_t want = iov[i].iov_len;
        if (want > size) {
            want = size;
        }


static void qio_channel_websock_encode(QIOChannelWebsock *ioc)
{
    if (!ioc->rawoutput.offset) {
        return;
        buffer_append(&ioc->encoutput, iov[i].iov_base, want);
        size -= want;
    }
    qio_channel_websock_encode_buffer(
        ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME,
        &ioc->rawoutput);
    buffer_reset(&ioc->rawoutput);
}


@@ -634,17 +651,22 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **);
static void qio_channel_websock_write_close(QIOChannelWebsock *ioc,
                                            uint16_t code, const char *reason)
{
    buffer_reserve(&ioc->rawoutput, 2 + (reason ? strlen(reason) : 0));
    *(uint16_t *)(ioc->rawoutput.buffer + ioc->rawoutput.offset) =
        cpu_to_be16(code);
    ioc->rawoutput.offset += 2;
    struct iovec iov[2] = {
        { .iov_base = &code, .iov_len = sizeof(code) },
    };
    size_t niov = 1;
    size_t size = iov[0].iov_len;

    cpu_to_be16s(&code);

    if (reason) {
        buffer_append(&ioc->rawoutput, reason, strlen(reason));
        iov[1].iov_base = (void *)reason;
        iov[1].iov_len = strlen(reason);
        size += iov[1].iov_len;
        niov++;
    }
    qio_channel_websock_encode_buffer(
        ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
        &ioc->rawoutput);
    buffer_reset(&ioc->rawoutput);
    qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
                               iov, niov, size);
    qio_channel_websock_write_wire(ioc, NULL);
    qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
}
@@ -813,9 +835,10 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc,
        error_setg(errp, "websocket closed by peer");
        if (payload_len) {
            /* echo client status */
            qio_channel_websock_encode_buffer(
                ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
                &ioc->encinput);
            struct iovec iov = { .iov_base = ioc->encinput.buffer,
                                 .iov_len = ioc->encinput.offset };
            qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
                                       &iov, 1, iov.iov_len);
            qio_channel_websock_write_wire(ioc, NULL);
            qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
        } else {
@@ -825,11 +848,15 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc,
        }
        return -1;
    } else if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_PING) {
        /* ping frames produce an immediate reply */
        buffer_reset(&ioc->ping_reply);
        qio_channel_websock_encode_buffer(
            ioc, &ioc->ping_reply, QIO_CHANNEL_WEBSOCK_OPCODE_PONG,
            &ioc->encinput);
        /* ping frames produce an immediate reply, as long as we've not still
         * got a previous pong queued, in which case we drop the new pong */
        if (ioc->pong_remain == 0) {
            struct iovec iov = { .iov_base = ioc->encinput.buffer,
                                 .iov_len = ioc->encinput.offset };
            qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_PONG,
                                       &iov, 1, iov.iov_len);
            ioc->pong_remain = ioc->encoutput.offset;
        }
    }   /* pong frames are ignored */

    if (payload_len) {
@@ -887,8 +914,6 @@ static void qio_channel_websock_finalize(Object *obj)
    buffer_free(&ioc->encinput);
    buffer_free(&ioc->encoutput);
    buffer_free(&ioc->rawinput);
    buffer_free(&ioc->rawoutput);
    buffer_free(&ioc->ping_reply);
    object_unref(OBJECT(ioc->master));
    if (ioc->io_tag) {
        g_source_remove(ioc->io_tag);
@@ -946,13 +971,6 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc,
    ssize_t ret;
    ssize_t done = 0;

    /* ping replies take priority over binary data */
    if (!ioc->ping_reply.offset) {
        qio_channel_websock_encode(ioc);
    } else if (!ioc->encoutput.offset) {
        buffer_move_empty(&ioc->encoutput, &ioc->ping_reply);
    }

    while (ioc->encoutput.offset > 0) {
        ret = qio_channel_write(ioc->master,
                                (char *)ioc->encoutput.buffer,
@@ -968,6 +986,11 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc,
        }
        buffer_advance(&ioc->encoutput, ret);
        done += ret;
        if (ioc->pong_remain < ret) {
            ioc->pong_remain = 0;
        } else {
            ioc->pong_remain -= ret;
        }
    }
    return done;
}
@@ -1026,7 +1049,7 @@ static void qio_channel_websock_set_watch(QIOChannelWebsock *ioc)
        return;
    }

    if (ioc->encoutput.offset || ioc->ping_reply.offset) {
    if (ioc->encoutput.offset) {
        cond |= G_IO_OUT;
    }
    if (ioc->encinput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER &&
@@ -1100,8 +1123,8 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
                                          Error **errp)
{
    QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
    size_t i;
    ssize_t done = 0;
    ssize_t want = iov_size(iov, niov);
    ssize_t avail;
    ssize_t ret;

    if (wioc->io_err) {
@@ -1114,24 +1137,21 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
        return -1;
    }

    for (i = 0; i < niov; i++) {
        size_t want = iov[i].iov_len;
        if ((want + wioc->rawoutput.offset) > QIO_CHANNEL_WEBSOCK_MAX_BUFFER) {
            want = (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->rawoutput.offset);
        }
        if (want == 0) {
            goto done;
    avail = wioc->encoutput.offset >= QIO_CHANNEL_WEBSOCK_MAX_BUFFER ?
        0 : (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->encoutput.offset);
    if (want > avail) {
        want = avail;
    }

        buffer_reserve(&wioc->rawoutput, want);
        buffer_append(&wioc->rawoutput, iov[i].iov_base, want);
        done += want;
        if (want < iov[i].iov_len) {
            break;
        }
    if (want) {
        qio_channel_websock_encode(wioc,
                                   QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME,
                                   iov, niov, want);
    }

 done:
    /* Even if want == 0, we'll try write_wire in case there's
     * pending data we could usefully flush out
     */
    ret = qio_channel_websock_write_wire(wioc, errp);
    if (ret < 0 &&
        ret != QIO_CHANNEL_ERR_BLOCK) {
@@ -1141,11 +1161,11 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,

    qio_channel_websock_set_watch(wioc);

    if (done == 0) {
    if (want == 0) {
        return QIO_CHANNEL_ERR_BLOCK;
    }

    return done;
    return want;
}

static int qio_channel_websock_set_blocking(QIOChannel *ioc,
@@ -1208,7 +1228,7 @@ qio_channel_websock_source_check(GSource *source)
    if (wsource->wioc->rawinput.offset || wsource->wioc->io_eof) {
        cond |= G_IO_IN;
    }
    if (wsource->wioc->rawoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) {
    if (wsource->wioc->encoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) {
        cond |= G_IO_OUT;
    }

+2 −0
Original line number Diff line number Diff line
@@ -48,6 +48,8 @@ qio_channel_websock_handshake_pending(void *ioc, int status) "Websock handshake
qio_channel_websock_handshake_reply(void *ioc) "Websock handshake reply ioc=%p"
qio_channel_websock_handshake_fail(void *ioc, const char *msg) "Websock handshake fail ioc=%p err=%s"
qio_channel_websock_handshake_complete(void *ioc) "Websock handshake complete ioc=%p"
qio_channel_websock_http_greeting(void *ioc, const char *greeting) "Websocket HTTP request ioc=%p greeting='%s'"
qio_channel_websock_http_request(void *ioc, const char *protocols, const char *version, const char *host, const char *connection, const char *upgrade, const char *key) "Websocket HTTP request ioc=%p protocols='%s' version='%s' host='%s' connection='%s' upgrade='%s' key='%s'"
qio_channel_websock_header_partial_decode(void *ioc, size_t payloadlen, unsigned char fin, unsigned char opcode, unsigned char has_mask) "Websocket header decoded ioc=%p payload-len=%zu fin=0x%x opcode=0x%x has_mask=0x%x"
qio_channel_websock_header_full_decode(void *ioc, size_t headerlen, size_t payloadlen, uint32_t mask) "Websocket header decoded ioc=%p header-len=%zu payload-len=%zu mask=0x%x"
qio_channel_websock_payload_decode(void *ioc, uint8_t opcode, size_t payload_remain) "Websocket header decoded ioc=%p opcode=0x%x payload-remain=%zu"
+90 −48
Original line number Diff line number Diff line
@@ -149,6 +149,54 @@ int inet_ai_family_from_address(InetSocketAddress *addr,
    return PF_UNSPEC;
}

static int create_fast_reuse_socket(struct addrinfo *e)
{
    int slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
    if (slisten < 0) {
        return -1;
    }
    socket_set_fast_reuse(slisten);
    return slisten;
}

static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e)
{
#ifndef IPV6_V6ONLY
    return bind(socket, e->ai_addr, e->ai_addrlen);
#else
    /*
     * Deals with first & last cases in matrix in comment
     * for inet_ai_family_from_address().
     */
    int v6only =
        ((!saddr->has_ipv4 && !saddr->has_ipv6) ||
         (saddr->has_ipv4 && saddr->ipv4 &&
          saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
    int stat;

 rebind:
    if (e->ai_family == PF_INET6) {
        qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
                        sizeof(v6only));
    }

    stat = bind(socket, e->ai_addr, e->ai_addrlen);
    if (!stat) {
        return 0;
    }

    /* If we got EADDRINUSE from an IPv6 bind & v6only is unset,
     * it could be that the IPv4 port is already claimed, so retry
     * with v6only set
     */
    if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
        v6only = 1;
        goto rebind;
    }
    return stat;
#endif
}

static int inet_listen_saddr(InetSocketAddress *saddr,
                             int port_offset,
                             bool update_addr,
@@ -158,7 +206,10 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
    char port[33];
    char uaddr[INET6_ADDRSTRLEN+1];
    char uport[33];
    int slisten, rc, port_min, port_max, p;
    int rc, port_min, port_max, p;
    int slisten = 0;
    int saved_errno = 0;
    bool socket_created = false;
    Error *err = NULL;

    memset(&ai,0, sizeof(ai));
@@ -210,75 +261,66 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
        return -1;
    }

    /* create socket + bind */
    /* create socket + bind/listen */
    for (e = res; e != NULL; e = e->ai_next) {
        getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
		        uaddr,INET6_ADDRSTRLEN,uport,32,
		        NI_NUMERICHOST | NI_NUMERICSERV);
        slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);

        slisten = create_fast_reuse_socket(e);
        if (slisten < 0) {
            if (!e->ai_next) {
                error_setg_errno(errp, errno, "Failed to create socket");
            }
            continue;
        }

        socket_set_fast_reuse(slisten);

        socket_created = true;
        port_min = inet_getport(e);
        port_max = saddr->has_to ? saddr->to + port_offset : port_min;
        for (p = port_min; p <= port_max; p++) {
#ifdef IPV6_V6ONLY
            /*
             * Deals with first & last cases in matrix in comment
             * for inet_ai_family_from_address().
             */
            int v6only =
                ((!saddr->has_ipv4 && !saddr->has_ipv6) ||
                 (saddr->has_ipv4 && saddr->ipv4 &&
                  saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
#endif
            inet_setport(e, p);
#ifdef IPV6_V6ONLY
        rebind:
            if (e->ai_family == PF_INET6) {
                qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
                                sizeof(v6only));
            rc = try_bind(slisten, saddr, e);
            if (rc) {
                if (errno == EADDRINUSE) {
                    continue;
                } else {
                    error_setg_errno(errp, errno, "Failed to bind socket");
                    goto listen_failed;
                }
#endif
            if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) {
                goto listen;
            }

#ifdef IPV6_V6ONLY
            /* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset,
             * it could be that the IPv4 port is already claimed, so retry
             * with V6ONLY set
             */
            if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
                v6only = 1;
                goto rebind;
            if (!listen(slisten, 1)) {
                goto listen_ok;
            }
#endif

            if (p == port_max) {
                if (!e->ai_next) {
                    error_setg_errno(errp, errno, "Failed to bind socket");
            if (errno != EADDRINUSE) {
                error_setg_errno(errp, errno, "Failed to listen on socket");
                goto listen_failed;
            }
            /* Someone else managed to bind to the same port and beat us
             * to listen on it! Socket semantics does not allow us to
             * recover from this situation, so we need to recreate the
             * socket to allow bind attempts for subsequent ports:
             */
            closesocket(slisten);
            slisten = create_fast_reuse_socket(e);
            if (slisten < 0) {
                error_setg_errno(errp, errno,
                                 "Failed to recreate failed listening socket");
                goto listen_failed;
            }
        }
    }
    error_setg_errno(errp, errno,
                     socket_created ?
                     "Failed to find an available port" :
                     "Failed to create a socket");
listen_failed:
    saved_errno = errno;
    if (slisten >= 0) {
        closesocket(slisten);
    }
    freeaddrinfo(res);
    errno = saved_errno;
    return -1;

listen:
    if (listen(slisten,1) != 0) {
        error_setg_errno(errp, errno, "Failed to listen on socket");
        closesocket(slisten);
        freeaddrinfo(res);
        return -1;
    }
listen_ok:
    if (update_addr) {
        g_free(saddr->host);
        saddr->host = g_strdup(uaddr);