Commit 20781f9c authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'bonzini/migration-writev' into staging



# By Paolo Bonzini
# Via Paolo Bonzini
* bonzini/migration-writev:
  qemu-file: do not use stdio for qemu_fdopen
  iov: handle partial writes from sendmsg and recvmsg
  iov: reorganize iov_send_recv, part 3
  iov: reorganize iov_send_recv, part 2
  iov: reorganize iov_send_recv, part 1
  qemu-file: drop socket_put_buffer

Message-id: 1366192012-14872-1-git-send-email-pbonzini@redhat.com
Signed-off-by: default avatarAnthony Liguori <aliguori@us.ibm.com>
parents e0a83fc2 e9d8fbf5
Loading
Loading
Loading
Loading
+90 −24
Original line number Diff line number Diff line
@@ -219,18 +219,6 @@ static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
    return len;
}

static int socket_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
{
    QEMUFileSocket *s = opaque;
    ssize_t len;

    len = qemu_send_full(s->fd, buf, size, 0);
    if (len < size) {
        len = -socket_error();
    }
    return len;
}

static int socket_close(void *opaque)
{
    QEMUFileSocket *s = opaque;
@@ -368,9 +356,94 @@ static const QEMUFileOps stdio_file_write_ops = {
    .close =      stdio_fclose
};

static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
                                  int64_t pos)
{
    QEMUFileSocket *s = opaque;
    ssize_t len, offset;
    ssize_t size = iov_size(iov, iovcnt);
    ssize_t total = 0;

    assert(iovcnt > 0);
    offset = 0;
    while (size > 0) {
        /* Find the next start position; skip all full-sized vector elements  */
        while (offset >= iov[0].iov_len) {
            offset -= iov[0].iov_len;
            iov++, iovcnt--;
        }

        /* skip `offset' bytes from the (now) first element, undo it on exit */
        assert(iovcnt > 0);
        iov[0].iov_base += offset;
        iov[0].iov_len -= offset;

        do {
            len = writev(s->fd, iov, iovcnt);
        } while (len == -1 && errno == EINTR);
        if (len == -1) {
            return -errno;
        }

        /* Undo the changes above */
        iov[0].iov_base -= offset;
        iov[0].iov_len += offset;

        /* Prepare for the next iteration */
        offset += len;
        total += len;
        size -= len;
    }

    return total;
}

static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
{
    QEMUFileSocket *s = opaque;
    ssize_t len;

    for (;;) {
        len = read(s->fd, buf, size);
        if (len != -1) {
            break;
        }
        if (errno == EAGAIN) {
            yield_until_fd_readable(s->fd);
        } else if (errno != EINTR) {
            break;
        }
    }

    if (len == -1) {
        len = -errno;
    }
    return len;
}

static int unix_close(void *opaque)
{
    QEMUFileSocket *s = opaque;
    close(s->fd);
    g_free(s);
    return 0;
}

static const QEMUFileOps unix_read_ops = {
    .get_fd =     socket_get_fd,
    .get_buffer = unix_get_buffer,
    .close =      unix_close
};

static const QEMUFileOps unix_write_ops = {
    .get_fd =     socket_get_fd,
    .writev_buffer = unix_writev_buffer,
    .close =      unix_close
};

QEMUFile *qemu_fdopen(int fd, const char *mode)
{
    QEMUFileStdio *s;
    QEMUFileSocket *s;

    if (mode == NULL ||
	(mode[0] != 'r' && mode[0] != 'w') ||
@@ -379,21 +452,15 @@ QEMUFile *qemu_fdopen(int fd, const char *mode)
        return NULL;
    }

    s = g_malloc0(sizeof(QEMUFileStdio));
    s->stdio_file = fdopen(fd, mode);
    if (!s->stdio_file)
        goto fail;
    s = g_malloc0(sizeof(QEMUFileSocket));
    s->fd = fd;

    if(mode[0] == 'r') {
        s->file = qemu_fopen_ops(s, &stdio_file_read_ops);
        s->file = qemu_fopen_ops(s, &unix_read_ops);
    } else {
        s->file = qemu_fopen_ops(s, &stdio_file_write_ops);
        s->file = qemu_fopen_ops(s, &unix_write_ops);
    }
    return s->file;

fail:
    g_free(s);
    return NULL;
}

static const QEMUFileOps socket_read_ops = {
@@ -404,7 +471,6 @@ static const QEMUFileOps socket_read_ops = {

static const QEMUFileOps socket_write_ops = {
    .get_fd =     socket_get_fd,
    .put_buffer = socket_put_buffer,
    .writev_buffer = socket_writev_buffer,
    .close =      socket_close
};
+59 −45
Original line number Diff line number Diff line
@@ -144,57 +144,71 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
                      size_t offset, size_t bytes,
                      bool do_send)
{
    ssize_t total = 0;
    ssize_t ret;
    unsigned si, ei;            /* start and end indexes */
    if (bytes == 0) {
        /* Catch the do-nothing case early, as otherwise we will pass an
         * empty iovec to sendmsg/recvmsg(), and not all implementations
         * accept this.
         */
        return 0;
    }
    size_t orig_len, tail;
    unsigned niov;

    while (bytes > 0) {
        /* Find the start position, skipping `offset' bytes:
         * first, skip all full-sized vector elements, */
    for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) {
        offset -= iov[si].iov_len;
        for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
            offset -= iov[niov].iov_len;
        }

        /* niov == iov_cnt would only be valid if bytes == 0, which
         * we already ruled out in the loop condition.  */
        assert(niov < iov_cnt);
        iov += niov;
        iov_cnt -= niov;

        if (offset) {
        assert(si < iov_cnt);
            /* second, skip `offset' bytes from the (now) first element,
             * undo it on exit */
        iov[si].iov_base += offset;
        iov[si].iov_len -= offset;
            iov[0].iov_base += offset;
            iov[0].iov_len -= offset;
        }
        /* Find the end position skipping `bytes' bytes: */
        /* first, skip all full-sized elements */
    for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) {
        bytes -= iov[ei].iov_len;
        tail = bytes;
        for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
            tail -= iov[niov].iov_len;
        }
    if (bytes) {
        /* second, fixup the last element, and remember
         * the length we've cut from the end of it in `bytes' */
        size_t tail;
        assert(ei < iov_cnt);
        assert(iov[ei].iov_len > bytes);
        tail = iov[ei].iov_len - bytes;
        iov[ei].iov_len = bytes;
        bytes = tail;  /* bytes is now equal to the tail size */
        ++ei;
        if (tail) {
            /* second, fixup the last element, and remember the original
             * length */
            assert(niov < iov_cnt);
            assert(iov[niov].iov_len > tail);
            orig_len = iov[niov].iov_len;
            iov[niov++].iov_len = tail;
        }

    ret = do_send_recv(sockfd, iov + si, ei - si, do_send);
        ret = do_send_recv(sockfd, iov, niov, do_send);

    /* Undo the changes above */
        /* Undo the changes above before checking for errors */
        if (tail) {
            iov[niov-1].iov_len = orig_len;
        }
        if (offset) {
        iov[si].iov_base -= offset;
        iov[si].iov_len += offset;
            iov[0].iov_base -= offset;
            iov[0].iov_len += offset;
        }
    if (bytes) {
        iov[ei-1].iov_len += bytes;

        if (ret < 0) {
            assert(errno != EINTR);
            if (errno == EAGAIN && total > 0) {
                return total;
            }
            return -1;
        }

    return ret;
        /* Prepare for the next iteration */
        offset += ret;
        total += ret;
        bytes -= ret;
    }

    return total;
}