Commit 199a5bde authored by Stefan Hajnoczi's avatar Stefan Hajnoczi
Browse files

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging



* NBD bugfix (Changlong)
* NBD write zeroes support (Eric)
* Memory backend fixes (Haozhong)
* Atomics fix (Alex)
* New AVX512 features (Luwei)
* "make check" logging fix (Paolo)
* Chardev refactoring fallout (Paolo)
* Small checkpatch improvements (Paolo, Jeff)

# gpg: Signature made Wed 02 Nov 2016 08:31:11 AM GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (30 commits)
  main-loop: Suppress I/O thread warning under qtest
  docs/rcu.txt: Fix minor typo
  vl: exit qemu on guest panic if -no-shutdown is not set
  checkpatch: allow spaces before parenthesis for 'coroutine_fn'
  x86: add AVX512_4VNNIW and AVX512_4FMAPS features
  slirp: fix CharDriver breakage
  qemu-char: do not forward events through the mux until QEMU has started
  nbd: Implement NBD_CMD_WRITE_ZEROES on client
  nbd: Implement NBD_CMD_WRITE_ZEROES on server
  nbd: Improve server handling of shutdown requests
  nbd: Refactor conversion to errno to silence checkpatch
  nbd: Support shorter handshake
  nbd: Less allocation during NBD_OPT_LIST
  nbd: Let client skip portions of server reply
  nbd: Let server know when client gives up negotiation
  nbd: Share common option-sending code in client
  nbd: Send message along with server NBD_REP_ERR errors
  nbd: Share common reply-sending code in server
  nbd: Rename struct nbd_request and nbd_reply
  nbd: Rename NbdClientSession to NBDClientSession
  ...

Signed-off-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
parents c2a4b384 7d175d29
Loading
Loading
Loading
Loading
+68 −36
Original line number Diff line number Diff line
/*
 * QEMU Block driver for  NBD
 *
 * Copyright (C) 2016 Red Hat, Inc.
 * Copyright (C) 2008 Bull S.A.S.
 *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
 *
@@ -32,7 +33,7 @@
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
{
    int i;

@@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)

static void nbd_teardown_connection(BlockDriverState *bs)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    NBDClientSession *client = nbd_get_client_session(bs);

    if (!client->ioc) { /* Already closed */
        return;
@@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
static void nbd_reply_ready(void *opaque)
{
    BlockDriverState *bs = opaque;
    NbdClientSession *s = nbd_get_client_session(bs);
    NBDClientSession *s = nbd_get_client_session(bs);
    uint64_t i;
    int ret;

@@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque)
}

static int nbd_co_send_request(BlockDriverState *bs,
                               struct nbd_request *request,
                               NBDRequest *request,
                               QEMUIOVector *qiov)
{
    NbdClientSession *s = nbd_get_client_session(bs);
    NBDClientSession *s = nbd_get_client_session(bs);
    AioContext *aio_context;
    int rc, ret, i;

@@ -166,9 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
    return rc;
}

static void nbd_co_receive_reply(NbdClientSession *s,
                                 struct nbd_request *request,
                                 struct nbd_reply *reply,
static void nbd_co_receive_reply(NBDClientSession *s,
                                 NBDRequest *request,
                                 NBDReply *reply,
                                 QEMUIOVector *qiov)
{
    int ret;
@@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
    }
}

static void nbd_coroutine_start(NbdClientSession *s,
   struct nbd_request *request)
static void nbd_coroutine_start(NBDClientSession *s,
                                NBDRequest *request)
{
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
        qemu_co_mutex_lock(&s->free_sema);
    if (s->in_flight == MAX_NBD_REQUESTS) {
        qemu_co_queue_wait(&s->free_sema);
        assert(s->in_flight < MAX_NBD_REQUESTS);
    }
    s->in_flight++;
@@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s,
    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
}

static void nbd_coroutine_end(NbdClientSession *s,
    struct nbd_request *request)
static void nbd_coroutine_end(NBDClientSession *s,
                              NBDRequest *request)
{
    int i = HANDLE_TO_INDEX(s, request->handle);
    s->recv_coroutine[i] = NULL;
    if (s->in_flight-- == MAX_NBD_REQUESTS) {
        qemu_co_mutex_unlock(&s->free_sema);
        qemu_co_queue_next(&s->free_sema);
    }
}

int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_READ,
        .from = offset,
        .len = bytes,
    };
    struct nbd_reply reply;
    NBDReply reply;
    ssize_t ret;

    assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE,
        .from = offset,
        .len = bytes,
    };
    struct nbd_reply reply;
    NBDReply reply;
    ssize_t ret;

    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
        request.type |= NBD_CMD_FLAG_FUA;
        request.flags |= NBD_CMD_FLAG_FUA;
    }

    assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    return -reply.error;
}

int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                int count, BdrvRequestFlags flags)
{
    ssize_t ret;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
        .len = count,
    };
    NBDReply reply;

    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
    }

    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
        request.flags |= NBD_CMD_FLAG_NO_HOLE;
    }

    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
}

int nbd_client_co_flush(BlockDriverState *bs)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = { .type = NBD_CMD_FLUSH };
    struct nbd_reply reply;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = { .type = NBD_CMD_FLUSH };
    NBDReply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
@@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs)

int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_TRIM,
        .from = offset,
        .len = count,
    };
    struct nbd_reply reply;
    NBDReply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
@@ -342,12 +378,8 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,

void nbd_client_close(BlockDriverState *bs)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = {
        .type = NBD_CMD_DISC,
        .from = 0,
        .len = 0
    };
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = { .type = NBD_CMD_DISC };

    if (client->ioc == NULL) {
        return;
@@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs,
                    const char *hostname,
                    Error **errp)
{
    NbdClientSession *client = nbd_get_client_session(bs);
    NBDClientSession *client = nbd_get_client_session(bs);
    int ret;

    /* NBD handshake */
@@ -386,7 +418,7 @@ int nbd_client_init(BlockDriverState *bs,
    }

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_mutex_init(&client->free_sema);
    qemu_co_queue_init(&client->free_sema);
    client->sioc = sioc;
    object_ref(OBJECT(client->sioc));

+7 −5
Original line number Diff line number Diff line
@@ -17,24 +17,24 @@

#define MAX_NBD_REQUESTS    16

typedef struct NbdClientSession {
typedef struct NBDClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
    uint16_t nbdflags;
    off_t size;

    CoMutex send_mutex;
    CoMutex free_sema;
    CoQueue free_sema;
    Coroutine *send_coroutine;
    int in_flight;

    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
    struct nbd_reply reply;
    NBDReply reply;

    bool is_unix;
} NbdClientSession;
} NBDClientSession;

NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

int nbd_client_init(BlockDriverState *bs,
                    QIOChannelSocket *sock,
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                int count, BdrvRequestFlags flags);
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags);

+6 −2
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@
#define EN_OPTSTR ":exportname="

typedef struct BDRVNBDState {
    NbdClientSession client;
    NBDClientSession client;

    /* For nbd_refresh_filename() */
    SocketAddress *saddr;
@@ -294,7 +294,7 @@ done:
    return saddr;
}

NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
{
    BDRVNBDState *s = bs->opaque;
    return &s->client;
@@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}

@@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
+1 −1
Original line number Diff line number Diff line
@@ -145,7 +145,7 @@ The core RCU API is small:
        and then read from there.

        RCU read-side critical sections must use atomic_rcu_read() to
        read data, unless concurrent writes are presented by another
        read data, unless concurrent writes are prevented by another
        synchronization mechanism.

        Furthermore, RCU read-side critical sections should traverse the
+30 −3
Original line number Diff line number Diff line
@@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
                                  hwaddr *xlat, hwaddr *plen)
{
    MemoryRegionSection *section;
    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);

    section = address_space_translate_internal(d, addr, xlat, plen, false);

@@ -1231,6 +1231,15 @@ void qemu_mutex_unlock_ramlist(void)
}

#ifdef __linux__
static int64_t get_file_size(int fd)
{
    int64_t size = lseek(fd, 0, SEEK_END);
    if (size < 0) {
        return -errno;
    }
    return size;
}

static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path,
@@ -1242,6 +1251,7 @@ static void *file_ram_alloc(RAMBlock *block,
    char *c;
    void *area = MAP_FAILED;
    int fd = -1;
    int64_t file_size;

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        error_setg(errp,
@@ -1304,6 +1314,8 @@ static void *file_ram_alloc(RAMBlock *block,
    }
#endif

    file_size = get_file_size(fd);

    if (memory < block->page_size) {
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                   "or larger than page size 0x%zx",
@@ -1311,6 +1323,13 @@ static void *file_ram_alloc(RAMBlock *block,
        goto error;
    }

    if (file_size > 0 && file_size < memory) {
        error_setg(errp, "backing store %s size 0x%" PRIx64
                   " does not match 'size' option 0x" RAM_ADDR_FMT,
                   path, file_size, memory);
        goto error;
    }

    memory = ROUND_UP(memory, block->page_size);

    /*
@@ -1318,8 +1337,16 @@ static void *file_ram_alloc(RAMBlock *block,
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     *
     * Do not truncate the non-empty backend file to avoid corrupting
     * the existing data in the file. Disabling shrinking is not
     * enough. For example, the current vNVDIMM implementation stores
     * the guest NVDIMM labels at the end of the backend file. If the
     * backend file is later extended, QEMU will not be able to find
     * those labels. Therefore, extending the non-empty backend file
     * is disabled as well.
     */
    if (ftruncate(fd, memory)) {
    if (!file_size && ftruncate(fd, memory)) {
        perror("ftruncate");
    }

@@ -2378,7 +2405,7 @@ static void tcg_commit(MemoryListener *listener)
     * may have split the RCU critical section.
     */
    d = atomic_rcu_read(&cpuas->as->dispatch);
    cpuas->memory_dispatch = d;
    atomic_rcu_set(&cpuas->memory_dispatch, d);
    tlb_flush(cpuas->cpu, 1);
}

Loading