Commit 9bb38019 authored by Dr. David Alan Gilbert's avatar Dr. David Alan Gilbert Committed by Michael S. Tsirkin
Browse files

vhost+postcopy: Send address back to qemu



We need a better way, but at the moment we need the address of the
mappings sent back to qemu so it can interpret the messages on the
userfaultfd it reads.

This is done as a 3 stage set:
   QEMU -> client
      set_mem_table

   mmap stuff, get addresses

   client -> qemu
       here are the addresses

   qemu -> client
       OK - now you can use them

That ensures that qemu has registered the new addresses in it's
userfault code before the client starts accessing them.

Note: We don't ask for the default 'ack' reply since we've got our own.

Signed-off-by: default avatarDr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: default avatarMarc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent 51a5d6e5
Loading
Loading
Loading
Loading
+23 −1
Original line number Diff line number Diff line
@@ -491,10 +491,32 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
                   dev_region->mmap_addr);
        }

        /* Return the address to QEMU so that it can translate the ufd
         * fault addresses back.
         */
        msg_region->userspace_addr = (uintptr_t)(mmap_addr +
                                                 dev_region->mmap_offset);
        close(vmsg->fds[i]);
    }

    /* TODO: Get address back to QEMU */
    /* Send the message back to qemu with the addresses filled in */
    vmsg->fd_num = 0;
    if (!vu_message_write(dev, dev->sock, vmsg)) {
        vu_panic(dev, "failed to respond to set-mem-table for postcopy");
        return false;
    }

    /* Wait for QEMU to confirm that it's registered the handler for the
     * faults.
     */
    if (!vu_message_read(dev, dev->sock, vmsg) ||
        vmsg->size != sizeof(vmsg->payload.u64) ||
        vmsg->payload.u64 != 0) {
        vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
        return false;
    }

    /* OK, now we can go and register the memory and generate faults */
    for (i = 0; i < dev->nregions; i++) {
        VuDevRegion *dev_region = &dev->regions[i];
#ifdef UFFDIO_REGISTER
+9 −0
Original line number Diff line number Diff line
@@ -455,12 +455,21 @@ Master message types
      Id: 5
      Equivalent ioctl: VHOST_SET_MEM_TABLE
      Master payload: memory regions description
      Slave payload: (postcopy only) memory regions description

      Sets the memory map regions on the slave so it can translate the vring
      addresses. In the ancillary data there is an array of file descriptors
      for each memory mapped region. The size and ordering of the fds matches
      the number and ordering of memory regions.

      When VHOST_USER_POSTCOPY_LISTEN has been received, SET_MEM_TABLE replies with
      the bases of the memory mapped regions to the master.  The slave must
      have mmap'd the regions but not yet accessed them and should not yet generate
      a userfault event. Note NEED_REPLY_MASK is not set in this case.
      QEMU will then reply back to the list of mappings with an empty
      VHOST_USER_SET_MEM_TABLE as an acknowledgment; only upon reception of this
      message may the guest start accessing the memory and generating faults.

 * VHOST_USER_SET_LOG_BASE

      Id: 6
+1 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ vhost_section(const char *name, int r) "%s:%d"

# hw/virtio/vhost-user.c
vhost_user_postcopy_listen(void) ""
vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d"

# hw/virtio/virtio.c
virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
+65 −2
Original line number Diff line number Diff line
@@ -174,6 +174,7 @@ struct vhost_user {
    int slave_fd;
    NotifierWithReturn postcopy_notifier;
    struct PostCopyFD  postcopy_fd;
    uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
    /* True once we've entered postcopy_listen */
    bool               postcopy_listen;
};
@@ -343,12 +344,15 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
                                             struct vhost_memory *mem)
{
    struct vhost_user *u = dev->opaque;
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    int i, fd;
    size_t fd_num = 0;
    bool reply_supported = virtio_has_feature(dev->protocol_features,
                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
    /* TODO: Add actual postcopy differences */
    VhostUserMsg msg_reply;
    int region_i, msg_i;

    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_SET_MEM_TABLE,
        .hdr.flags = VHOST_USER_VERSION,
@@ -395,6 +399,64 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
        return -1;
    }

    if (vhost_user_read(dev, &msg_reply) < 0) {
        return -1;
    }

    if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
        error_report("%s: Received unexpected msg type."
                     "Expected %d received %d", __func__,
                     VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
        return -1;
    }
    /* We're using the same structure, just reusing one of the
     * fields, so it should be the same size.
     */
    if (msg_reply.hdr.size != msg.hdr.size) {
        error_report("%s: Unexpected size for postcopy reply "
                     "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
        return -1;
    }

    memset(u->postcopy_client_bases, 0,
           sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);

    /* They're in the same order as the regions that were sent
     * but some of the regions were skipped (above) if they
     * didn't have fd's
    */
    for (msg_i = 0, region_i = 0;
         region_i < dev->mem->nregions;
        region_i++) {
        if (msg_i < fd_num &&
            msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
            dev->mem->regions[region_i].guest_phys_addr) {
            u->postcopy_client_bases[region_i] =
                msg_reply.payload.memory.regions[msg_i].userspace_addr;
            trace_vhost_user_set_mem_table_postcopy(
                msg_reply.payload.memory.regions[msg_i].userspace_addr,
                msg.payload.memory.regions[msg_i].userspace_addr,
                msg_i, region_i);
            msg_i++;
        }
    }
    if (msg_i != fd_num) {
        error_report("%s: postcopy reply not fully consumed "
                     "%d vs %zd",
                     __func__, msg_i, fd_num);
        return -1;
    }
    /* Now we've registered this with the postcopy code, we ack to the client,
     * because now we're in the position to be able to deal with any faults
     * it generates.
     */
    /* TODO: Use this for failure cases as well with a bad value */
    msg.hdr.size = sizeof(msg.payload.u64);
    msg.payload.u64 = 0; /* OK */
    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
        return -1;
    }

    if (reply_supported) {
        return process_message_reply(dev, &msg);
    }
@@ -411,7 +473,8 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
    size_t fd_num = 0;
    bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
    bool reply_supported = virtio_has_feature(dev->protocol_features,
                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
                                          VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
                                          !do_postcopy;

    if (do_postcopy) {
        /* Postcopy has enough differences that it's best done in it's own