Commit 47175951 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2019-04-01' into staging



nbd patches for 2019-04-01

- Better behavior of qemu-img map on NBD images
- Fixes for NBD protocol alignment corner cases:
 - the server has fewer places where it sends reads or block status
   not aligned to its advertised block size
 - the client has more cases where it can work around server
   non-compliance present in qemu 3.1
 - the client now avoids non-compliant requests when interoperating
   with nbdkit or other servers not advertising block size

# gpg: Signature made Mon 01 Apr 2019 15:06:54 BST
# gpg:                using RSA key A7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2019-04-01:
  nbd/client: Trace server noncompliance on structured reads
  nbd/server: Advertise actual minimum block size
  block: Add bdrv_get_request_alignment()
  nbd/client: Support qemu-img convert from unaligned size
  nbd/client: Reject inaccessible tail of inconsistent server
  nbd/client: Report offsets in bdrv_block_status
  nbd/client: Lower min_block for block-status, unaligned size
  iotests: Add 241 to test NBD on unaligned images
  nbd-client: Work around server BLOCK_STATUS misalignment at EOF
  qemu-img: Gracefully shutdown when map can't finish
  nbd: Permit simple error to NBD_CMD_BLOCK_STATUS
  nbd: Don't lose server's error to NBD_CMD_BLOCK_STATUS
  nbd: Tolerate some server non-compliance in NBD_CMD_BLOCK_STATUS
  qemu-img: Report bdrv_block_status failures

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 230ce198 75d34eb9
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -1764,6 +1764,13 @@ int blk_get_flags(BlockBackend *blk)
    }
}

/* Returns the minimum request alignment, in bytes; guaranteed nonzero */
uint32_t blk_get_request_alignment(BlockBackend *blk)
{
    BlockDriverState *bs = blk_bs(blk);
    return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
}

/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
uint32_t blk_get_max_transfer(BlockBackend *blk)
{
+101 −23
Original line number Diff line number Diff line
@@ -211,7 +211,8 @@ static inline uint64_t payload_advance64(uint8_t **payload)
    return ldq_be_p(*payload - 8);
}

static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
static int nbd_parse_offset_hole_payload(NBDClientSession *client,
                                         NBDStructuredReplyChunk *chunk,
                                         uint8_t *payload, uint64_t orig_offset,
                                         QEMUIOVector *qiov, Error **errp)
{
@@ -233,6 +234,10 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
                         " region");
        return -EINVAL;
    }
    if (client->info.min_block &&
        !QEMU_IS_ALIGNED(hole_size, client->info.min_block)) {
        trace_nbd_structured_read_compliance("hole");
    }

    qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);

@@ -240,8 +245,8 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
}

/* nbd_parse_blockstatus_payload
 * support only one extent in reply and only for
 * base:allocation context
 * Based on our request, we expect only one extent in reply, for the
 * base:allocation context.
 */
static int nbd_parse_blockstatus_payload(NBDClientSession *client,
                                         NBDStructuredReplyChunk *chunk,
@@ -250,7 +255,8 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
{
    uint32_t context_id;

    if (chunk->length != sizeof(context_id) + sizeof(*extent)) {
    /* The server succeeded, so it must have sent [at least] one extent */
    if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
        error_setg(errp, "Protocol error: invalid payload for "
                         "NBD_REPLY_TYPE_BLOCK_STATUS");
        return -EINVAL;
@@ -268,18 +274,50 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
    extent->length = payload_advance32(&payload);
    extent->flags = payload_advance32(&payload);

    if (extent->length == 0 ||
        (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
                                                    client->info.min_block))) {
    if (extent->length == 0) {
        error_setg(errp, "Protocol error: server sent status chunk with "
                   "invalid length");
                   "zero length");
        return -EINVAL;
    }

    /* The server is allowed to send us extra information on the final
     * extent; just clamp it to the length we requested. */
    /*
     * A server sending unaligned block status is in violation of the
     * protocol, but as qemu-nbd 3.1 is such a server (at least for
     * POSIX files that are not a multiple of 512 bytes, since qemu
     * rounds files up to 512-byte multiples but lseek(SEEK_HOLE)
     * still sees an implicit hole beyond the real EOF), it's nicer to
     * work around the misbehaving server. If the request included
     * more than the final unaligned block, truncate it back to an
     * aligned result; if the request was only the final block, round
     * up to the full block and change the status to fully-allocated
     * (always a safe status, even if it loses information).
     */
    if (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
                                                   client->info.min_block)) {
        trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
        if (extent->length > client->info.min_block) {
            extent->length = QEMU_ALIGN_DOWN(extent->length,
                                             client->info.min_block);
        } else {
            extent->length = client->info.min_block;
            extent->flags = 0;
        }
    }

    /*
     * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
     * sent us any more than one extent, nor should it have included
     * status beyond our request in that extent. However, it's easy
     * enough to ignore the server's noncompliance without killing the
     * connection; just ignore trailing extents, and clamp things to
     * the length of our request.
     */
    if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
        trace_nbd_parse_blockstatus_compliance("more than one extent");
    }
    if (extent->length > orig_length) {
        extent->length = orig_length;
        trace_nbd_parse_blockstatus_compliance("extent length too large");
    }

    return 0;
@@ -357,6 +395,9 @@ static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
                         " region");
        return -EINVAL;
    }
    if (s->info.min_block && !QEMU_IS_ALIGNED(data_size, s->info.min_block)) {
        trace_nbd_structured_read_compliance("data");
    }

    qemu_iovec_init(&sub_qiov, qiov->niov);
    qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
@@ -679,7 +720,7 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
             * in qiov */
            break;
        case NBD_REPLY_TYPE_OFFSET_HOLE:
            ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
            ret = nbd_parse_offset_hole_payload(s, &reply.structured, payload,
                                                offset, qiov, &local_err);
            if (ret < 0) {
                s->quit = true;
@@ -718,9 +759,7 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
    bool received = false;

    assert(!extent->length);
    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
                            NULL, &reply, &payload)
    {
    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) {
        int ret;
        NBDStructuredReplyChunk *chunk = &reply.structured;

@@ -758,12 +797,9 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
        payload = NULL;
    }

    if (!extent->length && !iter.err) {
        error_setg(&iter.err,
                   "Server did not reply with any status extents");
        if (!iter.ret) {
            iter.ret = -EIO;
        }
    if (!extent->length && !iter.request_ret) {
        error_setg(&local_err, "Server did not reply with any status extents");
        nbd_iter_channel_error(&iter, -EIO, &local_err);
    }

    error_propagate(errp, iter.err);
@@ -820,6 +856,25 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
    if (!bytes) {
        return 0;
    }
    /*
     * Work around the fact that the block layer doesn't do
     * byte-accurate sizing yet - if the read exceeds the server's
     * advertised size because the block layer rounded size up, then
     * truncate the request to the server and tail-pad with zero.
     */
    if (offset >= client->info.size) {
        assert(bytes < BDRV_SECTOR_SIZE);
        qemu_iovec_memset(qiov, 0, 0, bytes);
        return 0;
    }
    if (offset + bytes > client->info.size) {
        uint64_t slop = offset + bytes - client->info.size;

        assert(slop < BDRV_SECTOR_SIZE);
        qemu_iovec_memset(qiov, bytes - slop, 0, slop);
        request.len -= slop;
    }

    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        return ret;
@@ -938,15 +993,35 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
        .from = offset,
        .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
                                                bs->bl.request_alignment),
                                client->info.max_block), bytes),
                                client->info.max_block),
                   MIN(bytes, client->info.size - offset)),
        .flags = NBD_CMD_FLAG_REQ_ONE,
    };

    if (!client->info.base_allocation) {
        *pnum = bytes;
        return BDRV_BLOCK_DATA;
        *map = offset;
        *file = bs;
        return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
    }

    /*
     * Work around the fact that the block layer doesn't do
     * byte-accurate sizing yet - if the status request exceeds the
     * server's advertised size because the block layer rounded size
     * up, we truncated the request to the server (above), or are
     * called on just the hole.
     */
    if (offset >= client->info.size) {
        *pnum = bytes;
        assert(bytes < BDRV_SECTOR_SIZE);
        /* Intentionally don't report offset_valid for the hole */
        return BDRV_BLOCK_ZERO;
    }

    if (client->info.min_block) {
        assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
    }
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        return ret;
@@ -967,8 +1042,11 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,

    assert(extent.length);
    *pnum = extent.length;
    *map = offset;
    *file = bs;
    return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
           (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
        (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0) |
        BDRV_BLOCK_OFFSET_VALID;
}

void nbd_client_detach_aio_context(BlockDriverState *bs)
+18 −1
Original line number Diff line number Diff line
@@ -437,7 +437,24 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
    uint32_t min = s->info.min_block;
    uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);

    bs->bl.request_alignment = min ? min : BDRV_SECTOR_SIZE;
    /*
     * If the server did not advertise an alignment:
     * - a size that is not sector-aligned implies that an alignment
     *   of 1 can be used to access those tail bytes
     * - advertisement of block status requires an alignment of 1, so
     *   that we don't violate block layer constraints that block
     *   status is always aligned (as we can't control whether the
     *   server will report sub-sector extents, such as a hole at EOF
     *   on an unaligned POSIX file)
     * - otherwise, assume the server is so old that we are safer avoiding
     *   sub-sector requests
     */
    if (!min) {
        min = (!QEMU_IS_ALIGNED(s->info.size, BDRV_SECTOR_SIZE) ||
               s->info.base_allocation) ? 1 : BDRV_SECTOR_SIZE;
    }

    bs->bl.request_alignment = min;
    bs->bl.max_pdiscard = max;
    bs->bl.max_pwrite_zeroes = max;
    bs->bl.max_transfer = max;
+2 −0
Original line number Diff line number Diff line
@@ -157,6 +157,8 @@ nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pa
iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, uint64_t bytes, int ret) "src_lun %p offset %"PRIu64" dst_lun %p offset %"PRIu64" bytes %"PRIu64" ret %d"

# nbd-client.c
nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s"
nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"

+1 −0
Original line number Diff line number Diff line
@@ -177,6 +177,7 @@ bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int blk_get_flags(BlockBackend *blk);
uint32_t blk_get_request_alignment(BlockBackend *blk);
uint32_t blk_get_max_transfer(BlockBackend *blk);
int blk_get_max_iov(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
Loading