Commit e9102eb8 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-11-04' into staging



Block patches for 4.2-rc0:
- Work around XFS write-zeroes bug in file-posix block driver
- Fix backup job with compression
- Fix to the NVMe block driver header

# gpg: Signature made Mon 04 Nov 2019 09:01:16 GMT
# gpg:                using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg:                issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2019-11-04:
  block/file-posix: Let post-EOF fallocate serialize
  block: Add bdrv_co_get_self_request()
  block: Make wait/mark serialising requests public
  block/block-copy: fix s->copy_size for compressed cluster
  nvme: fix NSSRS offset in CAP register

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 412fbef3 292d06b9
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -109,9 +109,9 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
        s->use_copy_range = false;
        s->copy_size = cluster_size;
    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
        /* Compression is not supported for copy_range */
        /* Compression supports only cluster-size writes and no copy-range. */
        s->use_copy_range = false;
        s->copy_size = MAX(cluster_size, BLOCK_COPY_MAX_BUFFER);
        s->copy_size = cluster_size;
    } else {
        /*
         * copy_range does not respect max_transfer (it's a TODO), so we factor
+36 −0
Original line number Diff line number Diff line
@@ -2721,6 +2721,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
    RawPosixAIOData acb;
    ThreadPoolFunc *handler;

#ifdef CONFIG_FALLOCATE
    if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
        BdrvTrackedRequest *req;
        uint64_t end;

        /*
         * This is a workaround for a bug in the Linux XFS driver,
         * where writes submitted through the AIO interface will be
         * discarded if they happen beyond a concurrently running
         * fallocate() that increases the file length (i.e., both the
         * write and the fallocate() happen beyond the EOF).
         *
         * To work around it, we extend the tracked request for this
         * zero write until INT64_MAX (effectively infinity), and mark
         * it as serializing.
         *
         * We have to enable this workaround for all filesystems and
         * AIO modes (not just XFS with aio=native), because for
         * remote filesystems we do not know the host configuration.
         */

        req = bdrv_co_get_self_request(bs);
        assert(req);
        assert(req->type == BDRV_TRACKED_WRITE);
        assert(req->offset <= offset);
        assert(req->offset + req->bytes >= offset + bytes);

        end = INT64_MAX & -(uint64_t)bs->bl.request_alignment;
        req->bytes = end - req->offset;
        req->overlap_bytes = req->bytes;

        bdrv_mark_request_serialising(req, bs->bl.request_alignment);
        bdrv_wait_serialising_requests(req);
    }
#endif

    acb = (RawPosixAIOData) {
        .bs             = bs,
        .aio_fildes     = s->fd,
+30 −12
Original line number Diff line number Diff line
@@ -715,7 +715,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
    qemu_co_mutex_unlock(&bs->reqs_lock);
}

static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
{
    int64_t overlap_offset = req->offset & ~(align - 1);
    uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
@@ -742,6 +742,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
           (req->bytes == req->overlap_bytes);
}

/**
 * Return the tracked request on @bs for the current coroutine, or
 * NULL if there is none.
 */
BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
{
    BdrvTrackedRequest *req;
    Coroutine *self = qemu_coroutine_self();

    QLIST_FOREACH(req, &bs->tracked_requests, list) {
        if (req->co == self) {
            return req;
        }
    }

    return NULL;
}

/**
 * Round a region to cluster boundaries
 */
@@ -805,7 +823,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
    bdrv_wakeup(bs);
}

static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
{
    BlockDriverState *bs = self->bs;
    BdrvTrackedRequest *req;
@@ -1437,14 +1455,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
         * with each other for the same cluster.  For example, in copy-on-read
         * it ensures that the CoR read and write operations are atomic and
         * guest writes cannot interleave between them. */
        mark_request_serialising(req, bdrv_get_cluster_size(bs));
        bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
    }

    /* BDRV_REQ_SERIALISING is only for write operation */
    assert(!(flags & BDRV_REQ_SERIALISING));

    if (!(flags & BDRV_REQ_NO_SERIALISING)) {
        wait_serialising_requests(req);
        bdrv_wait_serialising_requests(req);
    }

    if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1841,10 +1859,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
    assert(!(flags & ~BDRV_REQ_MASK));

    if (flags & BDRV_REQ_SERIALISING) {
        mark_request_serialising(req, bdrv_get_cluster_size(bs));
        bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
    }

    waited = wait_serialising_requests(req);
    waited = bdrv_wait_serialising_requests(req);

    assert(!waited || !req->serialising ||
           is_request_serialising_and_aligned(req));
@@ -2008,8 +2026,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,

    padding = bdrv_init_padding(bs, offset, bytes, &pad);
    if (padding) {
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_mark_request_serialising(req, align);
        bdrv_wait_serialising_requests(req);

        bdrv_padding_rmw_read(child, req, &pad, true);

@@ -2111,8 +2129,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
    }

    if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
        mark_request_serialising(&req, align);
        wait_serialising_requests(&req);
        bdrv_mark_request_serialising(&req, align);
        bdrv_wait_serialising_requests(&req);
        bdrv_padding_rmw_read(child, &req, &pad, false);
    }

@@ -3205,7 +3223,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
        /* BDRV_REQ_SERIALISING is only for write operation */
        assert(!(read_flags & BDRV_REQ_SERIALISING));
        if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
            wait_serialising_requests(&req);
            bdrv_wait_serialising_requests(&req);
        }

        ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
@@ -3336,7 +3354,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
     * new area, we need to make sure that no write requests are made to it
     * concurrently or they might be overwritten by preallocation. */
    if (new_bytes) {
        mark_request_serialising(&req, 1);
        bdrv_mark_request_serialising(&req, 1);
    }
    if (bs->read_only) {
        error_setg(errp, "Image is read-only");
+4 −0
Original line number Diff line number Diff line
@@ -999,6 +999,10 @@ extern unsigned int bdrv_drain_all_count;
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);

bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);

int get_tmp_filename(char *filename, int size);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
                            const char *filename);
+1 −1
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ enum NvmeCapShift {
    CAP_AMS_SHIFT      = 17,
    CAP_TO_SHIFT       = 24,
    CAP_DSTRD_SHIFT    = 32,
    CAP_NSSRS_SHIFT    = 33,
    CAP_NSSRS_SHIFT    = 36,
    CAP_CSS_SHIFT      = 37,
    CAP_MPSMIN_SHIFT   = 48,
    CAP_MPSMAX_SHIFT   = 52,