Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2017-08-23' into staging (1eed3399) · Commits · SUMMER2020 / students / proj-2021291

block.c

+3 −4

Original line number	Diff line number	Diff line
		@@ -4085,21 +4085,20 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
		}
		}

		if (setting_flag) {
		if (setting_flag && !(bs->open_flags & BDRV_O_INACTIVE)) {
		uint64_t perm, shared_perm;

		bs->open_flags \|= BDRV_O_INACTIVE;

		QLIST_FOREACH(parent, &bs->parents, next_parent) {
		if (parent->role->inactivate) {
		ret = parent->role->inactivate(parent);
		if (ret < 0) {
		bs->open_flags &= ~BDRV_O_INACTIVE;
		return ret;
		}
		}
		}

		bs->open_flags \|= BDRV_O_INACTIVE;

		/* Update permissions, they may differ for inactive nodes */
		bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
		bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort);

block/block-backend.c

+26 −5

Original line number	Diff line number	Diff line
		@@ -70,6 +70,7 @@ struct BlockBackend {

		int quiesce_counter;
		VMChangeStateEntry *vmsh;
		bool force_allow_inactivate;
		};

		typedef struct BlockBackendAIOCB {
		@@ -192,6 +193,30 @@ static void blk_root_activate(BdrvChild child, Error *errp)
		}
		}

		void blk_set_force_allow_inactivate(BlockBackend *blk)
		{
		blk->force_allow_inactivate = true;
		}

		static bool blk_can_inactivate(BlockBackend *blk)
		{
		/* If it is a guest device, inactivate is ok. */
		if (blk->dev \|\| blk_name(blk)[0]) {
		return true;
		}

		/* Inactivating means no more writes to the image can be done,
		* even if those writes would be changes invisible to the
		* guest. For block job BBs that satisfy this, we can just allow
		* it. This is the case for mirror job source, which is required
		* by libvirt non-shared block migration. */
		if (!(blk->perm & (BLK_PERM_WRITE \| BLK_PERM_WRITE_UNCHANGED))) {
		return true;
		}

		return blk->force_allow_inactivate;
		}

		static int blk_root_inactivate(BdrvChild *child)
		{
		BlockBackend *blk = child->opaque;
		@@ -200,11 +225,7 @@ static int blk_root_inactivate(BdrvChild *child)
		return 0;
		}

		/* Only inactivate BlockBackends for guest devices (which are inactive at
		* this point because the VM is stopped) and unattached monitor-owned
		* BlockBackends. If there is still any other user like a block job, then
		* we simply can't inactivate the image. */
		if (!blk->dev && !blk_name(blk)[0]) {
		if (!blk_can_inactivate(blk)) {
		return -EPERM;
		}

block/mirror.c

+12 −2

Original line number	Diff line number	Diff line
		@@ -1134,6 +1134,7 @@ static void mirror_start_job(const char job_id, BlockDriverState bs,
		const BlockJobDriver *driver,
		bool is_none_mode, BlockDriverState *base,
		bool auto_complete, const char *filter_node_name,
		bool is_mirror,
		Error **errp)
		{
		MirrorBlockJob *s;
		@@ -1222,6 +1223,15 @@ static void mirror_start_job(const char job_id, BlockDriverState bs,
		if (ret < 0) {
		goto fail;
		}
		if (is_mirror) {
		/* XXX: Mirror target could be a NBD server of target QEMU in the case
		* of non-shared block migration. To allow migration completion, we
		* have to allow "inactivate" of the target BB. When that happens, we
		* know the job is drained, and the vcpus are stopped, so no write
		* operation will be performed. Block layer already has assertions to
		* ensure that. */
		blk_set_force_allow_inactivate(s->target);
		}

		s->replaces = g_strdup(replaces);
		s->on_source_error = on_source_error;
		@@ -1306,7 +1316,7 @@ void mirror_start(const char job_id, BlockDriverState bs,
		speed, granularity, buf_size, backing_mode,
		on_source_error, on_target_error, unmap, NULL, NULL,
		&mirror_job_driver, is_none_mode, base, false,
		filter_node_name, errp);
		filter_node_name, true, errp);
		}

		void commit_active_start(const char job_id, BlockDriverState bs,
		@@ -1329,7 +1339,7 @@ void commit_active_start(const char job_id, BlockDriverState bs,
		MIRROR_LEAVE_BACKING_CHAIN,
		on_error, on_error, true, cb, opaque,
		&commit_active_job_driver, false, base, auto_complete,
		filter_node_name, &local_err);
		filter_node_name, false, &local_err);
		if (local_err) {
		error_propagate(errp, local_err);
		goto error_restore_flags;

block/nbd-client.c

+22 −13

Original line number	Diff line number	Diff line
		@@ -39,8 +39,10 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
		int i;

		for (i = 0; i < MAX_NBD_REQUESTS; i++) {
		if (s->recv_coroutine[i]) {
		aio_co_wake(s->recv_coroutine[i]);
		NBDClientRequest *req = &s->requests[i];

		if (req->coroutine && req->receiving) {
		aio_co_wake(req->coroutine);
		}
		}
		}
		@@ -88,28 +90,28 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
		* one coroutine is called until the reply finishes.
		*/
		i = HANDLE_TO_INDEX(s, s->reply.handle);
		if (i >= MAX_NBD_REQUESTS \|\| !s->recv_coroutine[i]) {
		if (i >= MAX_NBD_REQUESTS \|\|
		!s->requests[i].coroutine \|\|
		!s->requests[i].receiving) {
		break;
		}

		/* We're woken up by the recv_coroutine itself. Note that there
		/* We're woken up again by the request itself. Note that there
		* is no race between yielding and reentering read_reply_co. This
		* is because:
		*
		* - if recv_coroutine[i] runs on the same AioContext, it is only
		* - if the request runs on the same AioContext, it is only
		* entered after we yield
		*
		* - if recv_coroutine[i] runs on a different AioContext, reentering
		* - if the request runs on a different AioContext, reentering
		* read_reply_co happens through a bottom half, which can only
		* run after we yield.
		*/
		aio_co_wake(s->recv_coroutine[i]);
		aio_co_wake(s->requests[i].coroutine);
		qemu_coroutine_yield();
		}

		if (ret < 0) {
		s->quit = true;
		}
		nbd_recv_coroutines_enter_all(s);
		s->read_reply_co = NULL;
		}
		@@ -128,14 +130,17 @@ static int nbd_co_send_request(BlockDriverState *bs,
		s->in_flight++;

		for (i = 0; i < MAX_NBD_REQUESTS; i++) {
		if (s->recv_coroutine[i] == NULL) {
		s->recv_coroutine[i] = qemu_coroutine_self();
		if (s->requests[i].coroutine == NULL) {
		break;
		}
		}

		g_assert(qemu_in_coroutine());
		assert(i < MAX_NBD_REQUESTS);

		s->requests[i].coroutine = qemu_coroutine_self();
		s->requests[i].receiving = false;

		request->handle = INDEX_TO_HANDLE(s, i);

		if (s->quit) {
		@@ -173,10 +178,13 @@ static void nbd_co_receive_reply(NBDClientSession *s,
		NBDReply *reply,
		QEMUIOVector *qiov)
		{
		int i = HANDLE_TO_INDEX(s, request->handle);
		int ret;

		/* Wait until we're woken up by nbd_read_reply_entry. */
		s->requests[i].receiving = true;
		qemu_coroutine_yield();
		s->requests[i].receiving = false;
		*reply = s->reply;
		if (reply->handle != request->handle \|\| !s->ioc \|\| s->quit) {
		reply->error = EIO;
		@@ -186,6 +194,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
		NULL);
		if (ret != request->len) {
		reply->error = EIO;
		s->quit = true;
		}
		}

		@@ -200,7 +209,7 @@ static void nbd_coroutine_end(BlockDriverState *bs,
		NBDClientSession *s = nbd_get_client_session(bs);
		int i = HANDLE_TO_INDEX(s, request->handle);

		s->recv_coroutine[i] = NULL;
		s->requests[i].coroutine = NULL;

		/* Kick the read_reply_co to get the next reply. */
		if (s->read_reply_co) {

block/nbd-client.h

+6 −1

Original line number	Diff line number	Diff line
		@@ -17,6 +17,11 @@

		#define MAX_NBD_REQUESTS 16

		typedef struct {
		Coroutine *coroutine;
		bool receiving; /* waiting for read_reply_co? */
		} NBDClientRequest;

		typedef struct NBDClientSession {
		QIOChannelSocket sioc; / The master data channel */
		QIOChannel ioc; / The current I/O channel which may differ (eg TLS) */
		@@ -27,7 +32,7 @@ typedef struct NBDClientSession {
		Coroutine *read_reply_co;
		int in_flight;

		Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
		NBDClientRequest requests[MAX_NBD_REQUESTS];
		NBDReply reply;
		bool quit;
		} NBDClientSession;