Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (e018ccb3) · Commits · SUMMER2020 / students / proj-2021291

block.c

+37 −26

Original line number	Original line	Diff line number	Diff line
	@@ -2169,16 +2169,8 @@ void bdrv_filter_default_perms(BlockDriverState bs, BdrvChild c,
	uint64_t perm, uint64_t shared,		uint64_t perm, uint64_t shared,
	uint64_t nperm, uint64_t nshared)		uint64_t nperm, uint64_t nshared)
	{		{
	if (c == NULL) {
	*nperm = perm & DEFAULT_PERM_PASSTHROUGH;		*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
	*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) \| DEFAULT_PERM_UNCHANGED;		*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) \| DEFAULT_PERM_UNCHANGED;
	return;
	}

	*nperm = (perm & DEFAULT_PERM_PASSTHROUGH) \|
	(c->perm & DEFAULT_PERM_UNCHANGED);
	*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) \|
	(c->shared_perm & DEFAULT_PERM_UNCHANGED);
	}		}

	void bdrv_format_default_perms(BlockDriverState bs, BdrvChild c,		void bdrv_format_default_perms(BlockDriverState bs, BdrvChild c,
	@@ -2239,13 +2231,27 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
	BlockDriverState *new_bs)		BlockDriverState *new_bs)
	{		{
	BlockDriverState *old_bs = child->bs;		BlockDriverState *old_bs = child->bs;
	int i;		int new_bs_quiesce_counter;
			int drain_saldo;

	assert(!child->frozen);		assert(!child->frozen);

	if (old_bs && new_bs) {		if (old_bs && new_bs) {
	assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));		assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
	}		}

			new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
			drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;

			/*
			* If the new child node is drained but the old one was not, flush
			* all outstanding requests to the old child node.
			*/
			while (drain_saldo > 0 && child->role->drained_begin) {
			bdrv_parent_drained_begin_single(child, true);
			drain_saldo--;
			}

	if (old_bs) {		if (old_bs) {
	/* Detach first so that the recursive drain sections coming from @child		/* Detach first so that the recursive drain sections coming from @child
	* are already gone and we only end the drain sections that came from		* are already gone and we only end the drain sections that came from
	@@ -2253,28 +2259,22 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
	if (child->role->detach) {		if (child->role->detach) {
	child->role->detach(child);		child->role->detach(child);
	}		}
	while (child->parent_quiesce_counter) {
	bdrv_parent_drained_end_single(child);
	}
	QLIST_REMOVE(child, next_parent);		QLIST_REMOVE(child, next_parent);
	} else {
	assert(child->parent_quiesce_counter == 0);
	}		}

	child->bs = new_bs;		child->bs = new_bs;

	if (new_bs) {		if (new_bs) {
	QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);		QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
	if (new_bs->quiesce_counter) {
	int num = new_bs->quiesce_counter;		/*
	if (child->role->parent_is_bds) {		* Detaching the old node may have led to the new node's
	num -= bdrv_drain_all_count;		* quiesce_counter having been decreased. Not a problem, we
	}		* just need to recognize this here and then invoke
	assert(num >= 0);		* drained_end appropriately more often.
	for (i = 0; i < num; i++) {		*/
	bdrv_parent_drained_begin_single(child, true);		assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
	}		drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
	}

	/* Attach only after starting new drained sections, so that recursive		/* Attach only after starting new drained sections, so that recursive
	* drain sections coming from @child don't get an extra .drained_begin		* drain sections coming from @child don't get an extra .drained_begin
	@@ -2283,6 +2283,15 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
	child->role->attach(child);		child->role->attach(child);
	}		}
	}		}

			/*
			* If the old child node was drained but the new one is not, allow
			* requests to come in only after the new node has been attached.
			*/
			while (drain_saldo < 0 && child->role->drained_end) {
			bdrv_parent_drained_end_single(child);
			drain_saldo++;
			}
	}		}

	/*		/*
	@@ -4500,6 +4509,7 @@ int bdrv_drop_intermediate(BlockDriverState top, BlockDriverState base,
	int ret = -EIO;		int ret = -EIO;

	bdrv_ref(top);		bdrv_ref(top);
			bdrv_subtree_drained_begin(top);

	if (!top->drv \|\| !base->drv) {		if (!top->drv \|\| !base->drv) {
	goto exit;		goto exit;
	@@ -4571,6 +4581,7 @@ int bdrv_drop_intermediate(BlockDriverState top, BlockDriverState base,

	ret = 0;		ret = 0;
	exit:		exit:
			bdrv_subtree_drained_end(top);
	bdrv_unref(top);		bdrv_unref(top);
	return ret;		return ret;
	}		}

block/backup.c

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -644,6 +644,7 @@ BlockJob backup_job_create(const char job_id, BlockDriverState *bs,
	if (ret < 0) {		if (ret < 0) {
	goto error;		goto error;
	}		}
			blk_set_disable_request_queuing(job->target, true);

	job->on_source_error = on_source_error;		job->on_source_error = on_source_error;
	job->on_target_error = on_target_error;		job->on_target_error = on_target_error;

block/block-backend.c

+50 −19

Original line number	Original line	Diff line number	Diff line
	@@ -81,6 +81,9 @@ struct BlockBackend {
	QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;		QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;

	int quiesce_counter;		int quiesce_counter;
			CoQueue queued_requests;
			bool disable_request_queuing;

	VMChangeStateEntry *vmsh;		VMChangeStateEntry *vmsh;
	bool force_allow_inactivate;		bool force_allow_inactivate;

	@@ -341,6 +344,7 @@ BlockBackend blk_new(AioContext ctx, uint64_t perm, uint64_t shared_perm)

	block_acct_init(&blk->stats);		block_acct_init(&blk->stats);

			qemu_co_queue_init(&blk->queued_requests);
	notifier_list_init(&blk->remove_bs_notifiers);		notifier_list_init(&blk->remove_bs_notifiers);
	notifier_list_init(&blk->insert_bs_notifiers);		notifier_list_init(&blk->insert_bs_notifiers);
	QLIST_INIT(&blk->aio_notifiers);		QLIST_INIT(&blk->aio_notifiers);
	@@ -1098,6 +1102,11 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
	blk->allow_aio_context_change = allow;		blk->allow_aio_context_change = allow;
	}		}

			void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
			{
			blk->disable_request_queuing = disable;
			}

	static int blk_check_byte_request(BlockBackend *blk, int64_t offset,		static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
	size_t size)		size_t size)
	{		{
	@@ -1129,13 +1138,24 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
	return 0;		return 0;
	}		}

			static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
			{
			if (blk->quiesce_counter && !blk->disable_request_queuing) {
			qemu_co_queue_wait(&blk->queued_requests, NULL);
			}
			}

	int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,		int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
	unsigned int bytes, QEMUIOVector *qiov,		unsigned int bytes, QEMUIOVector *qiov,
	BdrvRequestFlags flags)		BdrvRequestFlags flags)
	{		{
	int ret;		int ret;
	BlockDriverState *bs = blk_bs(blk);		BlockDriverState *bs;

			blk_wait_while_drained(blk);

			/* Call blk_bs() only after waiting, the graph may have changed */
			bs = blk_bs(blk);
	trace_blk_co_preadv(blk, bs, offset, bytes, flags);		trace_blk_co_preadv(blk, bs, offset, bytes, flags);

	ret = blk_check_byte_request(blk, offset, bytes);		ret = blk_check_byte_request(blk, offset, bytes);
	@@ -1161,8 +1181,12 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
	BdrvRequestFlags flags)		BdrvRequestFlags flags)
	{		{
	int ret;		int ret;
	BlockDriverState *bs = blk_bs(blk);		BlockDriverState *bs;

			blk_wait_while_drained(blk);

			/* Call blk_bs() only after waiting, the graph may have changed */
			bs = blk_bs(blk);
	trace_blk_co_pwritev(blk, bs, offset, bytes, flags);		trace_blk_co_pwritev(blk, bs, offset, bytes, flags);

	ret = blk_check_byte_request(blk, offset, bytes);		ret = blk_check_byte_request(blk, offset, bytes);
	@@ -1239,22 +1263,6 @@ static int blk_prw(BlockBackend blk, int64_t offset, uint8_t buf,
	return rwco.ret;		return rwco.ret;
	}		}

	int blk_pread_unthrottled(BlockBackend blk, int64_t offset, uint8_t buf,
	int count)
	{
	int ret;

	ret = blk_check_byte_request(blk, offset, count);
	if (ret < 0) {
	return ret;
	}

	blk_root_drained_begin(blk->root);
	ret = blk_pread(blk, offset, buf, count);
	blk_root_drained_end(blk->root, NULL);
	return ret;
	}

	int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,		int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
	int bytes, BdrvRequestFlags flags)		int bytes, BdrvRequestFlags flags)
	{		{
	@@ -1367,6 +1375,12 @@ static void blk_aio_read_entry(void *opaque)
	BlkRwCo *rwco = &acb->rwco;		BlkRwCo *rwco = &acb->rwco;
	QEMUIOVector *qiov = rwco->iobuf;		QEMUIOVector *qiov = rwco->iobuf;

			if (rwco->blk->quiesce_counter) {
			blk_dec_in_flight(rwco->blk);
			blk_wait_while_drained(rwco->blk);
			blk_inc_in_flight(rwco->blk);
			}

	assert(qiov->size == acb->bytes);		assert(qiov->size == acb->bytes);
	rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,		rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
	qiov, rwco->flags);		qiov, rwco->flags);
	@@ -1379,6 +1393,12 @@ static void blk_aio_write_entry(void *opaque)
	BlkRwCo *rwco = &acb->rwco;		BlkRwCo *rwco = &acb->rwco;
	QEMUIOVector *qiov = rwco->iobuf;		QEMUIOVector *qiov = rwco->iobuf;

			if (rwco->blk->quiesce_counter) {
			blk_dec_in_flight(rwco->blk);
			blk_wait_while_drained(rwco->blk);
			blk_inc_in_flight(rwco->blk);
			}

	assert(!qiov \|\| qiov->size == acb->bytes);		assert(!qiov \|\| qiov->size == acb->bytes);
	rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,		rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
	qiov, rwco->flags);		qiov, rwco->flags);
	@@ -1500,6 +1520,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)

	int blk_co_ioctl(BlockBackend blk, unsigned long int req, void buf)		int blk_co_ioctl(BlockBackend blk, unsigned long int req, void buf)
	{		{
			blk_wait_while_drained(blk);

	if (!blk_is_available(blk)) {		if (!blk_is_available(blk)) {
	return -ENOMEDIUM;		return -ENOMEDIUM;
	}		}
	@@ -1540,7 +1562,11 @@ BlockAIOCB blk_aio_ioctl(BlockBackend blk, unsigned long int req, void *buf,

	int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)		int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
	{		{
	int ret = blk_check_byte_request(blk, offset, bytes);		int ret;

			blk_wait_while_drained(blk);

			ret = blk_check_byte_request(blk, offset, bytes);
	if (ret < 0) {		if (ret < 0) {
	return ret;		return ret;
	}		}
	@@ -1550,6 +1576,8 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)

	int blk_co_flush(BlockBackend *blk)		int blk_co_flush(BlockBackend *blk)
	{		{
			blk_wait_while_drained(blk);

	if (!blk_is_available(blk)) {		if (!blk_is_available(blk)) {
	return -ENOMEDIUM;		return -ENOMEDIUM;
	}		}
	@@ -2250,6 +2278,9 @@ static void blk_root_drained_end(BdrvChild child, int drained_end_counter)
	if (blk->dev_ops && blk->dev_ops->drained_end) {		if (blk->dev_ops && blk->dev_ops->drained_end) {
	blk->dev_ops->drained_end(blk->dev_opaque);		blk->dev_ops->drained_end(blk->dev_opaque);
	}		}
			while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
			/* Resume all queued requests */
			}
	}		}
	}		}

block/commit.c

+2 −0

Original line number	Original line	Diff line number	Diff line
	@@ -350,6 +350,7 @@ void commit_start(const char job_id, BlockDriverState bs,
	if (ret < 0) {		if (ret < 0) {
	goto fail;		goto fail;
	}		}
			blk_set_disable_request_queuing(s->base, true);
	s->base_bs = base;		s->base_bs = base;

	/* Required permissions are already taken with block_job_add_bdrv() */		/* Required permissions are already taken with block_job_add_bdrv() */
	@@ -358,6 +359,7 @@ void commit_start(const char job_id, BlockDriverState bs,
	if (ret < 0) {		if (ret < 0) {
	goto fail;		goto fail;
	}		}
			blk_set_disable_request_queuing(s->top, true);

	s->backing_file_str = g_strdup(backing_file_str);		s->backing_file_str = g_strdup(backing_file_str);
	s->on_error = on_error;		s->on_error = on_error;

block/file-posix.c

+25 −11

Original line number	Original line	Diff line number	Diff line
	@@ -323,6 +323,7 @@ static void raw_probe_alignment(BlockDriverState bs, int fd, Error *errp)
	BDRVRawState *s = bs->opaque;		BDRVRawState *s = bs->opaque;
	char *buf;		char *buf;
	size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());		size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
			size_t alignments[] = {1, 512, 1024, 2048, 4096};

	/* For SCSI generic devices the alignment is not really used.		/* For SCSI generic devices the alignment is not really used.
	With buffered I/O, we don't have any restrictions. */		With buffered I/O, we don't have any restrictions. */
	@@ -349,25 +350,38 @@ static void raw_probe_alignment(BlockDriverState bs, int fd, Error *errp)
	}		}
	#endif		#endif

	/* If we could not get the sizes so far, we can only guess them */		/*
	if (!s->buf_align) {		* If we could not get the sizes so far, we can only guess them. First try
			* to detect request alignment, since it is more likely to succeed. Then
			* try to detect buf_align, which cannot be detected in some cases (e.g.
			* Gluster). If buf_align cannot be detected, we fallback to the value of
			* request_alignment.
			*/

			if (!bs->bl.request_alignment) {
			int i;
	size_t align;		size_t align;
	buf = qemu_memalign(max_align, 2 * max_align);		buf = qemu_memalign(max_align, max_align);
	for (align = 512; align <= max_align; align <<= 1) {		for (i = 0; i < ARRAY_SIZE(alignments); i++) {
	if (raw_is_io_aligned(fd, buf + align, max_align)) {		align = alignments[i];
	s->buf_align = align;		if (raw_is_io_aligned(fd, buf, align)) {
			/* Fallback to safe value. */
			bs->bl.request_alignment = (align != 1) ? align : max_align;
	break;		break;
	}		}
	}		}
	qemu_vfree(buf);		qemu_vfree(buf);
	}		}

	if (!bs->bl.request_alignment) {		if (!s->buf_align) {
			int i;
	size_t align;		size_t align;
	buf = qemu_memalign(s->buf_align, max_align);		buf = qemu_memalign(max_align, 2 * max_align);
	for (align = 512; align <= max_align; align <<= 1) {		for (i = 0; i < ARRAY_SIZE(alignments); i++) {
	if (raw_is_io_aligned(fd, buf, align)) {		align = alignments[i];
	bs->bl.request_alignment = align;		if (raw_is_io_aligned(fd, buf + align, max_align)) {
			/* Fallback to request_aligment. */
			s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
	break;		break;
	}		}
	}		}