Commit 71217df3 authored by Paolo Valente's avatar Paolo Valente Committed by Jens Axboe
Browse files

block, bfq: make waker-queue detection more robust



In the presence of many parallel I/O flows, the detection of waker
bfq_queues suffers from false positives. This commits addresses this
issue by making the filtering of actual wakers more selective. In more
detail, a candidate waker must be found to meet waker requirements
three times before being promoted to actual waker.

Tested-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarPaolo Valente <paolo.valente@linaro.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5a5436b9
Loading
Loading
Loading
Loading
+103 −108
Original line number Diff line number Diff line
@@ -158,7 +158,6 @@ BFQ_BFQQ_FNS(in_large_burst);
BFQ_BFQQ_FNS(coop);
BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update);
BFQ_BFQQ_FNS(has_waker);
#undef BFQ_BFQQ_FNS						\

/* Expiration time of sync (0) and async (1) requests, in ns. */
@@ -1905,88 +1904,79 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
	}
}

static void bfq_add_request(struct request *rq)
{
	struct bfq_queue *bfqq = RQ_BFQQ(rq);
	struct bfq_data *bfqd = bfqq->bfqd;
	struct request *next_rq, *prev;
	unsigned int old_wr_coeff = bfqq->wr_coeff;
	bool interactive = false;
	u64 now_ns = ktime_get_ns();

	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
	bfqq->queued[rq_is_sync(rq)]++;
	bfqd->queued++;

	if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
/*
		 * Detect whether bfqq's I/O seems synchronized with
		 * that of some other queue, i.e., whether bfqq, after
		 * remaining empty, happens to receive new I/O only
		 * right after some I/O request of the other queue has
		 * been completed. We call waker queue the other
		 * queue, and we assume, for simplicity, that bfqq may
		 * have at most one waker queue.
		 *
		 * A remarkable throughput boost can be reached by
		 * unconditionally injecting the I/O of the waker
		 * queue, every time a new bfq_dispatch_request
		 * happens to be invoked while I/O is being plugged
		 * for bfqq.  In addition to boosting throughput, this
		 * unblocks bfqq's I/O, thereby improving bandwidth
		 * and latency for bfqq. Note that these same results
		 * may be achieved with the general injection
		 * mechanism, but less effectively. For details on
		 * this aspect, see the comments on the choice of the
		 * queue for injection in bfq_select_queue().
		 *
		 * Turning back to the detection of a waker queue, a
		 * queue Q is deemed as a waker queue for bfqq if, for
		 * two consecutive times, bfqq happens to become non
		 * empty right after a request of Q has been
		 * completed. In particular, on the first time, Q is
		 * tentatively set as a candidate waker queue, while
		 * on the second time, the flag
		 * bfq_bfqq_has_waker(bfqq) is set to confirm that Q
		 * is a waker queue for bfqq. These detection steps
		 * are performed only if bfqq has a long think time,
		 * so as to make it more likely that bfqq's I/O is
		 * actually being blocked by a synchronization. This
		 * last filter, plus the above two-times requirement,
		 * make false positives less likely.
 * Detect whether bfqq's I/O seems synchronized with that of some
 * other queue, i.e., whether bfqq, after remaining empty, happens to
 * receive new I/O only right after some I/O request of the other
 * queue has been completed. We call waker queue the other queue, and
 * we assume, for simplicity, that bfqq may have at most one waker
 * queue.
 *
 * A remarkable throughput boost can be reached by unconditionally
 * injecting the I/O of the waker queue, every time a new
 * bfq_dispatch_request happens to be invoked while I/O is being
 * plugged for bfqq.  In addition to boosting throughput, this
 * unblocks bfqq's I/O, thereby improving bandwidth and latency for
 * bfqq. Note that these same results may be achieved with the general
 * injection mechanism, but less effectively. For details on this
 * aspect, see the comments on the choice of the queue for injection
 * in bfq_select_queue().
 *
 * Turning back to the detection of a waker queue, a queue Q is deemed
 * as a waker queue for bfqq if, for three consecutive times, bfqq
 * happens to become non empty right after a request of Q has been
 * completed. In particular, on the first time, Q is tentatively set
 * as a candidate waker queue, while on the third consecutive time
 * that Q is detected, the field waker_bfqq is set to Q, to confirm
 * that Q is a waker queue for bfqq. These detection steps are
 * performed only if bfqq has a long think time, so as to make it more
 * likely that bfqq's I/O is actually being blocked by a
 * synchronization. This last filter, plus the above three-times
 * requirement, make false positives less likely.
 *
 * NOTE
 *
		 * The sooner a waker queue is detected, the sooner
		 * throughput can be boosted by injecting I/O from the
		 * waker queue. Fortunately, detection is likely to be
		 * actually fast, for the following reasons. While
		 * blocked by synchronization, bfqq has a long think
		 * time. This implies that bfqq's inject limit is at
		 * least equal to 1 (see the comments in
		 * bfq_update_inject_limit()). So, thanks to
		 * injection, the waker queue is likely to be served
		 * during the very first I/O-plugging time interval
		 * for bfqq. This triggers the first step of the
		 * detection mechanism. Thanks again to injection, the
		 * candidate waker queue is then likely to be
		 * confirmed no later than during the next
		 * I/O-plugging interval for bfqq.
		 */
		if (bfqd->last_completed_rq_bfqq &&
		    !bfq_bfqq_has_short_ttime(bfqq) &&
		    now_ns - bfqd->last_completion <
		    4 * NSEC_PER_MSEC) {
			if (bfqd->last_completed_rq_bfqq != bfqq &&
			    bfqd->last_completed_rq_bfqq !=
			    bfqq->waker_bfqq) {
				/*
				 * First synchronization detected with
				 * a candidate waker queue, or with a
				 * different candidate waker queue
				 * from the current one.
 * The sooner a waker queue is detected, the sooner throughput can be
 * boosted by injecting I/O from the waker queue. Fortunately,
 * detection is likely to be actually fast, for the following
 * reasons. While blocked by synchronization, bfqq has a long think
 * time. This implies that bfqq's inject limit is at least equal to 1
 * (see the comments in bfq_update_inject_limit()). So, thanks to
 * injection, the waker queue is likely to be served during the very
 * first I/O-plugging time interval for bfqq. This triggers the first
 * step of the detection mechanism. Thanks again to injection, the
 * candidate waker queue is then likely to be confirmed no later than
 * during the next I/O-plugging interval for bfqq.
 *
 * ISSUE
 *
 * On queue merging all waker information is lost.
 */
void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, u64 now_ns)
{
	if (!bfqd->last_completed_rq_bfqq ||
	    bfqd->last_completed_rq_bfqq == bfqq ||
	    bfq_bfqq_has_short_ttime(bfqq) ||
	    now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
	    bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
		return;

	if (bfqd->last_completed_rq_bfqq !=
	    bfqq->tentative_waker_bfqq) {
		/*
		 * First synchronization detected with a
		 * candidate waker queue, or with a different
		 * candidate waker queue from the current one.
		 */
		bfqq->tentative_waker_bfqq =
			bfqd->last_completed_rq_bfqq;
		bfqq->num_waker_detections = 1;
	} else /* Same tentative waker queue detected again */
		bfqq->num_waker_detections++;

	if (bfqq->num_waker_detections == 3) {
		bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
		bfqq->tentative_waker_bfqq = NULL;

		/*
		 * If the waker queue disappears, then
@@ -2012,19 +2002,25 @@ static void bfq_add_request(struct request *rq)
			hlist_del_init(&bfqq->woken_list_node);
		hlist_add_head(&bfqq->woken_list_node,
			       &bfqd->last_completed_rq_bfqq->woken_list);

				bfq_clear_bfqq_has_waker(bfqq);
			} else if (bfqd->last_completed_rq_bfqq ==
				   bfqq->waker_bfqq &&
				   !bfq_bfqq_has_waker(bfqq)) {
				/*
				 * synchronization with waker_bfqq
				 * seen for the second time
				 */
				bfq_mark_bfqq_has_waker(bfqq);
	}
}

static void bfq_add_request(struct request *rq)
{
	struct bfq_queue *bfqq = RQ_BFQQ(rq);
	struct bfq_data *bfqd = bfqq->bfqd;
	struct request *next_rq, *prev;
	unsigned int old_wr_coeff = bfqq->wr_coeff;
	bool interactive = false;
	u64 now_ns = ktime_get_ns();

	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
	bfqq->queued[rq_is_sync(rq)]++;
	bfqd->queued++;

	if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
		bfq_check_waker(bfqd, bfqq, now_ns);

		/*
		 * Periodically reset inject limit, to make sure that
		 * the latter eventually drops in case workload
@@ -4569,7 +4565,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
		    bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
		    bfq_bfqq_budget_left(async_bfqq))
			bfqq = bfqq->bic->bfqq[0];
		else if (bfq_bfqq_has_waker(bfqq) &&
		else if (bfqq->waker_bfqq &&
			   bfq_bfqq_busy(bfqq->waker_bfqq) &&
			   bfqq->waker_bfqq->next_rq &&
			   bfq_serv_to_charge(bfqq->waker_bfqq->next_rq,
@@ -4973,7 +4969,6 @@ void bfq_put_queue(struct bfq_queue *bfqq)
	hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
				  woken_list_node) {
		item->waker_bfqq = NULL;
		bfq_clear_bfqq_has_waker(item);
		hlist_del_init(&item->woken_list_node);
	}

+5 −2
Original line number Diff line number Diff line
@@ -376,6 +376,11 @@ struct bfq_queue {
	 * bfq_select_queue().
	 */
	struct bfq_queue *waker_bfqq;
	/* pointer to the curr. tentative waker queue, see bfq_check_waker() */
	struct bfq_queue *tentative_waker_bfqq;
	/* number of times the same tentative waker has been detected */
	unsigned int num_waker_detections;

	/* node for woken_list, see below */
	struct hlist_node woken_list_node;
	/*
@@ -776,7 +781,6 @@ enum bfqq_state_flags {
				 */
	BFQQF_coop,		/* bfqq is shared */
	BFQQF_split_coop,	/* shared bfqq will be split */
	BFQQF_has_waker		/* bfqq has a waker queue */
};

#define BFQ_BFQQ_FNS(name)						\
@@ -796,7 +800,6 @@ BFQ_BFQQ_FNS(in_large_burst);
BFQ_BFQQ_FNS(coop);
BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update);
BFQ_BFQQ_FNS(has_waker);
#undef BFQ_BFQQ_FNS

/* Expiration reasons. */