Unverified Commit 6b33e607 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!1441 workqueue: fix sanity check warning when invoke destroy_workqueue()

Merge Pull Request from: @henryze 
 
https://gitee.com/openeuler/kernel/issues/I7LRJF?from=project-issue

The warning logs are listed below:

WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
*****
destroy_workqueue: test_workqueue9 has the following busy pwq
  pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
      in-flight: 5658:wq_barrier_func
Showing busy workqueues and worker pools:
*****

It shows that even after drain_workqueue() returns, the barrier work item
is still in flight and the pwq (and a worker) is still busy on it.

The problem is caused by drain_workqueue() not watching flush_work():
~~~
Thread A				Worker
					/* normal work item with linked */
					process_scheduled_works()
destroy_workqueue()			  process_one_work()
  drain_workqueue()			    /* run normal work item */
				 /--	    pwq_dec_nr_in_flight()
    flush_workqueue()	    <---/
		/* the last normal work item is done */
  sanity_check				  process_one_work()
				       /--  raw_spin_unlock_irq(&pool->lock)
    raw_spin_lock_irq(&pool->lock)  <-/     /* maybe preempt */
    *WARNING*				    wq_barrier_func()
					    /* maybe preempt by cond_resched() */
~~~
So the solution is to make drain_workqueue() watch for flush_work() which
means making flush_workqueue() watch for flush_work().

Due to historical convenience, we used WORK_NO_COLOR for barrier work items
queued by flush_work().  The color has two purposes:
	Not participate in flushing
	Not participate in nr_active

Only the second purpose is obligatory.  So the plan is to mark barrier
work items inactive without using WORK_NO_COLOR in patch4 so that we can
assign a flushing color to them in patch5.

Patch1-3 are preparation, and patch6 is a cleanup.

Test steps:
insmod wq_issue.ko
rmmod wq_issue

~~~
# insmod wq_issue.ko
[   14.061088] wq_issue: loading out-of-tree module taints kernel.
[   14.070509] wq_test_init
[   14.072112] wq_test_init done
[   14.074035] insmod (92) used greatest stack depth: 13840 bytes left
/tmp # rmmod wq_issue.ko
[   24.489421] wq_test_exit done
/tmp # uname -a
Linux (none) 5.10.0+ #10 SMP Wed Jul 26 15:48:31 CST 2023 x86_64 GNU/Linux
~~~ 
 
Link:https://gitee.com/openeuler/kernel/pulls/1441

 

Reviewed-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 0c40bf67 13b0c50f
Loading
Loading
Loading
Loading
+4 −9
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ void delayed_work_timer_fn(struct timer_list *t);

enum {
	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
	WORK_STRUCT_INACTIVE_BIT= 1,	/* work item is inactive */
	WORK_STRUCT_PWQ_BIT	= 2,	/* data points to pwq */
	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -43,7 +43,7 @@ enum {
	WORK_STRUCT_COLOR_BITS	= 4,

	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
	WORK_STRUCT_INACTIVE	= 1 << WORK_STRUCT_INACTIVE_BIT,
	WORK_STRUCT_PWQ		= 1 << WORK_STRUCT_PWQ_BIT,
	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -52,19 +52,14 @@ enum {
	WORK_STRUCT_STATIC	= 0,
#endif

	/*
	 * The last color is no color used for works which don't
	 * participate in workqueue flushing.
	 */
	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
	WORK_NO_COLOR		= WORK_NR_COLORS,
	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS),

	/* not bound to any CPU, prefer the local CPU */
	WORK_CPU_UNBOUND	= NR_CPUS,

	/*
	 * Reserve 8 bits off of pwq pointer w/ debugobjects turned off.
	 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
	 * This makes pwqs aligned to 256 bytes and allows 16 workqueue
	 * flush colors.
	 */
	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
+82 −51
Original line number Diff line number Diff line
@@ -206,9 +206,26 @@ struct pool_workqueue {
	int			refcnt;		/* L: reference count */
	int			nr_in_flight[WORK_NR_COLORS];
						/* L: nr of in_flight works */

	/*
	 * nr_active management and WORK_STRUCT_INACTIVE:
	 *
	 * When pwq->nr_active >= max_active, new work item is queued to
	 * pwq->inactive_works instead of pool->worklist and marked with
	 * WORK_STRUCT_INACTIVE.
	 *
	 * All work items marked with WORK_STRUCT_INACTIVE do not participate
	 * in pwq->nr_active and all work items in pwq->inactive_works are
	 * marked with WORK_STRUCT_INACTIVE.  But not all WORK_STRUCT_INACTIVE
	 * work items are in pwq->inactive_works.  Some of them are ready to
	 * run in pool->worklist or worker->scheduled.  Those work itmes are
	 * only struct wq_barrier which is used for flush_work() and should
	 * not participate in pwq->nr_active.  For non-barrier work item, it
	 * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
	 */
	int			nr_active;	/* L: nr of active works */
	int			max_active;	/* L: max active works */
	struct list_head	delayed_works;	/* L: delayed works */
	struct list_head	inactive_works;	/* L: inactive works */
	struct list_head	pwqs_node;	/* WR: node on wq->pwqs */
	struct list_head	mayday_node;	/* MD: node on wq->maydays */

@@ -580,9 +597,9 @@ static unsigned int work_color_to_flags(int color)
	return color << WORK_STRUCT_COLOR_SHIFT;
}

static int get_work_color(struct work_struct *work)
static int get_work_color(unsigned long work_data)
{
	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
	return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
		((1 << WORK_STRUCT_COLOR_BITS) - 1);
}

@@ -1146,7 +1163,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
	}
}

static void pwq_activate_delayed_work(struct work_struct *work)
static void pwq_activate_inactive_work(struct work_struct *work)
{
	struct pool_workqueue *pwq = get_work_pwq(work);

@@ -1154,22 +1171,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)
	if (list_empty(&pwq->pool->worklist))
		pwq->pool->watchdog_ts = jiffies;
	move_linked_works(work, &pwq->pool->worklist, NULL);
	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
	__clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
	pwq->nr_active++;
}

static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
{
	struct work_struct *work = list_first_entry(&pwq->delayed_works,
	struct work_struct *work = list_first_entry(&pwq->inactive_works,
						    struct work_struct, entry);

	pwq_activate_delayed_work(work);
	pwq_activate_inactive_work(work);
}

/**
 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
 * @pwq: pwq of interest
 * @color: color of work which left the queue
 * @work_data: work_data of work which left the queue
 *
 * A work either has completed or is removed from pending queue,
 * decrement nr_in_flight of its pwq and handle workqueue flushing.
@@ -1177,20 +1194,20 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 */
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
{
	/* uncolored work items don't participate in flushing or nr_active */
	if (color == WORK_NO_COLOR)
		goto out_put;

	pwq->nr_in_flight[color]--;
	int color = get_work_color(work_data);

	if (!(work_data & WORK_STRUCT_INACTIVE)) {
		pwq->nr_active--;
	if (!list_empty(&pwq->delayed_works)) {
		/* one down, submit a delayed one */
		if (!list_empty(&pwq->inactive_works)) {
			/* one down, submit an inactive one */
			if (pwq->nr_active < pwq->max_active)
			pwq_activate_first_delayed(pwq);
				pwq_activate_first_inactive(pwq);
		}
	}

	pwq->nr_in_flight[color]--;

	/* is flush in progress and are we at the flushing tip? */
	if (likely(pwq->flush_color != color))
@@ -1291,17 +1308,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
		debug_work_deactivate(work);

		/*
		 * A delayed work item cannot be grabbed directly because
		 * it might have linked NO_COLOR work items which, if left
		 * on the delayed_list, will confuse pwq->nr_active
		 * A cancelable inactive work item must be in the
		 * pwq->inactive_works since a queued barrier can't be
		 * canceled (see the comments in insert_wq_barrier()).
		 *
		 * An inactive work item cannot be grabbed directly because
		 * it might have linked barrier work items which, if left
		 * on the inactive_works list, will confuse pwq->nr_active
		 * management later on and cause stall.  Make sure the work
		 * item is activated before grabbing.
		 */
		if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
			pwq_activate_delayed_work(work);
		if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
			pwq_activate_inactive_work(work);

		list_del_init(&work->entry);
		pwq_dec_nr_in_flight(pwq, get_work_color(work));
		pwq_dec_nr_in_flight(pwq, *work_data_bits(work));

		/* work->data points to pwq iff queued, point to pool */
		set_work_pool_and_keep_pending(work, pool->id);
@@ -1497,8 +1518,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
		if (list_empty(worklist))
			pwq->pool->watchdog_ts = jiffies;
	} else {
		work_flags |= WORK_STRUCT_DELAYED;
		worklist = &pwq->delayed_works;
		work_flags |= WORK_STRUCT_INACTIVE;
		worklist = &pwq->inactive_works;
	}

	debug_work_activate(work);
@@ -2177,7 +2198,7 @@ __acquires(&pool->lock)
	struct pool_workqueue *pwq = get_work_pwq(work);
	struct worker_pool *pool = worker->pool;
	bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
	int work_color;
	unsigned long work_data;
	struct worker *collision;
#ifdef CONFIG_LOCKDEP
	/*
@@ -2213,7 +2234,8 @@ __acquires(&pool->lock)
	worker->current_work = work;
	worker->current_func = work->func;
	worker->current_pwq = pwq;
	work_color = get_work_color(work);
	work_data = *work_data_bits(work);
	worker->current_color = get_work_color(work_data);

	/*
	 * Record wq name for cmdline and debug reporting, may get
@@ -2319,7 +2341,8 @@ __acquires(&pool->lock)
	worker->current_work = NULL;
	worker->current_func = NULL;
	worker->current_pwq = NULL;
	pwq_dec_nr_in_flight(pwq, work_color);
	worker->current_color = INT_MAX;
	pwq_dec_nr_in_flight(pwq, work_data);
}

/**
@@ -2535,7 +2558,7 @@ static int rescuer_thread(void *__rescuer)
			/*
			 * The above execution of rescued work items could
			 * have created more to rescue through
			 * pwq_activate_first_delayed() or chained
			 * pwq_activate_first_inactive() or chained
			 * queueing.  Let's put @pwq back on mayday list so
			 * that such back-to-back work items, which may be
			 * being used to relieve memory pressure, don't
@@ -2662,8 +2685,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
			      struct wq_barrier *barr,
			      struct work_struct *target, struct worker *worker)
{
	unsigned int work_flags = 0;
	unsigned int work_color;
	struct list_head *head;
	unsigned int linked = 0;

	/*
	 * debugobject calls are safe here even with pool->lock locked
@@ -2678,24 +2702,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,

	barr->task = current;

	/* The barrier work item does not participate in pwq->nr_active. */
	work_flags |= WORK_STRUCT_INACTIVE;

	/*
	 * If @target is currently being executed, schedule the
	 * barrier to the worker; otherwise, put it after @target.
	 */
	if (worker)
	if (worker) {
		head = worker->scheduled.next;
	else {
		work_color = worker->current_color;
	} else {
		unsigned long *bits = work_data_bits(target);

		head = target->entry.next;
		/* there can already be other linked works, inherit and set */
		linked = *bits & WORK_STRUCT_LINKED;
		work_flags |= *bits & WORK_STRUCT_LINKED;
		work_color = get_work_color(*bits);
		__set_bit(WORK_STRUCT_LINKED_BIT, bits);
	}

	pwq->nr_in_flight[work_color]++;
	work_flags |= work_color_to_flags(work_color);

	debug_work_activate(&barr->work);
	insert_work(pwq, &barr->work, head,
		    work_color_to_flags(WORK_NO_COLOR) | linked);
	insert_work(pwq, &barr->work, head, work_flags);
}

/**
@@ -2961,7 +2992,7 @@ void drain_workqueue(struct workqueue_struct *wq)
		bool drained;

		raw_spin_lock_irq(&pwq->pool->lock);
		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
		drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
		raw_spin_unlock_irq(&pwq->pool->lock);

		if (drained)
@@ -3717,7 +3748,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 * @pwq: target pool_workqueue
 *
 * If @pwq isn't freezing, set @pwq->max_active to the associated
 * workqueue's saved_max_active and activate delayed work items
 * workqueue's saved_max_active and activate inactive work items
 * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
 */
static void pwq_adjust_max_active(struct pool_workqueue *pwq)
@@ -3746,9 +3777,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)

		pwq->max_active = wq->saved_max_active;

		while (!list_empty(&pwq->delayed_works) &&
		while (!list_empty(&pwq->inactive_works) &&
		       pwq->nr_active < pwq->max_active) {
			pwq_activate_first_delayed(pwq);
			pwq_activate_first_inactive(pwq);
			kick = true;
		}

@@ -3779,7 +3810,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
	pwq->wq = wq;
	pwq->flush_color = -1;
	pwq->refcnt = 1;
	INIT_LIST_HEAD(&pwq->delayed_works);
	INIT_LIST_HEAD(&pwq->inactive_works);
	INIT_LIST_HEAD(&pwq->pwqs_node);
	INIT_LIST_HEAD(&pwq->mayday_node);
	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
@@ -4372,7 +4403,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)

	if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
		return true;
	if (pwq->nr_active || !list_empty(&pwq->delayed_works))
	if (pwq->nr_active || !list_empty(&pwq->inactive_works))
		return true;

	return false;
@@ -4568,7 +4599,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
	else
		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));

	ret = !list_empty(&pwq->delayed_works);
	ret = !list_empty(&pwq->inactive_works);
	preempt_enable();
	rcu_read_unlock();

@@ -4764,11 +4795,11 @@ static void show_pwq(struct pool_workqueue *pwq)
		pr_cont("\n");
	}

	if (!list_empty(&pwq->delayed_works)) {
	if (!list_empty(&pwq->inactive_works)) {
		bool comma = false;

		pr_info("    delayed:");
		list_for_each_entry(work, &pwq->delayed_works, entry) {
		pr_info("    inactive:");
		list_for_each_entry(work, &pwq->inactive_works, entry) {
			pr_cont_work(comma, work);
			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
		}
@@ -4798,7 +4829,7 @@ void show_workqueue_state(void)
		bool idle = true;

		for_each_pwq(pwq, wq) {
			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
			if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
				idle = false;
				break;
			}
@@ -4810,7 +4841,7 @@ void show_workqueue_state(void)

		for_each_pwq(pwq, wq) {
			raw_spin_lock_irqsave(&pwq->pool->lock, flags);
			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
			if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
				/*
				 * Defer printing to avoid deadlocks in console
				 * drivers that queue work while holding locks
@@ -5206,7 +5237,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);
 * freeze_workqueues_begin - begin freezing workqueues
 *
 * Start freezing workqueues.  After this function returns, all freezable
 * workqueues will queue new works to their delayed_works list instead of
 * workqueues will queue new works to their inactive_works list instead of
 * pool->worklist.
 *
 * CONTEXT:
+2 −1
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@ struct worker {
	struct work_struct	*current_work;	/* L: work being processed */
	work_func_t		current_func;	/* L: current_work's fn */
	struct pool_workqueue	*current_pwq;	/* L: current_work's pwq */
	unsigned int		current_color;	/* L: current_work's color */
	struct list_head	scheduled;	/* L: scheduled works */

	/* 64 bytes boundary on 64bit, 32 on 32bit */