Commit e4d77700 authored by Roman Gushchin's avatar Roman Gushchin Committed by Dennis Zhou
Browse files

percpu: optimize locking in pcpu_balance_workfn()



pcpu_balance_workfn() unconditionally calls pcpu_balance_free(),
pcpu_reclaim_populated(), pcpu_balance_populated() and
pcpu_balance_free() again.

Each call to pcpu_balance_free() and pcpu_reclaim_populated() will
cause at least one acquisition of the pcpu_lock. So even if the
balancing was scheduled because of a failed atomic allocation,
pcpu_lock will be acquired at least 4 times. This obviously
increases the contention on the pcpu_lock.

To optimize the scheme let's grab the pcpu_lock on the upper level
(in pcpu_balance_workfn()) and keep it generally locked for the whole
duration of the scheduled work, but release conditionally to perform
any slow operations like chunk (de)population and creation of new
chunks.

Signed-off-by: default avatarRoman Gushchin <guro@fb.com>
Signed-off-by: default avatarDennis Zhou <dennis@kernel.org>
parent 4829c791
Loading
Loading
Loading
Loading
+29 −12
Original line number Diff line number Diff line
@@ -1980,6 +1980,9 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 * If empty_only is %false, reclaim all fully free chunks regardless of the
 * number of populated pages.  Otherwise, only reclaim chunks that have no
 * populated pages.
 *
 * CONTEXT:
 * pcpu_lock (can be dropped temporarily)
 */
static void pcpu_balance_free(bool empty_only)
{
@@ -1987,12 +1990,12 @@ static void pcpu_balance_free(bool empty_only)
	struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
	struct pcpu_chunk *chunk, *next;

	lockdep_assert_held(&pcpu_lock);

	/*
	 * There's no reason to keep around multiple unused chunks and VM
	 * areas can be scarce.  Destroy all free chunks except for one.
	 */
	spin_lock_irq(&pcpu_lock);

	list_for_each_entry_safe(chunk, next, free_head, list) {
		WARN_ON(chunk->immutable);

@@ -2004,8 +2007,10 @@ static void pcpu_balance_free(bool empty_only)
			list_move(&chunk->list, &to_free);
	}

	spin_unlock_irq(&pcpu_lock);
	if (list_empty(&to_free))
		return;

	spin_unlock_irq(&pcpu_lock);
	list_for_each_entry_safe(chunk, next, &to_free, list) {
		unsigned int rs, re;

@@ -2019,6 +2024,7 @@ static void pcpu_balance_free(bool empty_only)
		pcpu_destroy_chunk(chunk);
		cond_resched();
	}
	spin_lock_irq(&pcpu_lock);
}

/**
@@ -2029,6 +2035,9 @@ static void pcpu_balance_free(bool empty_only)
 * OOM killer to be triggered.  We should avoid doing so until an actual
 * allocation causes the failure as it is possible that requests can be
 * serviced from already backed regions.
 *
 * CONTEXT:
 * pcpu_lock (can be dropped temporarily)
 */
static void pcpu_balance_populated(void)
{
@@ -2037,6 +2046,8 @@ static void pcpu_balance_populated(void)
	struct pcpu_chunk *chunk;
	int slot, nr_to_pop, ret;

	lockdep_assert_held(&pcpu_lock);

	/*
	 * Ensure there are certain number of free populated pages for
	 * atomic allocs.  Fill up from the most packed so that atomic
@@ -2064,13 +2075,11 @@ static void pcpu_balance_populated(void)
		if (!nr_to_pop)
			break;

		spin_lock_irq(&pcpu_lock);
		list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
			nr_unpop = chunk->nr_pages - chunk->nr_populated;
			if (nr_unpop)
				break;
		}
		spin_unlock_irq(&pcpu_lock);

		if (!nr_unpop)
			continue;
@@ -2080,12 +2089,13 @@ static void pcpu_balance_populated(void)
					     chunk->nr_pages) {
			int nr = min_t(int, re - rs, nr_to_pop);

			spin_unlock_irq(&pcpu_lock);
			ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
			cond_resched();
			spin_lock_irq(&pcpu_lock);
			if (!ret) {
				nr_to_pop -= nr;
				spin_lock_irq(&pcpu_lock);
				pcpu_chunk_populated(chunk, rs, rs + nr);
				spin_unlock_irq(&pcpu_lock);
			} else {
				nr_to_pop = 0;
			}
@@ -2097,11 +2107,12 @@ static void pcpu_balance_populated(void)

	if (nr_to_pop) {
		/* ran out of chunks to populate, create a new one and retry */
		spin_unlock_irq(&pcpu_lock);
		chunk = pcpu_create_chunk(gfp);
		if (chunk) {
		cond_resched();
		spin_lock_irq(&pcpu_lock);
		if (chunk) {
			pcpu_chunk_relocate(chunk, -1);
			spin_unlock_irq(&pcpu_lock);
			goto retry_pop;
		}
	}
@@ -2117,6 +2128,10 @@ static void pcpu_balance_populated(void)
 * populated pages threshold, reintegrate the chunk if it has empty free pages.
 * Each chunk is scanned in the reverse order to keep populated pages close to
 * the beginning of the chunk.
 *
 * CONTEXT:
 * pcpu_lock (can be dropped temporarily)
 *
 */
static void pcpu_reclaim_populated(void)
{
@@ -2124,7 +2139,7 @@ static void pcpu_reclaim_populated(void)
	struct pcpu_block_md *block;
	int i, end;

	spin_lock_irq(&pcpu_lock);
	lockdep_assert_held(&pcpu_lock);

restart:
	/*
@@ -2190,8 +2205,6 @@ static void pcpu_reclaim_populated(void)
			list_move(&chunk->list,
				  &pcpu_chunk_lists[pcpu_sidelined_slot]);
	}

	spin_unlock_irq(&pcpu_lock);
}

/**
@@ -2212,10 +2225,14 @@ static void pcpu_balance_workfn(struct work_struct *work)
	 * appropriate.
	 */
	mutex_lock(&pcpu_alloc_mutex);
	spin_lock_irq(&pcpu_lock);

	pcpu_balance_free(false);
	pcpu_reclaim_populated();
	pcpu_balance_populated();
	pcpu_balance_free(true);

	spin_unlock_irq(&pcpu_lock);
	mutex_unlock(&pcpu_alloc_mutex);
}