Commit df1acc85 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

mm/page_alloc: avoid conflating IRQs disabled with zone->lock

Historically when freeing pages, free_one_page() assumed that callers had
IRQs disabled and the zone->lock could be acquired with spin_lock().  This
confuses the scope of what local_lock_irq is protecting and what
zone->lock is protecting in free_unref_page_list in particular.

This patch uses spin_lock_irqsave() for the zone->lock in free_one_page()
instead of relying on callers to have disabled IRQs.
free_unref_page_commit() is changed to only deal with PCP pages protected
by the local lock.  free_unref_page_list() then first frees isolated pages
to the buddy lists with free_one_page() and frees the rest of the pages to
the PCP via free_unref_page_commit().  The end result is that
free_one_page() is no longer depending on side-effects of local_lock to be
correct.

Note that this may incur a performance penalty while memory hot-remove is
running but that is not a common operation.

[lkp@intel.com: Ensure CMA pages get addded to correct pcp list]

Link: https://lkml.kernel.org/r/20210512095458.30632-9-mgorman@techsingularity.net


Signed-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 56f0e661
Loading
Loading
Loading
Loading
+49 −26
Original line number Diff line number Diff line
@@ -1501,13 +1501,15 @@ static void free_one_page(struct zone *zone,
				unsigned int order,
				int migratetype, fpi_t fpi_flags)
{
	spin_lock(&zone->lock);
	unsigned long flags;

	spin_lock_irqsave(&zone->lock, flags);
	if (unlikely(has_isolate_pageblock(zone) ||
		is_migrate_isolate(migratetype))) {
		migratetype = get_pfnblock_migratetype(page, pfn);
	}
	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
	spin_unlock(&zone->lock);
	spin_unlock_irqrestore(&zone->lock, flags);
}

static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -3285,31 +3287,13 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
	return true;
}

static void free_unref_page_commit(struct page *page, unsigned long pfn)
static void free_unref_page_commit(struct page *page, unsigned long pfn,
				   int migratetype)
{
	struct zone *zone = page_zone(page);
	struct per_cpu_pages *pcp;
	int migratetype;

	migratetype = get_pcppage_migratetype(page);
	__count_vm_event(PGFREE);

	/*
	 * We only track unmovable, reclaimable and movable on pcp lists.
	 * Free ISOLATE pages back to the allocator because they are being
	 * offlined but treat HIGHATOMIC as movable pages so we can get those
	 * areas back if necessary. Otherwise, we may have to free
	 * excessively into the page allocator
	 */
	if (migratetype >= MIGRATE_PCPTYPES) {
		if (unlikely(is_migrate_isolate(migratetype))) {
			free_one_page(zone, page, pfn, 0, migratetype,
				      FPI_NONE);
			return;
		}
		migratetype = MIGRATE_MOVABLE;
	}

	pcp = this_cpu_ptr(zone->per_cpu_pageset);
	list_add(&page->lru, &pcp->lists[migratetype]);
	pcp->count++;
@@ -3324,12 +3308,29 @@ void free_unref_page(struct page *page)
{
	unsigned long flags;
	unsigned long pfn = page_to_pfn(page);
	int migratetype;

	if (!free_unref_page_prepare(page, pfn))
		return;

	/*
	 * We only track unmovable, reclaimable and movable on pcp lists.
	 * Place ISOLATE pages on the isolated list because they are being
	 * offlined but treat HIGHATOMIC as movable pages so we can get those
	 * areas back if necessary. Otherwise, we may have to free
	 * excessively into the page allocator
	 */
	migratetype = get_pcppage_migratetype(page);
	if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
		if (unlikely(is_migrate_isolate(migratetype))) {
			free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
			return;
		}
		migratetype = MIGRATE_MOVABLE;
	}

	local_lock_irqsave(&pagesets.lock, flags);
	free_unref_page_commit(page, pfn);
	free_unref_page_commit(page, pfn, migratetype);
	local_unlock_irqrestore(&pagesets.lock, flags);
}

@@ -3341,22 +3342,44 @@ void free_unref_page_list(struct list_head *list)
	struct page *page, *next;
	unsigned long flags, pfn;
	int batch_count = 0;
	int migratetype;

	/* Prepare pages for freeing */
	list_for_each_entry_safe(page, next, list, lru) {
		pfn = page_to_pfn(page);
		if (!free_unref_page_prepare(page, pfn))
			list_del(&page->lru);

		/*
		 * Free isolated pages directly to the allocator, see
		 * comment in free_unref_page.
		 */
		migratetype = get_pcppage_migratetype(page);
		if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
			if (unlikely(is_migrate_isolate(migratetype))) {
				list_del(&page->lru);
				free_one_page(page_zone(page), page, pfn, 0,
							migratetype, FPI_NONE);
				continue;
			}

			/*
			 * Non-isolated types over MIGRATE_PCPTYPES get added
			 * to the MIGRATE_MOVABLE pcp list.
			 */
			set_pcppage_migratetype(page, MIGRATE_MOVABLE);
		}

		set_page_private(page, pfn);
	}

	local_lock_irqsave(&pagesets.lock, flags);
	list_for_each_entry_safe(page, next, list, lru) {
		unsigned long pfn = page_private(page);

		pfn = page_private(page);
		set_page_private(page, 0);
		migratetype = get_pcppage_migratetype(page);
		trace_mm_page_free_batched(page);
		free_unref_page_commit(page, pfn);
		free_unref_page_commit(page, pfn, migratetype);

		/*
		 * Guard against excessive IRQ disabled times when we get