Commit b7795074 authored by Helge Deller's avatar Helge Deller
Browse files

parisc: Optimize per-pagetable spinlocks



On parisc a spinlock is stored in the next page behind the pgd which
protects against parallel accesses to the pgd. That's why one additional
page (PGD_ALLOC_ORDER) is allocated for the pgd.

Matthew Wilcox suggested that we instead should use a pointer in the
struct page table for this spinlock and noted, that the comments for the
PGD_ORDER and PMD_ORDER defines were wrong.

Both suggestions are addressed with this patch. Instead of having an own
spinlock to protect the pgd, we now switch to use the existing
page_table_lock.  Additionally, beside loading the pgd into cr25 in
switch_mm_irqs_off(), the physical address of this lock is loaded into
cr28 (tr4), so that we can avoid implementing a complicated lookup in
assembly for this lock in the TLB fault handlers.

The existing Hybrid L2/L3 page table scheme (where the pmd is adjacent
to the pgd) has been dropped with this patch.

Remove the locking in set_pte() and the huge-page pte functions too.
They trigger a spinlock recursion on 32bit machines and seem unnecessary.

Suggested-by: default avatarMatthew Wilcox <willy@infradead.org>
Fixes: b37d1c18 ("parisc: Use per-pagetable spinlock")
Signed-off-by: default avatarJohn David Anglin <dave.anglin@bell.net>
Signed-off-by: default avatarHelge Deller <deller@gmx.de>
parent ae3c4761
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -312,6 +312,16 @@ config IRQSTACKS
	  for handling hard and soft interrupts.  This can help avoid
	  overflowing the process kernel stacks.

config TLB_PTLOCK
	bool "Use page table locks in TLB fault handler"
	depends on SMP
	default n
	help
	  Select this option to enable page table locking in the TLB
	  fault handler. This ensures that page table entries are
	  updated consistently on SMP machines at the expense of some
	  loss in performance.

config HOTPLUG_CPU
	bool
	default y if SMP
+7 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/spinlock.h>
#include <asm-generic/mm_hooks.h>

/* on PA-RISC, we actually have enough contexts to justify an allocator
@@ -50,6 +51,12 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
		struct mm_struct *next, struct task_struct *tsk)
{
	if (prev != next) {
#ifdef CONFIG_TLB_PTLOCK
		/* put physical address of page_table_lock in cr28 (tr4)
		   for TLB faults */
		spinlock_t *pgd_lock = &next->page_table_lock;
		mtctl(__pa(__ldcw_align(&pgd_lock->rlock.raw_lock)), 28);
#endif
		mtctl(__pa(next->pgd), 25);
		load_context(next->context);
	}
+1 −1
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ extern int npmem_ranges;
#else
#define BITS_PER_PTE_ENTRY	2
#define BITS_PER_PMD_ENTRY	2
#define BITS_PER_PGD_ENTRY	BITS_PER_PMD_ENTRY
#define BITS_PER_PGD_ENTRY	2
#endif
#define PGD_ENTRY_SIZE	(1UL << BITS_PER_PGD_ENTRY)
#define PMD_ENTRY_SIZE	(1UL << BITS_PER_PMD_ENTRY)
+18 −58
Original line number Diff line number Diff line
@@ -15,47 +15,23 @@
#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h>

/* Allocate the top level pgd (page directory)
 *
 * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
 * allocate the first pmd adjacent to the pgd.  This means that we can
 * subtract a constant offset to get to it.  The pmd and pgd sizes are
 * arranged so that a single pmd covers 4GB (giving a full 64-bit
 * process access to 8TB) so our lookups are effectively L2 for the
 * first 4GB of the kernel (i.e. for all ILP32 processes and all the
 * kernel for machines with under 4GB of memory) */
/* Allocate the top level pgd (page directory) */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL,
					       PGD_ALLOC_ORDER);
	pgd_t *actual_pgd = pgd;
	pgd_t *pgd;

	if (likely(pgd != NULL)) {
		memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
#if CONFIG_PGTABLE_LEVELS == 3
		actual_pgd += PTRS_PER_PGD;
		/* Populate first pmd with allocated memory.  We mark it
		 * with PxD_FLAG_ATTACHED as a signal to the system that this
		 * pmd entry may not be cleared. */
		set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
				        PxD_FLAG_VALID |
					PxD_FLAG_ATTACHED)
			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
		/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
		 * a signal that this pmd may not be freed */
		set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
#endif
	}
	spin_lock_init(pgd_spinlock(actual_pgd));
	return actual_pgd;
	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
	if (unlikely(pgd == NULL))
		return NULL;

	memset(pgd, 0, PAGE_SIZE << PGD_ORDER);

	return pgd;
}

static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#if CONFIG_PGTABLE_LEVELS == 3
	pgd -= PTRS_PER_PGD;
#endif
	free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
	free_pages((unsigned long)pgd, PGD_ORDER);
}

#if CONFIG_PGTABLE_LEVELS == 3
@@ -70,39 +46,23 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)

static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
	return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
	pmd_t *pmd;

	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
	if (likely(pmd))
		memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER);
	return pmd;
}

static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
		/*
		 * This is the permanent pmd attached to the pgd;
		 * cannot free it.
		 * Increment the counter to compensate for the decrement
		 * done by generic mm code.
		 */
		mm_inc_nr_pmds(mm);
		return;
	}
	free_pages((unsigned long)pmd, PMD_ORDER);
}

#endif

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
#if CONFIG_PGTABLE_LEVELS == 3
	/* preserve the gateway marker if this is the beginning of
	 * the permanent pmd */
	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
				PxD_FLAG_VALID |
				PxD_FLAG_ATTACHED)
			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
	else
#endif
	set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
		+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
}
+18 −71
Original line number Diff line number Diff line
@@ -23,8 +23,6 @@
#include <asm/processor.h>
#include <asm/cache.h>

static inline spinlock_t *pgd_spinlock(pgd_t *);

/*
 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
 * memory.  For the return value to be meaningful, ADDR must be >=
@@ -42,12 +40,8 @@ static inline spinlock_t *pgd_spinlock(pgd_t *);

/* This is for the serialization of PxTLB broadcasts. At least on the N class
 * systems, only one PxTLB inter processor broadcast can be active at any one
 * time on the Merced bus.

 * PTE updates are protected by locks in the PMD.
 */
 * time on the Merced bus. */
extern spinlock_t pa_tlb_flush_lock;
extern spinlock_t pa_swapper_pg_lock;
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
extern int pa_serialize_tlb_flushes;
#else
@@ -89,15 +83,13 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define set_pte(pteptr, pteval)			\
	do {					\
		*(pteptr) = (pteval);		\
		barrier();			\
	} while(0)

#define set_pte_at(mm, addr, ptep, pteval)			\
#define set_pte_at(mm, addr, pteptr, pteval)	\
	do {					\
		unsigned long flags;				\
		spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
		set_pte(ptep, pteval);				\
		*(pteptr) = (pteval);		\
		purge_tlb_entries(mm, addr);	\
		spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
	} while (0)

#endif /* !__ASSEMBLY__ */
@@ -120,12 +112,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)

#if CONFIG_PGTABLE_LEVELS == 3
#define PGD_ORDER	1 /* Number of pages per pgd */
#define PMD_ORDER	1 /* Number of pages per pmd */
#define PGD_ALLOC_ORDER	(2 + 1) /* first pgd contains pmd */
#define PMD_ORDER	1
#define PGD_ORDER	0
#else
#define PGD_ORDER	1 /* Number of pages per pgd */
#define PGD_ALLOC_ORDER	(PGD_ORDER + 1)
#define PGD_ORDER	1
#endif

/* Definitions for 3rd level (we use PLD here for Page Lower directory
@@ -240,11 +230,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 * able to effectively address 40/42/44-bits of physical address space
 * depending on 4k/16k/64k PAGE_SIZE */
#define _PxD_PRESENT_BIT   31
#define _PxD_ATTACHED_BIT  30
#define _PxD_VALID_BIT     29
#define _PxD_VALID_BIT     30

#define PxD_FLAG_PRESENT  (1 << xlate_pabit(_PxD_PRESENT_BIT))
#define PxD_FLAG_ATTACHED (1 << xlate_pabit(_PxD_ATTACHED_BIT))
#define PxD_FLAG_VALID    (1 << xlate_pabit(_PxD_VALID_BIT))
#define PxD_FLAG_MASK     (0xf)
#define PxD_FLAG_SHIFT    (4)
@@ -326,23 +314,10 @@ extern unsigned long *empty_zero_page;
#define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
#define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)

#if CONFIG_PGTABLE_LEVELS == 3
/* The first entry of the permanent pmd is not there if it contains
 * the gateway marker */
#define pmd_none(x)	(!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED)
#else
#define pmd_none(x)	(!pmd_val(x))
#endif
#define pmd_bad(x)	(!(pmd_flag(x) & PxD_FLAG_VALID))
#define pmd_present(x)	(pmd_flag(x) & PxD_FLAG_PRESENT)
static inline void pmd_clear(pmd_t *pmd) {
#if CONFIG_PGTABLE_LEVELS == 3
	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
		/* This is the entry pointing to the permanent pmd
		 * attached to the pgd; cannot clear it */
		set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
	else
#endif
		set_pmd(pmd,  __pmd(0));
}

@@ -358,12 +333,6 @@ static inline void pmd_clear(pmd_t *pmd) {
#define pud_bad(x)      (!(pud_flag(x) & PxD_FLAG_VALID))
#define pud_present(x)  (pud_flag(x) & PxD_FLAG_PRESENT)
static inline void pud_clear(pud_t *pud) {
#if CONFIG_PGTABLE_LEVELS == 3
	if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
		/* This is the permanent pmd attached to the pud; cannot
		 * free it */
		return;
#endif
	set_pud(pud, __pud(0));
}
#endif
@@ -456,32 +425,18 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x)		((pte_t) { (x).val })


static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
{
	if (unlikely(pgd == swapper_pg_dir))
		return &pa_swapper_pg_lock;
	return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
}


static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
	pte_t pte;
	unsigned long flags;

	if (!pte_young(*ptep))
		return 0;

	spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
	pte = *ptep;
	if (!pte_young(pte)) {
		spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
		return 0;
	}
	set_pte(ptep, pte_mkold(pte));
	purge_tlb_entries(vma->vm_mm, addr);
	spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
	return 1;
}

@@ -489,24 +444,16 @@ struct mm_struct;
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
	pte_t old_pte;
	unsigned long flags;

	spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
	old_pte = *ptep;
	set_pte(ptep, __pte(0));
	purge_tlb_entries(mm, addr);
	spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
	set_pte_at(mm, addr, ptep, __pte(0));

	return old_pte;
}

static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
	unsigned long flags;
	spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
	set_pte(ptep, pte_wrprotect(*ptep));
	purge_tlb_entries(mm, addr);
	spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
	set_pte_at(mm, addr, ptep, pte_wrprotect(*ptep));
}

#define pte_same(A,B)	(pte_val(A) == pte_val(B))
Loading