Commit bb5f33c0 authored by Michael Ellerman's avatar Michael Ellerman
Browse files

Merge "Use hugepages to map kernel mem on 8xx" into next

Merge Christophe's large series to use huge pages for the linear
mapping on 8xx.

From his cover letter:

The main purpose of this big series is to:
- reorganise huge page handling to avoid using mm_slices.
- use huge pages to map kernel memory on the 8xx.

The 8xx supports 4 page sizes: 4k, 16k, 512k and 8M.
It uses 2 Level page tables, PGD having 1024 entries, each entry
covering 4M address space. Then each page table has 1024 entries.

At the time being, page sizes are managed in PGD entries, implying
the use of mm_slices as it can't mix several pages of the same size
in one page table.

The first purpose of this series is to reorganise things so that
standard page tables can also handle 512k pages. This is done by
adding a new _PAGE_HUGE flag which will be copied into the Level 1
entry in the TLB miss handler. That done, we have 2 types of pages:
- PGD entries to regular page tables handling 4k/16k and 512k pages
- PGD entries to hugepd tables handling 8M pages.

There is no need to mix 8M pages with other sizes, because a 8M page
will use more than what a single PGD covers.

Then comes the second purpose of this series. At the time being, the
8xx has implemented special handling in the TLB miss handlers in order
to transparently map kernel linear address space and the IMMR using
huge pages by building the TLB entries in assembly at the time of the
exception.

As mm_slices is only for user space pages, and also because it would
anyway not be convenient to slice kernel address space, it was not
possible to use huge pages for kernel address space. But after step
one of the series, it is now more flexible to use huge pages.

This series drop all assembly 'just in time' handling of huge pages
and use huge pages in page tables instead.

Once the above is done, then comes icing on the cake:
- Use huge pages for KASAN shadow mapping
- Allow pinned TLBs with strict kernel rwx
- Allow pinned TLBs with debug pagealloc

Then, last but not least, those modifications for the 8xx allows the
following improvement on book3s/32:
- Mapping KASAN shadow with BATs
- Allowing BATs with debug pagealloc

All this allows to considerably simplify TLB miss handlers and associated
initialisation. The overhead of reading page tables is negligible
compared to the reduction of the miss handlers.

While we were at touching pte_update(), some cleanup was done
there too.

Tested widely on 8xx and 832x. Boot tested on QEMU MAC99.
parents 82a1b8ed 7974c473
Loading
Loading
Loading
Loading
+11 −51
Original line number Diff line number Diff line
@@ -778,36 +778,12 @@ config THREAD_SHIFT
	  Used to define the stack size. The default is almost always what you
	  want. Only change this if you know what you are doing.

config ETEXT_SHIFT_BOOL
	bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \
					     (PPC_BOOK3S_32 || PPC_8xx)
	depends on ADVANCED_OPTIONS
	help
	  This option allows you to set the kernel end of text alignment. When
	  RAM is mapped by blocks, the alignment needs to fit the size and
	  number of possible blocks. The default should be OK for most configs.

	  Say N here unless you know what you are doing.

config ETEXT_SHIFT
	int "_etext shift" if ETEXT_SHIFT_BOOL
	range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
	range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
	default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
	default 19 if STRICT_KERNEL_RWX && PPC_8xx
	default PPC_PAGE_SHIFT
	help
	  On Book3S 32 (603+), IBATs are used to map kernel text.
	  Smaller is the alignment, greater is the number of necessary IBATs.

	  On 8xx, large pages (512kb or 8M) are used to map kernel linear
	  memory. Aligning to 8M reduces TLB misses as only 8M pages are used
	  in that case.

config DATA_SHIFT_BOOL
	bool "Set custom data alignment" if STRICT_KERNEL_RWX && \
					    (PPC_BOOK3S_32 || PPC_8xx)
	bool "Set custom data alignment"
	depends on ADVANCED_OPTIONS
	depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC
	depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \
				     (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX))
	help
	  This option allows you to set the kernel data alignment. When
	  RAM is mapped by blocks, the alignment needs to fit the size and
@@ -818,10 +794,13 @@ config DATA_SHIFT_BOOL
config DATA_SHIFT
	int "Data shift" if DATA_SHIFT_BOOL
	default 24 if STRICT_KERNEL_RWX && PPC64
	range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
	range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
	range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32
	range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx
	default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
	default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32
	default 23 if STRICT_KERNEL_RWX && PPC_8xx
	default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA
	default 19 if DEBUG_PAGEALLOC && PPC_8xx
	default PPC_PAGE_SHIFT
	help
	  On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
@@ -829,7 +808,8 @@ config DATA_SHIFT

	  On 8xx, large pages (512kb or 8M) are used to map kernel linear
	  memory. Aligning to 8M reduces TLB misses as only 8M pages are used
	  in that case.
	  in that case. If PIN_TLB is selected, it must be aligned to 8M as
	  8M pages will be pinned.

config FORCE_MAX_ZONEORDER
	int "Maximum zone order"
@@ -1227,26 +1207,6 @@ config TASK_SIZE
	hex "Size of user task space" if TASK_SIZE_BOOL
	default "0x80000000" if PPC_8xx
	default "0xc0000000"

config PIN_TLB
	bool "Pinned Kernel TLBs (860 ONLY)"
	depends on ADVANCED_OPTIONS && PPC_8xx && \
		   !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX

config PIN_TLB_DATA
	bool "Pinned TLB for DATA"
	depends on PIN_TLB
	default y

config PIN_TLB_IMMR
	bool "Pinned TLB for IMMR"
	depends on PIN_TLB || PPC_EARLY_DEBUG_CPM
	default y

config PIN_TLB_TEXT
	bool "Pinned TLB for TEXT"
	depends on PIN_TLB
	default y
endmenu

if PPC64
+0 −1
Original line number Diff line number Diff line
@@ -10,7 +10,6 @@ CONFIG_EXPERT=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_ADDER875=y
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
# CONFIG_SECCOMP is not set
+0 −1
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@ CONFIG_EXPERT=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_EP88XC=y
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
+0 −1
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@ CONFIG_EXPERT=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_MPC86XADS=y
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
CONFIG_MATH_EMULATION=y
+0 −1
Original line number Diff line number Diff line
@@ -11,7 +11,6 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
Loading