Commit f913a660 authored by Vasily Gorbik's avatar Vasily Gorbik Committed by Heiko Carstens
Browse files

s390/boot: rework decompressor reserved tracking



Currently several approaches for finding unused memory in decompressor
are utilized. While "safe_addr" grows towards higher addresses, vmem
code allocates paging structures top down. The former requires careful
ordering. In addition to that ipl report handling code verifies potential
intersections with secure boot certificates on its own. Neither of two
approaches are memory holes aware and consistent with each other in low
memory conditions.

To solve that, existing approaches are generalized and combined
together, as well as online memory ranges are now taken into
consideration.

physmem_info has been extended to contain reserved memory ranges. New
set of functions allow to handle reserves and find unused memory.
All reserves and memory allocations are "typed". In case of out of
memory condition decompressor fails with detailed info on current
reserved ranges and usable online memory.

Linux version 6.2.0 ...
Kernel command line: ... mem=100M
Our of memory allocating 100000 bytes 100000 aligned in range 0:5800000
Reserved memory ranges:
0000000000000000 0000000003e33000 DECOMPRESSOR
0000000003f00000 00000000057648a3 INITRD
00000000063e0000 00000000063e8000 VMEM
00000000063eb000 00000000063f4000 VMEM
00000000063f7800 0000000006400000 VMEM
0000000005800000 0000000006300000 KASAN
Usable online memory ranges (info source: sclp read info [3]):
0000000000000000 0000000006400000
Usable online memory total: 6400000 Reserved: 61b10a3 Free: 24ef5d
Call Trace:
(sp:000000000002bd58 [<0000000000012a70>] physmem_alloc_top_down+0x60/0x14c)
 sp:000000000002bdc8 [<0000000000013756>] _pa+0x56/0x6a
 sp:000000000002bdf0 [<0000000000013bcc>] pgtable_populate+0x45c/0x65e
 sp:000000000002be90 [<00000000000140aa>] setup_vmem+0x2da/0x424
 sp:000000000002bec8 [<0000000000011c20>] startup_kernel+0x428/0x8b4
 sp:000000000002bf60 [<00000000000100f4>] startup_normal+0xd4/0xd4

physmem_alloc_range allows to find free memory in specified range. It
should be used for one time allocations only like finding position for
amode31 and vmlinux.
physmem_alloc_top_down can be used just like physmem_alloc_range, but
it also allows multiple allocations per type and tries to merge sequential
allocations together. Which is useful for paging structures allocations.
If sequential allocations cannot be merged together they are "chained",
allowing easy per type reserved ranges enumeration and migration to
memblock later. Extra "struct reserved_range" allocated for chaining are
not tracked or reserved but rely on the fact that both
physmem_alloc_range and physmem_alloc_top_down search for free memory
only below current top down allocator position. All reserved ranges
should be transferred to memblock before memblock allocations are
enabled.

The startup code has been reordered to delay any memory allocations until
online memory ranges are detected and occupied memory ranges are marked as
reserved to be excluded from follow-up allocations.
Ipl report certificates are a special case, ipl report certificates list
is checked together with other memory reserves until certificates are
saved elsewhere.
KASAN required memory for shadow memory allocation and mapping is reserved
as 1 large chunk which is later passed to KASAN early initialization code.

Acked-by: default avatarHeiko Carstens <hca@linux.ibm.com>
Reviewed-by: default avatarAlexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent 8c37cb7d
Loading
Loading
Loading
Loading
+25 −5
Original line number Diff line number Diff line
@@ -8,6 +8,8 @@

#ifndef __ASSEMBLY__

#include <asm/physmem_info.h>

struct machine_info {
	unsigned char has_edat1 : 1;
	unsigned char has_edat2 : 1;
@@ -33,21 +35,34 @@ struct vmlinux_info {
};

void startup_kernel(void);
unsigned long detect_memory(unsigned long *safe_addr);
unsigned long detect_max_physmem_end(void);
void detect_physmem_online_ranges(unsigned long max_physmem_end);
void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
				     unsigned long align);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
				  unsigned long align, unsigned long min, unsigned long max,
				  bool die_on_oom);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
				 unsigned long *intersection_start);
bool is_ipl_block_dump(void);
void store_ipl_parmblock(void);
unsigned long read_ipl_report(unsigned long safe_addr);
int read_ipl_report(void);
void save_ipl_cert_comp_list(void);
void setup_boot_command_line(void);
void parse_boot_command_line(void);
void verify_facilities(void);
void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr);
unsigned long get_random_base(void);
void setup_vmem(unsigned long asce_limit);
unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);

extern struct machine_info machine;
@@ -62,7 +77,7 @@ extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[];
extern char _end[];
extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
@@ -70,5 +85,10 @@ extern struct vmlinux_info _vmlinux_info;

#define __abs_lowcore_pa(x)	(((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))

static inline bool intersects(unsigned long addr0, unsigned long size0,
			      unsigned long addr1, unsigned long size1)
{
	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
}
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
+49 −57
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/physmem_info.h>
#include <uapi/asm/ipl.h>
#include "boot.h"

@@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size);
unsigned long __bootdata(early_ipl_comp_list_addr);
unsigned long __bootdata(early_ipl_comp_list_size);

static struct ipl_rb_certificates *certs;
static struct ipl_rb_components *comps;
static bool ipl_report_needs_saving;

#define for_each_rb_entry(entry, rb) \
	for (entry = rb->entries; \
	     (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
	     entry++)

static inline bool intersects(unsigned long addr0, unsigned long size0,
			      unsigned long addr1, unsigned long size1)
{
	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
}

static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
					 struct ipl_rb_certificates *certs,
					 unsigned long safe_addr)
static unsigned long get_cert_comp_list_size(void)
{
	struct ipl_rb_certificate_entry *cert;
	struct ipl_rb_component_entry *comp;
@@ -44,44 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
	ipl_cert_list_size = 0;
	for_each_rb_entry(cert, certs)
		ipl_cert_list_size += sizeof(unsigned int) + cert->len;
	size = ipl_cert_list_size + early_ipl_comp_list_size;

	/*
	 * Start from safe_addr to find a free memory area large
	 * enough for the IPL report boot data. This area is used
	 * for ipl_cert_list_addr/ipl_cert_list_size and
	 * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
	 * not overlap with any component or any certificate.
	 */
repeat:
	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
	    intersects(initrd_data.start, initrd_data.size, safe_addr, size))
		safe_addr = initrd_data.start + initrd_data.size;
	if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
		safe_addr = (unsigned long)comps + comps->len;
		goto repeat;
	return ipl_cert_list_size + early_ipl_comp_list_size;
}
	for_each_rb_entry(comp, comps)
		if (intersects(safe_addr, size, comp->addr, comp->len)) {
			safe_addr = comp->addr + comp->len;
			goto repeat;
		}
	if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
		safe_addr = (unsigned long)certs + certs->len;
		goto repeat;

bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
				 unsigned long *intersection_start)
{
	struct ipl_rb_certificate_entry *cert;

	if (!ipl_report_needs_saving)
		return false;

	for_each_rb_entry(cert, certs) {
		if (intersects(addr, size, cert->addr, cert->len)) {
			*intersection_start = cert->addr;
			return true;
		}
	for_each_rb_entry(cert, certs)
		if (intersects(safe_addr, size, cert->addr, cert->len)) {
			safe_addr = cert->addr + cert->len;
			goto repeat;
	}
	early_ipl_comp_list_addr = safe_addr;
	ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;

	return safe_addr + size;
	return false;
}

static void copy_components_bootdata(struct ipl_rb_components *comps)
static void copy_components_bootdata(void)
{
	struct ipl_rb_component_entry *comp, *ptr;

@@ -90,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps)
		memcpy(ptr++, comp, sizeof(*ptr));
}

static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
static void copy_certificates_bootdata(void)
{
	struct ipl_rb_certificate_entry *cert;
	void *ptr;
@@ -104,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
	}
}

unsigned long read_ipl_report(unsigned long safe_addr)
int read_ipl_report(void)
{
	struct ipl_rb_certificates *certs;
	struct ipl_rb_components *comps;
	struct ipl_pl_hdr *pl_hdr;
	struct ipl_rl_hdr *rl_hdr;
	struct ipl_rb_hdr *rb_hdr;
@@ -120,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr)
	 */
	if (!ipl_block_valid ||
	    !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
		return safe_addr;
		return -1;
	ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
	/*
	 * There is an IPL report, to find it load the pointer to the
@@ -158,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr)
	 * With either the component list or the certificate list
	 * missing the kernel will stay ignorant of secure IPL.
	 */
	if (!comps || !certs)
		return safe_addr;
	if (!comps || !certs) {
		certs = NULL;
		return -1;
	}

	/*
	 * Copy component and certificate list to a safe area
	 * where the decompressed kernel can find them.
	 */
	safe_addr = find_bootdata_space(comps, certs, safe_addr);
	copy_components_bootdata(comps);
	copy_certificates_bootdata(certs);
	ipl_report_needs_saving = true;
	physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr,
			(unsigned long)rl_end - (unsigned long)pl_hdr);
	return 0;
}

void save_ipl_cert_comp_list(void)
{
	unsigned long size;

	if (!ipl_report_needs_saving)
		return;

	size = get_cert_comp_list_size();
	early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
	ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;

	return safe_addr;
	copy_components_bootdata();
	copy_certificates_bootdata();
	physmem_free(RR_IPLREPORT);
	ipl_report_needs_saving = false;
}
+8 −105
Original line number Diff line number Diff line
@@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value)
	return 0;
}

/*
 * To randomize kernel base address we have to consider several facts:
 * 1. physical online memory might not be continuous and have holes. physmem
 *    info contains list of online memory ranges we should consider.
 * 2. we have several memory regions which are occupied and we should not
 *    overlap and destroy them. Currently safe_addr tells us the border below
 *    which all those occupied regions are. We are safe to use anything above
 *    safe_addr.
 * 3. the upper limit might apply as well, even if memory above that limit is
 *    online. Currently those limitations are:
 *    3.1. Limit set by "mem=" kernel command line option
 *    3.2. memory reserved at the end for kasan initialization.
 * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
 *    Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
 *    (16 pages when the kernel is built with kasan enabled)
 * Assumptions:
 * 1. kernel size (including .bss size) and upper memory limit are page aligned.
 * 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE
 *    aligned (in practice memory configurations granularity on z/VM and LPAR
 *    is 1mb).
 *
 * To guarantee uniform distribution of kernel base address among all suitable
 * addresses we generate random value just once. For that we need to build a
 * continuous range in which every value would be suitable. We can build this
 * range by simply counting all suitable addresses (let's call them positions)
 * which would be valid as kernel base address. To count positions we iterate
 * over online memory ranges. For each range which is big enough for the
 * kernel image we count all suitable addresses we can put the kernel image at
 * that is
 * (end - start - kernel_size) / THREAD_SIZE + 1
 * Two functions count_valid_kernel_positions and position_to_address help
 * to count positions in memory range given and then convert position back
 * to address.
 */
static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
						  unsigned long _min,
						  unsigned long _max)
{
	unsigned long start, end, pos = 0;
	int i;

	for_each_physmem_usable_range(i, &start, &end) {
		if (_min >= end)
			continue;
		if (start >= _max)
			break;
		start = max(_min, start);
		end = min(_max, end);
		if (end - start < kernel_size)
			continue;
		pos += (end - start - kernel_size) / THREAD_SIZE + 1;
	}

	return pos;
}

static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
				 unsigned long _min, unsigned long _max)
{
	unsigned long start, end;
	int i;

	for_each_physmem_usable_range(i, &start, &end) {
		if (_min >= end)
			continue;
		if (start >= _max)
			break;
		start = max(_min, start);
		end = min(_max, end);
		if (end - start < kernel_size)
			continue;
		if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
			return start + (pos - 1) * THREAD_SIZE;
		pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
	}

	return 0;
}

unsigned long get_random_base(unsigned long safe_addr)
unsigned long get_random_base(void)
{
	unsigned long usable_total = get_physmem_usable_total();
	unsigned long memory_limit = get_physmem_usable_end();
	unsigned long base_pos, max_pos, kernel_size;
	int i;

	/*
	 * Avoid putting kernel in the end of physical memory
	 * which vmem and kasan code will use for shadow memory and
	 * pgtable mapping allocations.
	 */
	memory_limit -= kasan_estimate_memory_needs(usable_total);
	memory_limit -= vmem_estimate_memory_needs(usable_total);

	safe_addr = ALIGN(safe_addr, THREAD_SIZE);
	kernel_size = vmlinux.image_size + vmlinux.bss_size;
	if (safe_addr + kernel_size > memory_limit)
		return 0;
	unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
	unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size;
	unsigned long random;

	max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
	if (!max_pos) {
		sclp_early_printk("KASLR disabled: not enough memory\n");
	/* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */
	if (get_random(physmem_info.usable - minimal_pos, &random))
		return 0;
	}

	/* we need a value in the range [1, base_pos] inclusive */
	if (get_random(max_pos, &base_pos))
		return 0;
	return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
	return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE,
				   vmlinux.default_lma, minimal_pos + random, false);
}
+2 −3
Original line number Diff line number Diff line
@@ -123,11 +123,10 @@ void decompressor_printk(const char *fmt, ...)
	sclp_early_printk(buf);
}

static noinline void print_stacktrace(void)
void print_stacktrace(unsigned long sp)
{
	struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
					 (unsigned long)_stack_end };
	unsigned long sp = S390_lowcore.gpregs_save_area[15];
	bool first = true;

	decompressor_printk("Call Trace:\n");
@@ -173,7 +172,7 @@ void print_pgm_check_info(void)
			    gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
			    gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
	print_stacktrace();
	print_stacktrace(S390_lowcore.gpregs_save_area[15]);
	decompressor_printk("Last Breaking-Event-Address:\n");
	decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
			    (void *)S390_lowcore.pgm_last_break);
+160 −28
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <linux/processor.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <asm/setup.h>
#include <asm/processor.h>
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/physmem_info.h>
#include <asm/stacktrace.h>
#include <asm/boot_data.h>
#include <asm/sparsemem.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sclp.h>
#include <asm/uv.h>
#include "decompressor.h"
#include "boot.h"

struct physmem_info __bootdata(physmem_info);
static unsigned int physmem_alloc_ranges;
static unsigned long physmem_alloc_pos;

/* up to 256 storage elements, 1020 subincrements each */
#define ENTRIES_EXTENDED_MAX						       \
@@ -20,6 +25,11 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
{
	if (n < MEM_INLINED_ENTRIES)
		return &physmem_info.online[n];
	if (unlikely(!physmem_info.online_extended)) {
		physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
			RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
			physmem_alloc_pos, true);
	}
	return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
}

@@ -143,49 +153,171 @@ static unsigned long search_mem_end(void)
	return (offset + 1) << 20;
}

unsigned long detect_memory(unsigned long *safe_addr)
unsigned long detect_max_physmem_end(void)
{
	unsigned long max_physmem_end = 0;

	sclp_early_get_memsize(&max_physmem_end);
	physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64));
	if (!sclp_early_get_memsize(&max_physmem_end)) {
		physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
	} else {
		max_physmem_end = search_mem_end();
		physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
	}
	return max_physmem_end;
}

void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
	if (!sclp_early_read_storage_info()) {
		physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
	} else if (!diag260()) {
		physmem_info.info_source = MEM_DETECT_DIAG260;
		max_physmem_end = max_physmem_end ?: get_physmem_usable_end();
	} else if (max_physmem_end) {
		add_physmem_online_range(0, max_physmem_end);
		physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
	} else {
		max_physmem_end = search_mem_end();
		add_physmem_online_range(0, max_physmem_end);
		physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
	}

	if (physmem_info.range_count > MEM_INLINED_ENTRIES) {
		*safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) *
			      sizeof(struct physmem_range);
}

	return max_physmem_end;
void physmem_set_usable_limit(unsigned long limit)
{
	physmem_info.usable = limit;
	physmem_alloc_pos = limit;
}

void physmem_set_usable_limit(unsigned long limit)
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
{
	struct physmem_range *range;
	unsigned long start, end, total_mem = 0, total_reserved_mem = 0;
	struct reserved_range *range;
	enum reserved_range_type t;
	int i;

	/* make sure mem_detect.usable ends up within online memory block */
	for (i = 0; i < physmem_info.range_count; i++) {
		range = __get_physmem_range_ptr(i);
		if (range->start >= limit)
			break;
		if (range->end >= limit) {
			physmem_info.usable = limit;
	decompressor_printk("Linux version %s\n", kernel_version);
	if (!is_prot_virt_guest() && early_command_line[0])
		decompressor_printk("Kernel command line: %s\n", early_command_line);
	decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
			    size, align, min, max);
	decompressor_printk("Reserved memory ranges:\n");
	for_each_physmem_reserved_range(t, range, &start, &end) {
		decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
		total_reserved_mem += end - start;
	}
	decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
			    get_physmem_info_source(), physmem_info.info_source);
	for_each_physmem_usable_range(i, &start, &end) {
		decompressor_printk("%016lx %016lx\n", start, end);
		total_mem += end - start;
	}
	decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
			    total_mem, total_reserved_mem,
			    total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
	print_stacktrace(current_frame_address());
	sclp_early_printk("\n\n -- System halted\n");
	disabled_wait();
}

void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
	physmem_info.reserved[type].start = addr;
	physmem_info.reserved[type].end = addr + size;
}

void physmem_free(enum reserved_range_type type)
{
	physmem_info.reserved[type].start = 0;
	physmem_info.reserved[type].end = 0;
}

static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size,
				       unsigned long *intersection_start)
{
	unsigned long res_addr, res_size;
	int t;

	for (t = 0; t < RR_MAX; t++) {
		if (!get_physmem_reserved(t, &res_addr, &res_size))
			continue;
		if (intersects(addr, size, res_addr, res_size)) {
			*intersection_start = res_addr;
			return true;
		}
	}
	return ipl_report_certs_intersects(addr, size, intersection_start);
}

static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align,
					   unsigned long min, unsigned long max,
					   unsigned int from_ranges, unsigned int *ranges_left,
					   bool die_on_oom)
{
	unsigned int nranges = from_ranges ?: physmem_info.range_count;
	unsigned long range_start, range_end;
	unsigned long intersection_start;
	unsigned long addr, pos = max;

	align = max(align, 8UL);
	while (nranges) {
		__get_physmem_range(nranges - 1, &range_start, &range_end, false);
		pos = min(range_end, pos);

		if (round_up(min, align) + size > pos)
			break;
		addr = round_down(pos - size, align);
		if (range_start > addr) {
			nranges--;
			continue;
		}
		if (__physmem_alloc_intersects(addr, size, &intersection_start)) {
			pos = intersection_start;
			continue;
		}

		if (ranges_left)
			*ranges_left = nranges;
		return addr;
	}
	if (die_on_oom)
		die_oom(size, align, min, max);
	return 0;
}

unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
				  unsigned long align, unsigned long min, unsigned long max,
				  bool die_on_oom)
{
	unsigned long addr;

	max = min(max, physmem_alloc_pos);
	addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
	if (addr)
		physmem_reserve(type, addr, size);
	return addr;
}

unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
				     unsigned long align)
{
	struct reserved_range *range = &physmem_info.reserved[type];
	struct reserved_range *new_range;
	unsigned int ranges_left;
	unsigned long addr;

	addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
				     &ranges_left, true);
	/* if not a consecutive allocation of the same type or first allocation */
	if (range->start != addr + size) {
		if (range->end) {
			physmem_alloc_pos = __physmem_alloc_range(
				sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
				physmem_alloc_ranges, &ranges_left, true);
			new_range = (struct reserved_range *)physmem_alloc_pos;
			*new_range = *range;
			range->chain = new_range;
			addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
						     ranges_left, &ranges_left, true);
		}
		physmem_info.usable = range->end;
		range->end = addr + size;
	}
	range->start = addr;
	physmem_alloc_pos = addr;
	physmem_alloc_ranges = ranges_left;
	return addr;
}
Loading