Commit 158dbe9c authored by Petr Tesarik's avatar Petr Tesarik Committed by Christoph Hellwig
Browse files

swiotlb: separate memory pool data from other allocator data



Carve out memory pool specific fields from struct io_tlb_mem. The original
struct now contains shared data for the whole allocator, while the new
struct io_tlb_pool contains data that is specific to one memory pool of
(potentially) many.

Signed-off-by: default avatarPetr Tesarik <petr.tesarik.ext@huawei.com>
Reviewed-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent fea18777
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -625,7 +625,7 @@ struct device_physical_location {
 * @dma_pools:	Dma pools (if dma'ble device).
 * @dma_mem:	Internal for coherent mem override.
 * @cma_area:	Contiguous memory area for dma allocations
 * @dma_io_tlb_mem: Pointer to the swiotlb pool used.  Not for driver use.
 * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver use.
 * @archdata:	For arch-specific additions.
 * @of_node:	Associated device tree node.
 * @fwnode:	Associated device node supplied by platform firmware.
+28 −17
Original line number Diff line number Diff line
@@ -62,8 +62,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
#ifdef CONFIG_SWIOTLB

/**
 * struct io_tlb_mem - IO TLB Memory Pool Descriptor
 *
 * struct io_tlb_pool - IO TLB memory pool descriptor
 * @start:	The start address of the swiotlb memory pool. Used to do a quick
 *		range check to see if the memory was in fact allocated by this
 *		API.
@@ -73,15 +72,34 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 * @vaddr:	The vaddr of the swiotlb memory pool. The swiotlb memory pool
 *		may be remapped in the memory encrypted case and store virtual
 *		address for bounce buffer operation.
 * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
 *		@end. For default swiotlb, this is command line adjustable via
 *		setup_io_tlb_npages.
 * @nslabs:	The number of IO TLB slots between @start and @end. For the
 *		default swiotlb, this can be adjusted with a boot parameter,
 *		see setup_io_tlb_npages().
 * @late_alloc:	%true if allocated using the page allocator.
 * @nareas:	Number of areas in the pool.
 * @area_nslabs: Number of slots in each area.
 * @areas:	Array of memory area descriptors.
 * @slots:	Array of slot descriptors.
 */
struct io_tlb_pool {
	phys_addr_t start;
	phys_addr_t end;
	void *vaddr;
	unsigned long nslabs;
	bool late_alloc;
	unsigned int nareas;
	unsigned int area_nslabs;
	struct io_tlb_area *areas;
	struct io_tlb_slot *slots;
};

/**
 * struct io_tlb_mem - Software IO TLB allocator
 * @defpool:	Default (initial) IO TLB memory pool descriptor.
 * @nslabs:	Total number of IO TLB slabs in all pools.
 * @debugfs:	The dentry to debugfs.
 * @late_alloc:	%true if allocated using the page allocator
 * @force_bounce: %true if swiotlb bouncing is forced
 * @for_alloc:  %true if the pool is used for memory allocation
 * @nareas:  The area number in the pool.
 * @area_nslabs: The slot number in the area.
 * @total_used:	The total number of slots in the pool that are currently used
 *		across all areas. Used only for calculating used_hiwater in
 *		debugfs.
@@ -89,18 +107,11 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 *		in debugfs.
 */
struct io_tlb_mem {
	phys_addr_t start;
	phys_addr_t end;
	void *vaddr;
	struct io_tlb_pool defpool;
	unsigned long nslabs;
	struct dentry *debugfs;
	bool late_alloc;
	bool force_bounce;
	bool for_alloc;
	unsigned int nareas;
	unsigned int area_nslabs;
	struct io_tlb_area *areas;
	struct io_tlb_slot *slots;
#ifdef CONFIG_DEBUG_FS
	atomic_long_t total_used;
	atomic_long_t used_hiwater;
@@ -122,7 +133,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;

	return mem && paddr >= mem->start && paddr < mem->end;
	return mem && paddr >= mem->defpool.start && paddr < mem->defpool.end;
}

static inline bool is_swiotlb_force_bounce(struct device *dev)
+111 −64
Original line number Diff line number Diff line
@@ -209,7 +209,7 @@ void __init swiotlb_adjust_size(unsigned long size)

void swiotlb_print_info(void)
{
	struct io_tlb_mem *mem = &io_tlb_default_mem;
	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;

	if (!mem->nslabs) {
		pr_warn("No low mem\n");
@@ -238,7 +238,7 @@ static inline unsigned long nr_slots(u64 val)
 */
void __init swiotlb_update_mem_attributes(void)
{
	struct io_tlb_mem *mem = &io_tlb_default_mem;
	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
	unsigned long bytes;

	if (!mem->nslabs || mem->late_alloc)
@@ -247,9 +247,8 @@ void __init swiotlb_update_mem_attributes(void)
	set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
}

static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
		unsigned long nslabs, unsigned int flags,
		bool late_alloc, unsigned int nareas)
static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
		unsigned long nslabs, bool late_alloc, unsigned int nareas)
{
	void *vaddr = phys_to_virt(start);
	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
@@ -261,8 +260,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
	mem->nareas = nareas;
	mem->area_nslabs = nslabs / mem->nareas;

	mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);

	for (i = 0; i < mem->nareas; i++) {
		spin_lock_init(&mem->areas[i].lock);
		mem->areas[i].index = 0;
@@ -319,7 +316,7 @@ static void __init *swiotlb_memblock_alloc(unsigned long nslabs,
void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
		int (*remap)(void *tlb, unsigned long nslabs))
{
	struct io_tlb_mem *mem = &io_tlb_default_mem;
	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
	unsigned long nslabs;
	unsigned int nareas;
	size_t alloc_size;
@@ -330,6 +327,9 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
	if (swiotlb_force_disable)
		return;

	io_tlb_default_mem.force_bounce =
		swiotlb_force_bounce || (flags & SWIOTLB_FORCE);

	if (!default_nareas)
		swiotlb_adjust_nareas(num_possible_cpus());

@@ -363,8 +363,9 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
		return;
	}

	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false,
	swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false,
				 default_nareas);
	io_tlb_default_mem.nslabs = nslabs;

	if (flags & SWIOTLB_VERBOSE)
		swiotlb_print_info();
@@ -383,7 +384,7 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags)
int swiotlb_init_late(size_t size, gfp_t gfp_mask,
		int (*remap)(void *tlb, unsigned long nslabs))
{
	struct io_tlb_mem *mem = &io_tlb_default_mem;
	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
	unsigned int nareas;
	unsigned char *vstart = NULL;
@@ -397,6 +398,8 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
	if (swiotlb_force_disable)
		return 0;

	io_tlb_default_mem.force_bounce = swiotlb_force_bounce;

	if (!default_nareas)
		swiotlb_adjust_nareas(num_possible_cpus());

@@ -448,8 +451,9 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,

	set_memory_decrypted((unsigned long)vstart,
			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
				 nareas);
	io_tlb_default_mem.nslabs = nslabs;

	swiotlb_print_info();
	return 0;
@@ -463,7 +467,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,

void __init swiotlb_exit(void)
{
	struct io_tlb_mem *mem = &io_tlb_default_mem;
	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
	unsigned long tbl_vaddr;
	size_t tbl_size, slots_size;
	unsigned int area_order;
@@ -519,7 +523,7 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
			   enum dma_data_direction dir)
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
	struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool;
	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
	phys_addr_t orig_addr = mem->slots[index].orig_addr;
	size_t alloc_size = mem->slots[index].alloc_size;
@@ -601,7 +605,7 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask)
	return nr_slots(boundary_mask + 1);
}

static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index)
{
	if (index >= mem->area_nslabs)
		return 0;
@@ -645,6 +649,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
/**
 * swiotlb_area_find_slots() - search for slots in one IO TLB memory area
 * @dev:	Device which maps the buffer.
 * @pool:	Memory pool to be searched.
 * @area_index:	Index of the IO TLB memory area to be searched.
 * @orig_addr:	Original (non-bounced) IO buffer address.
 * @alloc_size: Total requested size of the bounce buffer,
@@ -657,15 +662,14 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
 *
 * Return: Index of the first allocated slot, or -1 on error.
 */
static int swiotlb_area_find_slots(struct device *dev, int area_index,
		phys_addr_t orig_addr, size_t alloc_size,
static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool,
		int area_index, phys_addr_t orig_addr, size_t alloc_size,
		unsigned int alloc_align_mask)
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
	struct io_tlb_area *area = mem->areas + area_index;
	struct io_tlb_area *area = pool->areas + area_index;
	unsigned long boundary_mask = dma_get_seg_boundary(dev);
	dma_addr_t tbl_dma_addr =
		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
		phys_to_dma_unencrypted(dev, pool->start) & boundary_mask;
	unsigned long max_slots = get_max_slots(boundary_mask);
	unsigned int iotlb_align_mask =
		dma_get_min_align_mask(dev) | alloc_align_mask;
@@ -677,7 +681,7 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index,
	unsigned int slot_index;

	BUG_ON(!nslots);
	BUG_ON(area_index >= mem->nareas);
	BUG_ON(area_index >= pool->nareas);

	/*
	 * For allocations of PAGE_SIZE or larger only look for page aligned
@@ -694,19 +698,19 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index,
	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;

	spin_lock_irqsave(&area->lock, flags);
	if (unlikely(nslots > mem->area_nslabs - area->used))
	if (unlikely(nslots > pool->area_nslabs - area->used))
		goto not_found;

	slot_base = area_index * mem->area_nslabs;
	slot_base = area_index * pool->area_nslabs;
	index = area->index;

	for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
	for (slots_checked = 0; slots_checked < pool->area_nslabs; ) {
		slot_index = slot_base + index;

		if (orig_addr &&
		    (slot_addr(tbl_dma_addr, slot_index) &
		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
			index = wrap_area_index(mem, index + 1);
			index = wrap_area_index(pool, index + 1);
			slots_checked++;
			continue;
		}
@@ -719,10 +723,10 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index,
		if (!iommu_is_span_boundary(slot_index, nslots,
					    nr_slots(tbl_dma_addr),
					    max_slots)) {
			if (mem->slots[slot_index].list >= nslots)
			if (pool->slots[slot_index].list >= nslots)
				goto found;
		}
		index = wrap_area_index(mem, index + stride);
		index = wrap_area_index(pool, index + stride);
		slots_checked += stride;
	}

@@ -732,58 +736,79 @@ static int swiotlb_area_find_slots(struct device *dev, int area_index,

found:
	for (i = slot_index; i < slot_index + nslots; i++) {
		mem->slots[i].list = 0;
		mem->slots[i].alloc_size = alloc_size - (offset +
		pool->slots[i].list = 0;
		pool->slots[i].alloc_size = alloc_size - (offset +
				((i - slot_index) << IO_TLB_SHIFT));
	}
	for (i = slot_index - 1;
	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
	     mem->slots[i].list; i--)
		mem->slots[i].list = ++count;
	     pool->slots[i].list; i--)
		pool->slots[i].list = ++count;

	/*
	 * Update the indices to avoid searching in the next round.
	 */
	area->index = wrap_area_index(mem, index + nslots);
	area->index = wrap_area_index(pool, index + nslots);
	area->used += nslots;
	spin_unlock_irqrestore(&area->lock, flags);

	inc_used_and_hiwater(mem, nslots);
	inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots);
	return slot_index;
}

/**
 * swiotlb_find_slots() - search for slots in the whole swiotlb
 * swiotlb_pool_find_slots() - search for slots in one memory pool
 * @dev:	Device which maps the buffer.
 * @pool:	Memory pool to be searched.
 * @orig_addr:	Original (non-bounced) IO buffer address.
 * @alloc_size: Total requested size of the bounce buffer,
 *		including initial alignment padding.
 * @alloc_align_mask:	Required alignment of the allocated buffer.
 *
 * Search through the whole software IO TLB to find a sequence of slots that
 * match the allocation constraints.
 * Search through one memory pool to find a sequence of slots that match the
 * allocation constraints.
 *
 * Return: Index of the first allocated slot, or -1 on error.
 */
static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
		size_t alloc_size, unsigned int alloc_align_mask)
static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool,
		phys_addr_t orig_addr, size_t alloc_size,
		unsigned int alloc_align_mask)
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
	int start = raw_smp_processor_id() & (mem->nareas - 1);
	int start = raw_smp_processor_id() & (pool->nareas - 1);
	int i = start, index;

	do {
		index = swiotlb_area_find_slots(dev, i, orig_addr, alloc_size,
						alloc_align_mask);
		index = swiotlb_area_find_slots(dev, pool, i, orig_addr,
						alloc_size, alloc_align_mask);
		if (index >= 0)
			return index;
		if (++i >= mem->nareas)
		if (++i >= pool->nareas)
			i = 0;
	} while (i != start);

	return -1;
}

/**
 * swiotlb_find_slots() - search for slots in the whole swiotlb
 * @dev:	Device which maps the buffer.
 * @orig_addr:	Original (non-bounced) IO buffer address.
 * @alloc_size: Total requested size of the bounce buffer,
 *		including initial alignment padding.
 * @alloc_align_mask:	Required alignment of the allocated buffer.
 *
 * Search through the whole software IO TLB to find a sequence of slots that
 * match the allocation constraints.
 *
 * Return: Index of the first allocated slot, or -1 on error.
 */
static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
		size_t alloc_size, unsigned int alloc_align_mask)
{
	return swiotlb_pool_find_slots(dev, &dev->dma_io_tlb_mem->defpool,
				       orig_addr, alloc_size, alloc_align_mask);
}

#ifdef CONFIG_DEBUG_FS

/**
@@ -802,6 +827,24 @@ static unsigned long mem_used(struct io_tlb_mem *mem)

#else /* !CONFIG_DEBUG_FS */

/**
 * mem_pool_used() - get number of used slots in a memory pool
 * @pool:	Software IO TLB memory pool.
 *
 * The result is not accurate, see mem_used().
 *
 * Return: Approximate number of used slots.
 */
static unsigned long mem_pool_used(struct io_tlb_pool *pool)
{
	int i;
	unsigned long used = 0;

	for (i = 0; i < pool->nareas; i++)
		used += pool->areas[i].used;
	return used;
}

/**
 * mem_used() - get number of used slots in an allocator
 * @mem:	Software IO TLB allocator.
@@ -813,12 +856,7 @@ static unsigned long mem_used(struct io_tlb_mem *mem)
 */
static unsigned long mem_used(struct io_tlb_mem *mem)
{
	int i;
	unsigned long used = 0;

	for (i = 0; i < mem->nareas; i++)
		used += mem->areas[i].used;
	return used;
	return mem_pool_used(&mem->defpool);
}

#endif /* CONFIG_DEBUG_FS */
@@ -830,6 +868,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
	struct io_tlb_pool *pool;
	unsigned int i;
	int index;
	phys_addr_t tlb_addr;
@@ -864,9 +903,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
	 * This is needed when we sync the memory.  Then we sync the buffer if
	 * needed.
	 */
	pool = &mem->defpool;
	for (i = 0; i < nr_slots(alloc_size + offset); i++)
		mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
	tlb_addr = slot_addr(mem->start, index) + offset;
		pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
	tlb_addr = slot_addr(pool->start, index) + offset;
	/*
	 * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
	 * to the tlb buffer, if we knew for sure the device will
@@ -880,7 +920,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,

static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
	struct io_tlb_pool *mem = &dev->dma_io_tlb_mem->defpool;
	unsigned long flags;
	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
@@ -924,7 +964,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
	area->used -= nslots;
	spin_unlock_irqrestore(&area->lock, flags);

	dec_used(mem, nslots);
	dec_used(dev->dma_io_tlb_mem, nslots);
}

/*
@@ -1033,7 +1073,7 @@ bool is_swiotlb_active(struct device *dev)
 */
phys_addr_t default_swiotlb_base(void)
{
	return io_tlb_default_mem.start;
	return io_tlb_default_mem.defpool.start;
}

/**
@@ -1043,7 +1083,7 @@ phys_addr_t default_swiotlb_base(void)
 */
phys_addr_t default_swiotlb_limit(void)
{
	return io_tlb_default_mem.end - 1;
	return io_tlb_default_mem.defpool.end - 1;
}

#ifdef CONFIG_DEBUG_FS
@@ -1119,6 +1159,7 @@ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
struct page *swiotlb_alloc(struct device *dev, size_t size)
{
	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
	struct io_tlb_pool *pool;
	phys_addr_t tlb_addr;
	int index;

@@ -1129,7 +1170,8 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
	if (index == -1)
		return NULL;

	tlb_addr = slot_addr(mem->start, index);
	pool = &mem->defpool;
	tlb_addr = slot_addr(pool->start, index);

	return pfn_to_page(PFN_DOWN(tlb_addr));
}
@@ -1166,29 +1208,34 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
	 * to it.
	 */
	if (!mem) {
		struct io_tlb_pool *pool;

		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
		if (!mem)
			return -ENOMEM;
		pool = &mem->defpool;

		mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
		if (!mem->slots) {
		pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL);
		if (!pool->slots) {
			kfree(mem);
			return -ENOMEM;
		}

		mem->areas = kcalloc(nareas, sizeof(*mem->areas),
		pool->areas = kcalloc(nareas, sizeof(*pool->areas),
				GFP_KERNEL);
		if (!mem->areas) {
			kfree(mem->slots);
		if (!pool->areas) {
			kfree(pool->slots);
			kfree(mem);
			return -ENOMEM;
		}

		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
				     rmem->size >> PAGE_SHIFT);
		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
		swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs,
					 false, nareas);
		mem->force_bounce = true;
		mem->for_alloc = true;
		mem->nslabs = nslabs;

		rmem->priv = mem;