Commit 87bd701e authored by Matthew Auld's avatar Matthew Auld Committed by Lucas De Marchi
Browse files

drm/i915: enforce min GTT alignment for discrete cards



For local-memory objects we need to align the GTT addresses
to 64K, both for the ppgtt and ggtt.

We need to support vm->min_alignment > 4K, depending
on the vm itself and the type of object we are inserting.
With this in mind update the GTT selftests to take this
into account.

For compact-pt we further align and pad lmem object GTT addresses
to 2MB to ensure PDEs contain consistent page sizes as
required by the HW.

v3:
	* use needs_compact_pt flag to discriminate between
	  64K and 64K with compact-pt
	* add i915_vm_obj_min_alignment
	* use i915_vm_obj_min_alignment to round up vma reservation
	  if compact-pt instead of hard coding
v5:
	* fix i915_vm_obj_min_alignment for internal objects which
	  have no memory region
v6:
	* tiled_blits_create correctly pick largest required alignment
v8:
	* i915_vm_min_alignment protect against array overflow for mock region

Signed-off-by: default avatarMatthew Auld <matthew.auld@intel.com>
Signed-off-by: default avatarRamalingam C <ramalingam.c@intel.com>
Signed-off-by: default avatarRobert Beckett <bob.beckett@collabora.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-7-ramalingam.c@intel.com
parent 132aaaf0
Loading
Loading
Loading
Loading
+13 −8
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct tiled_blits {
	struct blit_buffer scratch;
	struct i915_vma *batch;
	u64 hole;
	u64 align;
	u32 width;
	u32 height;
};
@@ -410,14 +411,19 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
		goto err_free;
	}

	hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
	t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL);
	t->align = max(t->align,
		       i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));

	hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align);
	hole_size *= 2; /* room to maneuver */
	hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
	hole_size += 2 * t->align; /* padding on either side */

	mutex_lock(&t->ce->vm->mutex);
	memset(&hole, 0, sizeof(hole));
	err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
					  hole_size, 0, I915_COLOR_UNEVICTABLE,
					  hole_size, t->align,
					  I915_COLOR_UNEVICTABLE,
					  0, U64_MAX,
					  DRM_MM_INSERT_BEST);
	if (!err)
@@ -428,7 +434,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
		goto err_put;
	}

	t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
	t->hole = hole.start + t->align;
	pr_info("Using hole at %llx\n", t->hole);

	err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
@@ -455,7 +461,7 @@ static void tiled_blits_destroy(struct tiled_blits *t)
static int tiled_blits_prepare(struct tiled_blits *t,
			       struct rnd_state *prng)
{
	u64 offset = PAGE_ALIGN(t->width * t->height * 4);
	u64 offset = round_up(t->width * t->height * 4, t->align);
	u32 *map;
	int err;
	int i;
@@ -486,8 +492,7 @@ static int tiled_blits_prepare(struct tiled_blits *t,

static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
{
	u64 offset =
		round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
	u64 offset = round_up(t->width * t->height * 4, 2 * t->align);
	int err;

	/* We want to check position invariant tiling across GTT eviction */
@@ -500,7 +505,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)

	/* Reposition so that we overlap the old addresses, and slightly off */
	err = tiled_blit(t,
			 &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
			 &t->buffers[2], t->hole + t->align,
			 &t->buffers[1], t->hole + 3 * offset / 2);
	if (err)
		return err;
+12 −0
Original line number Diff line number Diff line
@@ -223,6 +223,18 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)

	GEM_BUG_ON(!vm->total);
	drm_mm_init(&vm->mm, 0, vm->total);

	memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
		 ARRAY_SIZE(vm->min_alignment));

	if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915)) {
		vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
		vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
	} else if (HAS_64K_PAGES(vm->i915)) {
		vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
		vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
	}

	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;

	INIT_LIST_HEAD(&vm->bound_list);
+22 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@
#include "i915_selftest.h"
#include "i915_vma_resource.h"
#include "i915_vma_types.h"
#include "i915_params.h"
#include "intel_memory_region.h"

#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)

@@ -223,6 +225,7 @@ struct i915_address_space {
	struct device *dma;
	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
	u64 reserved;		/* size addr space reserved */
	u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1];

	unsigned int bind_async_flags;

@@ -384,6 +387,25 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm)
	return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
}

static inline u64 i915_vm_min_alignment(struct i915_address_space *vm,
					enum intel_memory_type type)
{
	/* avoid INTEL_MEMORY_MOCK overflow */
	if ((int)type >= ARRAY_SIZE(vm->min_alignment))
		type = INTEL_MEMORY_SYSTEM;

	return vm->min_alignment[type];
}

static inline u64 i915_vm_obj_min_alignment(struct i915_address_space *vm,
					    struct drm_i915_gem_object  *obj)
{
	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
	enum intel_memory_type type = mr ? mr->type : INTEL_MEMORY_SYSTEM;

	return i915_vm_min_alignment(vm, type);
}

static inline bool
i915_vm_has_cache_coloring(struct i915_address_space *vm)
{
+9 −0
Original line number Diff line number Diff line
@@ -756,6 +756,14 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));

	alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
	/*
	 * for compact-pt we round up the reservation to prevent
	 * any smaller pages being used within the same PDE
	 */
	if (NEEDS_COMPACT_PT(vma->vm->i915))
		size = round_up(size, alignment);

	/* If binding the object/GGTT view requires more space than the entire
	 * aperture has, reject it early before evicting everything in a vain
	 * attempt to find space.
@@ -768,6 +776,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
	}

	color = 0;

	if (i915_vm_has_cache_coloring(vma->vm))
		color = vma->obj->cache_level;

+63 −33
Original line number Diff line number Diff line
@@ -238,6 +238,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
			 u64 hole_start, u64 hole_end,
			 unsigned long end_time)
{
	const unsigned int min_alignment =
		i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
	I915_RND_STATE(seed_prng);
	struct i915_vma_resource *mock_vma_res;
	unsigned int size;
@@ -251,9 +253,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
		I915_RND_SUBSTATE(prng, seed_prng);
		struct drm_i915_gem_object *obj;
		unsigned int *order, count, n;
		u64 hole_size;
		u64 hole_size, aligned_size;

		hole_size = (hole_end - hole_start) >> size;
		aligned_size = max_t(u32, ilog2(min_alignment), size);
		hole_size = (hole_end - hole_start) >> aligned_size;
		if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
			hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
		count = hole_size >> 1;
@@ -274,8 +277,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
		}
		GEM_BUG_ON(!order);

		GEM_BUG_ON(count * BIT_ULL(size) > vm->total);
		GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end);
		GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total);
		GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end);

		/* Ignore allocation failures (i.e. don't report them as
		 * a test failure) as we are purposefully allocating very
@@ -298,10 +301,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
		}

		for (n = 0; n < count; n++) {
			u64 addr = hole_start + order[n] * BIT_ULL(size);
			u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
			intel_wakeref_t wakeref;

			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
			GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total);

			if (igt_timeout(end_time,
					"%s timed out before %d/%d\n",
@@ -344,7 +347,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
			}

			mock_vma_res->bi.pages = obj->mm.pages;
			mock_vma_res->node_size = BIT_ULL(size);
			mock_vma_res->node_size = BIT_ULL(aligned_size);
			mock_vma_res->start = addr;

			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
@@ -355,7 +358,7 @@ static int lowlevel_hole(struct i915_address_space *vm,

		i915_random_reorder(order, count, &prng);
		for (n = 0; n < count; n++) {
			u64 addr = hole_start + order[n] * BIT_ULL(size);
			u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
			intel_wakeref_t wakeref;

			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
@@ -399,8 +402,10 @@ static int fill_hole(struct i915_address_space *vm,
{
	const u64 hole_size = hole_end - hole_start;
	struct drm_i915_gem_object *obj;
	const unsigned int min_alignment =
		i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
	const unsigned long max_pages =
		min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT);
		min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment));
	const unsigned long max_step = max(int_sqrt(max_pages), 2UL);
	unsigned long npages, prime, flags;
	struct i915_vma *vma;
@@ -441,14 +446,17 @@ static int fill_hole(struct i915_address_space *vm,

				offset = p->offset;
				list_for_each_entry(obj, &objects, st_link) {
					u64 aligned_size = round_up(obj->base.size,
								    min_alignment);

					vma = i915_vma_instance(obj, vm, NULL);
					if (IS_ERR(vma))
						continue;

					if (p->step < 0) {
						if (offset < hole_start + obj->base.size)
						if (offset < hole_start + aligned_size)
							break;
						offset -= obj->base.size;
						offset -= aligned_size;
					}

					err = i915_vma_pin(vma, 0, 0, offset | flags);
@@ -470,22 +478,25 @@ static int fill_hole(struct i915_address_space *vm,
					i915_vma_unpin(vma);

					if (p->step > 0) {
						if (offset + obj->base.size > hole_end)
						if (offset + aligned_size > hole_end)
							break;
						offset += obj->base.size;
						offset += aligned_size;
					}
				}

				offset = p->offset;
				list_for_each_entry(obj, &objects, st_link) {
					u64 aligned_size = round_up(obj->base.size,
								    min_alignment);

					vma = i915_vma_instance(obj, vm, NULL);
					if (IS_ERR(vma))
						continue;

					if (p->step < 0) {
						if (offset < hole_start + obj->base.size)
						if (offset < hole_start + aligned_size)
							break;
						offset -= obj->base.size;
						offset -= aligned_size;
					}

					if (!drm_mm_node_allocated(&vma->node) ||
@@ -506,22 +517,25 @@ static int fill_hole(struct i915_address_space *vm,
					}

					if (p->step > 0) {
						if (offset + obj->base.size > hole_end)
						if (offset + aligned_size > hole_end)
							break;
						offset += obj->base.size;
						offset += aligned_size;
					}
				}

				offset = p->offset;
				list_for_each_entry_reverse(obj, &objects, st_link) {
					u64 aligned_size = round_up(obj->base.size,
								    min_alignment);

					vma = i915_vma_instance(obj, vm, NULL);
					if (IS_ERR(vma))
						continue;

					if (p->step < 0) {
						if (offset < hole_start + obj->base.size)
						if (offset < hole_start + aligned_size)
							break;
						offset -= obj->base.size;
						offset -= aligned_size;
					}

					err = i915_vma_pin(vma, 0, 0, offset | flags);
@@ -543,22 +557,25 @@ static int fill_hole(struct i915_address_space *vm,
					i915_vma_unpin(vma);

					if (p->step > 0) {
						if (offset + obj->base.size > hole_end)
						if (offset + aligned_size > hole_end)
							break;
						offset += obj->base.size;
						offset += aligned_size;
					}
				}

				offset = p->offset;
				list_for_each_entry_reverse(obj, &objects, st_link) {
					u64 aligned_size = round_up(obj->base.size,
								    min_alignment);

					vma = i915_vma_instance(obj, vm, NULL);
					if (IS_ERR(vma))
						continue;

					if (p->step < 0) {
						if (offset < hole_start + obj->base.size)
						if (offset < hole_start + aligned_size)
							break;
						offset -= obj->base.size;
						offset -= aligned_size;
					}

					if (!drm_mm_node_allocated(&vma->node) ||
@@ -579,9 +596,9 @@ static int fill_hole(struct i915_address_space *vm,
					}

					if (p->step > 0) {
						if (offset + obj->base.size > hole_end)
						if (offset + aligned_size > hole_end)
							break;
						offset += obj->base.size;
						offset += aligned_size;
					}
				}
			}
@@ -611,6 +628,7 @@ static int walk_hole(struct i915_address_space *vm,
	const u64 hole_size = hole_end - hole_start;
	const unsigned long max_pages =
		min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT);
	unsigned long min_alignment;
	unsigned long flags;
	u64 size;

@@ -620,6 +638,8 @@ static int walk_hole(struct i915_address_space *vm,
	if (i915_is_ggtt(vm))
		flags |= PIN_GLOBAL;

	min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);

	for_each_prime_number_from(size, 1, max_pages) {
		struct drm_i915_gem_object *obj;
		struct i915_vma *vma;
@@ -638,7 +658,7 @@ static int walk_hole(struct i915_address_space *vm,

		for (addr = hole_start;
		     addr + obj->base.size < hole_end;
		     addr += obj->base.size) {
		     addr += round_up(obj->base.size, min_alignment)) {
			err = i915_vma_pin(vma, 0, 0, addr | flags);
			if (err) {
				pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n",
@@ -690,6 +710,7 @@ static int pot_hole(struct i915_address_space *vm,
{
	struct drm_i915_gem_object *obj;
	struct i915_vma *vma;
	unsigned int min_alignment;
	unsigned long flags;
	unsigned int pot;
	int err = 0;
@@ -698,6 +719,8 @@ static int pot_hole(struct i915_address_space *vm,
	if (i915_is_ggtt(vm))
		flags |= PIN_GLOBAL;

	min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);

	obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE);
	if (IS_ERR(obj))
		return PTR_ERR(obj);
@@ -710,13 +733,13 @@ static int pot_hole(struct i915_address_space *vm,

	/* Insert a pair of pages across every pot boundary within the hole */
	for (pot = fls64(hole_end - 1) - 1;
	     pot > ilog2(2 * I915_GTT_PAGE_SIZE);
	     pot > ilog2(2 * min_alignment);
	     pot--) {
		u64 step = BIT_ULL(pot);
		u64 addr;

		for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE;
		     addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE;
		for (addr = round_up(hole_start + min_alignment, step) - min_alignment;
		     addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment;
		     addr += step) {
			err = i915_vma_pin(vma, 0, 0, addr | flags);
			if (err) {
@@ -761,6 +784,7 @@ static int drunk_hole(struct i915_address_space *vm,
		      unsigned long end_time)
{
	I915_RND_STATE(prng);
	unsigned int min_alignment;
	unsigned int size;
	unsigned long flags;

@@ -768,15 +792,18 @@ static int drunk_hole(struct i915_address_space *vm,
	if (i915_is_ggtt(vm))
		flags |= PIN_GLOBAL;

	min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);

	/* Keep creating larger objects until one cannot fit into the hole */
	for (size = 12; (hole_end - hole_start) >> size; size++) {
		struct drm_i915_gem_object *obj;
		unsigned int *order, count, n;
		struct i915_vma *vma;
		u64 hole_size;
		u64 hole_size, aligned_size;
		int err = -ENODEV;

		hole_size = (hole_end - hole_start) >> size;
		aligned_size = max_t(u32, ilog2(min_alignment), size);
		hole_size = (hole_end - hole_start) >> aligned_size;
		if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
			hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
		count = hole_size >> 1;
@@ -816,7 +843,7 @@ static int drunk_hole(struct i915_address_space *vm,
		GEM_BUG_ON(vma->size != BIT_ULL(size));

		for (n = 0; n < count; n++) {
			u64 addr = hole_start + order[n] * BIT_ULL(size);
			u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);

			err = i915_vma_pin(vma, 0, 0, addr | flags);
			if (err) {
@@ -868,11 +895,14 @@ static int __shrink_hole(struct i915_address_space *vm,
{
	struct drm_i915_gem_object *obj;
	unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
	unsigned int min_alignment;
	unsigned int order = 12;
	LIST_HEAD(objects);
	int err = 0;
	u64 addr;

	min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);

	/* Keep creating larger objects until one cannot fit into the hole */
	for (addr = hole_start; addr < hole_end; ) {
		struct i915_vma *vma;
@@ -913,7 +943,7 @@ static int __shrink_hole(struct i915_address_space *vm,
		}

		i915_vma_unpin(vma);
		addr += size;
		addr += round_up(size, min_alignment);

		/*
		 * Since we are injecting allocation faults at random intervals,