Commit 5189e312 authored by Matthew Auld's avatar Matthew Auld Committed by Lucas De Marchi
Browse files

drm/i915: support 64K GTT pages for discrete cards



discrete cards optimise 64K GTT pages for local-memory, since everything
should be allocated at 64K granularity. We say goodbye to sparse
entries, and instead get a compact 256B page-table for 64K pages,
which should be more cache friendly. 4K pages for local-memory
are no longer supported by the HW.

v4: don't return uninitialized err in igt_ppgtt_compact
Reported-by: default avatarkernel test robot <lkp@intel.com>

Signed-off-by: default avatarMatthew Auld <matthew.auld@intel.com>
Signed-off-by: default avatarStuart Summers <stuart.summers@intel.com>
Signed-off-by: default avatarRamalingam C <ramalingam.c@intel.com>
Signed-off-by: default avatarRobert Beckett <bob.beckett@collabora.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-8-ramalingam.c@intel.com
parent 87bd701e
Loading
Loading
Loading
Loading
+60 −0
Original line number Diff line number Diff line
@@ -1478,6 +1478,65 @@ static int igt_ppgtt_sanity_check(void *arg)
	return err;
}

static int igt_ppgtt_compact(void *arg)
{
	struct drm_i915_private *i915 = arg;
	struct drm_i915_gem_object *obj;
	int err;

	/*
	 * Simple test to catch issues with compact 64K pages -- since the pt is
	 * compacted to 256B that gives us 32 entries per pt, however since the
	 * backing page for the pt is 4K, any extra entries we might incorrectly
	 * write out should be ignored by the HW. If ever hit such a case this
	 * test should catch it since some of our writes would land in scratch.
	 */

	if (!HAS_64K_PAGES(i915)) {
		pr_info("device lacks compact 64K page support, skipping\n");
		return 0;
	}

	if (!HAS_LMEM(i915)) {
		pr_info("device lacks LMEM support, skipping\n");
		return 0;
	}

	/* We want the range to cover multiple page-table boundaries. */
	obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
	if (IS_ERR(obj))
		return PTR_ERR(obj);

	err = i915_gem_object_pin_pages_unlocked(obj);
	if (err)
		goto out_put;

	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
		pr_info("LMEM compact unable to allocate huge-page(s)\n");
		goto out_unpin;
	}

	/*
	 * Disable 2M GTT pages by forcing the page-size to 64K for the GTT
	 * insertion.
	 */
	obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;

	err = igt_write_huge(i915, obj);
	if (err)
		pr_err("LMEM compact write-huge failed\n");

out_unpin:
	i915_gem_object_unpin_pages(obj);
out_put:
	i915_gem_object_put(obj);

	if (err == -ENOMEM)
		err = 0;

	return err;
}

static int igt_tmpfs_fallback(void *arg)
{
	struct drm_i915_private *i915 = arg;
@@ -1735,6 +1794,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
		SUBTEST(igt_tmpfs_fallback),
		SUBTEST(igt_ppgtt_smoke_huge),
		SUBTEST(igt_ppgtt_sanity_check),
		SUBTEST(igt_ppgtt_compact),
	};

	if (!HAS_PPGTT(i915)) {
+105 −3
Original line number Diff line number Diff line
@@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
						   start, end, lvl);
		} else {
			unsigned int count;
			unsigned int pte = gen8_pd_index(start, 0);
			unsigned int num_ptes;
			u64 *vaddr;

			count = gen8_pt_count(start, end);
@@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
			    atomic_read(&pt->used));
			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));

			num_ptes = count;
			if (pt->is_compact) {
				GEM_BUG_ON(num_ptes % 16);
				GEM_BUG_ON(pte % 16);
				num_ptes /= 16;
				pte /= 16;
			}

			vaddr = px_vaddr(pt);
			memset64(vaddr + gen8_pd_index(start, 0),
			memset64(vaddr + pte,
				 vm->scratch[0]->encode,
				 count);
				 num_ptes);

			atomic_sub(count, &pt->used);
			start += count;
@@ -453,6 +463,95 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
	return idx;
}

static void
xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
			  struct i915_vma_resource *vma_res,
			  struct sgt_dma *iter,
			  enum i915_cache_level cache_level,
			  u32 flags)
{
	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
	unsigned int rem = sg_dma_len(iter->sg);
	u64 start = vma_res->start;

	GEM_BUG_ON(!i915_vm_is_4lvl(vm));

	do {
		struct i915_page_directory * const pdp =
			gen8_pdp_for_page_address(vm, start);
		struct i915_page_directory * const pd =
			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
		struct i915_page_table *pt =
			i915_pt_entry(pd, __gen8_pte_index(start, 1));
		gen8_pte_t encode = pte_encode;
		unsigned int page_size;
		gen8_pte_t *vaddr;
		u16 index, max;

		max = I915_PDES;

		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
		    rem >= I915_GTT_PAGE_SIZE_2M &&
		    !__gen8_pte_index(start, 0)) {
			index = __gen8_pte_index(start, 1);
			encode |= GEN8_PDE_PS_2M;
			page_size = I915_GTT_PAGE_SIZE_2M;

			vaddr = px_vaddr(pd);
		} else {
			if (encode & GEN12_PPGTT_PTE_LM) {
				GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
				GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
				GEM_BUG_ON(!IS_ALIGNED(iter->dma,
						       I915_GTT_PAGE_SIZE_64K));

				index = __gen8_pte_index(start, 0) / 16;
				page_size = I915_GTT_PAGE_SIZE_64K;

				max /= 16;

				vaddr = px_vaddr(pd);
				vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;

				pt->is_compact = true;
			} else {
				GEM_BUG_ON(pt->is_compact);
				index =  __gen8_pte_index(start, 0);
				page_size = I915_GTT_PAGE_SIZE;
			}

			vaddr = px_vaddr(pt);
		}

		do {
			GEM_BUG_ON(rem < page_size);
			vaddr[index++] = encode | iter->dma;

			start += page_size;
			iter->dma += page_size;
			rem -= page_size;
			if (iter->dma >= iter->max) {
				iter->sg = __sg_next(iter->sg);
				if (!iter->sg)
					break;

				rem = sg_dma_len(iter->sg);
				if (!rem)
					break;

				iter->dma = sg_dma_address(iter->sg);
				iter->max = iter->dma + rem;

				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
					break;
			}
		} while (rem >= page_size && index < max);

		vma_res->page_sizes_gtt |= page_size;
	} while (iter->sg && sg_dma_len(iter->sg));
}

static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
				   struct i915_vma_resource *vma_res,
				   struct sgt_dma *iter,
@@ -586,6 +685,9 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
	struct sgt_dma iter = sgt_dma(vma_res);

	if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
		if (HAS_64K_PAGES(vm->i915))
			xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
		else
			gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
	} else  {
		u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
+3 −0
Original line number Diff line number Diff line
@@ -92,6 +92,8 @@ typedef u64 gen8_pte_t;

#define GEN12_GGTT_PTE_LM	BIT_ULL(1)

#define GEN12_PDE_64K BIT(6)

/*
 * Cacheability Control is a 4-bit value. The low three bits are stored in bits
 * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
@@ -160,6 +162,7 @@ struct i915_page_table {
		atomic_t used;
		struct i915_page_table *stash;
	};
	bool is_compact;
};

struct i915_page_directory {
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
		return ERR_PTR(-ENOMEM);
	}

	pt->is_compact = false;
	atomic_set(&pt->used, 0);
	return pt;
}