Merge tag 'drm-intel-fixes-2021-07-22' of... (36ebaeb4) · Commits · EulixOS / Software / Kernel

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

+13 −214

Original line number	Diff line number	Diff line
		@@ -25,10 +25,8 @@
		#include "i915_gem_clflush.h"
		#include "i915_gem_context.h"
		#include "i915_gem_ioctls.h"
		#include "i915_sw_fence_work.h"
		#include "i915_trace.h"
		#include "i915_user_extensions.h"
		#include "i915_memcpy.h"

		struct eb_vma {
		struct i915_vma *vma;
		@@ -1456,6 +1454,10 @@ static u32 reloc_gpu(struct i915_execbuffer eb,
		int err;
		struct intel_engine_cs *engine = eb->engine;

		/* If we need to copy for the cmdparser, we will stall anyway */
		if (eb_use_cmdparser(eb))
		return ERR_PTR(-EWOULDBLOCK);

		if (!reloc_can_use_engine(engine)) {
		engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
		if (!engine)
		@@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
		return vma;
		}

		struct eb_parse_work {
		struct dma_fence_work base;
		struct intel_engine_cs *engine;
		struct i915_vma *batch;
		struct i915_vma *shadow;
		struct i915_vma *trampoline;
		unsigned long batch_offset;
		unsigned long batch_length;
		unsigned long *jump_whitelist;
		const void *batch_map;
		void *shadow_map;
		};

		static int __eb_parse(struct dma_fence_work *work)
		{
		struct eb_parse_work pw = container_of(work, typeof(pw), base);
		int ret;
		bool cookie;

		cookie = dma_fence_begin_signalling();
		ret = intel_engine_cmd_parser(pw->engine,
		pw->batch,
		pw->batch_offset,
		pw->batch_length,
		pw->shadow,
		pw->jump_whitelist,
		pw->shadow_map,
		pw->batch_map);
		dma_fence_end_signalling(cookie);

		return ret;
		}

		static void __eb_parse_release(struct dma_fence_work *work)
		{
		struct eb_parse_work pw = container_of(work, typeof(pw), base);

		if (!IS_ERR_OR_NULL(pw->jump_whitelist))
		kfree(pw->jump_whitelist);

		if (pw->batch_map)
		i915_gem_object_unpin_map(pw->batch->obj);
		else
		i915_gem_object_unpin_pages(pw->batch->obj);

		i915_gem_object_unpin_map(pw->shadow->obj);

		if (pw->trampoline)
		i915_active_release(&pw->trampoline->active);
		i915_active_release(&pw->shadow->active);
		i915_active_release(&pw->batch->active);
		}

		static const struct dma_fence_work_ops eb_parse_ops = {
		.name = "eb_parse",
		.work = __eb_parse,
		.release = __eb_parse_release,
		};

		static inline int
		__parser_mark_active(struct i915_vma *vma,
		struct intel_timeline *tl,
		struct dma_fence *fence)
		{
		struct intel_gt_buffer_pool_node *node = vma->private;

		return i915_active_ref(&node->active, tl->fence_context, fence);
		}

		static int
		parser_mark_active(struct eb_parse_work pw, struct intel_timeline tl)
		{
		int err;

		mutex_lock(&tl->mutex);

		err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
		if (err)
		goto unlock;

		if (pw->trampoline) {
		err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
		if (err)
		goto unlock;
		}

		unlock:
		mutex_unlock(&tl->mutex);
		return err;
		}

		static int eb_parse_pipeline(struct i915_execbuffer *eb,
		struct i915_vma *shadow,
		struct i915_vma *trampoline)
		{
		struct eb_parse_work *pw;
		struct drm_i915_gem_object *batch = eb->batch->vma->obj;
		bool needs_clflush;
		int err;

		GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
		GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));

		pw = kzalloc(sizeof(*pw), GFP_KERNEL);
		if (!pw)
		return -ENOMEM;

		err = i915_active_acquire(&eb->batch->vma->active);
		if (err)
		goto err_free;

		err = i915_active_acquire(&shadow->active);
		if (err)
		goto err_batch;

		if (trampoline) {
		err = i915_active_acquire(&trampoline->active);
		if (err)
		goto err_shadow;
		}

		pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB);
		if (IS_ERR(pw->shadow_map)) {
		err = PTR_ERR(pw->shadow_map);
		goto err_trampoline;
		}

		needs_clflush =
		!(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);

		pw->batch_map = ERR_PTR(-ENODEV);
		if (needs_clflush && i915_has_memcpy_from_wc())
		pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);

		if (IS_ERR(pw->batch_map)) {
		err = i915_gem_object_pin_pages(batch);
		if (err)
		goto err_unmap_shadow;
		pw->batch_map = NULL;
		}

		pw->jump_whitelist =
		intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len,
		trampoline);
		if (IS_ERR(pw->jump_whitelist)) {
		err = PTR_ERR(pw->jump_whitelist);
		goto err_unmap_batch;
		}

		dma_fence_work_init(&pw->base, &eb_parse_ops);

		pw->engine = eb->engine;
		pw->batch = eb->batch->vma;
		pw->batch_offset = eb->batch_start_offset;
		pw->batch_length = eb->batch_len;
		pw->shadow = shadow;
		pw->trampoline = trampoline;

		/* Mark active refs early for this worker, in case we get interrupted */
		err = parser_mark_active(pw, eb->context->timeline);
		if (err)
		goto err_commit;

		err = dma_resv_reserve_shared(pw->batch->resv, 1);
		if (err)
		goto err_commit;

		err = dma_resv_reserve_shared(shadow->resv, 1);
		if (err)
		goto err_commit;

		/* Wait for all writes (and relocs) into the batch to complete */
		err = i915_sw_fence_await_reservation(&pw->base.chain,
		pw->batch->resv, NULL, false,
		0, I915_FENCE_GFP);
		if (err < 0)
		goto err_commit;

		/* Keep the batch alive and unwritten as we parse */
		dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);

		/* Force execution to wait for completion of the parser */
		dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);

		dma_fence_work_commit_imm(&pw->base);
		return 0;

		err_commit:
		i915_sw_fence_set_error_once(&pw->base.chain, err);
		dma_fence_work_commit_imm(&pw->base);
		return err;

		err_unmap_batch:
		if (pw->batch_map)
		i915_gem_object_unpin_map(batch);
		else
		i915_gem_object_unpin_pages(batch);
		err_unmap_shadow:
		i915_gem_object_unpin_map(shadow->obj);
		err_trampoline:
		if (trampoline)
		i915_active_release(&trampoline->active);
		err_shadow:
		i915_active_release(&shadow->active);
		err_batch:
		i915_active_release(&eb->batch->vma->active);
		err_free:
		kfree(pw);
		return err;
		}

		static struct i915_vma eb_dispatch_secure(struct i915_execbuffer eb, struct i915_vma *vma)
		{
		/*
		@@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb)
		goto err_trampoline;
		}

		err = eb_parse_pipeline(eb, shadow, trampoline);
		err = dma_resv_reserve_shared(shadow->resv, 1);
		if (err)
		goto err_trampoline;

		err = intel_engine_cmd_parser(eb->engine,
		eb->batch->vma,
		eb->batch_start_offset,
		eb->batch_len,
		shadow, trampoline);
		if (err)
		goto err_unpin_batch;

drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c

+4 −0

Original line number	Diff line number	Diff line
		@@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg)
		intel_gt_pm_get(&eb.i915->gt);

		for_each_uabi_engine(eb.engine, eb.i915) {
		if (intel_engine_requires_cmd_parser(eb.engine) \|\|
		intel_engine_using_cmd_parser(eb.engine))
		continue;

		reloc_cache_init(&eb.reloc_cache, eb.i915);
		memset(map, POISON_INUSE, 4096);

drivers/gpu/drm/i915/gvt/handlers.c

+15 −0

Original line number	Diff line number	Diff line
		@@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
		if (drm_WARN_ON(&i915->drm, !engine))
		return -EINVAL;

		/*
		* Due to d3_entered is used to indicate skipping PPGTT invalidation on
		* vGPU reset, it's set on D0->D3 on PCI config write, and cleared after
		* vGPU reset if in resuming.
		* In S0ix exit, the device power state also transite from D3 to D0 as
		* S3 resume, but no vGPU reset (triggered by QEMU devic model). After
		* S0ix exit, all engines continue to work. However the d3_entered
		* remains set which will break next vGPU reset logic (miss the expected
		* PPGTT invalidation).
		* Engines can only work in D0. Thus the 1st elsp write gives GVT a
		* chance to clear d3_entered.
		*/
		if (vgpu->d3_entered)
		vgpu->d3_entered = false;

		execlist = &vgpu->submission.execlist[engine->id];

		execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;

drivers/gpu/drm/i915/i915_cmd_parser.c

+74 −62

Original line number	Diff line number	Diff line
		@@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
		static u32 copy_batch(struct drm_i915_gem_object dst_obj,
		struct drm_i915_gem_object *src_obj,
		unsigned long offset, unsigned long length,
		void dst, const void src)
		bool *needs_clflush_after)
		{
		bool needs_clflush =
		!(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
		unsigned int src_needs_clflush;
		unsigned int dst_needs_clflush;
		void dst, src;
		int ret;

		if (src) {
		GEM_BUG_ON(!needs_clflush);
		i915_unaligned_memcpy_from_wc(dst, src + offset, length);
		} else {
		struct scatterlist *sg;
		ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
		if (ret)
		return ERR_PTR(ret);

		dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
		i915_gem_object_finish_access(dst_obj);
		if (IS_ERR(dst))
		return dst;

		ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
		if (ret) {
		i915_gem_object_unpin_map(dst_obj);
		return ERR_PTR(ret);
		}

		src = ERR_PTR(-ENODEV);
		if (src_needs_clflush && i915_has_memcpy_from_wc()) {
		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
		if (!IS_ERR(src)) {
		i915_unaligned_memcpy_from_wc(dst,
		src + offset,
		length);
		i915_gem_object_unpin_map(src_obj);
		}
		}
		if (IS_ERR(src)) {
		unsigned long x, n, remain;
		void *ptr;
		unsigned int x, sg_ofs;
		unsigned long remain;

		/*
		* We can avoid clflushing partial cachelines before the write
		@@ -1168,40 +1190,34 @@ static u32 copy_batch(struct drm_i915_gem_object dst_obj,
		* validate up to the end of the batch.
		*/
		remain = length;
		if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
		if (dst_needs_clflush & CLFLUSH_BEFORE)
		remain = round_up(remain,
		boot_cpu_data.x86_clflush_size);

		ptr = dst;
		x = offset_in_page(offset);
		sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false);

		while (remain) {
		unsigned long sg_max = sg->length >> PAGE_SHIFT;
		for (n = offset >> PAGE_SHIFT; remain; n++) {
		int len = min(remain, PAGE_SIZE - x);

		for (; remain && sg_ofs < sg_max; sg_ofs++) {
		unsigned long len = min(remain, PAGE_SIZE - x);
		void *map;

		map = kmap_atomic(nth_page(sg_page(sg), sg_ofs));
		if (needs_clflush)
		drm_clflush_virt_range(map + x, len);
		memcpy(ptr, map + x, len);
		kunmap_atomic(map);
		src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
		if (src_needs_clflush)
		drm_clflush_virt_range(src + x, len);
		memcpy(ptr, src + x, len);
		kunmap_atomic(src);

		ptr += len;
		remain -= len;
		x = 0;
		}

		sg_ofs = 0;
		sg = sg_next(sg);
		}
		}

		i915_gem_object_finish_access(src_obj);

		memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));

		/* dst_obj is returned with vmap pinned */
		*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;

		return dst;
		}

		@@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
		if (target_cmd_index == offset)
		return 0;

		if (IS_ERR(jump_whitelist))
		return PTR_ERR(jump_whitelist);

		if (!test_bit(target_cmd_index, jump_whitelist)) {
		DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
		jump_target);
		@@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
		return 0;
		}

		/**
		* intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser()
		* @batch_length: length of the commands in batch_obj
		* @trampoline: Whether jump trampolines are used.
		*
		* Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser().
		* This has to be preallocated, because the command parser runs in signaling context,
		* and may not allocate any memory.
		*
		* Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use
		* IS_ERR() to check for errors. Must bre freed() with kfree().
		*
		* NULL is a valid value, meaning no allocation was required.
		*/
		unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
		bool trampoline)
		static unsigned long *alloc_whitelist(u32 batch_length)
		{
		unsigned long *jmp;

		if (trampoline)
		return NULL;

		/*
		* We expect batch_length to be less than 256KiB for known users,
		* i.e. we need at most an 8KiB bitmap allocation which should be
		@@ -1415,9 +1416,7 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
		* @batch_offset: byte offset in the batch at which execution starts
		* @batch_length: length of the commands in batch_obj
		* @shadow: validated copy of the batch buffer in question
		* @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist()
		* @shadow_map: mapping to @shadow vma
		* @batch_map: mapping to @batch vma
		* @trampoline: true if we need to trampoline into privileged execution
		*
		* Parses the specified batch buffer looking for privilege violations as
		* described in the overview.
		@@ -1425,21 +1424,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
		* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
		* if the batch appears legal but should use hardware parsing
		*/

		int intel_engine_cmd_parser(struct intel_engine_cs *engine,
		struct i915_vma *batch,
		unsigned long batch_offset,
		unsigned long batch_length,
		struct i915_vma *shadow,
		unsigned long *jump_whitelist,
		void *shadow_map,
		const void *batch_map)
		bool trampoline)
		{
		u32 cmd, batch_end, offset = 0;
		struct drm_i915_cmd_descriptor default_desc = noop_desc;
		const struct drm_i915_cmd_descriptor *desc = &default_desc;
		bool needs_clflush_after = false;
		unsigned long *jump_whitelist;
		u64 batch_addr, shadow_addr;
		int ret = 0;
		bool trampoline = !jump_whitelist;

		GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
		GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
		@@ -1447,8 +1446,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
		batch->size));
		GEM_BUG_ON(!batch_length);

		cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length,
		shadow_map, batch_map);
		cmd = copy_batch(shadow->obj, batch->obj,
		batch_offset, batch_length,
		&needs_clflush_after);
		if (IS_ERR(cmd)) {
		DRM_DEBUG("CMD: Failed to copy batch\n");
		return PTR_ERR(cmd);
		}

		jump_whitelist = NULL;
		if (!trampoline)
		/* Defer failure until attempted use */
		jump_whitelist = alloc_whitelist(batch_length);

		shadow_addr = gen8_canonical_addr(shadow->node.start);
		batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
		@@ -1549,6 +1558,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,

		i915_gem_object_flush_map(shadow->obj);

		if (!IS_ERR_OR_NULL(jump_whitelist))
		kfree(jump_whitelist);
		i915_gem_object_unpin_map(shadow->obj);
		return ret;
		}

drivers/gpu/drm/i915/i915_drv.h

+1 −6

Original line number	Diff line number	Diff line
		@@ -1906,17 +1906,12 @@ const char i915_cache_level_str(struct drm_i915_private i915, int type);
		int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
		int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
		void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
		unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
		bool trampoline);

		int intel_engine_cmd_parser(struct intel_engine_cs *engine,
		struct i915_vma *batch,
		unsigned long batch_offset,
		unsigned long batch_length,
		struct i915_vma *shadow,
		unsigned long *jump_whitelist,
		void *shadow_map,
		const void *batch_map);
		bool trampoline);
		#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8

		/* intel_device_info.c */