Merge tag 'amd-drm-next-5.19-2022-05-26-2' of... (bf23729c) · Commits · EulixOS / Software / Kernel

drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

+91 −86

Original line number	Diff line number	Diff line
		@@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
		return hw_prio;
		}

		/* Calculate the time spend on the hw */
		static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
		{
		struct drm_sched_fence *s_fence;

		if (!fence)
		return ns_to_ktime(0);

		/* When the fence is not even scheduled it can't have spend time */
		s_fence = to_drm_sched_fence(fence);
		if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
		return ns_to_ktime(0);

		/* When it is still running account how much already spend */
		if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);

		return ktime_sub(s_fence->finished.timestamp,
		s_fence->scheduled.timestamp);
		}

		static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
		struct amdgpu_ctx_entity *centity)
		{
		ktime_t res = ns_to_ktime(0);
		uint32_t i;

		spin_lock(&ctx->ring_lock);
		for (i = 0; i < amdgpu_sched_jobs; i++) {
		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
		}
		spin_unlock(&ctx->ring_lock);
		return res;
		}

		static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
		const u32 ring)
		{
		struct drm_gpu_scheduler *scheds = NULL, sched = NULL;
		struct amdgpu_device *adev = ctx->mgr->adev;
		struct amdgpu_ctx_entity *entity;
		struct drm_gpu_scheduler *scheds = NULL, sched = NULL;
		unsigned num_scheds = 0;
		int32_t ctx_prio;
		unsigned int hw_prio;
		enum drm_sched_priority drm_prio;
		unsigned int hw_prio, num_scheds;
		int32_t ctx_prio;
		int r;

		entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
		@@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,

		ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
		ctx->init_priority : ctx->override_priority;
		entity->hw_ip = hw_ip;
		entity->sequence = 1;
		hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
		drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
		@@ -220,6 +254,23 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
		return r;
		}

		static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
		{
		ktime_t res = ns_to_ktime(0);
		int i;

		if (!entity)
		return res;

		for (i = 0; i < amdgpu_sched_jobs; ++i) {
		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
		dma_fence_put(entity->fences[i]);
		}

		kfree(entity);
		return res;
		}

		static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
		struct drm_file filp, struct amdgpu_ctx ctx)
		{
		@@ -246,20 +297,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
		return 0;
		}

		static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
		{

		int i;

		if (!entity)
		return;

		for (i = 0; i < amdgpu_sched_jobs; ++i)
		dma_fence_put(entity->fences[i]);

		kfree(entity);
		}

		static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
		u32 *stable_pstate)
		{
		@@ -351,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)

		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
		amdgpu_ctx_fini_entity(ctx->entities[i][j]);
		ctx->entities[i][j] = NULL;
		ktime_t spend;

		spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
		atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
		}
		}

		@@ -689,6 +728,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
		centity->sequence++;
		spin_unlock(&ctx->ring_lock);

		atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
		&ctx->mgr->time_spend[centity->hw_ip]);

		dma_fence_put(other);
		return seq;
		}
		@@ -795,9 +837,14 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
		void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
		struct amdgpu_device *adev)
		{
		unsigned int i;

		mgr->adev = adev;
		mutex_init(&mgr->lock);
		idr_init(&mgr->ctx_handles);

		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
		atomic64_set(&mgr->time_spend[i], 0);
		}

		long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
		@@ -873,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
		mutex_destroy(&mgr->lock);
		}

		static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
		struct amdgpu_ctx_entity centity, ktime_t total, ktime_t *max)
		void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
		ktime_t usage[AMDGPU_HW_IP_NUM])
		{
		ktime_t now, t1;
		uint32_t i;

		total = max = 0;

		now = ktime_get();
		for (i = 0; i < amdgpu_sched_jobs; i++) {
		struct dma_fence *fence;
		struct drm_sched_fence *s_fence;

		spin_lock(&ctx->ring_lock);
		fence = dma_fence_get(centity->fences[i]);
		spin_unlock(&ctx->ring_lock);
		if (!fence)
		continue;
		s_fence = to_drm_sched_fence(fence);
		if (!dma_fence_is_signaled(&s_fence->scheduled)) {
		dma_fence_put(fence);
		continue;
		}
		t1 = s_fence->scheduled.timestamp;
		if (!ktime_before(t1, now)) {
		dma_fence_put(fence);
		continue;
		}
		if (dma_fence_is_signaled(&s_fence->finished) &&
		s_fence->finished.timestamp < now)
		*total += ktime_sub(s_fence->finished.timestamp, t1);
		else
		*total += ktime_sub(now, t1);
		t1 = ktime_sub(now, t1);
		dma_fence_put(fence);
		max = max(t1, max);
		}
		}

		ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
		uint32_t idx, uint64_t *elapsed)
		{
		struct idr *idp;
		struct amdgpu_ctx *ctx;
		unsigned int hw_ip, i;
		uint32_t id;
		struct amdgpu_ctx_entity *centity;
		ktime_t total = 0, max = 0;

		if (idx >= AMDGPU_MAX_ENTITY_NUM)
		return 0;
		idp = &mgr->ctx_handles;
		/*
		* This is a little bit racy because it can be that a ctx or a fence are
		* destroyed just in the moment we try to account them. But that is ok
		* since exactly that case is explicitely allowed by the interface.
		*/
		mutex_lock(&mgr->lock);
		idr_for_each_entry(idp, ctx, id) {
		ktime_t ttotal, tmax;
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);

		if (!ctx->entities[hwip][idx])
		continue;
		usage[hw_ip] = ns_to_ktime(ns);
		}

		centity = ctx->entities[hwip][idx];
		amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
		idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
		struct amdgpu_ctx_entity *centity;
		ktime_t spend;

		/* Harmonic mean approximation diverges for very small
		* values. If ratio < 0.01% ignore
		*/
		if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
		centity = ctx->entities[hw_ip][i];
		if (!centity)
		continue;

		total = ktime_add(total, ttotal);
		max = ktime_after(tmax, max) ? tmax : max;
		spend = amdgpu_ctx_entity_time(ctx, centity);
		usage[hw_ip] = ktime_add(usage[hw_ip], spend);
		}
		}
		}

		mutex_unlock(&mgr->lock);
		if (elapsed)
		*elapsed = max;

		return total;
		}

drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

+9 −3

Original line number	Diff line number	Diff line
		@@ -23,16 +23,20 @@
		#ifndef __AMDGPU_CTX_H__
		#define __AMDGPU_CTX_H__

		#include <linux/ktime.h>
		#include <linux/types.h>

		#include "amdgpu_ring.h"

		struct drm_device;
		struct drm_file;
		struct amdgpu_fpriv;
		struct amdgpu_ctx_mgr;

		#define AMDGPU_MAX_ENTITY_NUM 4
		#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))

		struct amdgpu_ctx_entity {
		uint32_t hw_ip;
		uint64_t sequence;
		struct drm_sched_entity entity;
		struct dma_fence *fences[];
		@@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
		struct mutex lock;
		/* protected by lock */
		struct idr ctx_handles;
		atomic64_t time_spend[AMDGPU_HW_IP_NUM];
		};

		extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
		@@ -90,6 +95,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
		void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
		long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
		void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
		ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
		uint32_t idx, uint64_t *elapsed);
		void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
		ktime_t usage[AMDGPU_HW_IP_NUM]);

		#endif

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

+3 −2

Original line number	Diff line number	Diff line
		@@ -99,10 +99,11 @@
		* - 3.43.0 - Add device hot plug/unplug support
		* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
		* - 3.45.0 - Add context ioctl stable pstate interface
		* * 3.46.0 - To enable hot plug amdgpu tests in libdrm
		* - 3.46.0 - To enable hot plug amdgpu tests in libdrm
		* * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
		*/
		#define KMS_DRIVER_MAJOR 3
		#define KMS_DRIVER_MINOR 46
		#define KMS_DRIVER_MINOR 47
		#define KMS_DRIVER_PATCHLEVEL 0

		int amdgpu_vram_limit;

drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c

+30 −38

Original line number	Diff line number	Diff line
		@@ -32,6 +32,7 @@

		#include <drm/amdgpu_drm.h>
		#include <drm/drm_debugfs.h>
		#include <drm/drm_drv.h>

		#include "amdgpu.h"
		#include "amdgpu_vm.h"
		@@ -54,58 +55,49 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {

		void amdgpu_show_fdinfo(struct seq_file m, struct file f)
		{
		struct amdgpu_fpriv *fpriv;
		uint32_t bus, dev, fn, i, domain;
		uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
		struct drm_file *file = f->private_data;
		struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
		struct amdgpu_bo *root;
		struct amdgpu_fpriv *fpriv = file->driver_priv;
		struct amdgpu_vm *vm = &fpriv->vm;

		uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
		ktime_t usage[AMDGPU_HW_IP_NUM];
		uint32_t bus, dev, fn, domain;
		unsigned int hw_ip;
		int ret;

		ret = amdgpu_file_to_fpriv(f, &fpriv);
		if (ret)
		return;
		bus = adev->pdev->bus->number;
		domain = pci_domain_nr(adev->pdev->bus);
		dev = PCI_SLOT(adev->pdev->devfn);
		fn = PCI_FUNC(adev->pdev->devfn);

		root = amdgpu_bo_ref(fpriv->vm.root.bo);
		if (!root)
		ret = amdgpu_bo_reserve(vm->root.bo, false);
		if (ret)
		return;

		ret = amdgpu_bo_reserve(root, false);
		if (ret) {
		DRM_ERROR("Fail to reserve bo\n");
		return;
		}
		amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
		amdgpu_bo_unreserve(root);
		amdgpu_bo_unref(&root);
		amdgpu_vm_get_memory(vm, &vram_mem, &gtt_mem, &cpu_mem);
		amdgpu_bo_unreserve(vm->root.bo);

		seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
		dev, fn, fpriv->vm.pasid);
		seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
		seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
		seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
		for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
		uint32_t count = amdgpu_ctx_num_entities[i];
		int idx = 0;
		uint64_t total = 0, min = 0;
		uint32_t perc, frac;
		amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);

		for (idx = 0; idx < count; idx++) {
		total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
		i, idx, &min);
		if ((total == 0) \|\| (min == 0))
		continue;
		/*
		* ******************************************************************
		* For text output format description please see drm-usage-stats.rst!
		* ******************************************************************
		*/

		perc = div64_u64(10000 * total, min);
		frac = perc % 100;
		seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
		seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
		seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
		seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
		seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
		seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
		seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		if (!usage[hw_ip])
		continue;

		seq_printf(m, "%s%d:\t%d.%d%%\n",
		amdgpu_ip_name[i],
		idx, perc/100, frac);
		}
		seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
		ktime_to_ns(usage[hw_ip]));
		}
		}

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

+6 −3

Original line number	Diff line number	Diff line
		@@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device dev, void data,
		AMDGPU_GEM_CREATE_VRAM_CLEARED \|
		AMDGPU_GEM_CREATE_VM_ALWAYS_VALID \|
		AMDGPU_GEM_CREATE_EXPLICIT_SYNC \|
		AMDGPU_GEM_CREATE_ENCRYPTED))

		AMDGPU_GEM_CREATE_ENCRYPTED \|
		AMDGPU_GEM_CREATE_DISCARDABLE))
		return -EINVAL;

		/* reject invalid gem domains */
		@@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
		pte_flag \|= AMDGPU_PTE_WRITEABLE;
		if (flags & AMDGPU_VM_PAGE_PRT)
		pte_flag \|= AMDGPU_PTE_PRT;
		if (flags & AMDGPU_VM_PAGE_NOALLOC)
		pte_flag \|= AMDGPU_PTE_NOALLOC;

		if (adev->gmc.gmc_funcs->map_mtype)
		pte_flag \|= amdgpu_gmc_map_mtype(adev,
		@@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device dev, void data,
		{
		const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE \|
		AMDGPU_VM_PAGE_READABLE \| AMDGPU_VM_PAGE_WRITEABLE \|
		AMDGPU_VM_PAGE_EXECUTABLE \| AMDGPU_VM_MTYPE_MASK;
		AMDGPU_VM_PAGE_EXECUTABLE \| AMDGPU_VM_MTYPE_MASK \|
		AMDGPU_VM_PAGE_NOALLOC;
		const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE \|
		AMDGPU_VM_PAGE_PRT;

Original line number	Diff line number	Diff line
		@@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device dev, void data,
		AMDGPU_GEM_CREATE_VRAM_CLEARED \|
		AMDGPU_GEM_CREATE_VM_ALWAYS_VALID \|
		AMDGPU_GEM_CREATE_EXPLICIT_SYNC \|
		AMDGPU_GEM_CREATE_ENCRYPTED))

		AMDGPU_GEM_CREATE_ENCRYPTED \|
		AMDGPU_GEM_CREATE_DISCARDABLE))
		return -EINVAL;

		/* reject invalid gem domains */
		@@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
		pte_flag \|= AMDGPU_PTE_WRITEABLE;
		if (flags & AMDGPU_VM_PAGE_PRT)
		pte_flag \|= AMDGPU_PTE_PRT;
		if (flags & AMDGPU_VM_PAGE_NOALLOC)
		pte_flag \|= AMDGPU_PTE_NOALLOC;

		if (adev->gmc.gmc_funcs->map_mtype)
		pte_flag \|= amdgpu_gmc_map_mtype(adev,
		@@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device dev, void data,
		{
		const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE \|
		AMDGPU_VM_PAGE_READABLE \| AMDGPU_VM_PAGE_WRITEABLE \|
		AMDGPU_VM_PAGE_EXECUTABLE \| AMDGPU_VM_MTYPE_MASK;
		AMDGPU_VM_PAGE_EXECUTABLE \| AMDGPU_VM_MTYPE_MASK \|
		AMDGPU_VM_PAGE_NOALLOC;
		const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE \|
		AMDGPU_VM_PAGE_PRT;