Commit bf23729c authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-5.19-2022-05-26-2' of...

Merge tag 'amd-drm-next-5.19-2022-05-26-2' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-5.19-2022-05-26-2:

amdgpu:
- Update fdinfo to the common drm format

UAPI:
- Add VM_NOALLOC GPUVM attribute to prevent buffers for going into the MALL
  Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that can be discarded on eviction
  Mesa code which uses these: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466



Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220526202546.66860-1-alexander.deucher@amd.com
parents e573a3cd 9bdc1992
Loading
Loading
Loading
Loading
+91 −86
Original line number Diff line number Diff line
@@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
	return hw_prio;
}

/* Calculate the time spend on the hw */
static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
{
	struct drm_sched_fence *s_fence;

	if (!fence)
		return ns_to_ktime(0);

	/* When the fence is not even scheduled it can't have spend time */
	s_fence = to_drm_sched_fence(fence);
	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
		return ns_to_ktime(0);

	/* When it is still running account how much already spend */
	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);

	return ktime_sub(s_fence->finished.timestamp,
			 s_fence->scheduled.timestamp);
}

static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
				      struct amdgpu_ctx_entity *centity)
{
	ktime_t res = ns_to_ktime(0);
	uint32_t i;

	spin_lock(&ctx->ring_lock);
	for (i = 0; i < amdgpu_sched_jobs; i++) {
		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
	}
	spin_unlock(&ctx->ring_lock);
	return res;
}

static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
				  const u32 ring)
{
	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
	struct amdgpu_device *adev = ctx->mgr->adev;
	struct amdgpu_ctx_entity *entity;
	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
	unsigned num_scheds = 0;
	int32_t ctx_prio;
	unsigned int hw_prio;
	enum drm_sched_priority drm_prio;
	unsigned int hw_prio, num_scheds;
	int32_t ctx_prio;
	int r;

	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,

	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
			ctx->init_priority : ctx->override_priority;
	entity->hw_ip = hw_ip;
	entity->sequence = 1;
	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
	drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
@@ -220,6 +254,23 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
	return r;
}

static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{
	ktime_t res = ns_to_ktime(0);
	int i;

	if (!entity)
		return res;

	for (i = 0; i < amdgpu_sched_jobs; ++i) {
		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
		dma_fence_put(entity->fences[i]);
	}

	kfree(entity);
	return res;
}

static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
			   struct drm_file *filp, struct amdgpu_ctx *ctx)
{
@@ -246,20 +297,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
	return 0;
}

static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{

	int i;

	if (!entity)
		return;

	for (i = 0; i < amdgpu_sched_jobs; ++i)
		dma_fence_put(entity->fences[i]);

	kfree(entity);
}

static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
					u32 *stable_pstate)
{
@@ -351,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)

	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
			amdgpu_ctx_fini_entity(ctx->entities[i][j]);
			ctx->entities[i][j] = NULL;
			ktime_t spend;

			spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
			atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
		}
	}

@@ -689,6 +728,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
	centity->sequence++;
	spin_unlock(&ctx->ring_lock);

	atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
		     &ctx->mgr->time_spend[centity->hw_ip]);

	dma_fence_put(other);
	return seq;
}
@@ -795,9 +837,14 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
			 struct amdgpu_device *adev)
{
	unsigned int i;

	mgr->adev = adev;
	mutex_init(&mgr->lock);
	idr_init(&mgr->ctx_handles);

	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
		atomic64_set(&mgr->time_spend[i], 0);
}

long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
@@ -873,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
	mutex_destroy(&mgr->lock);
}

static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
		struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
			  ktime_t usage[AMDGPU_HW_IP_NUM])
{
	ktime_t now, t1;
	uint32_t i;

	*total = *max = 0;

	now = ktime_get();
	for (i = 0; i < amdgpu_sched_jobs; i++) {
		struct dma_fence *fence;
		struct drm_sched_fence *s_fence;

		spin_lock(&ctx->ring_lock);
		fence = dma_fence_get(centity->fences[i]);
		spin_unlock(&ctx->ring_lock);
		if (!fence)
			continue;
		s_fence = to_drm_sched_fence(fence);
		if (!dma_fence_is_signaled(&s_fence->scheduled)) {
			dma_fence_put(fence);
			continue;
		}
		t1 = s_fence->scheduled.timestamp;
		if (!ktime_before(t1, now)) {
			dma_fence_put(fence);
			continue;
		}
		if (dma_fence_is_signaled(&s_fence->finished) &&
			s_fence->finished.timestamp < now)
			*total += ktime_sub(s_fence->finished.timestamp, t1);
		else
			*total += ktime_sub(now, t1);
		t1 = ktime_sub(now, t1);
		dma_fence_put(fence);
		*max = max(t1, *max);
	}
}

ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
		uint32_t idx, uint64_t *elapsed)
{
	struct idr *idp;
	struct amdgpu_ctx *ctx;
	unsigned int hw_ip, i;
	uint32_t id;
	struct amdgpu_ctx_entity *centity;
	ktime_t total = 0, max = 0;

	if (idx >= AMDGPU_MAX_ENTITY_NUM)
		return 0;
	idp = &mgr->ctx_handles;
	/*
	 * This is a little bit racy because it can be that a ctx or a fence are
	 * destroyed just in the moment we try to account them. But that is ok
	 * since exactly that case is explicitely allowed by the interface.
	 */
	mutex_lock(&mgr->lock);
	idr_for_each_entry(idp, ctx, id) {
		ktime_t ttotal, tmax;
	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);

		if (!ctx->entities[hwip][idx])
			continue;
		usage[hw_ip] = ns_to_ktime(ns);
	}

		centity = ctx->entities[hwip][idx];
		amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
	idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
			for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
				struct amdgpu_ctx_entity *centity;
				ktime_t spend;

		/* Harmonic mean approximation diverges for very small
		 * values. If ratio < 0.01% ignore
		 */
		if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
				centity = ctx->entities[hw_ip][i];
				if (!centity)
					continue;

		total = ktime_add(total, ttotal);
		max = ktime_after(tmax, max) ? tmax : max;
				spend = amdgpu_ctx_entity_time(ctx, centity);
				usage[hw_ip] = ktime_add(usage[hw_ip], spend);
			}
		}
	}

	mutex_unlock(&mgr->lock);
	if (elapsed)
		*elapsed = max;

	return total;
}
+9 −3
Original line number Diff line number Diff line
@@ -23,16 +23,20 @@
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__

#include <linux/ktime.h>
#include <linux/types.h>

#include "amdgpu_ring.h"

struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
struct amdgpu_ctx_mgr;

#define AMDGPU_MAX_ENTITY_NUM 4
#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))

struct amdgpu_ctx_entity {
	uint32_t		hw_ip;
	uint64_t		sequence;
	struct drm_sched_entity	entity;
	struct dma_fence	*fences[];
@@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
	struct mutex		lock;
	/* protected by lock */
	struct idr		ctx_handles;
	atomic64_t		time_spend[AMDGPU_HW_IP_NUM];
};

extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
@@ -90,6 +95,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
		uint32_t idx, uint64_t *elapsed);
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
			  ktime_t usage[AMDGPU_HW_IP_NUM]);

#endif
+3 −2
Original line number Diff line number Diff line
@@ -99,10 +99,11 @@
 * - 3.43.0 - Add device hot plug/unplug support
 * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
 * - 3.45.0 - Add context ioctl stable pstate interface
 * * 3.46.0 - To enable hot plug amdgpu tests in libdrm
 * - 3.46.0 - To enable hot plug amdgpu tests in libdrm
 * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
 */
#define KMS_DRIVER_MAJOR	3
#define KMS_DRIVER_MINOR	46
#define KMS_DRIVER_MINOR	47
#define KMS_DRIVER_PATCHLEVEL	0

int amdgpu_vram_limit;
+30 −38
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@

#include <drm/amdgpu_drm.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>

#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -54,58 +55,49 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {

void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
{
	struct amdgpu_fpriv *fpriv;
	uint32_t bus, dev, fn, i, domain;
	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
	struct drm_file *file = f->private_data;
	struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
	struct amdgpu_bo *root;
	struct amdgpu_fpriv *fpriv = file->driver_priv;
	struct amdgpu_vm *vm = &fpriv->vm;

	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
	ktime_t usage[AMDGPU_HW_IP_NUM];
	uint32_t bus, dev, fn, domain;
	unsigned int hw_ip;
	int ret;

	ret = amdgpu_file_to_fpriv(f, &fpriv);
	if (ret)
		return;
	bus = adev->pdev->bus->number;
	domain = pci_domain_nr(adev->pdev->bus);
	dev = PCI_SLOT(adev->pdev->devfn);
	fn = PCI_FUNC(adev->pdev->devfn);

	root = amdgpu_bo_ref(fpriv->vm.root.bo);
	if (!root)
	ret = amdgpu_bo_reserve(vm->root.bo, false);
	if (ret)
		return;

	ret = amdgpu_bo_reserve(root, false);
	if (ret) {
		DRM_ERROR("Fail to reserve bo\n");
		return;
	}
	amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
	amdgpu_bo_unreserve(root);
	amdgpu_bo_unref(&root);
	amdgpu_vm_get_memory(vm, &vram_mem, &gtt_mem, &cpu_mem);
	amdgpu_bo_unreserve(vm->root.bo);

	seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
			dev, fn, fpriv->vm.pasid);
	seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
	seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
	seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
		uint32_t count = amdgpu_ctx_num_entities[i];
		int idx = 0;
		uint64_t total = 0, min = 0;
		uint32_t perc, frac;
	amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);

		for (idx = 0; idx < count; idx++) {
			total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
				i, idx, &min);
			if ((total == 0) || (min == 0))
				continue;
	/*
	 * ******************************************************************
	 * For text output format description please see drm-usage-stats.rst!
	 * ******************************************************************
	 */

			perc = div64_u64(10000 * total, min);
			frac = perc % 100;
	seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
	seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
	seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
	seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
	seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
	seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
	seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		if (!usage[hw_ip])
			continue;

			seq_printf(m, "%s%d:\t%d.%d%%\n",
					amdgpu_ip_name[i],
					idx, perc/100, frac);
		}
		seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
			   ktime_to_ns(usage[hw_ip]));
	}
}
+6 −3
Original line number Diff line number Diff line
@@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
		      AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
		      AMDGPU_GEM_CREATE_ENCRYPTED))

		      AMDGPU_GEM_CREATE_ENCRYPTED |
		      AMDGPU_GEM_CREATE_DISCARDABLE))
		return -EINVAL;

	/* reject invalid gem domains */
@@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
		pte_flag |= AMDGPU_PTE_WRITEABLE;
	if (flags & AMDGPU_VM_PAGE_PRT)
		pte_flag |= AMDGPU_PTE_PRT;
	if (flags & AMDGPU_VM_PAGE_NOALLOC)
		pte_flag |= AMDGPU_PTE_NOALLOC;

	if (adev->gmc.gmc_funcs->map_mtype)
		pte_flag |= amdgpu_gmc_map_mtype(adev,
@@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
{
	const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
		AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
		AMDGPU_VM_PAGE_NOALLOC;
	const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
		AMDGPU_VM_PAGE_PRT;

Loading