Commit 31f33243 authored by Eric Huang's avatar Eric Huang Committed by Alex Deucher
Browse files

drm/amdkfd: Make TLB flush conditional on mapping



It is to optimize memory mapping latency, and also aviod
a page fault in a corner case of changing valid PDE into
PTE.

Signed-off-by: default avatarEric Huang <jinhuieric.huang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 075e8080
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -269,7 +269,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
		uint64_t *size);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
+11 −8
Original line number Diff line number Diff line
@@ -1070,7 +1070,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,

static int update_gpuvm_pte(struct kgd_mem *mem,
			    struct kfd_mem_attachment *entry,
			    struct amdgpu_sync *sync)
			    struct amdgpu_sync *sync,
			    bool *table_freed)
{
	struct amdgpu_bo_va *bo_va = entry->bo_va;
	struct amdgpu_device *adev = entry->adev;
@@ -1081,7 +1082,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
		return ret;

	/* Update the page tables  */
	ret = amdgpu_vm_bo_update(adev, bo_va, false);
	ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed);
	if (ret) {
		pr_err("amdgpu_vm_bo_update failed\n");
		return ret;
@@ -1093,7 +1094,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
static int map_bo_to_gpuvm(struct kgd_mem *mem,
			   struct kfd_mem_attachment *entry,
			   struct amdgpu_sync *sync,
			   bool no_update_pte)
			   bool no_update_pte,
			   bool *table_freed)
{
	int ret;

@@ -1110,7 +1112,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
	if (no_update_pte)
		return 0;

	ret = update_gpuvm_pte(mem, entry, sync);
	ret = update_gpuvm_pte(mem, entry, sync, table_freed);
	if (ret) {
		pr_err("update_gpuvm_pte() failed\n");
		goto update_gpuvm_pte_failed;
@@ -1608,7 +1610,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}

int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
		struct kgd_dev *kgd, struct kgd_mem *mem,
		void *drm_priv, bool *table_freed)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1696,7 +1699,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
			 entry->va, entry->va + bo_size, entry);

		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
				      is_invalid_userptr);
				      is_invalid_userptr, table_freed);
		if (ret) {
			pr_err("Failed to map bo to gpuvm\n");
			goto out_unreserve;
@@ -2146,7 +2149,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
				continue;

			kfd_mem_dmaunmap_attachment(mem, attachment);
			ret = update_gpuvm_pte(mem, attachment, &sync);
			ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
			if (ret) {
				pr_err("%s: update PTE failed\n", __func__);
				/* make sure this gets validated again */
@@ -2352,7 +2355,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
				continue;

			kfd_mem_dmaunmap_attachment(mem, attachment);
			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
			ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
			if (ret) {
				pr_debug("Memory eviction: update PTE failed. Try again\n");
				goto validate_map_fail;
+13 −10
Original line number Diff line number Diff line
@@ -1393,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
	long err = 0;
	int i;
	uint32_t *devices_arr = NULL;
	bool table_freed = false;

	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
	if (!dev)
@@ -1450,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
			goto get_mem_obj_from_handle_failed;
		}
		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
			peer->kgd, (struct kgd_mem *)mem,
			peer_pdd->drm_priv, &table_freed);
		if (err) {
			pr_err("Failed to map to gpu %d/%d\n",
			       i, args->n_devices);
@@ -1468,6 +1470,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
	}

	/* Flush TLBs after waiting for the page table updates to complete */
	if (table_freed) {
		for (i = 0; i < args->n_devices; i++) {
			peer = kfd_device_by_id(devices_arr[i]);
			if (WARN_ON_ONCE(!peer))
@@ -1477,7 +1480,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
				continue;
			kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
		}

	}
	kfree(devices_arr);

	return err;
+2 −1
Original line number Diff line number Diff line
@@ -672,7 +672,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
	if (err)
		goto err_alloc_mem;

	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
			pdd->drm_priv, NULL);
	if (err)
		goto err_map_mem;