Commit dd03daec authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: restructure amdgpu_vram_mgr_new



Merge the two loops, loosen the restriction for big allocations.
This reduces the CPU overhead in the good case, but increases
it a bit under memory pressure.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Acked-and-Tested-by: default avatarNirmoy Das <nirmoy.das@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c0f76fc8
Loading
Loading
Loading
Loading
+27 −31
Original line number Diff line number Diff line
@@ -358,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
			       const struct ttm_place *place,
			       struct ttm_resource *mem)
{
	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
	struct amdgpu_device *adev = to_amdgpu_device(mgr);
	uint64_t vis_usage = 0, mem_bytes, max_bytes;
	struct drm_mm *mm = &mgr->mm;
	struct drm_mm_node *nodes;
	enum drm_mm_insert_mode mode;
	unsigned long lpfn, num_nodes, pages_per_node, pages_left;
	uint64_t vis_usage = 0, mem_bytes, max_bytes;
	struct drm_mm_node *nodes;
	unsigned i;
	int r;

@@ -391,9 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
		pages_per_node = HPAGE_PMD_NR;
#else
		/* default to 2MB */
		pages_per_node = (2UL << (20UL - PAGE_SHIFT));
		pages_per_node = 2UL << (20UL - PAGE_SHIFT);
#endif
		pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment);
		pages_per_node = max_t(uint32_t, pages_per_node,
				       mem->page_alignment);
		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
	}

@@ -411,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
	mem->start = 0;
	pages_left = mem->num_pages;

	spin_lock(&mgr->lock);
	for (i = 0; pages_left >= pages_per_node; ++i) {
		unsigned long pages = rounddown_pow_of_two(pages_left);

	/* Limit maximum size to 2GB due to SG table limitations */
		pages = min(pages, (2UL << (30 - PAGE_SHIFT)));

		r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
						pages_per_node, 0,
						place->fpfn, lpfn,
						mode);
		if (unlikely(r))
			break;

		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
		pages_left -= pages;
	}
	pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));

	for (; pages_left; ++i) {
		unsigned long pages = min(pages_left, pages_per_node);
	i = 0;
	spin_lock(&mgr->lock);
	while (pages_left) {
		uint32_t alignment = mem->page_alignment;

		if (pages == pages_per_node)
		if (pages >= pages_per_node)
			alignment = pages_per_node;

		r = drm_mm_insert_node_in_range(mm, &nodes[i],
						pages, alignment, 0,
						place->fpfn, lpfn,
						mode);
		if (unlikely(r))
		r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
						0, place->fpfn, lpfn, mode);
		if (unlikely(r)) {
			if (pages > pages_per_node) {
				if (is_power_of_2(pages))
					pages = pages / 2;
				else
					pages = rounddown_pow_of_two(pages);
				continue;
			}
			goto error;
		}

		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
		pages_left -= pages;
		++i;

		if (pages > pages_left)
			pages = pages_left;
	}
	spin_unlock(&mgr->lock);