Commit a04b7cd9 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay
Browse files

habanalabs: create internal CB pool



Create a device MMU-mapped internal command buffer pool, in order to allow
the driver to allocate CBs for the signal/wait operations
that are fetched by the queues when they are configured with the user's
address space ID.

We must pre-map this internal pool due to performance issues.

This pool is needed for future ASIC support and it is currently unused in
GOYA and GAUDI.

Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent eb8b293e
Loading
Loading
Loading
Loading
+52 −30
Original line number Diff line number Diff line
@@ -10,12 +10,18 @@

#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/genalloc.h>

static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
	if (cb->is_internal)
		gen_pool_free(hdev->internal_cb_pool,
				cb->kernel_address, cb->size);
	else
		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
				(void *) (uintptr_t) cb->kernel_address,
				cb->bus_address);

	kfree(cb);
}

@@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
}

static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
					int ctx_id)
					int ctx_id, bool internal_cb)
{
	struct hl_cb *cb;
	u32 cb_offset;
	void *p;

	/*
@@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
	if (!cb)
		return NULL;

	if (ctx_id == HL_KERNEL_ASID_ID)
	if (internal_cb) {
		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
		if (!p) {
			kfree(cb);
			return NULL;
		}

		cb_offset = p - hdev->internal_cb_pool_virt_addr;
		cb->is_internal = true;
		cb->bus_address =  hdev->internal_cb_va_base + cb_offset;
	} else if (ctx_id == HL_KERNEL_ASID_ID) {
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
						&cb->bus_address, GFP_ATOMIC);
	else
	} else {
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
						&cb->bus_address,
						GFP_USER | __GFP_ZERO);
	}

	if (!p) {
		dev_err(hdev->dev,
			"failed to allocate %d of dma memory for CB\n",
@@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
}

int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
			u32 cb_size, u64 *handle, int ctx_id)
			u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
{
	struct hl_cb *cb;
	bool alloc_new_cb = true;
@@ -112,6 +131,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
		goto out_err;
	}

	if (!internal_cb) {
		/* Minimum allocation must be PAGE SIZE */
		if (cb_size < PAGE_SIZE)
			cb_size = PAGE_SIZE;
@@ -121,8 +141,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,

			spin_lock(&hdev->cb_pool_lock);
			if (!list_empty(&hdev->cb_pool)) {
			cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
					pool_list);
				cb = list_first_entry(&hdev->cb_pool,
						typeof(*cb), pool_list);
				list_del(&cb->pool_list);
				spin_unlock(&hdev->cb_pool_lock);
				alloc_new_cb = false;
@@ -131,9 +151,10 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
				dev_dbg(hdev->dev, "CB pool is empty\n");
			}
		}
	}

	if (alloc_new_cb) {
		cb = hl_cb_alloc(hdev, cb_size, ctx_id);
		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
		if (!cb) {
			rc = -ENOMEM;
			goto out_err;
@@ -230,7 +251,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
		} else {
			rc = hl_cb_create(hdev, &hpriv->cb_mgr,
					args->in.cb_size, &handle,
						hpriv->ctx->asid);
					hpriv->ctx->asid, false);
		}

		memset(args, 0, sizeof(*args));
@@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
	idr_destroy(&mgr->cb_handles);
}

struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
					bool internal_cb)
{
	u64 cb_handle;
	struct hl_cb *cb;
	int rc;

	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
			HL_KERNEL_ASID_ID);
			HL_KERNEL_ASID_ID, internal_cb);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to allocate CB for the kernel driver %d\n", rc);
@@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)

	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
				HL_KERNEL_ASID_ID);
				HL_KERNEL_ASID_ID, false);
		if (cb) {
			cb->is_pool = true;
			list_add(&cb->pool_list, &hdev->cb_pool);
+7 −6
Original line number Diff line number Diff line
@@ -919,7 +919,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		goto put_cs;
	}

	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
	if (cs->type == CS_TYPE_WAIT)
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
	else
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);

	cb = hl_cb_kernel_create(hdev, cb_size,
				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
	if (!cb) {
		ctx->cs_counters.out_of_mem_drop_cnt++;
		kfree(job);
@@ -927,11 +933,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		goto put_cs;
	}

	if (cs->type == CS_TYPE_WAIT)
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
	else
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);

	job->id = 0;
	job->cs = cs;
	job->user_cb = cb;
+8 −0
Original line number Diff line number Diff line
@@ -153,10 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
			rc = -ENOMEM;
			goto mem_ctx_err;
		}

		rc = hdev->asic_funcs->ctx_init(ctx);
		if (rc) {
			dev_err(hdev->dev, "ctx_init failed\n");
			goto ctx_init_err;
		}
	}

	return 0;

ctx_init_err:
	hl_vm_ctx_fini(ctx);
mem_ctx_err:
	if (ctx->asid != HL_KERNEL_ASID_ID)
		hl_asid_free(hdev, ctx->asid);
+16 −2
Original line number Diff line number Diff line
@@ -392,6 +392,7 @@ struct hl_cb_mgr {
 * @ctx_id: holds the ID of the owner's context.
 * @mmap: true if the CB is currently mmaped to user.
 * @is_pool: true if CB was acquired from the pool, false otherwise.
 * @is_internal: internaly allocated
 */
struct hl_cb {
	struct kref		refcount;
@@ -408,6 +409,7 @@ struct hl_cb {
	u32			ctx_id;
	u8			mmap;
	u8			is_pool;
	u8			is_internal;
};


@@ -643,6 +645,7 @@ enum div_select_defs {
 * @rreg: Read a register. Needed for simulator support.
 * @wreg: Write a register. Needed for simulator support.
 * @halt_coresight: stop the ETF and ETR traces.
 * @ctx_init: context dependent initialization.
 * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
 * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
 * @read_device_fw_version: read the device's firmware versions that are
@@ -745,6 +748,7 @@ struct hl_asic_funcs {
	u32 (*rreg)(struct hl_device *hdev, u32 reg);
	void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
	void (*halt_coresight)(struct hl_device *hdev);
	int (*ctx_init)(struct hl_ctx *ctx);
	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
	u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
	void (*read_device_fw_version)(struct hl_device *hdev,
@@ -1432,6 +1436,10 @@ struct hl_device_idle_busy_ts {
 * @hl_debugfs: device's debugfs manager.
 * @cb_pool: list of preallocated CBs.
 * @cb_pool_lock: protects the CB pool.
 * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
 * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
 * @internal_cb_pool: internal command buffer memory pool.
 * @internal_cb_va_base: internal cb pool mmu virtual address base
 * @fpriv_list: list of file private data structures. Each structure is created
 *              when a user opens the device
 * @fpriv_list_lock: protects the fpriv_list
@@ -1531,6 +1539,11 @@ struct hl_device {
	struct list_head		cb_pool;
	spinlock_t			cb_pool_lock;

	void				*internal_cb_pool_virt_addr;
	dma_addr_t			internal_cb_pool_dma_addr;
	struct gen_pool			*internal_cb_pool;
	u64				internal_cb_va_base;

	struct list_head		fpriv_list;
	struct mutex			fpriv_list_lock;

@@ -1741,7 +1754,7 @@ int hl_hwmon_init(struct hl_device *hdev);
void hl_hwmon_fini(struct hl_device *hdev);

int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
		u64 *handle, int ctx_id);
		u64 *handle, int ctx_id, bool internal_cb);
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
@@ -1749,7 +1762,8 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
void hl_cb_put(struct hl_cb *cb);
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
					bool internal_cb);
int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev);

+12 −8
Original line number Diff line number Diff line
@@ -635,7 +635,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
	u8 tpc_id;
	int rc;

	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
	if (!cb)
		return -EFAULT;

@@ -4048,9 +4048,8 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
	parser->patched_cb_size = parser->user_cb_size +
			sizeof(struct packet_msg_prot) * 2;

	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
				parser->patched_cb_size,
				&patched_cb_handle, HL_KERNEL_ASID_ID);
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
			&patched_cb_handle, HL_KERNEL_ASID_ID, false);

	if (rc) {
		dev_err(hdev->dev,
@@ -4122,9 +4121,8 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
	if (rc)
		goto free_userptr;

	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
				parser->patched_cb_size,
				&patched_cb_handle, HL_KERNEL_ASID_ID);
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to allocate patched CB for DMA CS %d\n", rc);
@@ -4257,7 +4255,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
	struct hl_cb *cb;
	int rc;

	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
	if (!cb)
		return -EFAULT;

@@ -6229,6 +6227,11 @@ static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
	return RREG32(mmHW_STATE);
}

int gaudi_ctx_init(struct hl_ctx *ctx)
{
	return 0;
}

static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
{
	return gaudi_cq_assignment[cq_idx];
@@ -6532,6 +6535,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
	.rreg = hl_rreg,
	.wreg = hl_wreg,
	.halt_coresight = gaudi_halt_coresight,
	.ctx_init = gaudi_ctx_init,
	.get_clk_rate = gaudi_get_clk_rate,
	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
	.read_device_fw_version = gaudi_read_device_fw_version,
Loading