Commit dadf17ab authored by farah kassabri's avatar farah kassabri Committed by Oded Gabbay
Browse files

habanalabs: add support for encapsulated signals reservation



The signaling from within encapsulated OP capability is merged into the
existing stream architecture, such that one can trigger multiple
signaling from an encapsulated op, according to the time the event
was done in the graph execution and avoid the need to wait for the
whole encapsulated OP execution to be complete before the stream can
signal.

This commit implements only the reserve/unreserve part.

Signed-off-by: default avatarfarah kassabri <fkassabri@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 8ca2072e
Loading
Loading
Loading
Loading
+246 −9
Original line number Diff line number Diff line
@@ -38,7 +38,11 @@ static void hl_sob_reset(struct kref *ref)
							kref);
	struct hl_device *hdev = hw_sob->hdev;

	dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);

	hdev->asic_funcs->reset_sob(hdev, hw_sob);

	hw_sob->need_reset = false;
}

void hl_sob_reset_error(struct kref *ref)
@@ -52,7 +56,7 @@ void hl_sob_reset_error(struct kref *ref)
		hw_sob->q_idx, hw_sob->sob_id);
}

static void hw_sob_put(struct hl_hw_sob *hw_sob)
void hw_sob_put(struct hl_hw_sob *hw_sob)
{
	if (hw_sob)
		kref_put(&hw_sob->kref, hl_sob_reset);
@@ -64,7 +68,7 @@ static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
		kref_put(&hw_sob->kref, hl_sob_reset_error);
}

static void hw_sob_get(struct hl_hw_sob *hw_sob)
void hw_sob_get(struct hl_hw_sob *hw_sob)
{
	if (hw_sob)
		kref_get(&hw_sob->kref);
@@ -576,7 +580,8 @@ static inline void cs_release_sob_reset_handler(struct hl_device *hdev,

	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
			(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
			(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
			(!!hl_cs_cmpl->encaps_signals)) {
		dev_dbg(hdev->dev,
				"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
				hl_cs_cmpl->cs_seq,
@@ -829,6 +834,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,

	cs_cmpl->hdev = hdev;
	cs_cmpl->type = cs->type;
	cs_cmpl->encaps_signals = false;
	spin_lock_init(&cs_cmpl->lock);
	INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
	cs->fence = &cs_cmpl->base_fence;
@@ -1115,6 +1121,10 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
		return CS_TYPE_WAIT;
	else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
		return CS_TYPE_COLLECTIVE_WAIT;
	else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
		return CS_RESERVE_SIGNALS;
	else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
		return CS_UNRESERVE_SIGNALS;
	else
		return CS_TYPE_DEFAULT;
}
@@ -1652,10 +1662,17 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
 * if the SOB value reaches the max value move to the other SOB reserved
 * to the queue.
 * @hdev: pointer to device structure
 * @q_idx: stream queue index
 * @hw_sob: the H/W SOB used in this signal CS.
 * @count: signals count
 * @encaps_sig: tells whether it's reservation for encaps signals or not.
 *
 * Note that this function must be called while hw_queues_lock is taken.
 */
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
			struct hl_hw_sob **hw_sob, u32 count)
			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)

{
	struct hl_sync_stream_properties *prop;
	struct hl_hw_sob *sob = *hw_sob, *other_sob;
@@ -1688,12 +1705,34 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
			return -EINVAL;
		}

		prop->next_sob_val = 1;
		prop->next_sob_val = count;

		/* only two SOBs are currently in use */
		prop->curr_sob_offset = other_sob_offset;
		*hw_sob = other_sob;

		/*
		 * check if other_sob needs reset, then do it before using it
		 * for the reservation or the next signal cs.
		 * we do it here, and for both encaps and regular signal cs
		 * cases in order to avoid possible races of two kref_put
		 * of the sob which can occur at the same time if we move the
		 * sob reset(kref_put) to cs_do_release function.
		 * in addition, if we have combination of cs signal and
		 * encaps, and at the point we need to reset the sob there was
		 * no more reservations and only signal cs keep coming,
		 * in such case we need to signal_cs to put the refcount and
		 * reset the sob.
		 */
		if (other_sob->need_reset)
			kref_put(&other_sob->kref, hl_sob_reset);

		if (encaps_sig) {
			/* set reset indication for the sob */
			sob->need_reset = true;
			hw_sob_get(other_sob);
		}

		dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
				prop->curr_sob_offset, q_idx);
	} else {
@@ -1817,6 +1856,187 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
	return 0;
}

static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
				u32 q_idx, u32 count,
				u32 *handle_id, u32 *sob_addr,
				u32 *signals_count)
{
	struct hw_queue_properties *hw_queue_prop;
	struct hl_sync_stream_properties *prop;
	struct hl_device *hdev = hpriv->hdev;
	struct hl_cs_encaps_sig_handle *handle;
	struct hl_encaps_signals_mgr *mgr;
	struct hl_hw_sob *hw_sob;
	int hdl_id;
	int rc = 0;

	if (count >= HL_MAX_SOB_VAL) {
		dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
						count);
		rc = -EINVAL;
		goto out;
	}

	if (q_idx >= hdev->asic_prop.max_queues) {
		dev_err(hdev->dev, "Queue index %d is invalid\n",
			q_idx);
		rc = -EINVAL;
		goto out;
	}

	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];

	if (!hw_queue_prop->supports_sync_stream) {
		dev_err(hdev->dev,
			"Queue index %d does not support sync stream operations\n",
									q_idx);
		rc = -EINVAL;
		goto out;
	}

	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;

	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
	if (!handle) {
		rc = -ENOMEM;
		goto out;
	}

	handle->count = count;
	mgr = &hpriv->ctx->sig_mgr;

	spin_lock(&mgr->lock);
	hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_KERNEL);
	spin_unlock(&mgr->lock);

	if (hdl_id < 0) {
		dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
		rc = -EINVAL;
		goto out;
	}

	handle->id = hdl_id;
	handle->q_idx = q_idx;
	handle->hdev = hdev;
	kref_init(&handle->refcount);

	hdev->asic_funcs->hw_queues_lock(hdev);

	hw_sob = &prop->hw_sob[prop->curr_sob_offset];

	/*
	 * Increment the SOB value by count by user request
	 * to reserve those signals
	 * check if the signals amount to reserve is not exceeding the max sob
	 * value, if yes then switch sob.
	 */
	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
						true);
	if (rc) {
		dev_err(hdev->dev, "Failed to switch SOB\n");
		hdev->asic_funcs->hw_queues_unlock(hdev);
		rc = -EINVAL;
		goto remove_idr;
	}

	/* set the hw_sob to the handle after calling the sob wraparound handler
	 * since sob could have changed.
	 */
	handle->hw_sob = hw_sob;

	/* store the current sob value for unreserve validity check, and
	 * signal offset support
	 */
	handle->pre_sob_val = prop->next_sob_val - handle->count;

	*signals_count = prop->next_sob_val;
	hdev->asic_funcs->hw_queues_unlock(hdev);

	*sob_addr = handle->hw_sob->sob_addr;
	*handle_id = hdl_id;

	dev_dbg(hdev->dev,
		"Signals reserved, sob_id: %d, sob addr: 0x%x, sob val: 0x%x, q_idx: %d, hdl_id: %d\n",
			hw_sob->sob_id, handle->hw_sob->sob_addr,
			prop->next_sob_val, q_idx, hdl_id);
	goto out;

remove_idr:
	spin_lock(&mgr->lock);
	idr_remove(&mgr->handles, hdl_id);
	spin_unlock(&mgr->lock);

	kfree(handle);
out:
	return rc;
}

static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
{
	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
	struct hl_sync_stream_properties *prop;
	struct hl_device *hdev = hpriv->hdev;
	struct hl_encaps_signals_mgr *mgr;
	struct hl_hw_sob *hw_sob;
	u32 q_idx, sob_addr;
	int rc = 0;

	mgr = &hpriv->ctx->sig_mgr;

	spin_lock(&mgr->lock);
	encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
	if (encaps_sig_hdl) {
		dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
				handle_id, encaps_sig_hdl->hw_sob->sob_addr,
					encaps_sig_hdl->count);

		hdev->asic_funcs->hw_queues_lock(hdev);

		q_idx = encaps_sig_hdl->q_idx;
		prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
		hw_sob = &prop->hw_sob[prop->curr_sob_offset];
		sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);

		/* Check if sob_val got out of sync due to other
		 * signal submission requests which were handled
		 * between the reserve-unreserve calls or SOB switch
		 * upon reaching SOB max value.
		 */
		if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
				!= prop->next_sob_val ||
				sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
			dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
				encaps_sig_hdl->pre_sob_val,
				(prop->next_sob_val - encaps_sig_hdl->count));

			hdev->asic_funcs->hw_queues_unlock(hdev);
			rc = -EINVAL;
			goto out;
		}

		/*
		 * Decrement the SOB value by count by user request
		 * to unreserve those signals
		 */
		prop->next_sob_val -= encaps_sig_hdl->count;

		hdev->asic_funcs->hw_queues_unlock(hdev);

		hw_sob_put(hw_sob);

		/* Release the id and free allocated memory of the handle */
		idr_remove(&mgr->handles, handle_id);
		kfree(encaps_sig_hdl);
	} else {
		rc = -EINVAL;
		dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
	}
out:
	spin_unlock(&mgr->lock);

	return rc;
}

static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
				void __user *chunks, u32 num_chunks,
				u64 *cs_seq, u32 flags, u32 timeout)
@@ -1996,10 +2216,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{
	union hl_cs_args *args = data;
	enum hl_cs_type cs_type;
	enum hl_cs_type cs_type = 0;
	u64 cs_seq = ULONG_MAX;
	void __user *chunks;
	u32 num_chunks, flags, timeout;
	u32 num_chunks, flags, timeout,
		signals_count = 0, sob_addr = 0, handle_id = 0;
	int rc;

	rc = hl_cs_sanity_checks(hpriv, args);
@@ -2036,18 +2257,34 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
					&cs_seq, args->in.cs_flags, timeout);
		break;
	case CS_RESERVE_SIGNALS:
		rc = cs_ioctl_reserve_signals(hpriv,
					args->in.encaps_signals_q_idx,
					args->in.encaps_signals_count,
					&handle_id, &sob_addr, &signals_count);
		break;
	case CS_UNRESERVE_SIGNALS:
		rc = cs_ioctl_unreserve_signals(hpriv,
					args->in.encaps_sig_handle_id);
		break;
	default:
		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
						args->in.cs_flags, timeout);
		break;
	}

out:
	if (rc != -EAGAIN) {
		memset(args, 0, sizeof(*args));
		args->out.status = rc;

		if (cs_type == CS_RESERVE_SIGNALS) {
			args->out.handle_id = handle_id;
			args->out.sob_base_addr_offset = sob_addr;
			args->out.count = signals_count;
		} else {
			args->out.seq = cs_seq;
		}
		args->out.status = rc;
	}

	return rc;
}
+56 −0
Original line number Diff line number Diff line
@@ -9,6 +9,59 @@

#include <linux/slab.h>

void hl_encaps_handle_do_release(struct kref *ref)
{
	struct hl_cs_encaps_sig_handle *handle =
		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
	struct hl_ctx *ctx = handle->hdev->compute_ctx;
	struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;

	idr_remove(&mgr->handles, handle->id);
	kfree(handle);
}

static void hl_encaps_handle_do_release_sob(struct kref *ref)
{
	struct hl_cs_encaps_sig_handle *handle =
		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
	struct hl_ctx *ctx = handle->hdev->compute_ctx;
	struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;

	/* if we're here, then there was a signals reservation but cs with
	 * encaps signals wasn't submitted, so need to put refcount
	 * to hw_sob taken at the reservation.
	 */
	hw_sob_put(handle->hw_sob);

	idr_remove(&mgr->handles, handle->id);
	kfree(handle);
}

static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
{
	spin_lock_init(&mgr->lock);
	idr_init(&mgr->handles);
}

static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
			struct hl_encaps_signals_mgr *mgr)
{
	struct hl_cs_encaps_sig_handle *handle;
	struct idr *idp;
	u32 id;

	idp = &mgr->handles;

	if (!idr_is_empty(idp)) {
		dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
		idr_for_each_entry(idp, handle, id)
			kref_put(&handle->refcount,
					hl_encaps_handle_do_release_sob);
	}

	idr_destroy(&mgr->handles);
}

static void hl_ctx_fini(struct hl_ctx *ctx)
{
	struct hl_device *hdev = ctx->hdev;
@@ -53,6 +106,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
		hl_cb_va_pool_fini(ctx);
		hl_vm_ctx_fini(ctx);
		hl_asid_free(hdev, ctx->asid);
		hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);

		/* Scrub both SRAM and DRAM */
		hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
@@ -200,6 +254,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
			goto err_cb_va_pool_fini;
		}

		hl_encaps_sig_mgr_init(&ctx->sig_mgr);

		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
	}

+52 −3
Original line number Diff line number Diff line
@@ -242,7 +242,9 @@ enum hl_cs_type {
	CS_TYPE_DEFAULT,
	CS_TYPE_SIGNAL,
	CS_TYPE_WAIT,
	CS_TYPE_COLLECTIVE_WAIT
	CS_TYPE_COLLECTIVE_WAIT,
	CS_RESERVE_SIGNALS,
	CS_UNRESERVE_SIGNALS
};

/*
@@ -287,13 +289,17 @@ enum queue_cb_alloc_flags {
 * @hdev: habanalabs device structure.
 * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
 * @sob_id: id of this SOB.
 * @sob_addr: the sob offset from the base address.
 * @q_idx: the H/W queue that uses this SOB.
 * @need_reset: reset indication set when switching to the other sob.
 */
struct hl_hw_sob {
	struct hl_device	*hdev;
	struct kref		kref;
	u32			sob_id;
	u32			sob_addr;
	u32			q_idx;
	bool			need_reset;
};

enum hl_collective_mode {
@@ -608,6 +614,8 @@ struct hl_fence {
 * @type: type of the CS - signal/wait.
 * @sob_val: the SOB value that is used in this signal/wait CS.
 * @sob_group: the SOB group that is used in this collective wait CS.
 * @encaps_signals: indication whether it's a completion object of cs with
 * encaps signals or not.
 */
struct hl_cs_compl {
	struct work_struct	sob_reset_work;
@@ -619,6 +627,7 @@ struct hl_cs_compl {
	enum hl_cs_type		type;
	u16			sob_val;
	u16			sob_group;
	bool			encaps_signals;
};

/*
@@ -730,6 +739,17 @@ struct hl_sync_stream_properties {
	u8		curr_sob_offset;
};

/**
 * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
 * handlers manager
 * @lock: protects handles.
 * @handles: an idr to hold all encapsulated signals handles.
 */
struct hl_encaps_signals_mgr {
	spinlock_t		lock;
	struct idr		handles;
};

/**
 * struct hl_hw_queue - describes a H/W transport queue.
 * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
@@ -1135,6 +1155,7 @@ struct fw_load_mgr {
 * @init_firmware_loader: initialize data for FW loader.
 * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
 * @state_dump_init: initialize constants required for state dump
 * @get_sob_addr: get SOB base address offset.
 */
struct hl_asic_funcs {
	int (*early_init)(struct hl_device *hdev);
@@ -1261,6 +1282,7 @@ struct hl_asic_funcs {
	void (*init_firmware_loader)(struct hl_device *hdev);
	void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
	void (*state_dump_init)(struct hl_device *hdev);
	u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
};


@@ -1353,6 +1375,7 @@ struct hl_pending_cb {
 * @cs_counters: context command submission counters.
 * @cb_va_pool: device VA pool for command buffers which are mapped to the
 *              device's MMU.
 * @sig_mgr: encaps signals handle manager.
 * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
 *			to user so user could inquire about CS. It is used as
 *			index to cs_pending array.
@@ -1392,6 +1415,7 @@ struct hl_ctx {
	struct list_head		hw_block_mem_list;
	struct hl_cs_counters_atomic	cs_counters;
	struct gen_pool			*cb_va_pool;
	struct hl_encaps_signals_mgr	sig_mgr;
	u64				cs_sequence;
	u64				*dram_default_hops;
	spinlock_t			pending_cb_lock;
@@ -2504,7 +2528,6 @@ struct hl_device {

	struct multi_cs_completion	multi_cs_completion[
							MULTI_CS_MAX_USER_CTX];

	atomic64_t			dram_used_mem;
	u64				timeout_jiffies;
	u64				max_power;
@@ -2576,6 +2599,29 @@ struct hl_device {
};


/**
 * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
 * @refcount: refcount used to protect removing this id when several
 *            wait cs are used to wait of the reserved encaps signals.
 * @hdev: pointer to habanalabs device structure.
 * @hw_sob: pointer to  H/W SOB used in the reservation.
 * @cs_seq: staged cs sequence which contains encapsulated signals
 * @id: idr handler id to be used to fetch the handler info
 * @q_idx: stream queue index
 * @pre_sob_val: current SOB value before reservation
 * @count: signals number
 */
struct hl_cs_encaps_sig_handle {
	struct kref refcount;
	struct hl_device *hdev;
	struct hl_hw_sob *hw_sob;
	u64  cs_seq;
	u32  id;
	u32  q_idx;
	u32  pre_sob_val;
	u32  count;
};

/*
 * IOCTLs
 */
@@ -2889,9 +2935,12 @@ int hl_set_voltage(struct hl_device *hdev,
			int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,
			int sensor_index, u32 attr, long value);
void hl_encaps_handle_do_release(struct kref *ref);
void hw_sob_get(struct hl_hw_sob *hw_sob);
void hw_sob_put(struct hl_hw_sob *hw_sob);
void hl_release_pending_user_interrupts(struct hl_device *hdev);
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
			struct hl_hw_sob **hw_sob, u32 count);
			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);

int hl_state_dump(struct hl_device *hdev);
const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id);
+0 −1
Original line number Diff line number Diff line
@@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp)

out_err:
	mutex_unlock(&hdev->fpriv_list_lock);

	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
	filp->private_data = NULL;
+4 −1
Original line number Diff line number Diff line
@@ -426,7 +426,8 @@ static int init_signal_cs(struct hl_device *hdev,
	hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
				cs_cmpl->hw_sob->sob_id, 0, true);

	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
								false);

	return rc;
}
@@ -850,6 +851,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
		hw_sob = &sync_stream_prop->hw_sob[sob];
		hw_sob->hdev = hdev;
		hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
		hw_sob->sob_addr =
			hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
		hw_sob->q_idx = q_idx;
		kref_init(&hw_sob->kref);
	}
Loading