Commit be24dd48 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2021-10-18' of...

Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.16:

- Add a new uAPI (under the memory ioctl) to request from the driver
  to export a DMA-BUF object that represents a memory region on
  the device's DRAM. This is needed to enable peer-to-peer over PCIe
  between habana device and an RDMA adapter (e.g. mlnx5 or efa
  rdma adapter).

- Add debugfs node to dynamically configure CS timeout. Up until now,
  it was only configurable through kernel module parameter.

- Fetch more comprehensive power information from the firmware.

- Always take timestamp when waiting for user interrupt, as the user
  needs that information to optimize the graph runtime compilation.

- Modify user interrupt to look on 64-bit user value as fence, instead
  of 32-bit.

- Bypass reset in case of repeated h/w error event after device reset.
  This is to prevent endless loop of resets to the device.

- Fix several bugs in multi CS completion code.

- Fix race condition in fd close/open.

- Update to latest firmware headers

- Add select CRC32 in kconfig

- Small fixes, cosmetics

* tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits)
  habanalabs: refactor fence handling in hl_cs_poll_fences
  habanalabs: context cleanup cosmetics
  habanalabs: simplify wait for interrupt with timestamp flow
  habanalabs: initialize hpriv fields before adding new node
  habanalabs: Unify frequency set/get functionality
  habanalabs: select CRC32
  habanalabs: add support for dma-buf exporter
  habanalabs: define uAPI to export FD for DMA-BUF
  habanalabs: fix NULL pointer dereference
  habanalabs: fix race condition in multi CS completion
  habanalabs: use only u32
  habanalabs: update firmware files
  habanalabs: bypass reset for continuous h/w error event
  habanalabs: take timestamp on wait for interrupt
  habanalabs: prevent race between fd close/open
  habanalabs: refactor reset log message
  habanalabs: define soft-reset as inference op
  habanalabs: fix debugfs device memory MMU VA translation
  habanalabs: add support for a long interrupt target value
  habanalabs: remove redundant cs validity checks
  ...
parents 2b74240b b2faac38
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
                Writing an integer X discards X state dumps, so that the
                next read would return X+1-st newest state dump.

What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date:           Sep 2021
KernelVersion:  5.16
Contact:        obitton@habana.ai
Description:    Sets the command submission timeout value in seconds.

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
+2 −0
Original line number Diff line number Diff line
@@ -8,6 +8,8 @@ config HABANA_AI
	depends on PCI && HAS_IOMEM
	select GENERIC_ALLOCATOR
	select HWMON
	select DMA_SHARED_BUFFER
	select CRC32
	help
	  Enables PCIe card driver for Habana's AI Processors (AIP) that are
	  designed to accelerate Deep Learning inference and training workloads.
+1 −1
Original line number Diff line number Diff line
@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
		common/command_buffer.o common/hw_queue.o common/irq.o \
		common/sysfs.o common/hwmon.o common/memory.o \
		common/command_submission.o common/firmware_if.o \
		common/state_dump.o
		common/state_dump.o common/hwmgr.o
+72 −33
Original line number Diff line number Diff line
@@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
	fence->cs_sequence = sequence;
	fence->error = 0;
	fence->timestamp = ktime_set(0, 0);
	fence->mcs_handling_done = false;
	init_completion(&fence->completion);
}

@@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
	/* Don't cancel TDR in case this CS was timedout because we might be
	 * running from the TDR context
	 */
	if (cs && (cs->timedout ||
			hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
	if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
		return;

	if (cs && cs->tdr_active)
	if (cs->tdr_active)
		cancel_delayed_work_sync(&cs->work_tdr);

	spin_lock(&hdev->cs_mirror_lock);
@@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
				mcs_compl->timestamp =
						ktime_to_ns(fence->timestamp);
			complete_all(&mcs_compl->completion);

			/*
			 * Setting mcs_handling_done inside the lock ensures
			 * at least one fence have mcs_handling_done set to
			 * true before wait for mcs finish. This ensures at
			 * least one CS will be set as completed when polling
			 * mcs fences.
			 */
			fence->mcs_handling_done = true;
		}

		spin_unlock(&mcs_compl->lock);
	}
	/* In case CS completed without mcs completion initialized */
	fence->mcs_handling_done = true;
}

static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
@@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
			break;
		}

		mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;

		if (status == CS_WAIT_STATUS_BUSY)
			continue;
		switch (status) {
		case CS_WAIT_STATUS_BUSY:
			/* CS did not finished, keep waiting on its QID*/
			mcs_data->stream_master_qid_map |=
					fence->stream_master_qid_map;
			break;
		case CS_WAIT_STATUS_COMPLETED:
			/*
			 * Using mcs_handling_done to avoid possibility of mcs_data
			 * returns to user indicating CS completed before it finished
			 * all of its mcs handling, to avoid race the next time the
			 * user waits for mcs.
			 */
			if (!fence->mcs_handling_done)
				break;

			mcs_data->completion_bitmap |= BIT(i);

			/*
		 * best effort to extract timestamp. few notes:
		 * - if even single fence is gone we cannot extract timestamp
		 *   (as fence not exist anymore)
		 * - for all completed CSs we take the earliest timestamp.
		 *   for this we have to validate that:
		 *       1. given timestamp was indeed set
		 *       2. the timestamp is earliest of all timestamps so far
			 * For all completed CSs we take the earliest timestamp.
			 * For this we have to validate that the timestamp is
			 * earliest of all timestamps so far.
			 */

		if (status == CS_WAIT_STATUS_GONE) {
			if (mcs_data->update_ts &&
					(ktime_compare(fence->timestamp, first_cs_time) < 0))
				first_cs_time = fence->timestamp;
			break;
		case CS_WAIT_STATUS_GONE:
			mcs_data->update_ts = false;
			mcs_data->gone_cs = true;
		} else if (mcs_data->update_ts &&
			(ktime_compare(fence->timestamp,
						ktime_set(0, 0)) > 0) &&
			(ktime_compare(fence->timestamp, first_cs_time) < 0)) {
			first_cs_time = fence->timestamp;
			/*
			 * It is possible to get an old sequence numbers from user
			 * which related to already completed CSs and their fences
			 * already gone. In this case, CS set as completed but
			 * no need to consider its QID for mcs completion.
			 */
			mcs_data->completion_bitmap |= BIT(i);
			break;
		default:
			dev_err(hdev->dev, "Invalid fence status\n");
			return -EINVAL;
		}

	}

	hl_fences_put(mcs_data->fence_arr, arr_len);
@@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)

static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
				u32 timeout_us, u64 user_address,
				u32 target_value, u16 interrupt_offset,
				enum hl_cs_wait_status *status)
				u64 target_value, u16 interrupt_offset,
				enum hl_cs_wait_status *status,
				u64 *timestamp)
{
	struct hl_user_pending_interrupt *pend;
	struct hl_user_interrupt *interrupt;
	unsigned long timeout, flags;
	u32 completion_value;
	u64 completion_value;
	long completion_rc;
	int rc = 0;

@@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	/* We check for completion value as interrupt could have been received
	 * before we added the node to the wait list
	 */
	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
		dev_err(hdev->dev, "Failed to copy completion value from user\n");
		rc = -EFAULT;
		goto remove_pending_user_interrupt;
	}

	if (completion_value >= target_value)
	if (completion_value >= target_value) {
		*status = CS_WAIT_STATUS_COMPLETED;
	else
		/* There was no interrupt, we assume the completion is now. */
		pend->fence.timestamp = ktime_get();
	} else
		*status = CS_WAIT_STATUS_BUSY;

	if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
@@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
		reinit_completion(&pend->fence.completion);
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
			dev_err(hdev->dev, "Failed to copy completion value from user\n");
			rc = -EFAULT;

@@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	list_del(&pend->wait_list_node);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

	*timestamp = ktime_to_ns(pend->fence.timestamp);

	kfree(pend);
	hl_ctx_put(ctx);

@@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
	struct asic_fixed_properties *prop;
	union hl_wait_cs_args *args = data;
	enum hl_cs_wait_status status;
	u64 timestamp;
	int rc;

	prop = &hdev->asic_prop;
@@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)

	rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
				args->in.interrupt_timeout_us, args->in.addr,
				args->in.target, interrupt_offset, &status);
				args->in.target, interrupt_offset, &status,
				&timestamp);

	if (rc) {
		if (rc != -EINTR)
@@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)

	memset(args, 0, sizeof(*args));

	if (timestamp) {
		args->out.timestamp_nsec = timestamp;
		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
	}

	switch (status) {
	case CS_WAIT_STATUS_COMPLETED:
		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+1 −7
Original line number Diff line number Diff line
@@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
	return rc;
}

void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{
	if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
		return;
}

int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
	int rc = 0;
@@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id)
		hl_ctx_free(hdev, ctx);
		kref_put(&ctx->refcount, hl_ctx_do_release);

	idr_destroy(&mgr->ctx_handles);
	mutex_destroy(&mgr->ctx_lock);
Loading