Commit 5ffc06eb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull habanalabs updates from Greg KH:
 "Here is another round of misc driver patches for 5.15-rc1.

  In here is only updates for the Habanalabs driver. This request is
  late because the previously-objected-to dma-buf patches are all
  removed and some fixes that you and others found are now included in
  here as well.

  All of these have been in linux-next for well over a week with no
  reports of problems, and they are all self-contained to only this one
  driver. Full details are in the shortlog"

* tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (61 commits)
  habanalabs/gaudi: hwmon default card name
  habanalabs: add support for f/w reset
  habanalabs/gaudi: block ICACHE_BASE_ADDERESS_HIGH in TPC
  habanalabs: cannot sleep while holding spinlock
  habanalabs: never copy_from_user inside spinlock
  habanalabs: remove unnecessary device status check
  habanalabs: disable IRQ in user interrupts spinlock
  habanalabs: add "in device creation" status
  habanalabs/gaudi: invalidate PMMU mem cache on init
  habanalabs/gaudi: size should be printed in decimal
  habanalabs/gaudi: define DC POWER for secured PMC
  habanalabs/gaudi: unmask out of bounds SLM access interrupt
  habanalabs: add userptr_lookup node in debugfs
  habanalabs/gaudi: fetch TPC/MME ECC errors from F/W
  habanalabs: modify multi-CS to wait on stream masters
  habanalabs/gaudi: add monitored SOBs to state dump
  habanalabs/gaudi: restore user registers when context opens
  habanalabs/gaudi: increase boot fit timeout
  habanalabs: update to latest firmware headers
  habanalabs/gaudi: minimize number of register reads
  ...
parents a668acb8 4cd67adc
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -215,6 +215,17 @@ Description: Sets the skip reset on timeout option for the device. Value of
                "0" means device will be reset in case some CS has timed out,
                otherwise it will not be reset.

What:           /sys/kernel/debug/habanalabs/hl<n>/state_dump
Date:           Oct 2021
KernelVersion:  5.15
Contact:        ynudelman@habana.ai
Description:    Gets the state dump occurring on a CS timeout or failure.
                State dump is used for debug and is created each time in case of
                a problem in a CS execution, before reset.
                Reading from the node returns the newest state dump available.
                Writing an integer X discards X state dumps, so that the
                next read would return X+1-st newest state dump.

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
@@ -230,6 +241,14 @@ Description: Displays a list with information about the currently user
                pointers (user virtual addresses) that are pinned and mapped
                to DMA addresses

What:           /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
Date:           Aug 2021
KernelVersion:  5.15
Contact:        ogabbay@kernel.org
Description:    Allows to search for specific user pointers (user virtual
                addresses) that are pinned and mapped to DMA addresses, and see
                their resolution to the specific dma address.

What:           /sys/kernel/debug/habanalabs/hl<n>/vm
Date:           Jan 2019
KernelVersion:  5.1
+2 −1
Original line number Diff line number Diff line
@@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
		common/asid.o common/habanalabs_ioctl.o \
		common/command_buffer.o common/hw_queue.o common/irq.o \
		common/sysfs.o common/hwmon.o common/memory.o \
		common/command_submission.o common/firmware_if.o
		common/command_submission.o common/firmware_if.o \
		common/state_dump.o
+1 −3
Original line number Diff line number Diff line
@@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,

	spin_lock(&mgr->cb_lock);
	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
	if (rc < 0)
		rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
	spin_unlock(&mgr->cb_lock);

	if (rc < 0) {
@@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)

	vma->vm_private_data = cb;

	rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
	rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
					cb->bus_address, cb->size);
	if (rc) {
		spin_lock(&cb->lock);
+1049 −338

File changed.

Preview size limit exceeded, changes collapsed.

+126 −20
Original line number Diff line number Diff line
@@ -9,16 +9,70 @@

#include <linux/slab.h>

void hl_encaps_handle_do_release(struct kref *ref)
{
	struct hl_cs_encaps_sig_handle *handle =
		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
	struct hl_ctx *ctx = handle->hdev->compute_ctx;
	struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;

	spin_lock(&mgr->lock);
	idr_remove(&mgr->handles, handle->id);
	spin_unlock(&mgr->lock);

	kfree(handle);
}

static void hl_encaps_handle_do_release_sob(struct kref *ref)
{
	struct hl_cs_encaps_sig_handle *handle =
		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
	struct hl_ctx *ctx = handle->hdev->compute_ctx;
	struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;

	/* if we're here, then there was a signals reservation but cs with
	 * encaps signals wasn't submitted, so need to put refcount
	 * to hw_sob taken at the reservation.
	 */
	hw_sob_put(handle->hw_sob);

	spin_lock(&mgr->lock);
	idr_remove(&mgr->handles, handle->id);
	spin_unlock(&mgr->lock);

	kfree(handle);
}

static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
{
	spin_lock_init(&mgr->lock);
	idr_init(&mgr->handles);
}

static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
			struct hl_encaps_signals_mgr *mgr)
{
	struct hl_cs_encaps_sig_handle *handle;
	struct idr *idp;
	u32 id;

	idp = &mgr->handles;

	if (!idr_is_empty(idp)) {
		dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
		idr_for_each_entry(idp, handle, id)
			kref_put(&handle->refcount,
					hl_encaps_handle_do_release_sob);
	}

	idr_destroy(&mgr->handles);
}

static void hl_ctx_fini(struct hl_ctx *ctx)
{
	struct hl_device *hdev = ctx->hdev;
	int i;

	/* Release all allocated pending cb's, those cb's were never
	 * scheduled so it is safe to release them here
	 */
	hl_pending_cb_list_flush(ctx);

	/* Release all allocated HW block mapped list entries and destroy
	 * the mutex.
	 */
@@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
		hl_cb_va_pool_fini(ctx);
		hl_vm_ctx_fini(ctx);
		hl_asid_free(hdev, ctx->asid);
		hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);

		/* Scrub both SRAM and DRAM */
		hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
@@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{
	if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
		return;

	dev_warn(hdev->dev,
		"user process released device but its command submissions are still executing\n");
}

int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
@@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
	kref_init(&ctx->refcount);

	ctx->cs_sequence = 1;
	INIT_LIST_HEAD(&ctx->pending_cb_list);
	spin_lock_init(&ctx->pending_cb_lock);
	spin_lock_init(&ctx->cs_lock);
	atomic_set(&ctx->thread_ctx_switch_token, 1);
	atomic_set(&ctx->thread_pending_cb_token, 1);
	ctx->thread_ctx_switch_wait_token = 0;
	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
				sizeof(struct hl_fence *),
@@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
			goto err_cb_va_pool_fini;
		}

		hl_encaps_sig_mgr_init(&ctx->sig_mgr);

		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
	}

@@ -229,31 +280,86 @@ int hl_ctx_put(struct hl_ctx *ctx)
	return kref_put(&ctx->refcount, hl_ctx_do_release);
}

struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
/*
 * hl_ctx_get_fence_locked - get CS fence under CS lock
 *
 * @ctx: pointer to the context structure.
 * @seq: CS sequences number
 *
 * @return valid fence pointer on success, NULL if fence is gone, otherwise
 *         error pointer.
 *
 * NOTE: this function shall be called with cs_lock locked
 */
static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq)
{
	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
	struct hl_fence *fence;

	spin_lock(&ctx->cs_lock);

	if (seq >= ctx->cs_sequence) {
		spin_unlock(&ctx->cs_lock);
	if (seq >= ctx->cs_sequence)
		return ERR_PTR(-EINVAL);
	}

	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
		spin_unlock(&ctx->cs_lock);
	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence)
		return NULL;
	}

	fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
	hl_fence_get(fence);
	return fence;
}

struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{
	struct hl_fence *fence;

	spin_lock(&ctx->cs_lock);

	fence = hl_ctx_get_fence_locked(ctx, seq);

	spin_unlock(&ctx->cs_lock);

	return fence;
}

/*
 * hl_ctx_get_fences - get multiple CS fences under the same CS lock
 *
 * @ctx: pointer to the context structure.
 * @seq_arr: array of CS sequences to wait for
 * @fence: fence array to store the CS fences
 * @arr_len: length of seq_arr and fence_arr
 *
 * @return 0 on success, otherwise non 0 error code
 */
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
				struct hl_fence **fence, u32 arr_len)
{
	struct hl_fence **fence_arr_base = fence;
	int i, rc = 0;

	spin_lock(&ctx->cs_lock);

	for (i = 0; i < arr_len; i++, fence++) {
		u64 seq = seq_arr[i];

		*fence = hl_ctx_get_fence_locked(ctx, seq);

		if (IS_ERR(*fence)) {
			dev_err(ctx->hdev->dev,
				"Failed to get fence for CS with seq 0x%llx\n",
					seq);
			rc = PTR_ERR(*fence);
			break;
		}
	}

	spin_unlock(&ctx->cs_lock);

	if (rc)
		hl_fences_put(fence_arr_base, i);

	return rc;
}

/*
 * hl_ctx_mgr_init - initialize the context manager
 *
Loading