Commit 0acfbe9c authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-fixes-2020-12-30' of...

Merge tag 'misc-habanalabs-fixes-2020-12-30' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus

Oded writes:

This tag contains the following fixes for 5.11-rc2:

- Fixes that are needed for supporting the new F/W with security features:
  - Correctly fetch PLL information in GOYA when security is enabled in F/W
  - Fix hard-reset support when F/W is in its preboot stage
  - Disable clock gating when initializing the H/W
  - Fix hard-reset procedure
  - Fix PCI controller initialization
- Remove setting of Engine-Barrier in collective wait operations. This
  barrier created a drop in performance
- Retry loading the TPC firmware in case of EINTR during loading
- Fix CS counters
- Register to PCI shutdown callback to fix handling of VM shutdown
- Fix order of status check
- Fix memory leak in reset procedure
- Fix and add comments and fix indentations

* tag 'misc-habanalabs-fixes-2020-12-30' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs: Fix memleak in hl_device_reset
  habanalabs: fix order of status check
  habanalabs: register to pci shutdown callback
  habanalabs: add validation cs counter, fix misplaced counters
  habanalabs/gaudi: retry loading TPC f/w on -EINTR
  habanalabs: adjust pci controller init to new firmware
  habanalabs: update comment in hl_boot_if.h
  habanalabs/gaudi: enhance reset message
  habanalabs: full FW hard reset support
  habanalabs/gaudi: disable CGM at HW initialization
  habanalabs: Revise comment to align with mirror list name
  habanalabs/gaudi: do not set EB in collective slave queues
  habanalabs: preboot hard reset support
  habanalabs: remove generic gaudi get_pll_freq function
  habanalabs: fetch PSOC PLL frequency from F/W in goya
  habanalabs: add comment for pll frequency ioctl opcode
  habanalabs: Fix a missing-braces warning
parents 5c8fe583 b000700d
Loading
Loading
Loading
Loading
+58 −19
Original line number Diff line number Diff line
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	cntr = &hdev->aggregated_cs_counters;

	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
	if (!cs)
	if (!cs) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
		return -ENOMEM;
	}

	cs->ctx = ctx;
	cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,

	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
	if (!cs_cmpl) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
		rc = -ENOMEM;
		goto free_cs;
	}
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
	if (!cs->jobs_in_queue_cnt) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
		rc = -ENOMEM;
		goto free_fence;
	}
@@ -562,7 +569,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
		flush_workqueue(hdev->cq_wq[i]);

	/* Make sure we don't have leftovers in the H/W queues mirror list */
	/* Make sure we don't have leftovers in the CS mirror list */
	list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
		cs_get(cs);
		cs->aborted = true;
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)

static int hl_cs_copy_chunk_array(struct hl_device *hdev,
					struct hl_cs_chunk **cs_chunk_array,
					void __user *chunks, u32 num_chunks)
					void __user *chunks, u32 num_chunks,
					struct hl_ctx *ctx)
{
	u32 size_to_copy;

	if (num_chunks > HL_MAX_JOBS_PER_CS) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
		dev_err(hdev->dev,
			"Number of chunks can NOT be larger than %d\n",
			HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,

	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
					GFP_ATOMIC);
	if (!*cs_chunk_array)
	if (!*cs_chunk_array) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
		return -ENOMEM;
	}

	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
	if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
		kfree(*cs_chunk_array);
		return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
	struct hl_device *hdev = hpriv->hdev;
	struct hl_cs_chunk *cs_chunk_array;
	struct hl_cs_counters_atomic *cntr;
	struct hl_ctx *ctx = hpriv->ctx;
	struct hl_cs_job *job;
	struct hl_cs *cs;
	struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
	cntr = &hdev->aggregated_cs_counters;
	*cs_seq = ULLONG_MAX;

	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
			hpriv->ctx);
	if (rc)
		goto out;

@@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
		rc = validate_queue_index(hdev, chunk, &queue_type,
						&is_kernel_allocated_cb);
		if (rc) {
			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
			atomic64_inc(&cntr->parsing_drop_cnt);
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			goto free_cs_object;
		}

@@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
			if (!cb) {
				atomic64_inc(
				&hpriv->ctx->cs_counters.parsing_drop_cnt);
				atomic64_inc(&cntr->parsing_drop_cnt);
					&ctx->cs_counters.validation_drop_cnt);
				atomic64_inc(&cntr->validation_drop_cnt);
				rc = -EINVAL;
				goto free_cs_object;
			}
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
		job = hl_cs_allocate_job(hdev, queue_type,
						is_kernel_allocated_cb);
		if (!job) {
			atomic64_inc(
			&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
			atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
			atomic64_inc(&cntr->out_of_mem_drop_cnt);
			dev_err(hdev->dev, "Failed to allocate a new job\n");
			rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,

		rc = cs_parser(hpriv, job);
		if (rc) {
			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
			atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
			atomic64_inc(&cntr->parsing_drop_cnt);
			dev_err(hdev->dev,
				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
	}

	if (int_queues_only) {
		atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
		atomic64_inc(&cntr->parsing_drop_cnt);
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		dev_err(hdev->dev,
			"Reject CS %d.%llu because only internal queues jobs are present\n",
			cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
}

static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
		struct hl_cs_chunk *chunk, u64 *signal_seq)
		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
{
	u64 *signal_seq_arr = NULL;
	u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,

	/* currently only one signal seq is supported */
	if (signal_seq_arr_len != 1) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
		dev_err(hdev->dev,
			"Wait for signal CS supports only one signal CS seq\n");
		return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
					sizeof(*signal_seq_arr),
					GFP_ATOMIC);
	if (!signal_seq_arr)
	if (!signal_seq_arr) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
		return -ENOMEM;
	}

	size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
	if (copy_from_user(signal_seq_arr,
				u64_to_user_ptr(chunk->signal_seq_arr),
				size_to_copy)) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
		dev_err(hdev->dev,
			"Failed to copy signal seq array from user\n");
		rc = -EFAULT;
@@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	struct hl_device *hdev = hpriv->hdev;
	struct hl_cs_compl *sig_waitcs_cmpl;
	u32 q_idx, collective_engine_id = 0;
	struct hl_cs_counters_atomic *cntr;
	struct hl_fence *sig_fence = NULL;
	struct hl_ctx *ctx = hpriv->ctx;
	enum hl_queue_type q_type;
@@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	u64 signal_seq;
	int rc;

	cntr = &hdev->aggregated_cs_counters;
	*cs_seq = ULLONG_MAX;

	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
			ctx);
	if (rc)
		goto out;

@@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	chunk = &cs_chunk_array[0];

	if (chunk->queue_index >= hdev->asic_prop.max_queues) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		dev_err(hdev->dev, "Queue index %d is invalid\n",
			chunk->queue_index);
		rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	q_type = hw_queue_prop->type;

	if (!hw_queue_prop->supports_sync_stream) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		dev_err(hdev->dev,
			"Queue index %d does not support sync stream operations\n",
			q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,

	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"Queue index %d is invalid\n", q_idx);
			rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	}

	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
		if (rc)
			goto free_cs_chunk_array;

		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
		if (IS_ERR(sig_fence)) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"Failed to get signal CS with seq 0x%llx\n",
				signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
			container_of(sig_fence, struct hl_cs_compl, base_fence);

		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"CS seq 0x%llx is not of a signal CS\n",
				signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
				cs, q_idx, collective_engine_id);
	else
	else {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		rc = -EINVAL;
	}

	if (rc)
		goto free_cs_object;
+5 −3
Original line number Diff line number Diff line
@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{
	enum hl_device_status status;

	if (hdev->disabled)
		status = HL_DEVICE_STATUS_MALFUNCTION;
	else if (atomic_read(&hdev->in_reset))
	if (atomic_read(&hdev->in_reset))
		status = HL_DEVICE_STATUS_IN_RESET;
	else if (hdev->needs_reset)
		status = HL_DEVICE_STATUS_NEEDS_RESET;
	else if (hdev->disabled)
		status = HL_DEVICE_STATUS_MALFUNCTION;
	else
		status = HL_DEVICE_STATUS_OPERATIONAL;

@@ -1092,6 +1092,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
						GFP_KERNEL);
		if (!hdev->kernel_ctx) {
			rc = -ENOMEM;
			hl_mmu_fini(hdev);
			goto out_err;
		}

@@ -1103,6 +1104,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
				"failed to init kernel ctx in hard reset\n");
			kfree(hdev->kernel_ctx);
			hdev->kernel_ctx = NULL;
			hl_mmu_fini(hdev);
			goto out_err;
		}
	}
+44 −16
Original line number Diff line number Diff line
@@ -627,23 +627,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
	security_status = RREG32(cpu_security_boot_status_reg);

	/* We read security status multiple times during boot:
	 * 1. preboot - we check if fw security feature is supported
	 * 2. boot cpu - we get boot cpu security status
	 * 3. FW application - we get FW application security status
	 * 1. preboot - a. Check whether the security status bits are valid
	 *              b. Check whether fw security is enabled
	 *              c. Check whether hard reset is done by preboot
	 * 2. boot cpu - a. Fetch boot cpu security status
	 *               b. Check whether hard reset is done by boot cpu
	 * 3. FW application - a. Fetch fw application security status
	 *                     b. Check whether hard reset is done by fw app
	 *
	 * Preboot:
	 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
	 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
	 */
	if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
		hdev->asic_prop.fw_security_status_valid = 1;
		prop->fw_security_disabled =
			!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
		prop->fw_security_status_valid = 1;

		if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
			prop->fw_security_disabled = false;
		else
			prop->fw_security_disabled = true;

		if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
			prop->hard_reset_done_by_fw = true;
	} else {
		hdev->asic_prop.fw_security_status_valid = 0;
		prop->fw_security_status_valid = 0;
		prop->fw_security_disabled = true;
	}

	dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
			prop->hard_reset_done_by_fw ? "enabled" : "disabled");

	dev_info(hdev->dev, "firmware-level security is %s\n",
			prop->fw_security_disabled ? "disabled" : "enabled");

@@ -655,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
			bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	u32 status;
	int rc;

@@ -723,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
	/* Read U-Boot version now in case we will later fail */
	hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);

	/* Clear reset status since we need to read it again from boot CPU */
	prop->hard_reset_done_by_fw = false;

	/* Read boot_cpu security bits */
	if (hdev->asic_prop.fw_security_status_valid)
		hdev->asic_prop.fw_boot_cpu_security_map =
	if (prop->fw_security_status_valid) {
		prop->fw_boot_cpu_security_map =
				RREG32(cpu_security_boot_status_reg);

		if (prop->fw_boot_cpu_security_map &
				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
			prop->hard_reset_done_by_fw = true;
	}

	dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
			prop->hard_reset_done_by_fw ? "enabled" : "disabled");

	if (rc) {
		detect_cpu_boot_status(hdev, status);
		rc = -EIO;
@@ -796,18 +821,21 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
		goto out;
	}

	/* Clear reset status since we need to read again from app */
	prop->hard_reset_done_by_fw = false;

	/* Read FW application security bits */
	if (hdev->asic_prop.fw_security_status_valid) {
		hdev->asic_prop.fw_app_security_map =
	if (prop->fw_security_status_valid) {
		prop->fw_app_security_map =
				RREG32(cpu_security_boot_status_reg);

		if (hdev->asic_prop.fw_app_security_map &
		if (prop->fw_app_security_map &
				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
			hdev->asic_prop.hard_reset_done_by_fw = true;
			prop->hard_reset_done_by_fw = true;
	}

	dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
		hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
	dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
			prop->hard_reset_done_by_fw ? "enabled" : "disabled");

	dev_info(hdev->dev, "Successfully loaded firmware to device\n");

+3 −1
Original line number Diff line number Diff line
@@ -944,7 +944,7 @@ struct hl_asic_funcs {
	u32 (*get_signal_cb_size)(struct hl_device *hdev);
	u32 (*get_wait_cb_size)(struct hl_device *hdev);
	u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
			u32 size);
			u32 size, bool eb);
	u32 (*gen_wait_cb)(struct hl_device *hdev,
			struct hl_gen_wait_properties *prop);
	void (*reset_sob)(struct hl_device *hdev, void *data);
@@ -1000,6 +1000,7 @@ struct hl_va_range {
 * @queue_full_drop_cnt: dropped due to queue full
 * @device_in_reset_drop_cnt: dropped due to device in reset
 * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
 * @validation_drop_cnt: dropped due to error in validation
 */
struct hl_cs_counters_atomic {
	atomic64_t out_of_mem_drop_cnt;
@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
	atomic64_t queue_full_drop_cnt;
	atomic64_t device_in_reset_drop_cnt;
	atomic64_t max_cs_in_flight_drop_cnt;
	atomic64_t validation_drop_cnt;
};

/**
+1 −0
Original line number Diff line number Diff line
@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = {
	.id_table = ids,
	.probe = hl_pci_probe,
	.remove = hl_pci_remove,
	.shutdown = hl_pci_remove,
	.driver.pm = &hl_pm_ops,
	.err_handler = &hl_pci_err_handler,
};
Loading