Commit 3e1d5b0f authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-fixes-2021-09-19' of...

Merge tag 'misc-habanalabs-fixes-2021-09-19' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus

Oded writes:

This tag contains the following fixes for 5.15-rc3:

- Fix potential race when user waiting for interrupt ioctl
- Prevent possible kernel oops in staged CS ioctl
- Use direct MSI mechanism in Gaudi as a WA for a H/W issue
  regarding FLR
- Don't support collective wait ioctl operation when it
  is not supported. e.g. when the NIC ports are disabled
- Fix configuration of one of the security mechanism.
- Change error print to be rate-limited as it can be initiated
  by the user and spam the kernel log
- Fix return value of CS ioctl when doing staged CS
- Fix CS ioctl code when user doesn't supply an offset for
  the memory area that we use as fence.
- Spelling mistake fix

* tag 'misc-habanalabs-fixes-2021-09-19' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs: expose a single cs seq in staged submissions
  habanalabs: fix wait offset handling
  habanalabs: rate limit multi CS completion errors
  habanalabs/gaudi: fix LBW RR configuration
  habanalabs: Fix spelling mistake "FEADBACK" -> "FEEDBACK"
  habanalabs: fail collective wait when not supported
  habanalabs/gaudi: use direct MSI in single mode
  habanalabs: fix kernel OOPs related to staged cs
  habanalabs: fix potential race in interrupt wait ioctl
parents 25a14332 c8fee419
Loading
Loading
Loading
Loading
+51 −20
Original line number Diff line number Diff line
@@ -405,7 +405,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
{
	bool next_entry_found = false;
	struct hl_cs *next;
	struct hl_cs *next, *first_cs;

	if (!cs_needs_timeout(cs))
		return;
@@ -415,9 +415,16 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
	/* We need to handle tdr only once for the complete staged submission.
	 * Hence, we choose the CS that reaches this function first which is
	 * the CS marked as 'staged_last'.
	 * In case single staged cs was submitted which has both first and last
	 * indications, then "cs_find_first" below will return NULL, since we
	 * removed the cs node from the list before getting here,
	 * in such cases just continue with the cs to cancel it's TDR work.
	 */
	if (cs->staged_cs && cs->staged_last)
		cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
	if (cs->staged_cs && cs->staged_last) {
		first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
		if (first_cs)
			cs = first_cs;
	}

	spin_unlock(&hdev->cs_mirror_lock);

@@ -1288,6 +1295,12 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
	if (rc)
		goto free_cs_object;

	/* If this is a staged submission we must return the staged sequence
	 * rather than the internal CS sequence
	 */
	if (cs->staged_cs)
		*cs_seq = cs->staged_sequence;

	/* Validate ALL the CS chunks before submitting the CS */
	for (i = 0 ; i < num_chunks ; i++) {
		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
@@ -1988,6 +2001,15 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
			goto free_cs_chunk_array;
		}

		if (!hdev->nic_ports_mask) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"Collective operations not supported when NIC ports are disabled");
			rc = -EINVAL;
			goto free_cs_chunk_array;
		}

		collective_engine_id = chunk->collective_engine_id;
	}

@@ -2026,9 +2048,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
			spin_unlock(&ctx->sig_mgr.lock);

			if (!handle_found) {
				dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
				/* treat as signal CS already finished */
				dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
						signal_seq);
				rc = -EINVAL;
				rc = 0;
				goto free_cs_chunk_array;
			}

@@ -2613,7 +2636,8 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
		 * completed after the poll function.
		 */
		if (!mcs_data.completion_bitmap) {
			dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
			dev_warn_ratelimited(hdev->dev,
				"Multi-CS got completion on wait but no CS completed\n");
			rc = -EFAULT;
		}
	}
@@ -2740,10 +2764,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	else
		interrupt = &hdev->user_interrupt[interrupt_offset];

	/* Add pending user interrupt to relevant list for the interrupt
	 * handler to monitor
	 */
	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

	/* We check for completion value as interrupt could have been received
	 * before we added the node to the wait list
	 */
	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
		dev_err(hdev->dev, "Failed to copy completion value from user\n");
		rc = -EFAULT;
		goto free_fence;
		goto remove_pending_user_interrupt;
	}

	if (completion_value >= target_value)
@@ -2752,14 +2786,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
		*status = CS_WAIT_STATUS_BUSY;

	if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
		goto free_fence;

	/* Add pending user interrupt to relevant list for the interrupt
	 * handler to monitor
	 */
	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		goto remove_pending_user_interrupt;

wait_again:
	/* Wait for interrupt handler to signal completion */
@@ -2770,6 +2797,15 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	 * If comparison fails, keep waiting until timeout expires
	 */
	if (completion_rc > 0) {
		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
		/* reinit_completion must be called before we check for user
		 * completion value, otherwise, if interrupt is received after
		 * the comparison and before the next wait_for_completion,
		 * we will reach timeout and fail
		 */
		reinit_completion(&pend->fence.completion);
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
			dev_err(hdev->dev, "Failed to copy completion value from user\n");
			rc = -EFAULT;
@@ -2780,11 +2816,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
		if (completion_value >= target_value) {
			*status = CS_WAIT_STATUS_COMPLETED;
		} else {
			spin_lock_irqsave(&interrupt->wait_list_lock, flags);
			reinit_completion(&pend->fence.completion);
			timeout = completion_rc;

			spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
			goto wait_again;
		}
	} else if (completion_rc == -ERESTARTSYS) {
@@ -2802,7 +2834,6 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	list_del(&pend->wait_list_node);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

free_fence:
	kfree(pend);
	hl_ctx_put(ctx);

+7 −2
Original line number Diff line number Diff line
@@ -437,6 +437,7 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
			struct hl_cs_compl *cs_cmpl)
{
	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
	u32 offset = 0;

	cs_cmpl->hw_sob = handle->hw_sob;

@@ -446,9 +447,13 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
	 * set offset 1 for example he mean to wait only for the first
	 * signal only, which will be pre_sob_val, and if he set offset 2
	 * then the value required is (pre_sob_val + 1) and so on...
	 * if user set wait offset to 0, then treat it as legacy wait cs,
	 * wait for the next signal.
	 */
	cs_cmpl->sob_val = handle->pre_sob_val +
			(job->encaps_sig_wait_offset - 1);
	if (job->encaps_sig_wait_offset)
		offset = job->encaps_sig_wait_offset - 1;

	cs_cmpl->sob_val = handle->pre_sob_val + offset;
}

static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
+7 −4
Original line number Diff line number Diff line
@@ -395,7 +395,7 @@ static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {

static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
@@ -5802,6 +5802,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
{
	struct gaudi_device *gaudi = hdev->asic_specific;
	struct packet_msg_prot *cq_pkt;
	u64 msi_addr;
	u32 tmp;

	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
@@ -5823,10 +5824,12 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
	cq_pkt->ctl = cpu_to_le32(tmp);
	cq_pkt->value = cpu_to_le32(1);

	if (!gaudi->multi_msi_mode)
		msi_vec = 0;
	if (gaudi->multi_msi_mode)
		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
	else
		msi_addr = mmPCIE_CORE_MSI_REQ;

	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
}

static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
+67 −48
Original line number Diff line number Diff line
@@ -8,16 +8,21 @@
#include "gaudiP.h"
#include "../include/gaudi/asic_reg/gaudi_regs.h"
#define GAUDI_NUMBER_OF_RR_REGS		24
#define GAUDI_NUMBER_OF_LBW_RANGES	12
#define GAUDI_NUMBER_OF_LBW_RR_REGS	28
#define GAUDI_NUMBER_OF_HBW_RR_REGS	24
#define GAUDI_NUMBER_OF_LBW_RANGES	10
static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
	mmDMA_IF_W_S_SOB_HIT_WPROT,
	mmDMA_IF_W_S_DMA0_HIT_WPROT,
	mmDMA_IF_W_S_DMA1_HIT_WPROT,
	mmDMA_IF_E_S_SOB_HIT_WPROT,
	mmDMA_IF_E_S_DMA0_HIT_WPROT,
	mmDMA_IF_E_S_DMA1_HIT_WPROT,
	mmDMA_IF_W_N_SOB_HIT_WPROT,
	mmDMA_IF_W_N_DMA0_HIT_WPROT,
	mmDMA_IF_W_N_DMA1_HIT_WPROT,
	mmDMA_IF_E_N_SOB_HIT_WPROT,
	mmDMA_IF_E_N_DMA0_HIT_WPROT,
	mmDMA_IF_E_N_DMA1_HIT_WPROT,
	mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AW,
@@ -38,13 +43,17 @@ static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AW,
};
static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
	mmDMA_IF_W_S_SOB_HIT_RPROT,
	mmDMA_IF_W_S_DMA0_HIT_RPROT,
	mmDMA_IF_W_S_DMA1_HIT_RPROT,
	mmDMA_IF_E_S_SOB_HIT_RPROT,
	mmDMA_IF_E_S_DMA0_HIT_RPROT,
	mmDMA_IF_E_S_DMA1_HIT_RPROT,
	mmDMA_IF_W_N_SOB_HIT_RPROT,
	mmDMA_IF_W_N_DMA0_HIT_RPROT,
	mmDMA_IF_W_N_DMA1_HIT_RPROT,
	mmDMA_IF_E_N_SOB_HIT_RPROT,
	mmDMA_IF_E_N_DMA0_HIT_RPROT,
	mmDMA_IF_E_N_DMA1_HIT_RPROT,
	mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AR,
@@ -65,13 +74,17 @@ static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AR,
};
static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
	mmDMA_IF_W_S_SOB_MIN_WPROT_0,
	mmDMA_IF_W_S_DMA0_MIN_WPROT_0,
	mmDMA_IF_W_S_DMA1_MIN_WPROT_0,
	mmDMA_IF_E_S_SOB_MIN_WPROT_0,
	mmDMA_IF_E_S_DMA0_MIN_WPROT_0,
	mmDMA_IF_E_S_DMA1_MIN_WPROT_0,
	mmDMA_IF_W_N_SOB_MIN_WPROT_0,
	mmDMA_IF_W_N_DMA0_MIN_WPROT_0,
	mmDMA_IF_W_N_DMA1_MIN_WPROT_0,
	mmDMA_IF_E_N_SOB_MIN_WPROT_0,
	mmDMA_IF_E_N_DMA0_MIN_WPROT_0,
	mmDMA_IF_E_N_DMA1_MIN_WPROT_0,
	mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AW_0,
@@ -92,13 +105,17 @@ static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AW_0,
};
static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
	mmDMA_IF_W_S_SOB_MAX_WPROT_0,
	mmDMA_IF_W_S_DMA0_MAX_WPROT_0,
	mmDMA_IF_W_S_DMA1_MAX_WPROT_0,
	mmDMA_IF_E_S_SOB_MAX_WPROT_0,
	mmDMA_IF_E_S_DMA0_MAX_WPROT_0,
	mmDMA_IF_E_S_DMA1_MAX_WPROT_0,
	mmDMA_IF_W_N_SOB_MAX_WPROT_0,
	mmDMA_IF_W_N_DMA0_MAX_WPROT_0,
	mmDMA_IF_W_N_DMA1_MAX_WPROT_0,
	mmDMA_IF_E_N_SOB_MAX_WPROT_0,
	mmDMA_IF_E_N_DMA0_MAX_WPROT_0,
	mmDMA_IF_E_N_DMA1_MAX_WPROT_0,
	mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AW_0,
@@ -119,13 +136,17 @@ static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AW_0,
};
static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
	mmDMA_IF_W_S_SOB_MIN_RPROT_0,
	mmDMA_IF_W_S_DMA0_MIN_RPROT_0,
	mmDMA_IF_W_S_DMA1_MIN_RPROT_0,
	mmDMA_IF_E_S_SOB_MIN_RPROT_0,
	mmDMA_IF_E_S_DMA0_MIN_RPROT_0,
	mmDMA_IF_E_S_DMA1_MIN_RPROT_0,
	mmDMA_IF_W_N_SOB_MIN_RPROT_0,
	mmDMA_IF_W_N_DMA0_MIN_RPROT_0,
	mmDMA_IF_W_N_DMA1_MIN_RPROT_0,
	mmDMA_IF_E_N_SOB_MIN_RPROT_0,
	mmDMA_IF_E_N_DMA0_MIN_RPROT_0,
	mmDMA_IF_E_N_DMA1_MIN_RPROT_0,
	mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AR_0,
@@ -146,13 +167,17 @@ static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AR_0,
};
static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
	mmDMA_IF_W_S_SOB_MAX_RPROT_0,
	mmDMA_IF_W_S_DMA0_MAX_RPROT_0,
	mmDMA_IF_W_S_DMA1_MAX_RPROT_0,
	mmDMA_IF_E_S_SOB_MAX_RPROT_0,
	mmDMA_IF_E_S_DMA0_MAX_RPROT_0,
	mmDMA_IF_E_S_DMA1_MAX_RPROT_0,
	mmDMA_IF_W_N_SOB_MAX_RPROT_0,
	mmDMA_IF_W_N_DMA0_MAX_RPROT_0,
	mmDMA_IF_W_N_DMA1_MAX_RPROT_0,
	mmDMA_IF_E_N_SOB_MAX_RPROT_0,
	mmDMA_IF_E_N_DMA0_MAX_RPROT_0,
	mmDMA_IF_E_N_DMA1_MAX_RPROT_0,
	mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AR_0,
@@ -173,7 +198,7 @@ static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AR_0,
};
static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AW,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AW,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AW,
@@ -200,7 +225,7 @@ static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AW
};
static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AR,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AR,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AR,
@@ -227,7 +252,7 @@ static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AR
};
static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AW_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
@@ -254,7 +279,7 @@ static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AW_0
};
static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AW_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
@@ -281,7 +306,7 @@ static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AW_0
};
static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AW_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
@@ -308,7 +333,7 @@ static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AW_0
};
static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AW_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
@@ -335,7 +360,7 @@ static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_HIGH_AW_0
};
static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AR_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
@@ -362,7 +387,7 @@ static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AR_0
};
static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AR_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
@@ -389,7 +414,7 @@ static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AR_0
};
static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AR_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
@@ -416,7 +441,7 @@ static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
	mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AR_0
};
static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
	mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
	mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AR_0,
	mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
@@ -12849,50 +12874,44 @@ static void gaudi_init_range_registers_lbw(struct hl_device *hdev)
	u32 lbw_rng_end[GAUDI_NUMBER_OF_LBW_RANGES];
	int i, j;
	lbw_rng_start[0]  = (0xFBFE0000 & 0x3FFFFFF) - 1;
	lbw_rng_end[0]    = (0xFBFFF000 & 0x3FFFFFF) + 1;
	lbw_rng_start[0]  = (0xFC0E8000 & 0x3FFFFFF) - 1; /* 0x000E7FFF */
	lbw_rng_end[0]    = (0xFC11FFFF & 0x3FFFFFF) + 1; /* 0x00120000 */
	lbw_rng_start[1]  = (0xFC0E8000 & 0x3FFFFFF) - 1;
	lbw_rng_end[1]    = (0xFC120000 & 0x3FFFFFF) + 1;
	lbw_rng_start[1]  = (0xFC1E8000 & 0x3FFFFFF) - 1; /* 0x001E7FFF */
	lbw_rng_end[1]    = (0xFC48FFFF & 0x3FFFFFF) + 1; /* 0x00490000 */
	lbw_rng_start[2]  = (0xFC1E8000 & 0x3FFFFFF) - 1;
	lbw_rng_end[2]    = (0xFC48FFFF & 0x3FFFFFF) + 1;
	lbw_rng_start[2]  = (0xFC600000 & 0x3FFFFFF) - 1; /* 0x005FFFFF */
	lbw_rng_end[2]    = (0xFCC48FFF & 0x3FFFFFF) + 1; /* 0x00C49000 */
	lbw_rng_start[3]  = (0xFC600000 & 0x3FFFFFF) - 1;
	lbw_rng_end[3]    = (0xFCC48FFF & 0x3FFFFFF) + 1;
	lbw_rng_start[3]  = (0xFCC4A000 & 0x3FFFFFF) - 1; /* 0x00C49FFF */
	lbw_rng_end[3]    = (0xFCCDFFFF & 0x3FFFFFF) + 1; /* 0x00CE0000 */
	lbw_rng_start[4]  = (0xFCC4A000 & 0x3FFFFFF) - 1;
	lbw_rng_end[4]    = (0xFCCDFFFF & 0x3FFFFFF) + 1;
	lbw_rng_start[4]  = (0xFCCE4000 & 0x3FFFFFF) - 1; /* 0x00CE3FFF */
	lbw_rng_end[4]    = (0xFCD1FFFF & 0x3FFFFFF) + 1; /* 0x00D20000 */
	lbw_rng_start[5]  = (0xFCCE4000 & 0x3FFFFFF) - 1;
	lbw_rng_end[5]    = (0xFCD1FFFF & 0x3FFFFFF) + 1;
	lbw_rng_start[5]  = (0xFCD24000 & 0x3FFFFFF) - 1; /* 0x00D23FFF */
	lbw_rng_end[5]    = (0xFCD5FFFF & 0x3FFFFFF) + 1; /* 0x00D60000 */
	lbw_rng_start[6]  = (0xFCD24000 & 0x3FFFFFF) - 1;
	lbw_rng_end[6]    = (0xFCD5FFFF & 0x3FFFFFF) + 1;
	lbw_rng_start[6]  = (0xFCD64000 & 0x3FFFFFF) - 1; /* 0x00D63FFF */
	lbw_rng_end[6]    = (0xFCD9FFFF & 0x3FFFFFF) + 1; /* 0x00DA0000 */
	lbw_rng_start[7]  = (0xFCD64000 & 0x3FFFFFF) - 1;
	lbw_rng_end[7]    = (0xFCD9FFFF & 0x3FFFFFF) + 1;
	lbw_rng_start[7]  = (0xFCDA4000 & 0x3FFFFFF) - 1; /* 0x00DA3FFF */
	lbw_rng_end[7]    = (0xFCDDFFFF & 0x3FFFFFF) + 1; /* 0x00DE0000 */
	lbw_rng_start[8]  = (0xFCDA4000 & 0x3FFFFFF) - 1;
	lbw_rng_end[8]    = (0xFCDDFFFF & 0x3FFFFFF) + 1;
	lbw_rng_start[8]  = (0xFCDE4000 & 0x3FFFFFF) - 1; /* 0x00DE3FFF */
	lbw_rng_end[8]    = (0xFCE05FFF & 0x3FFFFFF) + 1; /* 0x00E06000 */
	lbw_rng_start[9]  = (0xFCDE4000 & 0x3FFFFFF) - 1;
	lbw_rng_end[9]    = (0xFCE05FFF & 0x3FFFFFF) + 1;
	lbw_rng_start[9]  = (0xFCFC9000 & 0x3FFFFFF) - 1; /* 0x00FC8FFF */
	lbw_rng_end[9]    = (0xFFFFFFFE & 0x3FFFFFF) + 1; /* 0x03FFFFFF */
	lbw_rng_start[10]  = (0xFEC43000 & 0x3FFFFFF) - 1;
	lbw_rng_end[10]    = (0xFEC43FFF & 0x3FFFFFF) + 1;
	lbw_rng_start[11] = (0xFE484000 & 0x3FFFFFF) - 1;
	lbw_rng_end[11]   = (0xFE484FFF & 0x3FFFFFF) + 1;
	for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
	for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) {
		WREG32(gaudi_rr_lbw_hit_aw_regs[i],
				(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
		WREG32(gaudi_rr_lbw_hit_ar_regs[i],
				(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
	}
	for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++)
	for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++)
		for (j = 0 ; j < GAUDI_NUMBER_OF_LBW_RANGES ; j++) {
			WREG32(gaudi_rr_lbw_min_aw_regs[i] + (j << 2),
							lbw_rng_start[j]);
@@ -12939,12 +12958,12 @@ static void gaudi_init_range_registers_hbw(struct hl_device *hdev)
	 * 6th range is the host
	 */
	for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
	for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
		WREG32(gaudi_rr_hbw_hit_aw_regs[i], 0x1F);
		WREG32(gaudi_rr_hbw_hit_ar_regs[i], 0x1D);
	}
	for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
	for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
		WREG32(gaudi_rr_hbw_base_low_aw_regs[i], dram_addr_lo);
		WREG32(gaudi_rr_hbw_base_low_ar_regs[i], dram_addr_lo);
+2 −0
Original line number Diff line number Diff line
@@ -308,6 +308,8 @@
#define mmPCIE_AUX_FLR_CTRL                                          0xC07394
#define mmPCIE_AUX_DBI                                               0xC07490

#define mmPCIE_CORE_MSI_REQ                                          0xC04100

#define mmPSOC_PCI_PLL_NR                                            0xC72100
#define mmSRAM_W_PLL_NR                                              0x4C8100
#define mmPSOC_HBM_PLL_NR                                            0xC74100