Commit 20cd88a7 authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay
Browse files

habanalabs: fixes to the poll-timeout macros



- use conventional internal macro variables (double underscore prefix)
- adjust address casting
- on register poll using ELBI use ELBI read rather than BAR read on
  error condition
- remove unused macro

Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 3fc25267
Loading
Loading
Loading
Loading
+90 −29
Original line number Diff line number Diff line
@@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
/* Timeout should be longer when working with simulator but cap the
 * increased timeout to some maximum
 */
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \
({ \
	ktime_t __timeout; \
	u32 __elbi_read; \
	int __rc = 0; \
	if (hdev->pdev) \
		__timeout = ktime_add_us(ktime_get(), timeout_us); \
	else \
@@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
					(u64) HL_SIM_MAX_TIMEOUT_US)); \
	might_sleep_if(sleep_us); \
	for (;;) { \
		(val) = RREG32(addr); \
		if (elbi) { \
			__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
			if (__rc) \
				break; \
			(val) = __elbi_read; \
		} else {\
			(val) = RREG32((u32)addr); \
		} \
		if (cond) \
			break; \
		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
			(val) = RREG32(addr); \
			if (elbi) { \
				__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
				if (__rc) \
					break; \
				(val) = __elbi_read; \
			} else {\
				(val) = RREG32((u32)addr); \
			} \
			break; \
		} \
		if (sleep_us) \
			usleep_range((sleep_us >> 2) + 1, sleep_us); \
	} \
	(cond) ? 0 : -ETIMEDOUT; \
	__rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
})

#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
		hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false)

#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \
		hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true)

/*
 * poll array of register addresses.
 * condition is satisfied if all registers values match the expected value.
 * once some register in the array satisfies the condition it will not be polled again,
 * this is done both for efficiency and due to some registers are "clear on read".
 * TODO: use read from PCI bar in other places in the code (SW-91406)
 */
#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
						timeout_us, elbi) \
({ \
	ktime_t __timeout; \
	u64 __elem_bitmask; \
	u32 __read_val;	\
	u8 __arr_idx;	\
	int __rc = 0; \
	\
	if (hdev->pdev) \
		__timeout = ktime_add_us(ktime_get(), timeout_us); \
	else \
		__timeout = ktime_add_us(ktime_get(),\
				min(((u64)timeout_us * 10), \
					(u64) HL_SIM_MAX_TIMEOUT_US)); \
	\
	might_sleep_if(sleep_us); \
	if (arr_size >= 64) \
		__rc = -EINVAL; \
	else \
		__elem_bitmask = BIT_ULL(arr_size) - 1; \
	for (;;) { \
		if (__rc) \
			break; \
		for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) {	\
			if (!(__elem_bitmask & BIT_ULL(__arr_idx)))	\
				continue;	\
			if (elbi) { \
				__rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \
				if (__rc) \
					break; \
			} else { \
				__read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
			} \
			if (__read_val == (expected_val))	\
				__elem_bitmask &= ~BIT_ULL(__arr_idx);	\
		}	\
		if (__rc || (__elem_bitmask == 0)) \
			break; \
		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
			break; \
		if (sleep_us) \
			usleep_range((sleep_us >> 2) + 1, sleep_us); \
	} \
	__rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
})

#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \
					timeout_us) \
	hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
						timeout_us, false)

#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \
					timeout_us) \
	hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
						timeout_us, true)

/*
 * address in this macro points always to a memory location in the
 * host's (server's) memory. That location is updated asynchronously
@@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
	(cond) ? 0 : -ETIMEDOUT; \
})

#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
					timeout_us) \
({ \
	ktime_t __timeout; \
	if (hdev->pdev) \
		__timeout = ktime_add_us(ktime_get(), timeout_us); \
	else \
		__timeout = ktime_add_us(ktime_get(),\
				min((u64)(timeout_us * 10), \
					(u64) HL_SIM_MAX_TIMEOUT_US)); \
	might_sleep_if(sleep_us); \
	for (;;) { \
		(val) = readl(addr); \
		if (cond) \
			break; \
		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
			(val) = readl(addr); \
			break; \
		} \
		if (sleep_us) \
			usleep_range((sleep_us >> 2) + 1, sleep_us); \
	} \
	(cond) ? 0 : -ETIMEDOUT; \
})

#define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
({ \
	struct user_mapped_block *p = blk; \