Commit cd5def80 authored by Koby Elbaz's avatar Koby Elbaz Committed by Oded Gabbay
Browse files

habanalabs: improve utilization calculation



The new approach is based on the notion that the relative
current power consumption is in relation of proportionality
to device's true utilization.
Utilization info ranges between [0,100]%
Currently, dc_power values are hard-coded.

Signed-off-by: default avatarKoby Elbaz <kelbaz@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent e8f9392a
Loading
Loading
Loading
Loading
+0 −18
Original line number Diff line number Diff line
@@ -505,24 +505,6 @@ static void cs_do_release(struct kref *ref)
		goto out;
	}

	hdev->asic_funcs->hw_queues_lock(hdev);

	hdev->cs_active_cnt--;
	if (!hdev->cs_active_cnt) {
		struct hl_device_idle_busy_ts *ts;

		ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
		ts->busy_to_idle_ts = ktime_get();

		if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
			hdev->idle_busy_ts_idx = 0;
	} else if (hdev->cs_active_cnt < 0) {
		dev_crit(hdev->dev, "CS active cnt %d is negative\n",
			hdev->cs_active_cnt);
	}

	hdev->asic_funcs->hw_queues_unlock(hdev);

	/* Need to update CI for all queue jobs that does not get completion */
	hl_hw_queue_update_ci(cs);

+13 −108
Original line number Diff line number Diff line
@@ -383,17 +383,9 @@ static int device_early_init(struct hl_device *hdev)
		goto free_sob_reset_wq;
	}

	hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
					sizeof(struct hl_device_idle_busy_ts),
					(GFP_KERNEL | __GFP_ZERO));
	if (!hdev->idle_busy_ts_arr) {
		rc = -ENOMEM;
		goto free_chip_info;
	}

	rc = hl_mmu_if_set_funcs(hdev);
	if (rc)
		goto free_idle_busy_ts_arr;
		goto free_chip_info;

	hl_cb_mgr_init(&hdev->kernel_cb_mgr);

@@ -422,8 +414,6 @@ static int device_early_init(struct hl_device *hdev)

free_cb_mgr:
	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
free_idle_busy_ts_arr:
	kfree(hdev->idle_busy_ts_arr);
free_chip_info:
	kfree(hdev->hl_chip_info);
free_sob_reset_wq:
@@ -461,7 +451,6 @@ static void device_early_fini(struct hl_device *hdev)

	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);

	kfree(hdev->idle_busy_ts_arr);
	kfree(hdev->hl_chip_info);

	destroy_workqueue(hdev->sob_reset_wq);
@@ -582,100 +571,24 @@ static void device_late_fini(struct hl_device *hdev)
	hdev->late_init_done = false;
}

uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
{
	struct hl_device_idle_busy_ts *ts;
	ktime_t zero_ktime, curr = ktime_get();
	u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
	s64 period_us, last_start_us, last_end_us, last_busy_time_us,
		total_busy_time_us = 0, total_busy_time_ms;

	zero_ktime = ktime_set(0, 0);
	period_us = period_ms * USEC_PER_MSEC;
	ts = &hdev->idle_busy_ts_arr[last_index];

	/* check case that device is currently in idle */
	if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
			!ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {

		last_index--;
		/* Handle case idle_busy_ts_idx was 0 */
		if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
			last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;

		ts = &hdev->idle_busy_ts_arr[last_index];
	}

	while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
		/* Check if we are in last sample case. i.e. if the sample
		 * begun before the sampling period. This could be a real
		 * sample or 0 so need to handle both cases
		 */
		last_start_us = ktime_to_us(
				ktime_sub(curr, ts->idle_to_busy_ts));

		if (last_start_us > period_us) {

			/* First check two cases:
			 * 1. If the device is currently busy
			 * 2. If the device was idle during the whole sampling
			 *    period
			 */

			if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
				/* Check if the device is currently busy */
				if (ktime_compare(ts->idle_to_busy_ts,
						zero_ktime))
					return 100;

				/* We either didn't have any activity or we
				 * reached an entry which is 0. Either way,
				 * exit and return what was accumulated so far
				 */
				break;
			}

			/* If sample has finished, check it is relevant */
			last_end_us = ktime_to_us(
					ktime_sub(curr, ts->busy_to_idle_ts));

			if (last_end_us > period_us)
				break;

			/* It is relevant so add it but with adjustment */
			last_busy_time_us = ktime_to_us(
						ktime_sub(ts->busy_to_idle_ts,
						ts->idle_to_busy_ts));
			total_busy_time_us += last_busy_time_us -
					(last_start_us - period_us);
			break;
		}

		/* Check if the sample is finished or still open */
		if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
			last_busy_time_us = ktime_to_us(
						ktime_sub(ts->busy_to_idle_ts,
						ts->idle_to_busy_ts));
		else
			last_busy_time_us = ktime_to_us(
					ktime_sub(curr, ts->idle_to_busy_ts));
	u64 max_power, curr_power, dc_power, dividend;
	int rc;

		total_busy_time_us += last_busy_time_us;
	max_power = hdev->asic_prop.max_power_default;
	dc_power = hdev->asic_prop.dc_power_default;
	rc = hl_fw_cpucp_power_get(hdev, &curr_power);

		last_index--;
		/* Handle case idle_busy_ts_idx was 0 */
		if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
			last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
	if (rc)
		return rc;

		ts = &hdev->idle_busy_ts_arr[last_index];
	curr_power = clamp(curr_power, dc_power, max_power);

		overlap_cnt++;
	}
	dividend = (curr_power - dc_power) * 100;
	*utilization = (u32) div_u64(dividend, (max_power - dc_power));

	total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
						USEC_PER_MSEC);

	return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
	return 0;
}

/*
@@ -1110,14 +1023,6 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
		hl_cq_reset(hdev, &hdev->completion_queue[i]);

	hdev->idle_busy_ts_idx = 0;
	hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
	hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);

	if (hdev->cs_active_cnt)
		dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
			hdev->cs_active_cnt);

	mutex_lock(&hdev->fpriv_list_lock);

	/* Make sure the context switch phase will run again */
+3 −22
Original line number Diff line number Diff line
@@ -61,8 +61,6 @@

#define HL_SIM_MAX_TIMEOUT_US		10000000 /* 10s */

#define HL_IDLE_BUSY_TS_ARR_SIZE	4096

#define HL_COMMON_USER_INTERRUPT_ID	0xFFF

/* Memory */
@@ -391,6 +389,7 @@ struct hl_mmu_properties {
 * @dram_size: DRAM total size.
 * @dram_pci_bar_size: size of PCI bar towards DRAM.
 * @max_power_default: max power of the device after reset
 * @dc_power_default: power consumed by the device in mode idle.
 * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
 *                                      fault.
 * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
@@ -463,6 +462,7 @@ struct asic_fixed_properties {
	u64				dram_size;
	u64				dram_pci_bar_size;
	u64				max_power_default;
	u64				dc_power_default;
	u64				dram_size_for_default_page_mapping;
	u64				pcie_dbi_base_address;
	u64				pcie_aux_dbi_reg_addr;
@@ -1760,16 +1760,6 @@ struct hl_device_reset_work {
	struct hl_device		*hdev;
};

/**
 * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
 * @idle_to_busy_ts: timestamp where device changed from idle to busy.
 * @busy_to_idle_ts: timestamp where device changed from busy to idle.
 */
struct hl_device_idle_busy_ts {
	ktime_t				idle_to_busy_ts;
	ktime_t				busy_to_idle_ts;
};

/**
 * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop
 * information.
@@ -1941,8 +1931,6 @@ struct hl_mmu_funcs {
 *              when a user opens the device
 * @fpriv_list_lock: protects the fpriv_list
 * @compute_ctx: current compute context executing.
 * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
 *                    and vice-versa
 * @aggregated_cs_counters: aggregated cs counters among all contexts
 * @mmu_priv: device-specific MMU data.
 * @mmu_func: device-related MMU functions.
@@ -1960,13 +1948,10 @@ struct hl_mmu_funcs {
 * @curr_pll_profile: current PLL profile.
 * @card_type: Various ASICs have several card types. This indicates the card
 *             type of the current device.
 * @cs_active_cnt: number of active command submissions on this device (active
 *                 means already in H/W queues)
 * @major: habanalabs kernel driver major.
 * @high_pll: high PLL profile frequency.
 * @soft_reset_cnt: number of soft reset since the driver was loaded.
 * @hard_reset_cnt: number of hard reset since the driver was loaded.
 * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
 * @clk_throttling_reason: bitmask represents the current clk throttling reasons
 * @id: device minor.
 * @id_control: minor of the control device
@@ -2065,8 +2050,6 @@ struct hl_device {

	struct hl_ctx			*compute_ctx;

	struct hl_device_idle_busy_ts	*idle_busy_ts_arr;

	struct hl_cs_counters_atomic	aggregated_cs_counters;

	struct hl_mmu_priv		mmu_priv;
@@ -2081,12 +2064,10 @@ struct hl_device {
	atomic_t			in_reset;
	enum hl_pll_frequency		curr_pll_profile;
	enum cpucp_card_types		card_type;
	int				cs_active_cnt;
	u32				major;
	u32				high_pll;
	u32				soft_reset_cnt;
	u32				hard_reset_cnt;
	u32				idle_busy_ts_idx;
	u32				clk_throttling_reason;
	u16				id;
	u16				id_control;
@@ -2275,7 +2256,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags);
void hl_hpriv_get(struct hl_fpriv *hpriv);
int hl_hpriv_put(struct hl_fpriv *hpriv);
int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
int hl_device_utilization(struct hl_device *hdev, u32 *utilization);

int hl_build_hwmon_channel_info(struct hl_device *hdev,
		struct cpucp_sensor *sensors_arr);
+3 −8
Original line number Diff line number Diff line
@@ -226,19 +226,14 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
	struct hl_info_device_utilization device_util = {0};
	u32 max_size = args->return_size;
	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
	int rc;

	if ((!max_size) || (!out))
		return -EINVAL;

	if ((args->period_ms < 100) || (args->period_ms > 1000) ||
		(args->period_ms % 100)) {
		dev_err(hdev->dev,
			"period %u must be between 100 - 1000 and must be divisible by 100\n",
			args->period_ms);
	rc = hl_device_utilization(hdev, &device_util.utilization);
	if (rc)
		return -EINVAL;
	}

	device_util.utilization = hl_device_utilization(hdev, args->period_ms);

	return copy_to_user(out, &device_util,
		min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
+0 −8
Original line number Diff line number Diff line
@@ -635,14 +635,6 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)

	spin_unlock(&hdev->cs_mirror_lock);

	if (!hdev->cs_active_cnt++) {
		struct hl_device_idle_busy_ts *ts;

		ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
		ts->busy_to_idle_ts = ktime_set(0, 0);
		ts->idle_to_busy_ts = ktime_get();
	}

	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
		switch (job->queue_type) {
		case QUEUE_TYPE_EXT:
Loading