Commit b11f623c authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-fixes-2021-01-21' of...

Merge tag 'misc-habanalabs-fixes-2021-01-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus

Oded writes:

This tag contains the following bug fixes for 5.11-rc5/6:

- Clear the fence field in the PCI counters packet before sending
  the packet to the F/W. Not clearing it might cause the driver
  and F/W to get out-of-sync

- Fix backward compatibility in the uapi of IDLE check that is
  part of the INFO IOCTL.

- Tell the F/W to not access the Host (device outbound) while
  the driver removes the device. If that happens, the server
  might crash.

* tag 'misc-habanalabs-fixes-2021-01-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs: disable FW events on device removal
  habanalabs: fix backward compatibility of idle check
  habanalabs: zero pci counters packet before submit to FW
parents cb5c681a 2dc4a6d7
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -1487,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev)
		}
	}

	/* Disable PCI access from device F/W so it won't send us additional
	 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
	 * can't have the F/W sending us interrupts after that. We need to
	 * disable the access here because if the device is marked disable, the
	 * message won't be send. Also, in case of heartbeat, the device CPU is
	 * marked as disable so this message won't be sent
	 */
	hl_fw_send_pci_access_msg(hdev,	CPUCP_PACKET_DISABLE_PCI_ACCESS);

	/* Mark device as disabled */
	hdev->disabled = true;

+5 −0
Original line number Diff line number Diff line
@@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
	}
	counters->rx_throughput = result;

	memset(&pkt, 0, sizeof(pkt));
	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
			CPUCP_PKT_CTL_OPCODE_SHIFT);

	/* Fetch PCI tx counter */
	pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
	counters->tx_throughput = result;

	/* Fetch PCI replay counter */
	memset(&pkt, 0, sizeof(pkt));
	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
			CPUCP_PKT_CTL_OPCODE_SHIFT);

+2 −0
Original line number Diff line number Diff line
@@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)

	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
					&hw_idle.busy_engines_mask_ext, NULL);
	hw_idle.busy_engines_mask =
			lower_32_bits(hw_idle.busy_engines_mask_ext);

	return copy_to_user(out, &hw_idle,
		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;