Commit d3222595 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2022-09-21' of...

Merge tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

  "This tag contains habanalabs driver changes for v6.1:

   - Support new notifier event for device state change through eventfd.
   - Add uAPI to retrieve device attestation information for Gaudi2.
   - Add uAPI to retrieve the h/w status of all h/w blocks.
   - Add uAPI to control the running mode of the engine cores in Gaudi2.
   - Expose whether the device runs with secured firmware through the INFO ioctl
     and sysfs.
   - Support trace events in DMA allocations and MMU map/unmap operations.
   - Notify firmware when the device was acquired by a user process and when it
     was released. This is done as part of the RAS that the f/w performs.
   - Multiple bug fixes, refactors and renames.
   - Cleanup of error messages, moving some to debug level.
   - Enhance log prints in case of h/w error events for Gaudi2."

* tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (68 commits)
  habanalabs: eliminate aggregate use warning
  habanalabs/gaudi: use 8KB aligned address for TPC kernels
  habanalabs: remove some f/w descriptor validations
  habanalabs: build ASICs from new to old
  habanalabs/gaudi2: allow user to flush PCIE by read
  habanalabs: failure to open device due to reset is debug level
  habanalabs/gaudi2: Remove unnecessary (void*) conversions
  habanalabs/gaudi2: add secured attestation info uapi
  habanalabs/gaudi2: add handling to pmmu events in eqe handler
  habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err
  habanalabs: rename error info structure
  habanalabs/gaudi2: get f/w reset status register dynamically
  habanalabs/gaudi2: increase hard-reset sleep time to 2 sec
  habanalabs/gaudi2: print RAZWI info upon PCIe access error
  habanalabs: MMU invalidation h/w is per device
  habanalabs: new notifier events for device state
  habanalabs/gaudi2: free event irq if init fails
  habanalabs: fix resetting the DRAM BAR
  habanalabs: add support for new cpucp return codes
  habanalabs/gaudi2: read F/W security indication after hard reset
  ...
parents 8be7dfc6 259cee1c
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ Description: Version of the application running on the device's CPU

What:           /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
Date:           Jun 2019
KernelVersion:  not yet upstreamed
KernelVersion:  5.7
Contact:        ogabbay@kernel.org
Description:    Allows the user to set the maximum clock frequency, in MHz.
                The device clock might be set to lower value than the maximum.
@@ -26,7 +26,7 @@ Description: Allows the user to set the maximum clock frequency, in MHz.

What:           /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
Date:           Jun 2019
KernelVersion:  not yet upstreamed
KernelVersion:  5.7
Contact:        ogabbay@kernel.org
Description:    Displays the current frequency, in MHz, of the device clock.
                This property is valid only for the Gaudi ASIC family
@@ -176,6 +176,12 @@ KernelVersion: 5.1
Contact:        ogabbay@kernel.org
Description:    Version of the device's preboot F/W code

What:           /sys/class/habanalabs/hl<n>/security_enabled
Date:           Oct 2022
KernelVersion:  6.1
Contact:        obitton@habana.ai
Description:    Displays the device's security status

What:           /sys/class/habanalabs/hl<n>/soft_reset
Date:           Jan 2019
KernelVersion:  5.1
@@ -230,6 +236,6 @@ Description: Version of the u-boot running on the device's CPU

What:           /sys/class/habanalabs/hl<n>/vrm_ver
Date:           Jan 2022
KernelVersion:  not yet upstreamed
KernelVersion:  5.17
Contact:        ogabbay@kernel.org
Description:    Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
+1 −0
Original line number Diff line number Diff line
@@ -8878,6 +8878,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
F:	Documentation/ABI/testing/debugfs-driver-habanalabs
F:	Documentation/ABI/testing/sysfs-driver-habanalabs
F:	drivers/misc/habanalabs/
F:	include/trace/events/habanalabs.h
F:	include/uapi/misc/habanalabs.h
HACKRF MEDIA DRIVER
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ config HABANA_AI
	select HWMON
	select DMA_SHARED_BUFFER
	select CRC32
	select FW_LOADER
	help
	  Enables PCIe card driver for Habana's AI Processors (AIP) that are
	  designed to accelerate Deep Learning inference and training workloads.
+4 −4
Original line number Diff line number Diff line
@@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o
include $(src)/common/Makefile
habanalabs-y += $(HL_COMMON_FILES)

include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
include $(src)/gaudi2/Makefile
habanalabs-y += $(HL_GAUDI2_FILES)

include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES)

include $(src)/gaudi2/Makefile
habanalabs-y += $(HL_GAUDI2_FILES)
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)

habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
+35 −92
Original line number Diff line number Diff line
@@ -12,20 +12,18 @@
#include <linux/slab.h>
#include <linux/uaccess.h>

#define CB_VA_POOL_SIZE		(4UL * SZ_1G)

static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
	struct hl_device *hdev = ctx->hdev;
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	struct hl_vm_va_block *va_block, *tmp;
	dma_addr_t bus_addr;
	u64 virt_addr;
	u32 page_size = prop->pmmu.page_size;
	s32 offset;
	int rc;

	if (!hdev->supports_cb_mapping) {
		dev_err_ratelimited(hdev->dev,
				"Cannot map CB because no VA range is allocated for CB mapping\n");
				"Mapping a CB to the device's MMU is not supported\n");
		return -EINVAL;
	}

@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
		return -EINVAL;
	}

	INIT_LIST_HEAD(&cb->va_block_list);
	if (cb->is_mmu_mapped)
		return 0;

	for (bus_addr = cb->bus_address;
			bus_addr < cb->bus_address + cb->size;
			bus_addr += page_size) {
	cb->roundup_size = roundup(cb->size, page_size);

		virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
		if (!virt_addr) {
			dev_err(hdev->dev,
				"Failed to allocate device virtual address for CB\n");
			rc = -ENOMEM;
			goto err_va_pool_free;
		}

		va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
		if (!va_block) {
			rc = -ENOMEM;
			gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
			goto err_va_pool_free;
		}

		va_block->start = virt_addr;
		va_block->end = virt_addr + page_size - 1;
		va_block->size = page_size;
		list_add_tail(&va_block->node, &cb->va_block_list);
	cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
	if (!cb->virtual_addr) {
		dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
		return -ENOMEM;
	}

	mutex_lock(&ctx->mmu_lock);

	bus_addr = cb->bus_address;
	offset = 0;
	list_for_each_entry(va_block, &cb->va_block_list, node) {
		rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
				va_block->size, list_is_last(&va_block->node,
							&cb->va_block_list));
	mutex_lock(&hdev->mmu_lock);
	rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
	if (rc) {
			dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
				va_block->start);
		dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
		goto err_va_umap;
	}

		bus_addr += va_block->size;
		offset += va_block->size;
	}

	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);

	mutex_unlock(&ctx->mmu_lock);
	mutex_unlock(&hdev->mmu_lock);

	cb->is_mmu_mapped = true;

	return rc;

err_va_umap:
	list_for_each_entry(va_block, &cb->va_block_list, node) {
		if (offset <= 0)
			break;
		hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
				offset <= va_block->size);
		offset -= va_block->size;
	}

	rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);

	mutex_unlock(&ctx->mmu_lock);

err_va_pool_free:
	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
		list_del(&va_block->node);
		kfree(va_block);
	}

	mutex_unlock(&hdev->mmu_lock);
	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
	return rc;
}

static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
	struct hl_device *hdev = ctx->hdev;
	struct hl_vm_va_block *va_block, *tmp;

	mutex_lock(&ctx->mmu_lock);

	list_for_each_entry(va_block, &cb->va_block_list, node)
		if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
				list_is_last(&va_block->node,
						&cb->va_block_list)))
			dev_warn_ratelimited(hdev->dev,
					"Failed to unmap CB's va 0x%llx\n",
					va_block->start);

	mutex_lock(&hdev->mmu_lock);
	hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
	mutex_unlock(&hdev->mmu_lock);

	mutex_unlock(&ctx->mmu_lock);

	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
		list_del(&va_block->node);
		kfree(va_block);
	}
	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
}

static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
static int hl_cb_info(struct hl_mem_mgr *mmg,
			u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{
	struct hl_vm_va_block *va_block;
	struct hl_cb *cb;
	int rc = 0;

@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
	}

	if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
		va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
		if (va_block) {
			*device_va = va_block->start;
		if (cb->is_mmu_mapped) {
			*device_va = cb->virtual_addr;
		} else {
			dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
			rc = -EINVAL;
@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
		return -ENOMEM;
	}

	rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
			prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
	ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
					CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
	if (!ctx->cb_va_pool_base) {
		rc = -ENOMEM;
		goto err_pool_destroy;
	}
	rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to add memory to VA gen pool for CB mapping\n");
		goto err_pool_destroy;
		goto err_unreserve_va_block;
	}

	return 0;

err_unreserve_va_block:
	hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
err_pool_destroy:
	gen_pool_destroy(ctx->cb_va_pool);

@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
		return;

	gen_pool_destroy(ctx->cb_va_pool);
	hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
}
Loading