Commit f5944964 authored by Vasant Hegde's avatar Vasant Hegde Committed by Joerg Roedel
Browse files

iommu/amd: Add 5 level guest page table support



Newer AMD IOMMU supports 5 level guest page table (v2 page table). If both
processor and IOMMU supports 5 level page table then enable it. Otherwise
fall back to 4 level page table.

Co-developed-by: default avatarWei Huang <wei.huang2@amd.com>
Signed-off-by: default avatarWei Huang <wei.huang2@amd.com>
Reviewed-by: default avatarSuravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: default avatarVasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/20230310090000.1117786-1-vasant.hegde@amd.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 4d4a0dba
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -34,6 +34,7 @@ extern int amd_iommu_reenable(int);
extern int amd_iommu_enable_faulting(void);
extern int amd_iommu_enable_faulting(void);
extern int amd_iommu_guest_ir;
extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;


/* IOMMUv2 specific functions */
/* IOMMUv2 specific functions */
struct iommu_domain;
struct iommu_domain;
+7 −0
Original line number Original line Diff line number Diff line
@@ -93,6 +93,8 @@
#define FEATURE_GA		(1ULL<<7)
#define FEATURE_GA		(1ULL<<7)
#define FEATURE_HE		(1ULL<<8)
#define FEATURE_HE		(1ULL<<8)
#define FEATURE_PC		(1ULL<<9)
#define FEATURE_PC		(1ULL<<9)
#define FEATURE_GATS_SHIFT	(12)
#define FEATURE_GATS_MASK	(3ULL)
#define FEATURE_GAM_VAPIC	(1ULL<<21)
#define FEATURE_GAM_VAPIC	(1ULL<<21)
#define FEATURE_GIOSUP		(1ULL<<48)
#define FEATURE_GIOSUP		(1ULL<<48)
#define FEATURE_EPHSUP		(1ULL<<50)
#define FEATURE_EPHSUP		(1ULL<<50)
@@ -305,6 +307,9 @@
#define PAGE_MODE_6_LEVEL 0x06
#define PAGE_MODE_6_LEVEL 0x06
#define PAGE_MODE_7_LEVEL 0x07
#define PAGE_MODE_7_LEVEL 0x07


#define GUEST_PGTABLE_4_LEVEL	0x00
#define GUEST_PGTABLE_5_LEVEL	0x01

#define PM_LEVEL_SHIFT(x)	(12 + ((x) * 9))
#define PM_LEVEL_SHIFT(x)	(12 + ((x) * 9))
#define PM_LEVEL_SIZE(x)	(((x) < 6) ? \
#define PM_LEVEL_SIZE(x)	(((x) < 6) ? \
				  ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
				  ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
@@ -398,6 +403,8 @@
#define DTE_GCR3_SHIFT_B	16
#define DTE_GCR3_SHIFT_B	16
#define DTE_GCR3_SHIFT_C	43
#define DTE_GCR3_SHIFT_C	43


#define DTE_GPT_LEVEL_SHIFT	54

#define GCR3_VALID		0x01ULL
#define GCR3_VALID		0x01ULL


#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
+21 −2
Original line number Original line Diff line number Diff line
@@ -153,6 +153,8 @@ bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
bool amd_iommu_irq_remap __read_mostly;


enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
/* Guest page table level */
int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;


int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
@@ -306,6 +308,11 @@ static bool check_feature_on_all_iommus(u64 mask)
	return !!(amd_iommu_efr & mask);
	return !!(amd_iommu_efr & mask);
}
}


static inline int check_feature_gpt_level(void)
{
	return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
}

/*
/*
 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
 * Default to IVHD EFR since it is available sooner
 * Default to IVHD EFR since it is available sooner
@@ -2155,8 +2162,10 @@ static void print_iommu_info(void)
		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
			pr_info("X2APIC enabled\n");
			pr_info("X2APIC enabled\n");
	}
	}
	if (amd_iommu_pgtable == AMD_IOMMU_V2)
	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
		pr_info("V2 page table enabled\n");
		pr_info("V2 page table enabled (Paging mode : %d level)\n",
			amd_iommu_gpt_level);
	}
}
}


static int __init amd_iommu_init_pci(void)
static int __init amd_iommu_init_pci(void)
@@ -3026,6 +3035,11 @@ static int __init early_amd_iommu_init(void)
	if (ret)
	if (ret)
		goto out;
		goto out;


	/* 5 level guest page table */
	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
	    check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL)
		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;

	/* Disable any previously enabled IOMMUs */
	/* Disable any previously enabled IOMMUs */
	if (!is_kdump_kernel() || amd_iommu_disabled)
	if (!is_kdump_kernel() || amd_iommu_disabled)
		disable_iommus();
		disable_iommus();
@@ -3557,6 +3571,11 @@ __setup("ivrs_acpihid", parse_ivrs_acpihid);


bool amd_iommu_v2_supported(void)
bool amd_iommu_v2_supported(void)
{
{
	/* CPU page table size should match IOMMU guest page table size */
	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
		return false;

	/*
	/*
	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
+6 −3
Original line number Original line Diff line number Diff line
@@ -37,8 +37,7 @@


static inline int get_pgtable_level(void)
static inline int get_pgtable_level(void)
{
{
	/* 5 level page table is not supported */
	return amd_iommu_gpt_level;
	return PAGE_MODE_4_LEVEL;
}
}


static inline bool is_large_pte(u64 pte)
static inline bool is_large_pte(u64 pte)
@@ -379,6 +378,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
	struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
	struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
	struct protection_domain *pdom = (struct protection_domain *)cookie;
	struct protection_domain *pdom = (struct protection_domain *)cookie;
	int ret;
	int ret;
	int ias = IOMMU_IN_ADDR_BIT_SIZE;


	pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC);
	pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC);
	if (!pgtable->pgd)
	if (!pgtable->pgd)
@@ -388,12 +388,15 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
	if (ret)
	if (ret)
		goto err_free_pgd;
		goto err_free_pgd;


	if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
		ias = 57;

	pgtable->iop.ops.map_pages    = iommu_v2_map_pages;
	pgtable->iop.ops.map_pages    = iommu_v2_map_pages;
	pgtable->iop.ops.unmap_pages  = iommu_v2_unmap_pages;
	pgtable->iop.ops.unmap_pages  = iommu_v2_unmap_pages;
	pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;
	pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;


	cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
	cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
	cfg->ias           = IOMMU_IN_ADDR_BIT_SIZE,
	cfg->ias           = ias,
	cfg->oas           = IOMMU_OUT_ADDR_BIT_SIZE,
	cfg->oas           = IOMMU_OUT_ADDR_BIT_SIZE,
	cfg->tlb           = &v2_flush_ops;
	cfg->tlb           = &v2_flush_ops;


+5 −0
Original line number Original line Diff line number Diff line
@@ -1611,6 +1611,11 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
		flags    |= tmp;
		flags    |= tmp;


		if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
			dev_table[devid].data[2] |=
				((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
		}

		if (domain->flags & PD_GIOV_MASK)
		if (domain->flags & PD_GIOV_MASK)
			pte_root |= DTE_FLAG_GIOV;
			pte_root |= DTE_FLAG_GIOV;
	}
	}