Commit 6c7c3c21 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Paolo Bonzini
Browse files

x86: implement la57 paging mode



The new paging more is extension of IA32e mode with more additional page
table level.

It brings support of 57-bit vitrual address space (128PB) and 52-bit
physical address space (4PB).

The structure of new page table level is identical to pml4.

The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].

CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
paging mode.

Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Message-Id: <20161215001305.146807-1-kirill.shutemov@linux.intel.com>
[Drop changes to target-i386/translate.c. - Paolo]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent c52ab08a
Loading
Loading
Loading
Loading
+37 −5
Original line number Diff line number Diff line
@@ -220,7 +220,8 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as,

/* IA-32e Paging */
static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
                       hwaddr pml4e_start_addr, int32_t a20_mask)
                       hwaddr pml4e_start_addr, int32_t a20_mask,
                       target_ulong start_line_addr)
{
    hwaddr pml4e_addr, pdpe_start_addr;
    uint64_t pml4e;
@@ -236,11 +237,34 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
            continue;
        }

        line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48);
        line_addr = start_line_addr | ((i & 0x1ffULL) << 39);
        pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask;
        walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr);
    }
}

static void walk_pml5e(MemoryMappingList *list, AddressSpace *as,
                       hwaddr pml5e_start_addr, int32_t a20_mask)
{
    hwaddr pml5e_addr, pml4e_start_addr;
    uint64_t pml5e;
    target_ulong line_addr;
    int i;

    for (i = 0; i < 512; i++) {
        pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask;
        pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED,
                                  NULL);
        if (!(pml5e & PG_PRESENT_MASK)) {
            /* not present */
            continue;
        }

        line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48);
        pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask;
        walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr);
    }
}
#endif

void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
@@ -257,10 +281,18 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
    if (env->cr[4] & CR4_PAE_MASK) {
#ifdef TARGET_X86_64
        if (env->hflags & HF_LMA_MASK) {
            if (env->cr[4] & CR4_LA57_MASK) {
                hwaddr pml5e_addr;

                pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
                walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask);
            } else {
                hwaddr pml4e_addr;

                pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
            walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask);
                walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask,
                        0xffffULL << 48);
            }
        } else
#endif
        {
+10 −6
Original line number Diff line number Diff line
@@ -238,7 +238,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
          CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
          CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
          CPUID_7_0_EBX_RDSEED */
#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \
          CPUID_7_0_ECX_LA57)
#define TCG_7_0_EDX_FEATURES 0
#define TCG_APM_FEATURES 0
#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
@@ -435,7 +436,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
            "ospke", NULL, NULL, NULL,
            NULL, NULL, NULL, NULL,
            NULL, NULL, NULL, NULL,
            NULL, NULL, NULL, NULL,
            "la57", NULL, NULL, NULL,
            NULL, NULL, "rdpid", NULL,
            NULL, NULL, NULL, NULL,
            NULL, NULL, NULL, NULL,
@@ -2742,10 +2743,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
    case 0x80000008:
        /* virtual & phys address size in low 2 bytes. */
        if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
            /* 64 bit processor, 48 bits virtual, configurable
             * physical bits.
             */
            *eax = 0x00003000 + cpu->phys_bits;
            /* 64 bit processor */
            *eax = cpu->phys_bits; /* configurable physical bits */
            if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
                *eax |= 0x00003900; /* 57 bits virtual */
            } else {
                *eax |= 0x00003000; /* 48 bits virtual */
            }
        } else {
            *eax = cpu->phys_bits;
        }
+2 −0
Original line number Diff line number Diff line
@@ -224,6 +224,7 @@
#define CR4_OSFXSR_SHIFT 9
#define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT)
#define CR4_OSXMMEXCPT_MASK  (1U << 10)
#define CR4_LA57_MASK   (1U << 12)
#define CR4_VMXE_MASK   (1U << 13)
#define CR4_SMXE_MASK   (1U << 14)
#define CR4_FSGSBASE_MASK (1U << 16)
@@ -629,6 +630,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_ECX_UMIP     (1U << 2)
#define CPUID_7_0_ECX_PKU      (1U << 3)
#define CPUID_7_0_ECX_OSPKE    (1U << 4)
#define CPUID_7_0_ECX_LA57     (1U << 16)
#define CPUID_7_0_ECX_RDPID    (1U << 22)

#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
+45 −9
Original line number Diff line number Diff line
@@ -651,11 +651,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
    uint32_t hflags;

#if defined(DEBUG_MMU)
    printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
    printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4);
#endif
    if ((new_cr4 ^ env->cr[4]) &
        (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
         CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) {
        tlb_flush(CPU(cpu), 1);
    }

@@ -757,19 +757,41 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,

#ifdef TARGET_X86_64
        if (env->hflags & HF_LMA_MASK) {
            bool la57 = env->cr[4] & CR4_LA57_MASK;
            uint64_t pml5e_addr, pml5e;
            uint64_t pml4e_addr, pml4e;
            int32_t sext;

            /* test virtual address sign extension */
            sext = (int64_t)addr >> 47;
            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
            if (sext != 0 && sext != -1) {
                env->error_code = 0;
                cs->exception_index = EXCP0D_GPF;
                return 1;
            }

            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                env->a20_mask;
            if (la57) {
                pml5e_addr = ((env->cr[3] & ~0xfff) +
                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
                pml5e = x86_ldq_phys(cs, pml5e_addr);
                if (!(pml5e & PG_PRESENT_MASK)) {
                    goto do_fault;
                }
                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
                    goto do_fault_rsvd;
                }
                if (!(pml5e & PG_ACCESSED_MASK)) {
                    pml5e |= PG_ACCESSED_MASK;
                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
                }
                ptep = pml5e ^ PG_NX_MASK;
            } else {
                pml5e = env->cr[3];
                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
            }

            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
            pml4e = x86_ldq_phys(cs, pml4e_addr);
            if (!(pml4e & PG_PRESENT_MASK)) {
                goto do_fault;
@@ -781,7 +803,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
                pml4e |= PG_ACCESSED_MASK;
                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
            }
            ptep = pml4e ^ PG_NX_MASK;
            ptep &= pml4e ^ PG_NX_MASK;
            pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
                env->a20_mask;
            pdpe = x86_ldq_phys(cs, pdpe_addr);
@@ -1024,16 +1046,30 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)

#ifdef TARGET_X86_64
        if (env->hflags & HF_LMA_MASK) {
            bool la57 = env->cr[4] & CR4_LA57_MASK;
            uint64_t pml5e_addr, pml5e;
            uint64_t pml4e_addr, pml4e;
            int32_t sext;

            /* test virtual address sign extension */
            sext = (int64_t)addr >> 47;
            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
            if (sext != 0 && sext != -1) {
                return -1;
            }
            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                env->a20_mask;

            if (la57) {
                pml5e_addr = ((env->cr[3] & ~0xfff) +
                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
                pml5e = x86_ldq_phys(cs, pml5e_addr);
                if (!(pml5e & PG_PRESENT_MASK)) {
                    return -1;
                }
            } else {
                pml5e = env->cr[3];
            }

            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
            pml4e = x86_ldq_phys(cs, pml4e_addr);
            if (!(pml4e & PG_PRESENT_MASK)) {
                return -1;
+180 −54
Original line number Diff line number Diff line
@@ -30,14 +30,19 @@
#include "hmp.h"


static void print_pte(Monitor *mon, hwaddr addr,
                      hwaddr pte,
                      hwaddr mask)
static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
                      hwaddr pte, hwaddr mask)
{
#ifdef TARGET_X86_64
    if (env->cr[4] & CR4_LA57_MASK) {
        if (addr & (1ULL << 56)) {
            addr |= -1LL << 57;
        }
    } else {
        if (addr & (1ULL << 47)) {
            addr |= -1LL << 48;
        }
    }
#endif
    monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
                   " %c%c%c%c%c%c%c%c%c\n",
@@ -66,13 +71,13 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env)
        if (pde & PG_PRESENT_MASK) {
            if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
                /* 4M pages */
                print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
                print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1));
            } else {
                for(l2 = 0; l2 < 1024; l2++) {
                    cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
                    pte = le32_to_cpu(pte);
                    if (pte & PG_PRESENT_MASK) {
                        print_pte(mon, (l1 << 22) + (l2 << 12),
                        print_pte(mon, env, (l1 << 22) + (l2 << 12),
                                  pte & ~PG_PSE_MASK,
                                  ~0xfff);
                    }
@@ -100,7 +105,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
                if (pde & PG_PRESENT_MASK) {
                    if (pde & PG_PSE_MASK) {
                        /* 2M pages with PAE, CR4.PSE is ignored */
                        print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
                        print_pte(mon, env, (l1 << 30) + (l2 << 21), pde,
                                  ~((hwaddr)(1 << 20) - 1));
                    } else {
                        pt_addr = pde & 0x3fffffffff000ULL;
@@ -108,7 +113,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
                            cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8);
                            pte = le64_to_cpu(pte);
                            if (pte & PG_PRESENT_MASK) {
                                print_pte(mon, (l1 << 30 ) + (l2 << 21)
                                print_pte(mon, env, (l1 << 30) + (l2 << 21)
                                          + (l3 << 12),
                                          pte & ~PG_PSE_MASK,
                                          ~(hwaddr)0xfff);
@@ -122,38 +127,50 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
}

#ifdef TARGET_X86_64
static void tlb_info_64(Monitor *mon, CPUArchState *env)
static void tlb_info_la48(Monitor *mon, CPUArchState *env,
        uint64_t l0, uint64_t pml4_addr)
{
    uint64_t l1, l2, l3, l4;
    uint64_t pml4e, pdpe, pde, pte;
    uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
    uint64_t pdp_addr, pd_addr, pt_addr;

    pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
    for (l1 = 0; l1 < 512; l1++) {
        cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
        pml4e = le64_to_cpu(pml4e);
        if (pml4e & PG_PRESENT_MASK) {
        if (!(pml4e & PG_PRESENT_MASK)) {
            continue;
        }

        pdp_addr = pml4e & 0x3fffffffff000ULL;
        for (l2 = 0; l2 < 512; l2++) {
            cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
            pdpe = le64_to_cpu(pdpe);
                if (pdpe & PG_PRESENT_MASK) {
            if (!(pdpe & PG_PRESENT_MASK)) {
                continue;
            }

            if (pdpe & PG_PSE_MASK) {
                /* 1G pages, CR4.PSE is ignored */
                        print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
                                  0x3ffffc0000000ULL);
                    } else {
                print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30),
                        pdpe, 0x3ffffc0000000ULL);
                continue;
            }

            pd_addr = pdpe & 0x3fffffffff000ULL;
            for (l3 = 0; l3 < 512; l3++) {
                cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
                pde = le64_to_cpu(pde);
                            if (pde & PG_PRESENT_MASK) {
                if (!(pde & PG_PRESENT_MASK)) {
                    continue;
                }

                if (pde & PG_PSE_MASK) {
                    /* 2M pages, CR4.PSE is ignored */
                                    print_pte(mon, (l1 << 39) + (l2 << 30) +
                                              (l3 << 21), pde,
                                              0x3ffffffe00000ULL);
                                } else {
                    print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) +
                            (l3 << 21), pde, 0x3ffffffe00000ULL);
                    continue;
                }

                pt_addr = pde & 0x3fffffffff000ULL;
                for (l4 = 0; l4 < 512; l4++) {
                    cpu_physical_memory_read(pt_addr
@@ -161,19 +178,28 @@ static void tlb_info_64(Monitor *mon, CPUArchState *env)
                            &pte, 8);
                    pte = le64_to_cpu(pte);
                    if (pte & PG_PRESENT_MASK) {
                                            print_pte(mon, (l1 << 39) +
                                                      (l2 << 30) +
                                                      (l3 << 21) + (l4 << 12),
                                                      pte & ~PG_PSE_MASK,
                                                      0x3fffffffff000ULL);
                                        }
                                    }
                        print_pte(mon, env, (l0 << 48) + (l1 << 39) +
                                (l2 << 30) + (l3 << 21) + (l4 << 12),
                                pte & ~PG_PSE_MASK, 0x3fffffffff000ULL);
                    }
                }
            }
        }
    }
}

static void tlb_info_la57(Monitor *mon, CPUArchState *env)
{
    uint64_t l0;
    uint64_t pml5e;
    uint64_t pml5_addr;

    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
    for (l0 = 0; l0 < 512; l0++) {
        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
        pml5e = le64_to_cpu(pml5e);
        if (pml5e & PG_PRESENT_MASK) {
            tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL);
        }
    }
}
@@ -192,7 +218,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
    if (env->cr[4] & CR4_PAE_MASK) {
#ifdef TARGET_X86_64
        if (env->hflags & HF_LMA_MASK) {
            tlb_info_64(mon, env);
            if (env->cr[4] & CR4_LA57_MASK) {
                tlb_info_la57(mon, env);
            } else {
                tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL);
            }
        } else
#endif
        {
@@ -324,7 +354,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)


#ifdef TARGET_X86_64
static void mem_info_64(Monitor *mon, CPUArchState *env)
static void mem_info_la48(Monitor *mon, CPUArchState *env)
{
    int prot, last_prot;
    uint64_t l1, l2, l3, l4;
@@ -400,6 +430,98 @@ static void mem_info_64(Monitor *mon, CPUArchState *env)
    /* Flush last range */
    mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
}

static void mem_info_la57(Monitor *mon, CPUArchState *env)
{
    int prot, last_prot;
    uint64_t l0, l1, l2, l3, l4;
    uint64_t pml5e, pml4e, pdpe, pde, pte;
    uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end;

    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
    last_prot = 0;
    start = -1;
    for (l0 = 0; l0 < 512; l0++) {
        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
        pml4e = le64_to_cpu(pml5e);
        end = l0 << 48;
        if (!(pml5e & PG_PRESENT_MASK)) {
            prot = 0;
            mem_print(mon, &start, &last_prot, end, prot);
            continue;
        }

        pml4_addr = pml5e & 0x3fffffffff000ULL;
        for (l1 = 0; l1 < 512; l1++) {
            cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
            pml4e = le64_to_cpu(pml4e);
            end = (l0 << 48) + (l1 << 39);
            if (!(pml4e & PG_PRESENT_MASK)) {
                prot = 0;
                mem_print(mon, &start, &last_prot, end, prot);
                continue;
            }

            pdp_addr = pml4e & 0x3fffffffff000ULL;
            for (l2 = 0; l2 < 512; l2++) {
                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
                pdpe = le64_to_cpu(pdpe);
                end = (l0 << 48) + (l1 << 39) + (l2 << 30);
                if (pdpe & PG_PRESENT_MASK) {
                    prot = 0;
                    mem_print(mon, &start, &last_prot, end, prot);
                    continue;
                }

                if (pdpe & PG_PSE_MASK) {
                    prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
                            PG_PRESENT_MASK);
                    prot &= pml4e;
                    mem_print(mon, &start, &last_prot, end, prot);
                    continue;
                }

                pd_addr = pdpe & 0x3fffffffff000ULL;
                for (l3 = 0; l3 < 512; l3++) {
                    cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
                    pde = le64_to_cpu(pde);
                    end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
                    if (pde & PG_PRESENT_MASK) {
                        prot = 0;
                        mem_print(mon, &start, &last_prot, end, prot);
                        continue;
                    }

                    if (pde & PG_PSE_MASK) {
                        prot = pde & (PG_USER_MASK | PG_RW_MASK |
                                PG_PRESENT_MASK);
                        prot &= pml4e & pdpe;
                        mem_print(mon, &start, &last_prot, end, prot);
                        continue;
                    }

                    pt_addr = pde & 0x3fffffffff000ULL;
                    for (l4 = 0; l4 < 512; l4++) {
                        cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8);
                        pte = le64_to_cpu(pte);
                        end = (l0 << 48) + (l1 << 39) + (l2 << 30) +
                            (l3 << 21) + (l4 << 12);
                        if (pte & PG_PRESENT_MASK) {
                            prot = pte & (PG_USER_MASK | PG_RW_MASK |
                                    PG_PRESENT_MASK);
                            prot &= pml4e & pdpe & pde;
                        } else {
                            prot = 0;
                        }
                        mem_print(mon, &start, &last_prot, end, prot);
                    }
                }
            }
        }
    }
    /* Flush last range */
    mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
}
#endif /* TARGET_X86_64 */

void hmp_info_mem(Monitor *mon, const QDict *qdict)
@@ -415,7 +537,11 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
    if (env->cr[4] & CR4_PAE_MASK) {
#ifdef TARGET_X86_64
        if (env->hflags & HF_LMA_MASK) {
            mem_info_64(mon, env);
            if (env->cr[4] & CR4_LA57_MASK) {
                mem_info_la57(mon, env);
            } else {
                mem_info_la48(mon, env);
            }
        } else
#endif
        {