Commit 10a85e2c authored by Richard Henderson's avatar Richard Henderson Committed by Peter Maydell
Browse files

target/arm: Reuse sve_probe_page for gather loads



Reviewed-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Message-id: 20200508154359.7494-19-richard.henderson@linaro.org
Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parent 88a660a4
Loading
Loading
Loading
Loading
+123 −113
Original line number Diff line number Diff line
@@ -5124,130 +5124,140 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
    return *(uint64_t *)(reg + reg_ofs);
}

static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
                       target_ulong base, uint32_t desc, uintptr_t ra,
                       zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
static inline QEMU_ALWAYS_INLINE
void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
               target_ulong base, uint32_t desc, uintptr_t retaddr,
               int esize, int msize, zreg_off_fn *off_fn,
               sve_ldst1_host_fn *host_fn,
               sve_ldst1_tlb_fn *tlb_fn)
{
    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
    intptr_t i, oprsz = simd_oprsz(desc);
    ARMVectorReg scratch = { };
    const int mmu_idx = cpu_mmu_index(env, false);
    const intptr_t reg_max = simd_oprsz(desc);
    ARMVectorReg scratch;
    intptr_t reg_off;
    SVEHostPage info, info2;

    for (i = 0; i < oprsz; ) {
        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
    memset(&scratch, 0, reg_max);
    reg_off = 0;
    do {
        uint64_t pg = vg[reg_off >> 6];
        do {
            if (likely(pg & 1)) {
                target_ulong off = off_fn(vm, i);
                tlb_fn(env, &scratch, i, base + (off << scale), ra);
            }
            i += 4, pg >>= 4;
        } while (i & 15);
    }

    /* Wait until all exceptions have been raised to write back.  */
    memcpy(vd, &scratch, oprsz);
}
                target_ulong addr = base + (off_fn(vm, reg_off) << scale);
                target_ulong in_page = -(addr | TARGET_PAGE_MASK);

static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
                       target_ulong base, uint32_t desc, uintptr_t ra,
                       zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
{
    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
    intptr_t i, oprsz = simd_oprsz(desc) / 8;
    ARMVectorReg scratch = { };
                sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD,
                               mmu_idx, retaddr);

    for (i = 0; i < oprsz; i++) {
        uint8_t pg = *(uint8_t *)(vg + H1(i));
        if (likely(pg & 1)) {
            target_ulong off = off_fn(vm, i * 8);
            tlb_fn(env, &scratch, i * 8, base + (off << scale), ra);
                if (likely(in_page >= msize)) {
                    if (unlikely(info.flags & TLB_WATCHPOINT)) {
                        cpu_check_watchpoint(env_cpu(env), addr, msize,
                                             info.attrs, BP_MEM_READ, retaddr);
                    }
                    /* TODO: MTE check */
                    host_fn(&scratch, reg_off, info.host);
                } else {
                    /* Element crosses the page boundary. */
                    sve_probe_page(&info2, false, env, addr + in_page, 0,
                                   MMU_DATA_LOAD, mmu_idx, retaddr);
                    if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) {
                        cpu_check_watchpoint(env_cpu(env), addr,
                                             msize, info.attrs,
                                             BP_MEM_READ, retaddr);
                    }
                    /* TODO: MTE check */
                    tlb_fn(env, &scratch, reg_off, addr, retaddr);
                }
            }
            reg_off += esize;
            pg >>= esize;
        } while (reg_off & 63);
    } while (reg_off < reg_max);

    /* Wait until all exceptions have been raised to write back.  */
    memcpy(vd, &scratch, oprsz * 8);
    memcpy(vd, &scratch, reg_max);
}

#define DO_LD1_ZPZ_S(MEM, OFS) \
void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
    (CPUARMState *env, void *vd, void *vg, void *vm,         \
     target_ulong base, uint32_t desc)                       \
#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \
void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg,       \
                                 void *vm, target_ulong base, uint32_t desc) \
{                                                                            \
    sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
              off_##OFS##_s, sve_ld1##MEM##_tlb);            \
    sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ,             \
              off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb);       \
}

#define DO_LD1_ZPZ_D(MEM, OFS) \
void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
    (CPUARMState *env, void *vd, void *vg, void *vm,         \
     target_ulong base, uint32_t desc)                       \
#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \
void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg,       \
                                 void *vm, target_ulong base, uint32_t desc) \
{                                                                            \
    sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
               off_##OFS##_d, sve_ld1##MEM##_tlb);           \
}

DO_LD1_ZPZ_S(bsu, zsu)
DO_LD1_ZPZ_S(bsu, zss)
DO_LD1_ZPZ_D(bdu, zsu)
DO_LD1_ZPZ_D(bdu, zss)
DO_LD1_ZPZ_D(bdu, zd)

DO_LD1_ZPZ_S(bss, zsu)
DO_LD1_ZPZ_S(bss, zss)
DO_LD1_ZPZ_D(bds, zsu)
DO_LD1_ZPZ_D(bds, zss)
DO_LD1_ZPZ_D(bds, zd)

DO_LD1_ZPZ_S(hsu_le, zsu)
DO_LD1_ZPZ_S(hsu_le, zss)
DO_LD1_ZPZ_D(hdu_le, zsu)
DO_LD1_ZPZ_D(hdu_le, zss)
DO_LD1_ZPZ_D(hdu_le, zd)

DO_LD1_ZPZ_S(hsu_be, zsu)
DO_LD1_ZPZ_S(hsu_be, zss)
DO_LD1_ZPZ_D(hdu_be, zsu)
DO_LD1_ZPZ_D(hdu_be, zss)
DO_LD1_ZPZ_D(hdu_be, zd)

DO_LD1_ZPZ_S(hss_le, zsu)
DO_LD1_ZPZ_S(hss_le, zss)
DO_LD1_ZPZ_D(hds_le, zsu)
DO_LD1_ZPZ_D(hds_le, zss)
DO_LD1_ZPZ_D(hds_le, zd)

DO_LD1_ZPZ_S(hss_be, zsu)
DO_LD1_ZPZ_S(hss_be, zss)
DO_LD1_ZPZ_D(hds_be, zsu)
DO_LD1_ZPZ_D(hds_be, zss)
DO_LD1_ZPZ_D(hds_be, zd)

DO_LD1_ZPZ_S(ss_le, zsu)
DO_LD1_ZPZ_S(ss_le, zss)
DO_LD1_ZPZ_D(sdu_le, zsu)
DO_LD1_ZPZ_D(sdu_le, zss)
DO_LD1_ZPZ_D(sdu_le, zd)

DO_LD1_ZPZ_S(ss_be, zsu)
DO_LD1_ZPZ_S(ss_be, zss)
DO_LD1_ZPZ_D(sdu_be, zsu)
DO_LD1_ZPZ_D(sdu_be, zss)
DO_LD1_ZPZ_D(sdu_be, zd)

DO_LD1_ZPZ_D(sds_le, zsu)
DO_LD1_ZPZ_D(sds_le, zss)
DO_LD1_ZPZ_D(sds_le, zd)

DO_LD1_ZPZ_D(sds_be, zsu)
DO_LD1_ZPZ_D(sds_be, zss)
DO_LD1_ZPZ_D(sds_be, zd)

DO_LD1_ZPZ_D(dd_le, zsu)
DO_LD1_ZPZ_D(dd_le, zss)
DO_LD1_ZPZ_D(dd_le, zd)

DO_LD1_ZPZ_D(dd_be, zsu)
DO_LD1_ZPZ_D(dd_be, zss)
DO_LD1_ZPZ_D(dd_be, zd)
    sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ,             \
              off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb);       \
}

DO_LD1_ZPZ_S(bsu, zsu, MO_8)
DO_LD1_ZPZ_S(bsu, zss, MO_8)
DO_LD1_ZPZ_D(bdu, zsu, MO_8)
DO_LD1_ZPZ_D(bdu, zss, MO_8)
DO_LD1_ZPZ_D(bdu, zd, MO_8)

DO_LD1_ZPZ_S(bss, zsu, MO_8)
DO_LD1_ZPZ_S(bss, zss, MO_8)
DO_LD1_ZPZ_D(bds, zsu, MO_8)
DO_LD1_ZPZ_D(bds, zss, MO_8)
DO_LD1_ZPZ_D(bds, zd, MO_8)

DO_LD1_ZPZ_S(hsu_le, zsu, MO_16)
DO_LD1_ZPZ_S(hsu_le, zss, MO_16)
DO_LD1_ZPZ_D(hdu_le, zsu, MO_16)
DO_LD1_ZPZ_D(hdu_le, zss, MO_16)
DO_LD1_ZPZ_D(hdu_le, zd, MO_16)

DO_LD1_ZPZ_S(hsu_be, zsu, MO_16)
DO_LD1_ZPZ_S(hsu_be, zss, MO_16)
DO_LD1_ZPZ_D(hdu_be, zsu, MO_16)
DO_LD1_ZPZ_D(hdu_be, zss, MO_16)
DO_LD1_ZPZ_D(hdu_be, zd, MO_16)

DO_LD1_ZPZ_S(hss_le, zsu, MO_16)
DO_LD1_ZPZ_S(hss_le, zss, MO_16)
DO_LD1_ZPZ_D(hds_le, zsu, MO_16)
DO_LD1_ZPZ_D(hds_le, zss, MO_16)
DO_LD1_ZPZ_D(hds_le, zd, MO_16)

DO_LD1_ZPZ_S(hss_be, zsu, MO_16)
DO_LD1_ZPZ_S(hss_be, zss, MO_16)
DO_LD1_ZPZ_D(hds_be, zsu, MO_16)
DO_LD1_ZPZ_D(hds_be, zss, MO_16)
DO_LD1_ZPZ_D(hds_be, zd, MO_16)

DO_LD1_ZPZ_S(ss_le, zsu, MO_32)
DO_LD1_ZPZ_S(ss_le, zss, MO_32)
DO_LD1_ZPZ_D(sdu_le, zsu, MO_32)
DO_LD1_ZPZ_D(sdu_le, zss, MO_32)
DO_LD1_ZPZ_D(sdu_le, zd, MO_32)

DO_LD1_ZPZ_S(ss_be, zsu, MO_32)
DO_LD1_ZPZ_S(ss_be, zss, MO_32)
DO_LD1_ZPZ_D(sdu_be, zsu, MO_32)
DO_LD1_ZPZ_D(sdu_be, zss, MO_32)
DO_LD1_ZPZ_D(sdu_be, zd, MO_32)

DO_LD1_ZPZ_D(sds_le, zsu, MO_32)
DO_LD1_ZPZ_D(sds_le, zss, MO_32)
DO_LD1_ZPZ_D(sds_le, zd, MO_32)

DO_LD1_ZPZ_D(sds_be, zsu, MO_32)
DO_LD1_ZPZ_D(sds_be, zss, MO_32)
DO_LD1_ZPZ_D(sds_be, zd, MO_32)

DO_LD1_ZPZ_D(dd_le, zsu, MO_64)
DO_LD1_ZPZ_D(dd_le, zss, MO_64)
DO_LD1_ZPZ_D(dd_le, zd, MO_64)

DO_LD1_ZPZ_D(dd_be, zsu, MO_64)
DO_LD1_ZPZ_D(dd_be, zss, MO_64)
DO_LD1_ZPZ_D(dd_be, zd, MO_64)

#undef DO_LD1_ZPZ_S
#undef DO_LD1_ZPZ_D