Commit 85aa8081 authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg: Support arbitrary size + alignment



Previously we allowed fully unaligned operations, but not operations
that are aligned but with less alignment than the operation size.

In addition, arm32, ia64, mips, and sparc had been omitted from the
previous overalignment patch, which would have led to that alignment
being enforced.

Signed-off-by: default avatarRichard Henderson <rth@twiddle.net>
parent ebc231d7
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -146,14 +146,14 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
    unsigned mmu_idx = get_mmuidx(oi);
    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
    int a_bits = get_alignment_bits(get_memop(oi));
    unsigned a_bits = get_alignment_bits(get_memop(oi));
    uintptr_t haddr;
    DATA_TYPE res;

    /* Adjust the given return address.  */
    retaddr -= GETPC_ADJ;

    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
    if (addr & ((1 << a_bits) - 1)) {
        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
                             mmu_idx, retaddr);
    }
@@ -220,14 +220,14 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
    unsigned mmu_idx = get_mmuidx(oi);
    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
    int a_bits = get_alignment_bits(get_memop(oi));
    unsigned a_bits = get_alignment_bits(get_memop(oi));
    uintptr_t haddr;
    DATA_TYPE res;

    /* Adjust the given return address.  */
    retaddr -= GETPC_ADJ;

    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
    if (addr & ((1 << a_bits) - 1)) {
        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
                             mmu_idx, retaddr);
    }
@@ -331,13 +331,13 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
    unsigned mmu_idx = get_mmuidx(oi);
    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
    int a_bits = get_alignment_bits(get_memop(oi));
    unsigned a_bits = get_alignment_bits(get_memop(oi));
    uintptr_t haddr;

    /* Adjust the given return address.  */
    retaddr -= GETPC_ADJ;

    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
    if (addr & ((1 << a_bits) - 1)) {
        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
                             mmu_idx, retaddr);
    }
@@ -414,13 +414,13 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
    unsigned mmu_idx = get_mmuidx(oi);
    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
    int a_bits = get_alignment_bits(get_memop(oi));
    unsigned a_bits = get_alignment_bits(get_memop(oi));
    uintptr_t haddr;

    /* Adjust the given return address.  */
    retaddr -= GETPC_ADJ;

    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
    if (addr & ((1 << a_bits) - 1)) {
        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
                             mmu_idx, retaddr);
    }
+7 −6
Original line number Diff line number Diff line
@@ -1081,23 +1081,24 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
    int tlb_offset = is_read ?
        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
    int a_bits = get_alignment_bits(opc);
    unsigned a_bits = get_alignment_bits(opc);
    unsigned s_bits = opc & MO_SIZE;
    unsigned a_mask = (1u << a_bits) - 1;
    unsigned s_mask = (1u << s_bits) - 1;
    TCGReg base = TCG_AREG0, x3;
    uint64_t tlb_mask;

    /* For aligned accesses, we check the first byte and include the alignment
       bits within the address.  For unaligned access, we check that we don't
       cross pages using the address of the last byte of the access.  */
    if (a_bits >= 0) {
        /* A byte access or an alignment check required */
        tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
    if (a_bits >= s_bits) {
        x3 = addr_reg;
    } else {
        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
                     TCG_REG_X3, addr_reg, (1 << (opc & MO_SIZE)) - 1);
        tlb_mask = TARGET_PAGE_MASK;
                     TCG_REG_X3, addr_reg, s_mask - a_mask);
        x3 = TCG_REG_X3;
    }
    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;

    /* Extract the TLB index from the address into X0.
       X0<CPU_TLB_BITS:0> =
+12 −7
Original line number Diff line number Diff line
@@ -1168,7 +1168,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */

static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                               TCGMemOp s_bits, int mem_index, bool is_load)
                               TCGMemOp opc, int mem_index, bool is_load)
{
    TCGReg base = TCG_AREG0;
    int cmp_off =
@@ -1176,6 +1176,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
    unsigned s_bits = opc & MO_SIZE;
    unsigned a_bits = get_alignment_bits(opc);

    /* Should generate something like the following:
     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
@@ -1216,10 +1218,13 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
        }
    }

    /* Check alignment.  */
    if (s_bits) {
        tcg_out_dat_imm(s, COND_AL, ARITH_TST,
                        0, addrlo, (1 << s_bits) - 1);
    /* Check alignment.  We don't support inline unaligned acceses,
       but we can easily support overalignment checks.  */
    if (a_bits < s_bits) {
        a_bits = s_bits;
    }
    if (a_bits) {
        tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
    }

    /* Load the tlb addend.  */
@@ -1499,7 +1504,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)

#ifdef CONFIG_SOFTMMU
    mem_index = get_mmuidx(oi);
    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);

    /* This a conditional BL only to load a pointer within this opcode into LR
       for the slow path.  We will not be using the value for a tail call.  */
@@ -1630,7 +1635,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)

#ifdef CONFIG_SOFTMMU
    mem_index = get_mmuidx(oi);
    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);

    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);

+10 −9
Original line number Diff line number Diff line
@@ -1202,7 +1202,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
    TCGType ttype = TCG_TYPE_I32;
    TCGType tlbtype = TCG_TYPE_I32;
    int trexw = 0, hrexw = 0, tlbrexw = 0;
    int a_bits = get_alignment_bits(opc);
    unsigned a_bits = get_alignment_bits(opc);
    unsigned s_bits = opc & MO_SIZE;
    unsigned a_mask = (1 << a_bits) - 1;
    unsigned s_mask = (1 << s_bits) - 1;
    target_ulong tlb_mask;

    if (TCG_TARGET_REG_BITS == 64) {
@@ -1220,17 +1223,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
    }

    tcg_out_mov(s, tlbtype, r0, addrlo);
    if (a_bits >= 0) {
        /* A byte access or an alignment check required */
    /* If the required alignment is at least as large as the access, simply
       copy the address and mask.  For lesser alignments, check that we don't
       cross pages for the complete access.  */
    if (a_bits >= s_bits) {
        tcg_out_mov(s, ttype, r1, addrlo);
        tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
    } else {
        /* For unaligned access check that we don't cross pages using
           the page address of the last byte.  */
        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo,
                             (1 << (opc & MO_SIZE)) - 1);
        tlb_mask = TARGET_PAGE_MASK;
        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
    }
    tlb_mask = TARGET_PAGE_MASK | a_mask;

    tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+15 −7
Original line number Diff line number Diff line
@@ -1496,9 +1496,17 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
   R1, R3 are clobbered, leaving R56 free for...
   BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                    TCGMemOp s_bits, int off_rw, int off_add,
                                    TCGMemOp opc, int off_rw, int off_add,
                                    uint64_t bswap1, uint64_t bswap2)
{
    unsigned s_bits = opc & MO_SIZE;
    unsigned a_bits = get_alignment_bits(opc);

    /* We don't support unaligned accesses, but overalignment is easy.  */
    if (a_bits < s_bits) {
        a_bits = s_bits;
    }

    /*
        .mii
        mov	r2 = off_rw
@@ -1536,8 +1544,8 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                               TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
                               63 - CPU_TLB_ENTRY_BITS),
                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
                               TCG_REG_R57, 63 - s_bits,
                               TARGET_PAGE_BITS - s_bits - 1));
                               TCG_REG_R57, 63 - a_bits,
                               TARGET_PAGE_BITS - a_bits - 1));
    tcg_out_bundle(s, MmI,
                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
                               TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
@@ -1661,7 +1669,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
    s_bits = opc & MO_SIZE;

    /* Read the TLB entry */
    tcg_out_qemu_tlb(s, addr_reg, s_bits,
    tcg_out_qemu_tlb(s, addr_reg, opc,
                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
                     INSN_NOP_I, INSN_NOP_I);
@@ -1739,7 +1747,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
        pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
    }

    tcg_out_qemu_tlb(s, addr_reg, s_bits,
    tcg_out_qemu_tlb(s, addr_reg, opc,
                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
                     pre1, pre2);
Loading