Loading tcg/arm/tcg-target.h +1 −1 Original line number Diff line number Diff line Loading @@ -60,7 +60,7 @@ extern int arm_arch; #undef TCG_TARGET_STACK_GROWSUP #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 #define TCG_TARGET_IMPLEMENTS_DYN_TLB 0 #define TCG_TARGET_IMPLEMENTS_DYN_TLB 1 typedef enum { TCG_REG_R0 = 0, Loading tcg/arm/tcg-target.inc.c +69 −70 Original line number Diff line number Diff line Loading @@ -500,6 +500,12 @@ static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); } static inline void tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt, TCGReg rn, TCGReg rm) { tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1); } static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, TCGReg rn, int imm8) { Loading Loading @@ -1229,8 +1235,13 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) /* We're expecting to use an 8-bit immediate and to mask. */ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); /* We expect tlb_mask to be before tlb_table. */ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < offsetof(CPUArchState, tlb_mask)); /* We expect to use a 20-bit unsigned offset from ENV. */ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) > 0xfffff); /* Load and compare a TLB entry, leaving the flags set. Returns the register containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ Loading @@ -1238,84 +1249,72 @@ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCGMemOp opc, int mem_index, bool is_load) { TCGReg base = TCG_AREG0; int cmp_off = (is_load ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); int mask_off; int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int table_off = offsetof(CPUArchState, tlb_table[mem_index]); TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); /* V7 generates the following: * ubfx r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS * add r2, env, #high * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS * ldr r0, [r2, #cmp] * ldr r2, [r2, #add] * movw tmp, #page_align_mask * bic tmp, addrlo, tmp * cmp r0, tmp * * Otherwise we generate: * shr tmp, addrlo, #TARGET_PAGE_BITS * add r2, env, #high * and r0, tmp, #(CPU_TLB_SIZE - 1) * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS * ldr r0, [r2, #cmp] * ldr r2, [r2, #add] * tst addrlo, #s_mask * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS */ if (use_armv7_instructions) { tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS); } else { tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); } if (table_off > 0xfff) { int mask_hi = mask_off & ~0xfff; int table_hi = table_off & ~0xfff; int rot; /* Add portions of the offset until the memory access is in range. * If we plan on using ldrd, reduce to an 8-bit offset; otherwise * we can use a 12-bit offset. */ if (use_armv6_instructions && TARGET_LONG_BITS == 64) { mask_off = 0xff; } else { mask_off = 0xfff; } while (cmp_off > mask_off) { int shift = ctz32(cmp_off & ~mask_off) & ~1; int rot = ((32 - shift) << 7) & 0xf00; int addend = cmp_off & (0xff << shift); tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, rot | ((cmp_off >> shift) & 0xff)); base = TCG_REG_R2; add_off -= addend; cmp_off -= addend; table_base = TCG_REG_R2; if (mask_hi == table_hi) { mask_base = table_base; } else if (mask_hi) { mask_base = TCG_REG_TMP; rot = encode_imm(mask_hi); assert(rot >= 0); tcg_out_dat_imm(s, COND_AL, ARITH_ADD, mask_base, TCG_AREG0, rotl(mask_hi, rot) | (rot << 7)); } rot = encode_imm(table_hi); assert(rot >= 0); tcg_out_dat_imm(s, COND_AL, ARITH_ADD, table_base, TCG_AREG0, rotl(table_hi, rot) | (rot << 7)); if (!use_armv7_instructions) { tcg_out_dat_imm(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); mask_off -= mask_hi; table_off -= table_hi; } tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* Load the tlb comparator. Use ldrd if needed and available, but due to how the pointer needs setting up, ldm isn't useful. Base arm5 doesn't have ldrd, but armv5te does. */ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, mask_base, mask_off); tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, table_base, table_off); /* Extract the tlb index from the address into TMP. */ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); /* * Add the tlb_table pointer, creating the CPUTLBEntry address in R2. * Load the tlb comparator into R0/R1 and the fast path addend into R2. */ if (cmp_off == 0) { if (use_armv6_instructions && TARGET_LONG_BITS == 64) { tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP); } else { tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP); } } else { tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0); if (use_armv6_instructions && TARGET_LONG_BITS == 64) { tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); } else { tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); if (TARGET_LONG_BITS == 64) { tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); } } if (!use_armv6_instructions && TARGET_LONG_BITS == 64) { tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); } /* Load the tlb addend. */ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, offsetof(CPUTLBEntry, addend)); /* Check alignment. We don't support inline unaligned acceses, but we can easily support overalignment checks. */ Loading Loading
tcg/arm/tcg-target.h +1 −1 Original line number Diff line number Diff line Loading @@ -60,7 +60,7 @@ extern int arm_arch; #undef TCG_TARGET_STACK_GROWSUP #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 #define TCG_TARGET_IMPLEMENTS_DYN_TLB 0 #define TCG_TARGET_IMPLEMENTS_DYN_TLB 1 typedef enum { TCG_REG_R0 = 0, Loading
tcg/arm/tcg-target.inc.c +69 −70 Original line number Diff line number Diff line Loading @@ -500,6 +500,12 @@ static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); } static inline void tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt, TCGReg rn, TCGReg rm) { tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1); } static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, TCGReg rn, int imm8) { Loading Loading @@ -1229,8 +1235,13 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) /* We're expecting to use an 8-bit immediate and to mask. */ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); /* We expect tlb_mask to be before tlb_table. */ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < offsetof(CPUArchState, tlb_mask)); /* We expect to use a 20-bit unsigned offset from ENV. */ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) > 0xfffff); /* Load and compare a TLB entry, leaving the flags set. Returns the register containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ Loading @@ -1238,84 +1249,72 @@ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCGMemOp opc, int mem_index, bool is_load) { TCGReg base = TCG_AREG0; int cmp_off = (is_load ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); int mask_off; int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int table_off = offsetof(CPUArchState, tlb_table[mem_index]); TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); /* V7 generates the following: * ubfx r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS * add r2, env, #high * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS * ldr r0, [r2, #cmp] * ldr r2, [r2, #add] * movw tmp, #page_align_mask * bic tmp, addrlo, tmp * cmp r0, tmp * * Otherwise we generate: * shr tmp, addrlo, #TARGET_PAGE_BITS * add r2, env, #high * and r0, tmp, #(CPU_TLB_SIZE - 1) * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS * ldr r0, [r2, #cmp] * ldr r2, [r2, #add] * tst addrlo, #s_mask * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS */ if (use_armv7_instructions) { tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS); } else { tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); } if (table_off > 0xfff) { int mask_hi = mask_off & ~0xfff; int table_hi = table_off & ~0xfff; int rot; /* Add portions of the offset until the memory access is in range. * If we plan on using ldrd, reduce to an 8-bit offset; otherwise * we can use a 12-bit offset. */ if (use_armv6_instructions && TARGET_LONG_BITS == 64) { mask_off = 0xff; } else { mask_off = 0xfff; } while (cmp_off > mask_off) { int shift = ctz32(cmp_off & ~mask_off) & ~1; int rot = ((32 - shift) << 7) & 0xf00; int addend = cmp_off & (0xff << shift); tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, rot | ((cmp_off >> shift) & 0xff)); base = TCG_REG_R2; add_off -= addend; cmp_off -= addend; table_base = TCG_REG_R2; if (mask_hi == table_hi) { mask_base = table_base; } else if (mask_hi) { mask_base = TCG_REG_TMP; rot = encode_imm(mask_hi); assert(rot >= 0); tcg_out_dat_imm(s, COND_AL, ARITH_ADD, mask_base, TCG_AREG0, rotl(mask_hi, rot) | (rot << 7)); } rot = encode_imm(table_hi); assert(rot >= 0); tcg_out_dat_imm(s, COND_AL, ARITH_ADD, table_base, TCG_AREG0, rotl(table_hi, rot) | (rot << 7)); if (!use_armv7_instructions) { tcg_out_dat_imm(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); mask_off -= mask_hi; table_off -= table_hi; } tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* Load the tlb comparator. Use ldrd if needed and available, but due to how the pointer needs setting up, ldm isn't useful. Base arm5 doesn't have ldrd, but armv5te does. */ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, mask_base, mask_off); tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, table_base, table_off); /* Extract the tlb index from the address into TMP. */ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); /* * Add the tlb_table pointer, creating the CPUTLBEntry address in R2. * Load the tlb comparator into R0/R1 and the fast path addend into R2. */ if (cmp_off == 0) { if (use_armv6_instructions && TARGET_LONG_BITS == 64) { tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP); } else { tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP); } } else { tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0); if (use_armv6_instructions && TARGET_LONG_BITS == 64) { tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); } else { tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); if (TARGET_LONG_BITS == 64) { tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); } } if (!use_armv6_instructions && TARGET_LONG_BITS == 64) { tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); } /* Load the tlb addend. */ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, offsetof(CPUTLBEntry, addend)); /* Check alignment. We don't support inline unaligned acceses, but we can easily support overalignment checks. */ Loading