Commit a0d4aac7 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170605' into staging



Queued TCG patches

# gpg: Signature made Mon 05 Jun 2017 17:48:42 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170605: (26 commits)
  target/alpha: Use goto_tb for fallthru between TBs
  target/alpha: Implement WTINT inline
  target/mips: optimize indirect branches
  target/mips: optimize cross-page direct jumps in softmmu
  target/aarch64: optimize indirect branches
  target/aarch64: optimize cross-page direct jumps in softmmu
  target/hppa: Use tcg_gen_lookup_and_goto_ptr
  target/s390: Use tcg_gen_lookup_and_goto_ptr
  tcg/mips: implement goto_ptr
  tcg/arm: Implement goto_ptr
  tcg/arm: Clarify tcg_out_bx for arm4 host
  tcg/s390: Implement goto_ptr
  tcg/sparc: Implement goto_ptr
  tcg/aarch64: Implement goto_ptr
  tcg/ppc: Implement goto_ptr
  tb-hash: improve tb_jmp_cache hash function in user mode
  target/i386: optimize indirect branches
  target/i386: optimize cross-page direct jumps in softmmu
  target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr
  target/arm: optimize indirect branches
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 199e19ee 2d826cdc
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -1213,12 +1213,12 @@ case "$cpu" in
           LDFLAGS="-m64 $LDFLAGS"
           ;;
    sparc)
           LDFLAGS="-m32 $LDFLAGS"
           CPU_CFLAGS="-m32 -mcpu=ultrasparc"
           CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
           LDFLAGS="-m32 -mv8plus $LDFLAGS"
           ;;
    sparc64)
           LDFLAGS="-m64 $LDFLAGS"
           CPU_CFLAGS="-m64 -mcpu=ultrasparc"
           LDFLAGS="-m64 $LDFLAGS"
           ;;
    s390)
           CPU_CFLAGS="-m31"
+2 −4
Original line number Diff line number Diff line
@@ -309,10 +309,8 @@ static bool tb_cmp(const void *p, const void *d)
    return false;
}

static TranslationBlock *tb_htable_lookup(CPUState *cpu,
                                          target_ulong pc,
                                          target_ulong cs_base,
                                          uint32_t flags)
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                   target_ulong cs_base, uint32_t flags)
{
    tb_page_addr_t phys_pc;
    struct tb_desc desc;
+2 −0
Original line number Diff line number Diff line
@@ -368,6 +368,8 @@ struct TranslationBlock {
void tb_free(TranslationBlock *tb);
void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                   target_ulong cs_base, uint32_t flags);

#if defined(USE_DIRECT_JUMP)

+12 −0
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@

#include "exec/tb-hash-xx.h"

#ifdef CONFIG_SOFTMMU

/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
   addresses on the same page.  The top bits are the same.  This allows
   TLB invalidation to quickly clear a subset of the hash table.  */
@@ -45,6 +47,16 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
           | (tmp & TB_JMP_ADDR_MASK));
}

#else

/* In user-mode we can get better hashing because we do not have a TLB */
static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
{
    return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
}

#endif /* CONFIG_SOFTMMU */

static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
{
+26 −8
Original line number Diff line number Diff line
@@ -88,6 +88,24 @@
#define smp_read_barrier_depends()   barrier()
#endif

/* Sanity check that the size of an atomic operation isn't "overly large".
 * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
 * want to use them because we ought not need them, and this lets us do a
 * bit of sanity checking that other 32-bit hosts might build.
 *
 * That said, we have a problem on 64-bit ILP32 hosts in that in order to
 * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS.
 * We'd prefer not want to pull in everything else TCG related, so handle
 * those few cases by hand.
 *
 * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for
 * Sparc we always force the use of sparcv9 in configure.
 */
#if defined(__x86_64__) || defined(__sparc__)
# define ATOMIC_REG_SIZE  8
#else
# define ATOMIC_REG_SIZE  sizeof(void *)
#endif

/* Weak atomic operations prevent the compiler moving other
 * loads/stores past the atomic operation load/store. However there is
@@ -104,7 +122,7 @@

#define atomic_read(ptr)                              \
    ({                                                \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
    atomic_read__nocheck(ptr);                        \
    })

@@ -112,7 +130,7 @@
    __atomic_store_n(ptr, i, __ATOMIC_RELAXED)

#define atomic_set(ptr, i)  do {                      \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
    atomic_set__nocheck(ptr, i);                      \
} while(0)

@@ -130,27 +148,27 @@

#define atomic_rcu_read(ptr)                          \
    ({                                                \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
    typeof_strip_qual(*ptr) _val;                     \
    atomic_rcu_read__nocheck(ptr, &_val);             \
    _val;                                             \
    })

#define atomic_rcu_set(ptr, i) do {                   \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
    __atomic_store_n(ptr, i, __ATOMIC_RELEASE);       \
} while(0)

#define atomic_load_acquire(ptr)                        \
    ({                                                  \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);  \
    typeof_strip_qual(*ptr) _val;                       \
    __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);        \
    _val;                                               \
    })

#define atomic_store_release(ptr, i)  do {              \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);  \
    __atomic_store_n(ptr, i, __ATOMIC_RELEASE);         \
} while(0)

@@ -162,7 +180,7 @@
})

#define atomic_xchg(ptr, i)    ({                           \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));       \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);      \
    atomic_xchg__nocheck(ptr, i);                           \
})

@@ -175,7 +193,7 @@
})

#define atomic_cmpxchg(ptr, old, new)    ({                             \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);                  \
    atomic_cmpxchg__nocheck(ptr, old, new);                             \
})

Loading