Commit da2fdd0b authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160611' into staging



TB hashing improvements

# gpg: Signature made Sun 12 Jun 2016 01:12:50 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160611:
  translate-all: add tb hash bucket info to 'info jit' dump
  tb hash: track translated blocks with qht
  qht: add test-qht-par to invoke qht-bench from 'check' target
  qht: add qht-bench, a performance benchmark
  qht: add test program
  qht: QEMU's fast, resizable and scalable Hash Table
  qdist: add test program
  qdist: add module to represent frequency distributions of data
  tb hash: hash phys_pc, pc, and flags with xxhash
  exec: add tb_hash_func5, derived from xxhash
  qemu-thread: add simple test-and-set spinlock
  include/processor.h: define cpu_relax()
  seqlock: rename write_lock/unlock to write_begin/end
  seqlock: remove optional mutex
  compiler.h: add QEMU_ALIGNED() to enforce struct alignment

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents a93c1bdf 329844d4
Loading
Loading
Loading
Loading
+46 −46
Original line number Diff line number Diff line
@@ -225,57 +225,57 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
}
#endif

static TranslationBlock *tb_find_physical(CPUState *cpu,
                                          target_ulong pc,
                                          target_ulong cs_base,
                                          uint32_t flags)
{
    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
    TranslationBlock *tb, **tb_hash_head, **ptb1;
    unsigned int h;
    tb_page_addr_t phys_pc, phys_page1;

    /* find translated block using physical mappings */
    phys_pc = get_page_addr_code(env, pc);
    phys_page1 = phys_pc & TARGET_PAGE_MASK;
    h = tb_phys_hash_func(phys_pc);

    /* Start at head of the hash entry */
    ptb1 = tb_hash_head = &tcg_ctx.tb_ctx.tb_phys_hash[h];
    tb = *ptb1;
struct tb_desc {
    target_ulong pc;
    target_ulong cs_base;
    CPUArchState *env;
    tb_page_addr_t phys_page1;
    uint32_t flags;
};

    while (tb) {
        if (tb->pc == pc &&
            tb->page_addr[0] == phys_page1 &&
            tb->cs_base == cs_base &&
            tb->flags == flags) {
static bool tb_cmp(const void *p, const void *d)
{
    const TranslationBlock *tb = p;
    const struct tb_desc *desc = d;

    if (tb->pc == desc->pc &&
        tb->page_addr[0] == desc->phys_page1 &&
        tb->cs_base == desc->cs_base &&
        tb->flags == desc->flags) {
        /* check next page if needed */
        if (tb->page_addr[1] == -1) {
                /* done, we have a match */
                break;
            return true;
        } else {
                /* check next page if needed */
                target_ulong virt_page2 = (pc & TARGET_PAGE_MASK) +
                                          TARGET_PAGE_SIZE;
                tb_page_addr_t phys_page2 = get_page_addr_code(env, virt_page2);
            tb_page_addr_t phys_page2;
            target_ulong virt_page2;

            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
            phys_page2 = get_page_addr_code(desc->env, virt_page2);
            if (tb->page_addr[1] == phys_page2) {
                    break;
                return true;
            }
        }
    }

        ptb1 = &tb->phys_hash_next;
        tb = *ptb1;
    return false;
}

    if (tb) {
        /* Move the TB to the head of the list */
        *ptb1 = tb->phys_hash_next;
        tb->phys_hash_next = *tb_hash_head;
        *tb_hash_head = tb;
    }
    return tb;
static TranslationBlock *tb_find_physical(CPUState *cpu,
                                          target_ulong pc,
                                          target_ulong cs_base,
                                          uint32_t flags)
{
    tb_page_addr_t phys_pc;
    struct tb_desc desc;
    uint32_t h;

    desc.env = (CPUArchState *)cpu->env_ptr;
    desc.cs_base = cs_base;
    desc.flags = flags;
    desc.pc = pc;
    phys_pc = get_page_addr_code(desc.env, pc);
    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
    h = tb_hash_func(phys_pc, pc, flags);
    return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
}

static TranslationBlock *tb_find_slow(CPUState *cpu,
+15 −15
Original line number Diff line number Diff line
@@ -249,13 +249,13 @@ int64_t cpu_get_clock(void)
void cpu_enable_ticks(void)
{
    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
    if (!timers_state.cpu_ticks_enabled) {
        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
        timers_state.cpu_clock_offset -= get_clock();
        timers_state.cpu_ticks_enabled = 1;
    }
    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
    seqlock_write_end(&timers_state.vm_clock_seqlock);
}

/* disable cpu_get_ticks() : the clock is stopped. You must not call
@@ -265,13 +265,13 @@ void cpu_enable_ticks(void)
void cpu_disable_ticks(void)
{
    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
    if (timers_state.cpu_ticks_enabled) {
        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
        timers_state.cpu_clock_offset = cpu_get_clock_locked();
        timers_state.cpu_ticks_enabled = 0;
    }
    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
    seqlock_write_end(&timers_state.vm_clock_seqlock);
}

/* Correlation between real and virtual time is always going to be
@@ -294,7 +294,7 @@ static void icount_adjust(void)
        return;
    }

    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
    cur_time = cpu_get_clock_locked();
    cur_icount = cpu_get_icount_locked();

@@ -315,7 +315,7 @@ static void icount_adjust(void)
    last_delta = delta;
    timers_state.qemu_icount_bias = cur_icount
                              - (timers_state.qemu_icount << icount_time_shift);
    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
    seqlock_write_end(&timers_state.vm_clock_seqlock);
}

static void icount_adjust_rt(void *opaque)
@@ -355,7 +355,7 @@ static void icount_warp_rt(void)
        return;
    }

    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
    if (runstate_is_running()) {
        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
                                     cpu_get_clock_locked());
@@ -374,7 +374,7 @@ static void icount_warp_rt(void)
        timers_state.qemu_icount_bias += warp_delta;
    }
    vm_clock_warp_start = -1;
    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
    seqlock_write_end(&timers_state.vm_clock_seqlock);

    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -399,9 +399,9 @@ void qtest_clock_warp(int64_t dest)
        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);

        seqlock_write_lock(&timers_state.vm_clock_seqlock);
        seqlock_write_begin(&timers_state.vm_clock_seqlock);
        timers_state.qemu_icount_bias += warp;
        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
        seqlock_write_end(&timers_state.vm_clock_seqlock);

        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
@@ -468,9 +468,9 @@ void qemu_start_warp_timer(void)
             * It is useful when we want a deterministic execution time,
             * isolated from host latencies.
             */
            seqlock_write_lock(&timers_state.vm_clock_seqlock);
            seqlock_write_begin(&timers_state.vm_clock_seqlock);
            timers_state.qemu_icount_bias += deadline;
            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
            seqlock_write_end(&timers_state.vm_clock_seqlock);
            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
        } else {
            /*
@@ -481,11 +481,11 @@ void qemu_start_warp_timer(void)
             * you will not be sending network packets continuously instead of
             * every 100ms.
             */
            seqlock_write_lock(&timers_state.vm_clock_seqlock);
            seqlock_write_begin(&timers_state.vm_clock_seqlock);
            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
                vm_clock_warp_start = clock;
            }
            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
            seqlock_write_end(&timers_state.vm_clock_seqlock);
            timer_mod_anticipate(icount_warp_timer, clock + deadline);
        }
    } else if (deadline == 0) {
@@ -621,7 +621,7 @@ int cpu_throttle_get_percentage(void)

void cpu_ticks_init(void)
{
    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
    seqlock_init(&timers_state.vm_clock_seqlock);
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                           cpu_throttle_timer_tick, NULL);
+0 −2
Original line number Diff line number Diff line
@@ -215,8 +215,6 @@ struct TranslationBlock {

    void *tc_ptr;    /* pointer to the translated code */
    uint8_t *tc_search;  /* pointer to search data */
    /* next matching tb for physical address. */
    struct TranslationBlock *phys_hash_next;
    /* original tb when cflags has CF_NOCACHE */
    struct TranslationBlock *orig_tb;
    /* first and second physical page containing code. The lower bit
+4 −3
Original line number Diff line number Diff line
@@ -21,9 +21,10 @@
#define QEMU_TB_CONTEXT_H_

#include "qemu/thread.h"
#include "qemu/qht.h"

#define CODE_GEN_PHYS_HASH_BITS     15
#define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
#define CODE_GEN_HTABLE_BITS     15
#define CODE_GEN_HTABLE_SIZE     (1 << CODE_GEN_HTABLE_BITS)

typedef struct TranslationBlock TranslationBlock;
typedef struct TBContext TBContext;
@@ -31,7 +32,7 @@ typedef struct TBContext TBContext;
struct TBContext {

    TranslationBlock *tbs;
    TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
    struct qht htable;
    int nb_tbs;
    /* any access to the tbs or the page table must use this lock */
    QemuMutex tb_lock;
+94 −0
Original line number Diff line number Diff line
/*
 * xxHash - Fast Hash algorithm
 * Copyright (C) 2012-2016, Yann Collet
 *
 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * + Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * + Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * You can contact the author at :
 * - xxHash source repository : https://github.com/Cyan4973/xxHash
 */
#ifndef EXEC_TB_HASH_XX
#define EXEC_TB_HASH_XX

#include <qemu/bitops.h>

#define PRIME32_1   2654435761U
#define PRIME32_2   2246822519U
#define PRIME32_3   3266489917U
#define PRIME32_4    668265263U
#define PRIME32_5    374761393U

#define TB_HASH_XX_SEED 1

/*
 * xxhash32, customized for input variables that are not guaranteed to be
 * contiguous in memory.
 */
static inline
uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
{
    uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
    uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
    uint32_t v3 = TB_HASH_XX_SEED + 0;
    uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1;
    uint32_t a = a0 >> 32;
    uint32_t b = a0;
    uint32_t c = b0 >> 32;
    uint32_t d = b0;
    uint32_t h32;

    v1 += a * PRIME32_2;
    v1 = rol32(v1, 13);
    v1 *= PRIME32_1;

    v2 += b * PRIME32_2;
    v2 = rol32(v2, 13);
    v2 *= PRIME32_1;

    v3 += c * PRIME32_2;
    v3 = rol32(v3, 13);
    v3 *= PRIME32_1;

    v4 += d * PRIME32_2;
    v4 = rol32(v4, 13);
    v4 *= PRIME32_1;

    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
    h32 += 20;

    h32 += e * PRIME32_3;
    h32  = rol32(h32, 17) * PRIME32_4;

    h32 ^= h32 >> 15;
    h32 *= PRIME32_2;
    h32 ^= h32 >> 13;
    h32 *= PRIME32_3;
    h32 ^= h32 >> 16;

    return h32;
}

#endif /* EXEC_TB_HASH_XX */
Loading