Commit 5e5f07e0 authored by Evgeny Voevodin's avatar Evgeny Voevodin Committed by Blue Swirl
Browse files

TCG: Move translation block variables to new context inside tcg_ctx: tb_ctx



It's worth to clean-up translation blocks variables and move them
into one context as was suggested by Swirl.
Also if we use this context directly inside tcg_ctx, then it
speeds up code generation a bit.

Signed-off-by: default avatarEvgeny Voevodin <evgenyvoevodin@gmail.com>
Signed-off-by: default avatarBlue Swirl <blauwirbel@gmail.com>
parent 0b0d3320
Loading
Loading
Loading
Loading
+8 −10
Original line number Diff line number Diff line
@@ -23,8 +23,6 @@
#include "qemu/atomic.h"
#include "sysemu/qtest.h"

int tb_invalidated_flag;

//#define CONFIG_DEBUG_EXEC

bool qemu_cpu_has_work(CPUState *cpu)
@@ -90,13 +88,13 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
    tb_page_addr_t phys_pc, phys_page1;
    target_ulong virt_page2;

    tb_invalidated_flag = 0;
    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;

    /* find translated block using physical mappings */
    phys_pc = get_page_addr_code(env, pc);
    phys_page1 = phys_pc & TARGET_PAGE_MASK;
    h = tb_phys_hash_func(phys_pc);
    ptb1 = &tb_phys_hash[h];
    ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
    for(;;) {
        tb = *ptb1;
        if (!tb)
@@ -128,8 +126,8 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
    /* Move the last found TB to the head of the list */
    if (likely(*ptb1)) {
        *ptb1 = tb->phys_hash_next;
        tb->phys_hash_next = tb_phys_hash[h];
        tb_phys_hash[h] = tb;
        tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
    }
    /* we add the TB in the virtual pc hash table */
    env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
@@ -563,16 +561,16 @@ int cpu_exec(CPUArchState *env)
#endif
                }
#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
                spin_lock(&tb_lock);
                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
                tb = tb_find_fast(env);
                /* Note: we do it here to avoid a gcc bug on Mac OS X when
                   doing it in tb_find_slow */
                if (tb_invalidated_flag) {
                if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
                    /* as some TB could have been invalidated because
                       of memory exceptions while generating the code, we
                       must recompute the hash index here */
                    next_tb = 0;
                    tb_invalidated_flag = 0;
                    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
                }
#ifdef CONFIG_DEBUG_EXEC
                qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
@@ -585,7 +583,7 @@ int cpu_exec(CPUArchState *env)
                if (next_tb != 0 && tb->page_addr[1] == -1) {
                    tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
                }
                spin_unlock(&tb_lock);
                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);

                /* cpu_interrupt might be called while translating the
                   TB, but before it is linked into a potentially
+19 −8
Original line number Diff line number Diff line
@@ -168,6 +168,25 @@ struct TranslationBlock {
    uint32_t icount;
};

#include "exec/spinlock.h"

typedef struct TBContext TBContext;

struct TBContext {

    TranslationBlock *tbs;
    TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
    int nb_tbs;
    /* any access to the tbs or the page table must use this lock */
    spinlock_t tb_lock;

    /* statistics */
    int tb_flush_count;
    int tb_phys_invalidate_count;

    int tb_invalidated_flag;
};

static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
{
    target_ulong tmp;
@@ -192,8 +211,6 @@ void tb_free(TranslationBlock *tb);
void tb_flush(CPUArchState *env);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);

extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];

#if defined(USE_DIRECT_JUMP)

#if defined(CONFIG_TCG_INTERPRETER)
@@ -275,12 +292,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
    }
}

#include "exec/spinlock.h"

extern spinlock_t tb_lock;

extern int tb_invalidated_flag;

/* The return address may point to the start of the next instruction.
   Subtracting one gets us the call instruction itself.  */
#if defined(CONFIG_TCG_INTERPRETER)
+3 −3
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ static int pending_cpus;
/* Make sure everything is in a consistent state for calling fork().  */
void fork_start(void)
{
    pthread_mutex_lock(&tb_lock);
    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
    pthread_mutex_lock(&exclusive_lock);
    mmap_fork_start();
}
@@ -129,11 +129,11 @@ void fork_end(int child)
        pthread_mutex_init(&cpu_list_mutex, NULL);
        pthread_cond_init(&exclusive_cond, NULL);
        pthread_cond_init(&exclusive_resume, NULL);
        pthread_mutex_init(&tb_lock, NULL);
        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
        gdbserver_fork(thread_env);
    } else {
        pthread_mutex_unlock(&exclusive_lock);
        pthread_mutex_unlock(&tb_lock);
        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
    }
}

+2 −0
Original line number Diff line number Diff line
@@ -471,6 +471,8 @@ struct TCGContext {
    size_t code_gen_buffer_max_size;
    uint8_t *code_gen_ptr;

    TBContext tb_ctx;

#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
    /* labels info for qemu_ld/st IRs
       The labels help to generate TLB miss case codes at the end of TB */
+47 −49
Original line number Diff line number Diff line
@@ -72,13 +72,6 @@

#define SMC_BITMAP_USE_THRESHOLD 10

/* Translation blocks */
static TranslationBlock *tbs;
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
static int nb_tbs;
/* any access to the tbs or the page table must use this lock */
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;

typedef struct PageDesc {
    /* list of TBs intersecting this ram page */
    TranslationBlock *first_tb;
@@ -125,10 +118,6 @@ uintptr_t qemu_host_page_mask;
   The bottom level has pointers to PageDesc.  */
static void *l1_map[V_L1_SIZE];

/* statistics */
static int tb_flush_count;
static int tb_phys_invalidate_count;

/* code generation context */
TCGContext tcg_ctx;

@@ -589,7 +578,8 @@ static inline void code_gen_alloc(size_t tb_size)
        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
            CODE_GEN_AVG_BLOCK_SIZE;
    tbs = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
    tcg_ctx.tb_ctx.tbs =
            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
}

/* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -620,12 +610,12 @@ static TranslationBlock *tb_alloc(target_ulong pc)
{
    TranslationBlock *tb;

    if (nb_tbs >= tcg_ctx.code_gen_max_blocks ||
    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
        (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
         tcg_ctx.code_gen_buffer_max_size) {
        return NULL;
    }
    tb = &tbs[nb_tbs++];
    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
    tb->pc = pc;
    tb->cflags = 0;
    return tb;
@@ -636,9 +626,10 @@ void tb_free(TranslationBlock *tb)
    /* In practice this is mostly used for single use temporary TB
       Ignore the hard cases and just back up if this TB happens to
       be the last one generated.  */
    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
    if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
        tcg_ctx.code_gen_ptr = tb->tc_ptr;
        nb_tbs--;
        tcg_ctx.tb_ctx.nb_tbs--;
    }
}

@@ -693,27 +684,28 @@ void tb_flush(CPUArchState *env1)
#if defined(DEBUG_FLUSH)
    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
           (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
           nb_tbs, nb_tbs > 0 ?
           tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
           ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
           nb_tbs : 0);
           tcg_ctx.tb_ctx.nb_tbs : 0);
#endif
    if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
        > tcg_ctx.code_gen_buffer_size) {
        cpu_abort(env1, "Internal error: code buffer overflow\n");
    }
    nb_tbs = 0;
    tcg_ctx.tb_ctx.nb_tbs = 0;

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
    }

    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
            CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
    page_flush_tb();

    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
    /* XXX: flush processor icache at this point if cache flush is
       expensive */
    tb_flush_count++;
    tcg_ctx.tb_ctx.tb_flush_count++;
}

#ifdef DEBUG_TB_CHECK
@@ -725,7 +717,7 @@ static void tb_invalidate_check(target_ulong address)

    address &= TARGET_PAGE_MASK;
    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
        for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
                  address >= tb->pc + tb->size)) {
                printf("ERROR invalidate: address=" TARGET_FMT_lx
@@ -743,7 +735,8 @@ static void tb_page_check(void)
    int i, flags1, flags2;

    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
                tb = tb->phys_hash_next) {
            flags1 = page_get_flags(tb->pc);
            flags2 = page_get_flags(tb->pc + tb->size - 1);
            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
@@ -835,7 +828,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
    /* remove the TB from the hash list */
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
    h = tb_phys_hash_func(phys_pc);
    tb_hash_remove(&tb_phys_hash[h], tb);
    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);

    /* remove the TB from the page list */
    if (tb->page_addr[0] != page_addr) {
@@ -849,7 +842,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
        invalidate_page_bitmap(p);
    }

    tb_invalidated_flag = 1;
    tcg_ctx.tb_ctx.tb_invalidated_flag = 1;

    /* remove the TB from the hash list */
    h = tb_jmp_cache_hash_func(tb->pc);
@@ -878,7 +871,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
    }
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */

    tb_phys_invalidate_count++;
    tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
}

static inline void set_bits(uint8_t *tab, int start, int len)
@@ -955,7 +948,7 @@ TranslationBlock *tb_gen_code(CPUArchState *env,
        /* cannot fail at this point */
        tb = tb_alloc(pc);
        /* Don't forget to invalidate previous TB info.  */
        tb_invalidated_flag = 1;
        tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
    }
    tc_ptr = tcg_ctx.code_gen_ptr;
    tb->tc_ptr = tc_ptr;
@@ -1273,7 +1266,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
    mmap_lock();
    /* add in the physical hash table */
    h = tb_phys_hash_func(phys_pc);
    ptb = &tb_phys_hash[h];
    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
    tb->phys_hash_next = *ptb;
    *ptb = tb;

@@ -1323,7 +1316,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
    uintptr_t v;
    TranslationBlock *tb;

    if (nb_tbs <= 0) {
    if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
        return NULL;
    }
    if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
@@ -1332,10 +1325,10 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
    }
    /* binary search (cf Knuth) */
    m_min = 0;
    m_max = nb_tbs - 1;
    m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
    while (m_min <= m_max) {
        m = (m_min + m_max) >> 1;
        tb = &tbs[m];
        tb = &tcg_ctx.tb_ctx.tbs[m];
        v = (uintptr_t)tb->tc_ptr;
        if (v == tc_ptr) {
            return tb;
@@ -1345,7 +1338,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
            m_min = m + 1;
        }
    }
    return &tbs[m_max];
    return &tcg_ctx.tb_ctx.tbs[m_max];
}

static void tb_reset_jump_recursive(TranslationBlock *tb);
@@ -1566,8 +1559,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
    cross_page = 0;
    direct_jmp_count = 0;
    direct_jmp2_count = 0;
    for (i = 0; i < nb_tbs; i++) {
        tb = &tbs[i];
    for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
        tb = &tcg_ctx.tb_ctx.tbs[i];
        target_code_size += tb->size;
        if (tb->size > max_target_code_size) {
            max_target_code_size = tb->size;
@@ -1588,27 +1581,32 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
                tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                tcg_ctx.code_gen_buffer_max_size);
    cpu_fprintf(f, "TB count            %d/%d\n",
                nb_tbs, tcg_ctx.code_gen_max_blocks);
            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
                nb_tbs ? target_code_size / nb_tbs : 0,
            tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
                    tcg_ctx.tb_ctx.nb_tbs : 0,
            max_target_code_size);
    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
                nb_tbs ? (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) /
                        nb_tbs : 0,
                target_code_size ?
                (double) (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) /
            tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
                                     tcg_ctx.code_gen_buffer) /
                                     tcg_ctx.tb_ctx.nb_tbs : 0,
                target_code_size ? (double) (tcg_ctx.code_gen_ptr -
                                             tcg_ctx.code_gen_buffer) /
                                             target_code_size : 0);
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
            cross_page,
            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
            tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
                                    tcg_ctx.tb_ctx.nb_tbs : 0);
    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
                direct_jmp_count,
                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
                        tcg_ctx.tb_ctx.nb_tbs : 0,
                direct_jmp2_count,
                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
                        tcg_ctx.tb_ctx.nb_tbs : 0);
    cpu_fprintf(f, "\nStatistics:\n");
    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
    cpu_fprintf(f, "TB invalidate count %d\n",
            tcg_ctx.tb_ctx.tb_phys_invalidate_count);
    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
    tcg_dump_info(f, cpu_fprintf);
}