Commit b125f9dc authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg: Check for overflow via highwater mark



We currently pre-compute an worst case code size for any TB, which
works out to be 122kB.  Since the average TB size is near 1kB, this
wastes quite a lot of storage.

Instead, check for overflow in between generating code for each opcode.
The overhead of the check isn't measurable and wastage is minimized.

Reviewed-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Signed-off-by: default avatarRichard Henderson <rth@twiddle.net>
parent f293709c
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -62,12 +62,6 @@ typedef struct TranslationBlock TranslationBlock;
#define OPC_BUF_SIZE 640
#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)

/* Maximum size a TCG op can expand to.  This is complicated because a
   single op may require several host instructions and register reloads.
   For now take a wild guess at 192 bytes, which should allow at least
   a couple of fixup instructions per argument.  */
#define TCG_MAX_OP_SIZE 192

#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)

#include "qemu/log.h"
+11 −3
Original line number Diff line number Diff line
@@ -385,9 +385,10 @@ void tcg_prologue_init(TCGContext *s)
    total_size = s->code_gen_buffer_size - prologue_size;
    s->code_gen_buffer_size = total_size;

    /* Compute a high-water mark, at which we voluntarily flush the
       buffer and start over.  */
    s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE;
    /* Compute a high-water mark, at which we voluntarily flush the buffer
       and start over.  The size here is arbitrary, significantly larger
       than we expect the code generation for any one opcode to require.  */
    s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);

    tcg_register_jit(s->code_gen_buffer, total_size);

@@ -2438,6 +2439,13 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
#ifndef NDEBUG
        check_regs(s);
#endif
        /* Test for (pending) buffer overflow.  The assumption is that any
           one operation beginning below the high water mark cannot overrun
           the buffer completely.  Thus we can test for overflow after
           generating code without having to check during generation.  */
        if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) {
            return -1;
        }
    }
    tcg_debug_assert(num_insns >= 0);
    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
+3 −2
Original line number Diff line number Diff line
@@ -559,10 +559,11 @@ struct TCGContext {
    void *code_gen_prologue;
    void *code_gen_buffer;
    size_t code_gen_buffer_size;
    /* threshold to flush the translated code buffer */
    size_t code_gen_buffer_max_size;
    void *code_gen_ptr;

    /* Threshold to flush the translated code buffer.  */
    void *code_gen_highwater;

    TBContext tb_ctx;

    /* The TCGBackendData structure is private to tcg-target.c.  */
+26 −5
Original line number Diff line number Diff line
@@ -223,6 +223,7 @@ static target_long decode_sleb128(uint8_t **pp)

static int encode_search(TranslationBlock *tb, uint8_t *block)
{
    uint8_t *highwater = tcg_ctx.code_gen_highwater;
    uint8_t *p = block;
    int i, j, n;

@@ -241,6 +242,14 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
        }
        prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]);
        p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev);

        /* Test for (pending) buffer overflow.  The assumption is that any
           one row beginning below the high water mark cannot overrun
           the buffer completely.  Thus we can test for overflow after
           encoding a row without having to check during encoding.  */
        if (unlikely(p > highwater)) {
            return -1;
        }
    }

    return p - block;
@@ -756,9 +765,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
{
    TranslationBlock *tb;

    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
        (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
         tcg_ctx.code_gen_buffer_max_size) {
    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
        return NULL;
    }
    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
@@ -1063,12 +1070,15 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    if (use_icount) {
        cflags |= CF_USE_ICOUNT;
    }

    tb = tb_alloc(pc);
    if (!tb) {
    if (unlikely(!tb)) {
 buffer_overflow:
        /* flush must be done */
        tb_flush(cpu);
        /* cannot fail at this point */
        tb = tb_alloc(pc);
        assert(tb != NULL);
        /* Don't forget to invalidate previous TB info.  */
        tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
    }
@@ -1109,8 +1119,19 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    tcg_ctx.code_time -= profile_getclock();
#endif

    /* ??? Overflow could be handled better here.  In particular, we
       don't need to re-do gen_intermediate_code, nor should we re-do
       the tcg optimization currently hidden inside tcg_gen_code.  All
       that should be required is to flush the TBs, allocate a new TB,
       re-initialize it per above, and re-do the actual code generation.  */
    gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
    if (unlikely(gen_code_size < 0)) {
        goto buffer_overflow;
    }
    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
    if (unlikely(search_size < 0)) {
        goto buffer_overflow;
    }

#ifdef CONFIG_PROFILER
    tcg_ctx.code_time += profile_getclock();
@@ -1681,7 +1702,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
    cpu_fprintf(f, "Translation buffer state:\n");
    cpu_fprintf(f, "gen code size       %td/%zd\n",
                tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                tcg_ctx.code_gen_buffer_max_size);
                tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
    cpu_fprintf(f, "TB count            %d/%d\n",
            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",