Commit 3a9d8b17 authored by Paolo Bonzini's avatar Paolo Bonzini Committed by Blue Swirl
Browse files

optimize: track nonzero bits of registers



Add a "mask" field to the tcg_temp_info struct.  A bit that is zero
in "mask" will always be zero in the corresponding temporary.
Zero bits in the mask can be produced from moves of immediates,
zero-extensions, ANDs with constants, shifts; they can then be
be propagated by logical operations, shifts, sign-extensions,
negations, deposit operations, and conditional moves.  Other
operations will just reset the mask to all-ones, i.e. unknown.

[rth: s/target_ulong/tcg_target_ulong/]

Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarRichard Henderson <rth@twiddle.net>
Signed-off-by: default avatarBlue Swirl <blauwirbel@gmail.com>
parent d193a14a
Loading
Loading
Loading
Loading
+110 −22
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@ struct tcg_temp_info {
    uint16_t prev_copy;
    uint16_t next_copy;
    tcg_target_ulong val;
    tcg_target_ulong mask;
};

static struct tcg_temp_info temps[TCG_MAX_TEMPS];
@@ -63,6 +64,7 @@ static void reset_temp(TCGArg temp)
        }
    }
    temps[temp].state = TCG_TEMP_UNDEF;
    temps[temp].mask = -1;
}

/* Reset all temporaries, given that there are NB_TEMPS of them.  */
@@ -71,6 +73,7 @@ static void reset_all_temps(int nb_temps)
    int i;
    for (i = 0; i < nb_temps; i++) {
        temps[i].state = TCG_TEMP_UNDEF;
        temps[i].mask = -1;
    }
}

@@ -149,6 +152,7 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
                            TCGArg dst, TCGArg src)
{
    reset_temp(dst);
    temps[dst].mask = temps[src].mask;
    assert(temps[src].state != TCG_TEMP_CONST);

    if (s->temps[src].type == s->temps[dst].type) {
@@ -173,6 +177,7 @@ static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
    reset_temp(dst);
    temps[dst].state = TCG_TEMP_CONST;
    temps[dst].val = val;
    temps[dst].mask = val;
    gen_args[0] = dst;
    gen_args[1] = val;
}
@@ -479,6 +484,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                    TCGArg *args, TCGOpDef *tcg_op_defs)
{
    int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
    tcg_target_ulong mask;
    TCGOpcode op;
    const TCGOpDef *def;
    TCGArg *gen_args;
@@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            break;
        }

        /* Simplify using known-zero bits */
        mask = -1;
        switch (op) {
        CASE_OP_32_64(ext8s):
            if ((temps[args[1]].mask & 0x80) != 0) {
                break;
            }
        CASE_OP_32_64(ext8u):
            mask = 0xff;
            goto and_const;
        CASE_OP_32_64(ext16s):
            if ((temps[args[1]].mask & 0x8000) != 0) {
                break;
            }
        CASE_OP_32_64(ext16u):
            mask = 0xffff;
            goto and_const;
        case INDEX_op_ext32s_i64:
            if ((temps[args[1]].mask & 0x80000000) != 0) {
                break;
            }
        case INDEX_op_ext32u_i64:
            mask = 0xffffffffU;
            goto and_const;

        CASE_OP_32_64(and):
            mask = temps[args[2]].mask;
            if (temps[args[2]].state == TCG_TEMP_CONST) {
        and_const:
                ;
            }
            mask = temps[args[1]].mask & mask;
            break;

        CASE_OP_32_64(sar):
            if (temps[args[2]].state == TCG_TEMP_CONST) {
                mask = ((tcg_target_long)temps[args[1]].mask
                        >> temps[args[2]].val);
            }
            break;

        CASE_OP_32_64(shr):
            if (temps[args[2]].state == TCG_TEMP_CONST) {
                mask = temps[args[1]].mask >> temps[args[2]].val;
            }
            break;

        CASE_OP_32_64(shl):
            if (temps[args[2]].state == TCG_TEMP_CONST) {
                mask = temps[args[1]].mask << temps[args[2]].val;
            }
            break;

        CASE_OP_32_64(neg):
            /* Set to 1 all bits to the left of the rightmost.  */
            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
            break;

        CASE_OP_32_64(deposit):
            tmp = ((1ull << args[4]) - 1);
            mask = ((temps[args[1]].mask & ~(tmp << args[3]))
                    | ((temps[args[2]].mask & tmp) << args[3]));
            break;

        CASE_OP_32_64(or):
        CASE_OP_32_64(xor):
            mask = temps[args[1]].mask | temps[args[2]].mask;
            break;

        CASE_OP_32_64(setcond):
            mask = 1;
            break;

        CASE_OP_32_64(movcond):
            mask = temps[args[3]].mask | temps[args[4]].mask;
            break;

        default:
            break;
        }

        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
        switch (op) {
        CASE_OP_32_64(and):
@@ -947,7 +1034,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            /* Default case: we know nothing about operation (or were unable
               to compute the operation result) so no propagation is done.
               We trash everything if the operation is the end of a basic
               block, otherwise we only trash the output args.  */
               block, otherwise we only trash the output args.  "mask" is
               the non-zero bits mask for the first output arg.  */
            if (def->flags & TCG_OPF_BB_END) {
                reset_all_temps(nb_temps);
            } else {