Commit f75da298 authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg: Add support for vector compare select



Perform a per-element conditional move.  This combination operation is
easier to implement on some host vector units than plain cmp+bitsel.
Omit the usual gvec interface, as this is intended to be used by
target-specific gvec expansion call-backs.

Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
parent 38dc1294
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -631,6 +631,13 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.

  Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector.

* cmpsel_vec v0, c1, c2, v3, v4, cond

  Select elements based on comparison results:
  for (i = 0; i < n; ++i) {
    v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i].
  }

*********

Note 1: Some shortcuts are defined when the last operand is known to be
+1 −0
Original line number Diff line number Diff line
@@ -141,6 +141,7 @@ typedef enum {
#define TCG_TARGET_HAS_sat_vec          1
#define TCG_TARGET_HAS_minmax_vec       1
#define TCG_TARGET_HAS_bitsel_vec       0
#define TCG_TARGET_HAS_cmpsel_vec       0

#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+1 −0
Original line number Diff line number Diff line
@@ -191,6 +191,7 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_sat_vec          1
#define TCG_TARGET_HAS_minmax_vec       1
#define TCG_TARGET_HAS_bitsel_vec       0
#define TCG_TARGET_HAS_cmpsel_vec       0

#define TCG_TARGET_deposit_i32_valid(ofs, len) \
    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
+59 −0
Original line number Diff line number Diff line
@@ -119,6 +119,11 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
                continue;
            }
            break;
        case INDEX_op_cmpsel_vec:
            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
                continue;
            }
            break;
        default:
            break;
        }
@@ -159,6 +164,20 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
    op->args[3] = c;
}

static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
{
    TCGOp *op = tcg_emit_op(opc);
    TCGOP_VECL(op) = type - TCG_TYPE_V64;
    TCGOP_VECE(op) = vece;
    op->args[0] = r;
    op->args[1] = a;
    op->args[2] = b;
    op->args[3] = c;
    op->args[4] = d;
    op->args[5] = e;
}

static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
{
    TCGTemp *rt = tcgv_vec_temp(r);
@@ -717,3 +736,43 @@ void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
        tcg_temp_free_vec(t);
    }
}

void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
{
    TCGTemp *rt = tcgv_vec_temp(r);
    TCGTemp *at = tcgv_vec_temp(a);
    TCGTemp *bt = tcgv_vec_temp(b);
    TCGTemp *ct = tcgv_vec_temp(c);
    TCGTemp *dt = tcgv_vec_temp(d);
    TCGArg ri = temp_arg(rt);
    TCGArg ai = temp_arg(at);
    TCGArg bi = temp_arg(bt);
    TCGArg ci = temp_arg(ct);
    TCGArg di = temp_arg(dt);
    TCGType type = rt->base_type;
    const TCGOpcode *hold_list;
    int can;

    tcg_debug_assert(at->base_type >= type);
    tcg_debug_assert(bt->base_type >= type);
    tcg_debug_assert(ct->base_type >= type);
    tcg_debug_assert(dt->base_type >= type);

    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
    hold_list = tcg_swap_vecop_list(NULL);
    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);

    if (can > 0) {
        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
    } else if (can < 0) {
        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
                          ri, ai, bi, ci, di, cond);
    } else {
        TCGv_vec t = tcg_temp_new_vec(type);
        tcg_gen_cmp_vec(cond, vece, t, a, b);
        tcg_gen_bitsel_vec(vece, r, t, c, d);
        tcg_temp_free_vec(t);
    }
    tcg_swap_vecop_list(hold_list);
}
+2 −0
Original line number Diff line number Diff line
@@ -1002,6 +1002,8 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,

void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
                        TCGv_vec b, TCGv_vec c);
void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d);

void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
Loading