Commit 5d0ceda9 authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg: Implement gvec support for rotate by vector



No host backend support yet, but the interfaces for rotlv
and rotrv are in place.

Reviewed-by: default avatarAlex Bennée <alex.bennee@linaro.org>
Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
---
v3: Drop the generic expansion from rot to shift; we can do better
    for each backend, and then this code becomes unused.
parent b0f7e744
Loading
Loading
Loading
Loading
+96 −0
Original line number Diff line number Diff line
@@ -908,6 +908,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
        uint8_t sh = *(uint8_t *)(b + i) & 7;
        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
        uint8_t sh = *(uint16_t *)(b + i) & 15;
        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
        uint8_t sh = *(uint32_t *)(b + i) & 31;
        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
        uint8_t sh = *(uint64_t *)(b + i) & 63;
        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
        uint8_t sh = *(uint8_t *)(b + i) & 7;
        *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
        uint8_t sh = *(uint16_t *)(b + i) & 15;
        *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
        uint8_t sh = *(uint32_t *)(b + i) & 31;
        *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
        uint8_t sh = *(uint64_t *)(b + i) & 63;
        *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
    }
    clear_high(d, oprsz, desc);
}

#define DO_CMP1(NAME, TYPE, OP)                                            \
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
{                                                                          \
+10 −0
Original line number Diff line number Diff line
@@ -279,6 +279,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+4 −0
Original line number Diff line number Diff line
@@ -356,6 +356,10 @@ void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);

void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
                      uint32_t aofs, uint32_t bofs,
+2 −0
Original line number Diff line number Diff line
@@ -1009,6 +1009,8 @@ void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);

void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
                     TCGv_vec a, TCGv_vec b);
+2 −0
Original line number Diff line number Diff line
@@ -257,6 +257,8 @@ DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))

DEF(cmp_vec, 1, 2, 1, IMPLVEC)

Loading