Commit a04b68e1 authored by Richard Henderson's avatar Richard Henderson Committed by Peter Maydell
Browse files

target/arm: Convert aes and sm4 to gvec helpers



With this conversion, we will be able to use the same helpers
with sve.  In particular, pass 3 vector parameters for the
3-operand operations; for advsimd the destination register
is also an input.

This also fixes a bug in which we failed to clear the high bits
of the SVE register after an AdvSIMD operation.

Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Message-id: 20200514212831.31248-2-richard.henderson@linaro.org
Reviewed-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parent fc417e5b
Loading
Loading
Loading
Loading
+52 −20
Original line number Diff line number Diff line
@@ -13,7 +13,9 @@

#include "cpu.h"
#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"
#include "crypto/aes.h"
#include "vec_internal.h"

union CRYPTO_STATE {
    uint8_t    bytes[16];
@@ -29,18 +31,15 @@ union CRYPTO_STATE {
#define CR_ST_WORD(state, i)   (state.words[i])
#endif

void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
                           uint64_t *rm, bool decrypt)
{
    static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
    static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
    uint64_t *rd = vd;
    uint64_t *rm = vm;
    union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
    union CRYPTO_STATE st = { .l = { rd[0], rd[1] } };
    union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
    int i;

    assert(decrypt < 2);

    /* xor state vector with round key */
    rk.l[0] ^= st.l[0];
    rk.l[1] ^= st.l[1];
@@ -54,7 +53,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
    rd[1] = st.l[1];
}

void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
{
    intptr_t i, opr_sz = simd_oprsz(desc);
    bool decrypt = simd_data(desc);

    for (i = 0; i < opr_sz; i += 16) {
        do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
    }
    clear_tail(vd, opr_sz, simd_maxsz(desc));
}

static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
{
    static uint32_t const mc[][256] = { {
        /* MixColumns lookup table */
@@ -190,13 +200,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
        0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
    } };

    uint64_t *rd = vd;
    uint64_t *rm = vm;
    union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
    int i;

    assert(decrypt < 2);

    for (i = 0; i < 16; i += 4) {
        CR_ST_WORD(st, i >> 2) =
            mc[decrypt][CR_ST_BYTE(st, i)] ^
@@ -209,6 +215,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
    rd[1] = st.l[1];
}

void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
{
    intptr_t i, opr_sz = simd_oprsz(desc);
    bool decrypt = simd_data(desc);

    for (i = 0; i < opr_sz; i += 16) {
        do_crypto_aesmc(vd + i, vm + i, decrypt);
    }
    clear_tail(vd, opr_sz, simd_maxsz(desc));
}

/*
 * SHA-1 logical functions
 */
@@ -638,12 +655,10 @@ static uint8_t const sm4_sbox[] = {
    0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
};

void HELPER(crypto_sm4e)(void *vd, void *vn)
static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
{
    uint64_t *rd = vd;
    uint64_t *rn = vn;
    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
    union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
    uint32_t t, i;

    for (i = 0; i < 4; i++) {
@@ -665,11 +680,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn)
    rd[1] = d.l[1];
}

void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
{
    intptr_t i, opr_sz = simd_oprsz(desc);

    for (i = 0; i < opr_sz; i += 16) {
        do_crypto_sm4e(vd + i, vn + i, vm + i);
    }
    clear_tail(vd, opr_sz, simd_maxsz(desc));
}

static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
{
    uint64_t *rd = vd;
    uint64_t *rn = vn;
    uint64_t *rm = vm;
    union CRYPTO_STATE d;
    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
@@ -693,3 +715,13 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
    rd[0] = d.l[0];
    rd[1] = d.l[1];
}

void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
{
    intptr_t i, opr_sz = simd_oprsz(desc);

    for (i = 0; i < opr_sz; i += 16) {
        do_crypto_sm4ekey(vd + i, vn + i, vm + i);
    }
    clear_tail(vd, opr_sz, simd_maxsz(desc));
}
+3 −3
Original line number Diff line number Diff line
@@ -510,7 +510,7 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)

DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -531,8 +531,8 @@ DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)

DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+35 −20
Original line number Diff line number Diff line
@@ -571,6 +571,15 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
            is_q ? 16 : 8, vec_full_reg_size(s));
}

/* Expand a 2-operand operation using an out-of-line helper.  */
static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
                             int rn, int data, gen_helper_gvec_2 *fn)
{
    tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
                       vec_full_reg_offset(s, rn),
                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}

/* Expand a 3-operand operation using an out-of-line helper.  */
static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
@@ -13403,9 +13412,8 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
    int rn = extract32(insn, 5, 5);
    int rd = extract32(insn, 0, 5);
    int decrypt;
    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
    TCGv_i32 tcg_decrypt;
    CryptoThreeOpIntFn *genfn;
    gen_helper_gvec_2 *genfn2 = NULL;
    gen_helper_gvec_3 *genfn3 = NULL;

    if (!dc_isar_feature(aa64_aes, s) || size != 0) {
        unallocated_encoding(s);
@@ -13415,19 +13423,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
    switch (opcode) {
    case 0x4: /* AESE */
        decrypt = 0;
        genfn = gen_helper_crypto_aese;
        genfn3 = gen_helper_crypto_aese;
        break;
    case 0x6: /* AESMC */
        decrypt = 0;
        genfn = gen_helper_crypto_aesmc;
        genfn2 = gen_helper_crypto_aesmc;
        break;
    case 0x5: /* AESD */
        decrypt = 1;
        genfn = gen_helper_crypto_aese;
        genfn3 = gen_helper_crypto_aese;
        break;
    case 0x7: /* AESIMC */
        decrypt = 1;
        genfn = gen_helper_crypto_aesmc;
        genfn2 = gen_helper_crypto_aesmc;
        break;
    default:
        unallocated_encoding(s);
@@ -13437,16 +13445,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
    if (!fp_access_check(s)) {
        return;
    }

    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
    tcg_decrypt = tcg_const_i32(decrypt);

    genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);

    tcg_temp_free_ptr(tcg_rd_ptr);
    tcg_temp_free_ptr(tcg_rn_ptr);
    tcg_temp_free_i32(tcg_decrypt);
    if (genfn2) {
        gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
    } else {
        gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
    }
}

/* Crypto three-reg SHA
@@ -13595,7 +13598,8 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
    int rn = extract32(insn, 5, 5);
    int rd = extract32(insn, 0, 5);
    bool feature;
    CryptoThreeOpFn *genfn;
    CryptoThreeOpFn *genfn = NULL;
    gen_helper_gvec_3 *oolfn = NULL;

    if (o == 0) {
        switch (opcode) {
@@ -13630,7 +13634,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
            break;
        case 2: /* SM4EKEY */
            feature = dc_isar_feature(aa64_sm4, s);
            genfn = gen_helper_crypto_sm4ekey;
            oolfn = gen_helper_crypto_sm4ekey;
            break;
        default:
            unallocated_encoding(s);
@@ -13647,6 +13651,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
        return;
    }

    if (oolfn) {
        gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
        return;
    }

    if (genfn) {
        TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;

@@ -13699,6 +13708,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
    bool feature;
    CryptoTwoOpFn *genfn;
    gen_helper_gvec_3 *oolfn = NULL;

    switch (opcode) {
    case 0: /* SHA512SU0 */
@@ -13707,7 +13717,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
        break;
    case 1: /* SM4E */
        feature = dc_isar_feature(aa64_sm4, s);
        genfn = gen_helper_crypto_sm4e;
        oolfn = gen_helper_crypto_sm4e;
        break;
    default:
        unallocated_encoding(s);
@@ -13723,6 +13733,11 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
        return;
    }

    if (oolfn) {
        gen_gvec_op3_ool(s, true, rd, rd, rn, 0, oolfn);
        return;
    }

    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
    tcg_rn_ptr = vec_full_reg_ptr(s, rn);

+14 −13
Original line number Diff line number Diff line
@@ -6350,22 +6350,23 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                    if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
                        return 1;
                    }
                    ptr1 = vfp_reg_ptr(true, rd);
                    ptr2 = vfp_reg_ptr(true, rm);

                     /* Bit 6 is the lowest opcode bit; it distinguishes between
                      * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
                    /*
                     * Bit 6 is the lowest opcode bit; it distinguishes
                     * between encryption (AESE/AESMC) and decryption
                     * (AESD/AESIMC).
                     */
                    tmp3 = tcg_const_i32(extract32(insn, 6, 1));

                    if (op == NEON_2RM_AESE) {
                        gen_helper_crypto_aese(ptr1, ptr2, tmp3);
                        tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
                                           vfp_reg_offset(true, rd),
                                           vfp_reg_offset(true, rm),
                                           16, 16, extract32(insn, 6, 1),
                                           gen_helper_crypto_aese);
                    } else {
                        gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
                        tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
                                           vfp_reg_offset(true, rm),
                                           16, 16, extract32(insn, 6, 1),
                                           gen_helper_crypto_aesmc);
                    }
                    tcg_temp_free_ptr(ptr1);
                    tcg_temp_free_ptr(ptr2);
                    tcg_temp_free_i32(tmp3);
                    break;
                case NEON_2RM_SHA1H:
                    if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
+1 −11
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@
#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"
#include "fpu/softfloat.h"

#include "vec_internal.h"

/* Note that vector data is stored in host-endian 64-bit chunks,
   so addressing units smaller than that needs a host-endian fixup.  */
@@ -36,16 +36,6 @@
#define H4(x)  (x)
#endif

static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
{
    uint64_t *d = vd + opr_sz;
    uintptr_t i;

    for (i = opr_sz; i < max_sz; i += 8) {
        *d++ = 0;
    }
}

/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
                               int16_t src3, uint32_t *sat)
Loading