Commit 807e6773 authored by Aleksandar Markovic's avatar Aleksandar Markovic
Browse files

target/mips: Unroll loops for MSA float max/min instructions



Slight preformance improvement for MSA float max/min instructions.

Signed-off-by: default avatarAleksandar Markovic <amarkovic@wavecomp.com>
Reviewed-by: default avatarAleksandar Rikalo <arikalo@wavecomp.com>
Message-Id: <1562068213-11307-7-git-send-email-aleksandar.markovic@rt-rk.com>
parent 44da090b
Loading
Loading
Loading
Loading
+125 −73
Original line number Diff line number Diff line
@@ -3850,35 +3850,65 @@ void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    uint32_t i;

    clear_msacsr_cause(env);

    switch (df) {
    case DF_WORD:
        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
            if (NUMBER_QNAN_PAIR(pws->w[i], pwt->w[i], 32, status)) {
                MSA_FLOAT_MAXOP(pwx->w[i], min, pws->w[i], pws->w[i], 32);
            } else if (NUMBER_QNAN_PAIR(pwt->w[i], pws->w[i], 32, status)) {
                MSA_FLOAT_MAXOP(pwx->w[i], min, pwt->w[i], pwt->w[i], 32);
    if (df == DF_WORD) {

        if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
        } else {
                MSA_FLOAT_MAXOP(pwx->w[i], min, pws->w[i], pwt->w[i], 32);
            MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
        }

        if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
        } else {
            MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
        }
        break;
    case DF_DOUBLE:
        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
            if (NUMBER_QNAN_PAIR(pws->d[i], pwt->d[i], 64, status)) {
                MSA_FLOAT_MAXOP(pwx->d[i], min, pws->d[i], pws->d[i], 64);
            } else if (NUMBER_QNAN_PAIR(pwt->d[i], pws->d[i], 64, status)) {
                MSA_FLOAT_MAXOP(pwx->d[i], min, pwt->d[i], pwt->d[i], 64);

        if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
        } else {
                MSA_FLOAT_MAXOP(pwx->d[i], min, pws->d[i], pwt->d[i], 64);
            MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
        }

        if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
        } else {
            MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
        }
        break;
    default:

    } else if (df == DF_DOUBLE) {

        if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
        } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
        } else {
            MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
        }

        if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
        } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
        } else {
            MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
        }

    } else {

        assert(0);

    }

    check_msacsr_cause(env, GETPC());
@@ -3894,22 +3924,18 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    uint32_t i;

    clear_msacsr_cause(env);

    switch (df) {
    case DF_WORD:
        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
            FMAXMIN_A(min, max, pwx->w[i], pws->w[i], pwt->w[i], 32, status);
        }
        break;
    case DF_DOUBLE:
        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
            FMAXMIN_A(min, max, pwx->d[i], pws->d[i], pwt->d[i], 64, status);
        }
        break;
    default:
    if (df == DF_WORD) {
        FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
        FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
        FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
        FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
    } else if (df == DF_DOUBLE) {
        FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
        FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
    } else {
        assert(0);
    }

@@ -3926,35 +3952,65 @@ void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    uint32_t i;

    clear_msacsr_cause(env);

    switch (df) {
    case DF_WORD:
        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
            if (NUMBER_QNAN_PAIR(pws->w[i], pwt->w[i], 32, status)) {
                MSA_FLOAT_MAXOP(pwx->w[i], max, pws->w[i], pws->w[i], 32);
            } else if (NUMBER_QNAN_PAIR(pwt->w[i], pws->w[i], 32, status)) {
                MSA_FLOAT_MAXOP(pwx->w[i], max, pwt->w[i], pwt->w[i], 32);
    if (df == DF_WORD) {

        if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
        } else {
                MSA_FLOAT_MAXOP(pwx->w[i], max, pws->w[i], pwt->w[i], 32);
            MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
        }

        if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
        } else {
            MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
        }
        break;
    case DF_DOUBLE:
        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
            if (NUMBER_QNAN_PAIR(pws->d[i], pwt->d[i], 64, status)) {
                MSA_FLOAT_MAXOP(pwx->d[i], max, pws->d[i], pws->d[i], 64);
            } else if (NUMBER_QNAN_PAIR(pwt->d[i], pws->d[i], 64, status)) {
                MSA_FLOAT_MAXOP(pwx->d[i], max, pwt->d[i], pwt->d[i], 64);

        if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
        } else {
                MSA_FLOAT_MAXOP(pwx->d[i], max, pws->d[i], pwt->d[i], 64);
            MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
        }

        if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
        } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
            MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
        } else {
            MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
        }
        break;
    default:

    } else if (df == DF_DOUBLE) {

        if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
        } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
        } else {
            MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
        }

        if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
        } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
            MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
        } else {
            MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
        }

    } else {

        assert(0);

    }

    check_msacsr_cause(env, GETPC());
@@ -3970,22 +4026,18 @@ void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    uint32_t i;

    clear_msacsr_cause(env);

    switch (df) {
    case DF_WORD:
        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
            FMAXMIN_A(max, min, pwx->w[i], pws->w[i], pwt->w[i], 32, status);
        }
        break;
    case DF_DOUBLE:
        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
            FMAXMIN_A(max, min, pwx->d[i], pws->d[i], pwt->d[i], 64, status);
        }
        break;
    default:
    if (df == DF_WORD) {
        FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
        FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
        FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
        FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
    } else if (df == DF_DOUBLE) {
        FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
        FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
    } else {
        assert(0);
    }