Commit 079911cb authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/rth/tags/pull-fpu-20181005' into staging



Testing infrastructure for softfpu (not run by default).
Drop countLeadingZeros.
Fix div_floats.
Add udiv_qrnnd specializations for x86_64, s390x, ppc64 hosts.

# gpg: Signature made Fri 05 Oct 2018 19:00:09 BST
# gpg:                using RSA key 64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-fpu-20181005:
  softfloat: Specialize udiv_qrnnd for ppc64
  softfloat: Specialize udiv_qrnnd for s390x
  softfloat: Specialize udiv_qrnnd for x86_64
  softfloat: Fix division
  softfloat: Replace countLeadingZeros32/64 with clz32/64
  tests/fp/fp-test: add floating point tests
  gitmodules: add berkeley's softfloat + testfloat version 3
  softfloat: remove float64_trunc_to_int

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents bb94c119 27ae5109
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -43,3 +43,9 @@
[submodule "roms/u-boot-sam460ex"]
	path = roms/u-boot-sam460ex
	url = git://git.qemu.org/u-boot-sam460ex.git
[submodule "tests/fp/berkeley-testfloat-3"]
	path = tests/fp/berkeley-testfloat-3
	url = git://github.com/cota/berkeley-testfloat-3
[submodule "tests/fp/berkeley-softfloat-3"]
	path = tests/fp/berkeley-softfloat-3
	url = git://github.com/cota/berkeley-softfloat-3
+4 −0
Original line number Diff line number Diff line
@@ -296,6 +296,8 @@ if test -e "$source_path/.git"
then
    git_update=yes
    git_submodules="ui/keycodemapdb"
    git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
    git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
else
    git_update=no
    git_submodules=""
@@ -7449,12 +7451,14 @@ fi

# build tree in object directory in case the source is not in the current directory
DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests tests/vm"
DIRS="$DIRS tests/fp"
DIRS="$DIRS docs docs/interop fsdev scsi"
DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
DIRS="$DIRS roms/seabios roms/vgabios"
FILES="Makefile tests/tcg/Makefile qdict-test-data.txt"
FILES="$FILES tests/tcg/cris/Makefile tests/tcg/cris/.gdbinit"
FILES="$FILES tests/tcg/lm32/Makefile tests/tcg/xtensa/Makefile po/Makefile"
FILES="$FILES tests/fp/Makefile"
FILES="$FILES pc-bios/optionrom/Makefile pc-bios/keymaps"
FILES="$FILES pc-bios/spapr-rtas/Makefile"
FILES="$FILES pc-bios/s390-ccw/Makefile"
+40 −28
Original line number Diff line number Diff line
@@ -1112,19 +1112,38 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
    bool sign = a.sign ^ b.sign;

    if (a.cls == float_class_normal && b.cls == float_class_normal) {
        uint64_t temp_lo, temp_hi;
        uint64_t n0, n1, q, r;
        int exp = a.exp - b.exp;

        /*
         * We want a 2*N / N-bit division to produce exactly an N-bit
         * result, so that we do not lose any precision and so that we
         * do not have to renormalize afterward.  If A.frac < B.frac,
         * then division would produce an (N-1)-bit result; shift A left
         * by one to produce the an N-bit result, and decrement the
         * exponent to match.
         *
         * The udiv_qrnnd algorithm that we're using requires normalization,
         * i.e. the msb of the denominator must be set.  Since we know that
         * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
         * by one (more), and the remainder must be shifted right by one.
         */
        if (a.frac < b.frac) {
            exp -= 1;
            shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
                              &temp_hi, &temp_lo);
            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
        } else {
            shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
                              &temp_hi, &temp_lo);
            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
        }
        /* LSB of quot is set if inexact which roundandpack will use
         * to set flags. Yet again we re-use a for the result */
        a.frac = div128To64(temp_lo, temp_hi, b.frac);
        q = udiv_qrnnd(&r, n1, n0, b.frac << 1);

        /*
         * Set lsb if there is a remainder, to set inexact.
         * As mentioned above, to find the actual value of the remainder we
         * would need to shift right, but (1) we are only concerned about
         * non-zero-ness, and (2) the remainder will always be even because
         * both inputs to the division primitive are even.
         */
        a.frac = q | (r != 0);
        a.sign = sign;
        a.exp = exp;
        return a;
@@ -1409,13 +1428,6 @@ float64 float64_round_to_int(float64 a, float_status *s)
    return float64_round_pack_canonical(pr, s);
}

float64 float64_trunc_to_int(float64 a, float_status *s)
{
    FloatParts pa = float64_unpack_canonical(a, s);
    FloatParts pr = round_to_int(pa, float_round_to_zero, 0, s);
    return float64_round_pack_canonical(pr, s);
}

/*
 * Returns the result of converting the floating-point value `a' to
 * the two's complement integer format. The conversion is performed
@@ -2690,7 +2702,7 @@ static void
{
    int8_t shiftCount;

    shiftCount = countLeadingZeros32( aSig ) - 8;
    shiftCount = clz32(aSig) - 8;
    *zSigPtr = aSig<<shiftCount;
    *zExpPtr = 1 - shiftCount;

@@ -2798,7 +2810,7 @@ static float32
{
    int8_t shiftCount;

    shiftCount = countLeadingZeros32( zSig ) - 1;
    shiftCount = clz32(zSig) - 1;
    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
                               status);

@@ -2831,7 +2843,7 @@ static void
{
    int8_t shiftCount;

    shiftCount = countLeadingZeros64( aSig ) - 11;
    shiftCount = clz64(aSig) - 11;
    *zSigPtr = aSig<<shiftCount;
    *zExpPtr = 1 - shiftCount;

@@ -2969,7 +2981,7 @@ static float64
{
    int8_t shiftCount;

    shiftCount = countLeadingZeros64( zSig ) - 1;
    shiftCount = clz64(zSig) - 1;
    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
                               status);

@@ -2987,7 +2999,7 @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
{
    int8_t shiftCount;

    shiftCount = countLeadingZeros64( aSig );
    shiftCount = clz64(aSig);
    *zSigPtr = aSig<<shiftCount;
    *zExpPtr = 1 - shiftCount;
}
@@ -3226,7 +3238,7 @@ floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
        zSig1 = 0;
        zExp -= 64;
    }
    shiftCount = countLeadingZeros64( zSig0 );
    shiftCount = clz64(zSig0);
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    zExp -= shiftCount;
    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
@@ -3303,7 +3315,7 @@ static void
    int8_t shiftCount;

    if ( aSig0 == 0 ) {
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
        shiftCount = clz64(aSig1) - 15;
        if ( shiftCount < 0 ) {
            *zSig0Ptr = aSig1>>( - shiftCount );
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
@@ -3315,7 +3327,7 @@ static void
        *zExpPtr = - shiftCount - 63;
    }
    else {
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
        shiftCount = clz64(aSig0) - 15;
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
        *zExpPtr = 1 - shiftCount;
    }
@@ -3504,7 +3516,7 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
        zSig1 = 0;
        zExp -= 64;
    }
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
    shiftCount = clz64(zSig0) - 15;
    if ( 0 <= shiftCount ) {
        zSig2 = 0;
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
@@ -3536,7 +3548,7 @@ floatx80 int32_to_floatx80(int32_t a, float_status *status)
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
    zSign = ( a < 0 );
    absA = zSign ? - a : a;
    shiftCount = countLeadingZeros32( absA ) + 32;
    shiftCount = clz32(absA) + 32;
    zSig = absA;
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );

@@ -3558,7 +3570,7 @@ float128 int32_to_float128(int32_t a, float_status *status)
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
    zSign = ( a < 0 );
    absA = zSign ? - a : a;
    shiftCount = countLeadingZeros32( absA ) + 17;
    shiftCount = clz32(absA) + 17;
    zSig0 = absA;
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );

@@ -3580,7 +3592,7 @@ floatx80 int64_to_floatx80(int64_t a, float_status *status)
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
    zSign = ( a < 0 );
    absA = zSign ? - a : a;
    shiftCount = countLeadingZeros64( absA );
    shiftCount = clz64(absA);
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );

}
@@ -3602,7 +3614,7 @@ float128 int64_to_float128(int64_t a, float_status *status)
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
    zSign = ( a < 0 );
    absA = zSign ? - a : a;
    shiftCount = countLeadingZeros64( absA ) + 49;
    shiftCount = clz64(absA) + 49;
    zExp = 0x406E - shiftCount;
    if ( 64 <= shiftCount ) {
        zSig1 = 0;
+53 −96
Original line number Diff line number Diff line
@@ -79,17 +79,6 @@ this code that are retained.
 * version 2 or later. See the COPYING file in the top-level directory.
 */

/*----------------------------------------------------------------------------
| This macro tests for minimum version of the GNU C compiler.
*----------------------------------------------------------------------------*/
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#else
# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
#endif


/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
@@ -340,15 +329,30 @@ static inline void
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/

static inline void
 shortShift128Left(
     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
                                     uint64_t *z0Ptr, uint64_t *z1Ptr)
{

    *z1Ptr = a1 << count;
    *z0Ptr =
        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
}

/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
| number of bits given in `count'.  Any bits shifted off are lost.  The value
| of `count' may be greater than 64.  The result is broken into two 64-bit
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/

static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
                                uint64_t *z0Ptr, uint64_t *z1Ptr)
{
    if (count < 64) {
        *z1Ptr = a1 << count;
        *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
    } else {
        *z1Ptr = 0;
        *z0Ptr = a1 << (count - 64);
    }
}

/*----------------------------------------------------------------------------
@@ -630,8 +634,36 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
 *
 * Licensed under the GPLv2/LGPLv3
 */
static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
                                  uint64_t n0, uint64_t d)
{
#if defined(__x86_64__)
    uint64_t q;
    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
    return q;
#elif defined(__s390x__)
    /* Need to use a TImode type to get an even register pair for DLGR.  */
    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
    *r = n >> 64;
    return n;
#elif defined(_ARCH_PPC64)
    /* From Power ISA 3.0B, programming note for divdeu.  */
    uint64_t q1, q2, Q, r1, r2, R;
    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
        : "=&r"(q1), "=r"(q2)
        : "r"(n1), "r"(n0), "r"(d));
    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
    r2 = n0 - (q2 * d);
    Q = q1 + q2;
    R = r1 + r2;
    if (R >= d || R < r2) { /* overflow implies R > d */
        Q += 1;
        R -= d;
    }
    *r = R;
    return Q;
#else
    uint64_t d0, d1, q0, q1, r1, r0, m;

    d0 = (uint32_t)d;
@@ -669,8 +701,9 @@ static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
    }
    r0 -= m;

    /* Return remainder in LSB */
    return (q1 << 32) | q0 | (r0 != 0);
    *r = r0;
    return (q1 << 32) | q0;
#endif
}

/*----------------------------------------------------------------------------
@@ -712,82 +745,6 @@ static inline uint32_t estimateSqrt32(int aExp, uint32_t a)

}

/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'.  If `a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/

static inline int8_t countLeadingZeros32(uint32_t a)
{
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
    if (a) {
        return __builtin_clz(a);
    } else {
        return 32;
    }
#else
    static const int8_t countLeadingZerosHigh[] = {
        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    };
    int8_t shiftCount;

    shiftCount = 0;
    if ( a < 0x10000 ) {
        shiftCount += 16;
        a <<= 16;
    }
    if ( a < 0x1000000 ) {
        shiftCount += 8;
        a <<= 8;
    }
    shiftCount += countLeadingZerosHigh[ a>>24 ];
    return shiftCount;
#endif
}

/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'.  If `a' is zero, 64 is returned.
*----------------------------------------------------------------------------*/

static inline int8_t countLeadingZeros64(uint64_t a)
{
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
    if (a) {
        return __builtin_clzll(a);
    } else {
        return 64;
    }
#else
    int8_t shiftCount;

    shiftCount = 0;
    if ( a < ( (uint64_t) 1 )<<32 ) {
        shiftCount += 32;
    }
    else {
        a >>= 32;
    }
    shiftCount += countLeadingZeros32( a );
    return shiftCount;
#endif
}

/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+0 −1
Original line number Diff line number Diff line
@@ -535,7 +535,6 @@ float128 float64_to_float128(float64, float_status *status);
| Software IEC/IEEE double-precision operations.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int(float64, float_status *status);
float64 float64_trunc_to_int(float64, float_status *status);
float64 float64_add(float64, float64, float_status *status);
float64 float64_sub(float64, float64, float_status *status);
float64 float64_mul(float64, float64, float_status *status);
Loading