target/i386: reimplement fyl2x using floatx80 operations (1f18a1e6) · Commits · SUMMER2020 / students / proj-2021291

target/i386/fpu_helper.c

+262 −111

Original line number	Diff line number	Diff line
		@@ -1223,21 +1223,6 @@ void helper_f2xm1(CPUX86State *env)
		merge_exception_flags(env, old_flags);
		}

		void helper_fyl2x(CPUX86State *env)
		{
		double fptemp = floatx80_to_double(env, ST0);

		if (fptemp > 0.0) {
		fptemp = log(fptemp) / log(2.0); /* log2(ST) */
		fptemp *= floatx80_to_double(env, ST1);
		ST1 = double_to_floatx80(env, fptemp);
		fpop(env);
		} else {
		env->fpus &= ~0x4700;
		env->fpus \|= 0x400;
		}
		}

		void helper_fptan(CPUX86State *env)
		{
		double fptemp = floatx80_to_double(env, ST0);
		@@ -1395,6 +1380,118 @@ void helper_fprem(CPUX86State *env)
		#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
		#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)

		/*
		* Compute an approximation of log2(1+arg), where 1+arg is in the
		* interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
		* function is called, rounding precision is set to 80 and the
		* round-to-nearest mode is in effect. arg must not be exactly zero,
		* and must not be so close to zero that underflow might occur.
		*/
		static void helper_fyl2x_common(CPUX86State env, floatx80 arg, int32_t exp,
		uint64_t sig0, uint64_t sig1)
		{
		uint64_t arg0_sig = extractFloatx80Frac(arg);
		int32_t arg0_exp = extractFloatx80Exp(arg);
		bool arg0_sign = extractFloatx80Sign(arg);
		bool asign;
		int32_t dexp, texp, aexp;
		uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
		uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
		uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
		floatx80 t2, accum;

		/*
		* Compute an approximation of arg/(2+arg), with extra precision,
		* as the argument to a polynomial approximation. The extra
		* precision is only needed for the first term of the
		* approximation, with subsequent terms being significantly
		* smaller; the approximation only uses odd exponents, and the
		* square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
		*/
		if (arg0_sign) {
		dexp = 0x3fff;
		shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
		sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
		} else {
		dexp = 0x4000;
		shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
		dsig0 \|= 0x8000000000000000ULL;
		}
		texp = arg0_exp - dexp + 0x3ffe;
		rsig0 = arg0_sig;
		rsig1 = 0;
		rsig2 = 0;
		if (dsig0 <= rsig0) {
		shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
		++texp;
		}
		tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
		mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
		sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
		&rsig0, &rsig1, &rsig2);
		while ((int64_t) rsig0 < 0) {
		--tsig0;
		add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
		&rsig0, &rsig1, &rsig2);
		}
		tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
		/*
		* No need to correct any estimation error in tsig1; even with
		* such error, it is accurate enough. Now compute the square of
		* that approximation.
		*/
		mul128To256(tsig0, tsig1, tsig0, tsig1,
		&t2sig0, &t2sig1, &t2sig2, &t2sig3);
		t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
		t2sig0, t2sig1, &env->fp_status);

		/* Compute the lower parts of the polynomial expansion. */
		accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);

		/*
		* The full polynomial expansion is fyl2x_coeff_0 + accum (where
		* accum has much lower magnitude, and so, in particular, carry
		* out of the addition is not possible), multiplied by t. (This
		* expansion is only accurate to about 70 bits, not 128 bits.)
		*/
		aexp = extractFloatx80Exp(fyl2x_coeff_0);
		asign = extractFloatx80Sign(fyl2x_coeff_0);
		shift128RightJamming(extractFloatx80Frac(accum), 0,
		aexp - extractFloatx80Exp(accum),
		&asig0, &asig1);
		bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
		bsig1 = 0;
		if (asign == extractFloatx80Sign(accum)) {
		add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
		} else {
		sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
		}
		/* Multiply by t to compute the required result. */
		mul128To256(asig0, asig1, tsig0, tsig1,
		&asig0, &asig1, &asig2, &asig3);
		aexp += texp - 0x3ffe;
		*exp = aexp;
		*sig0 = asig0;
		*sig1 = asig1;
		}

		void helper_fyl2xp1(CPUX86State *env)
		{
		uint8_t old_flags = save_exception_flags(env);
		@@ -1462,109 +1559,161 @@ void helper_fyl2xp1(CPUX86State *env)
		ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
		sig0, sig1, &env->fp_status);
		} else {
		bool asign;
		uint32_t dexp, texp, aexp;
		uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
		uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
		uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
		floatx80 t2, accum;
		int32_t aexp;
		uint64_t asig0, asig1, asig2;
		FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
		signed char save_prec = env->fp_status.floatx80_rounding_precision;
		env->fp_status.float_rounding_mode = float_round_nearest_even;
		env->fp_status.floatx80_rounding_precision = 80;

		helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
		/*
		* Compute an approximation of ST0/(2+ST0), with extra
		* precision, as the argument to a polynomial approximation.
		* The extra precision is only needed for the first term of
		* the approximation, with subsequent terms being
		* significantly smaller; the approximation only uses odd
		* exponents, and the square of ST0/(2+ST0) is at most
		* 17-12*sqrt(2) = 0.029....
		* Multiply by the second argument to compute the required
		* result.
		*/
		if (arg0_sign) {
		dexp = 0x3fff;
		shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
		sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
		} else {
		dexp = 0x4000;
		shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
		dsig0 \|= 0x8000000000000000ULL;
		if (arg1_exp == 0) {
		normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
		}
		texp = arg0_exp - dexp + 0x3ffe;
		rsig0 = arg0_sig;
		rsig1 = 0;
		rsig2 = 0;
		if (dsig0 <= rsig0) {
		shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
		++texp;
		mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
		aexp += arg1_exp - 0x3ffe;
		/* This result is inexact. */
		asig1 \|= 1;
		env->fp_status.float_rounding_mode = save_mode;
		ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
		asig0, asig1, &env->fp_status);
		env->fp_status.floatx80_rounding_precision = save_prec;
		}
		tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
		mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
		sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
		&rsig0, &rsig1, &rsig2);
		while ((int64_t) rsig0 < 0) {
		--tsig0;
		add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
		&rsig0, &rsig1, &rsig2);
		fpop(env);
		merge_exception_flags(env, old_flags);
		}
		tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
		/*
		* No need to correct any estimation error in tsig1; even with
		* such error, it is accurate enough. Now compute the square
		* of that approximation.
		*/
		mul128To256(tsig0, tsig1, tsig0, tsig1,
		&t2sig0, &t2sig1, &t2sig2, &t2sig3);
		t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
		t2sig0, t2sig1, &env->fp_status);

		/* Compute the lower parts of the polynomial expansion. */
		accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
		accum = floatx80_mul(accum, t2, &env->fp_status);
		accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
		void helper_fyl2x(CPUX86State *env)
		{
		uint8_t old_flags = save_exception_flags(env);
		uint64_t arg0_sig = extractFloatx80Frac(ST0);
		int32_t arg0_exp = extractFloatx80Exp(ST0);
		bool arg0_sign = extractFloatx80Sign(ST0);
		uint64_t arg1_sig = extractFloatx80Frac(ST1);
		int32_t arg1_exp = extractFloatx80Exp(ST1);
		bool arg1_sign = extractFloatx80Sign(ST1);

		/*
		* The full polynomial expansion is fyl2x_coeff_0 + accum
		* (where accum has much lower magnitude, and so, in
		* particular, carry out of the addition is not possible),
		* multiplied by t. (This expansion is only accurate to about
		* 70 bits, not 128 bits.)
		*/
		aexp = extractFloatx80Exp(fyl2x_coeff_0);
		asign = extractFloatx80Sign(fyl2x_coeff_0);
		shift128RightJamming(extractFloatx80Frac(accum), 0,
		aexp - extractFloatx80Exp(accum),
		if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_silence_nan(ST0, &env->fp_status);
		} else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_silence_nan(ST1, &env->fp_status);
		} else if (floatx80_invalid_encoding(ST0) \|\|
		floatx80_invalid_encoding(ST1)) {
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_default_nan(&env->fp_status);
		} else if (floatx80_is_any_nan(ST0)) {
		ST1 = ST0;
		} else if (floatx80_is_any_nan(ST1)) {
		/* Pass this NaN through. */
		} else if (arg0_sign && !floatx80_is_zero(ST0)) {
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_default_nan(&env->fp_status);
		} else if (floatx80_is_infinity(ST1)) {
		FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
		&env->fp_status);
		switch (cmp) {
		case float_relation_less:
		ST1 = floatx80_chs(ST1);
		break;
		case float_relation_greater:
		/* Result is infinity of the same sign as ST1. */
		break;
		default:
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_default_nan(&env->fp_status);
		break;
		}
		} else if (floatx80_is_infinity(ST0)) {
		if (floatx80_is_zero(ST1)) {
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_default_nan(&env->fp_status);
		} else if (arg1_sign) {
		ST1 = floatx80_chs(ST0);
		} else {
		ST1 = ST0;
		}
		} else if (floatx80_is_zero(ST0)) {
		if (floatx80_is_zero(ST1)) {
		float_raise(float_flag_invalid, &env->fp_status);
		ST1 = floatx80_default_nan(&env->fp_status);
		} else {
		/* Result is infinity with opposite sign to ST1. */
		float_raise(float_flag_divbyzero, &env->fp_status);
		ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
		0x8000000000000000ULL);
		}
		} else if (floatx80_is_zero(ST1)) {
		if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
		ST1 = floatx80_chs(ST1);
		}
		/* Otherwise, ST1 is already the correct result. */
		} else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
		if (arg1_sign) {
		ST1 = floatx80_chs(floatx80_zero);
		} else {
		ST1 = floatx80_zero;
		}
		} else {
		int32_t int_exp;
		floatx80 arg0_m1;
		FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
		signed char save_prec = env->fp_status.floatx80_rounding_precision;
		env->fp_status.float_rounding_mode = float_round_nearest_even;
		env->fp_status.floatx80_rounding_precision = 80;

		if (arg0_exp == 0) {
		normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
		}
		if (arg1_exp == 0) {
		normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
		}
		int_exp = arg0_exp - 0x3fff;
		if (arg0_sig > 0xb504f333f9de6484ULL) {
		++int_exp;
		}
		arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
		&env->fp_status),
		floatx80_one, &env->fp_status);
		if (floatx80_is_zero(arg0_m1)) {
		/* Exact power of 2; multiply by ST1. */
		env->fp_status.float_rounding_mode = save_mode;
		ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
		ST1, &env->fp_status);
		} else {
		bool asign = extractFloatx80Sign(arg0_m1);
		int32_t aexp;
		uint64_t asig0, asig1, asig2;
		helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
		if (int_exp != 0) {
		bool isign = (int_exp < 0);
		int32_t iexp;
		uint64_t isig;
		int shift;
		int_exp = isign ? -int_exp : int_exp;
		shift = clz32(int_exp) + 32;
		isig = int_exp;
		isig <<= shift;
		iexp = 0x403e - shift;
		shift128RightJamming(asig0, asig1, iexp - aexp,
		&asig0, &asig1);
		bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
		bsig1 = 0;
		if (asign == extractFloatx80Sign(accum)) {
		add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
		if (asign == isign) {
		add128(isig, 0, asig0, asig1, &asig0, &asig1);
		} else {
		sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
		sub128(isig, 0, asig0, asig1, &asig0, &asig1);
		}
		aexp = iexp;
		asign = isign;
		}
		/*
		* Multiply by t and by the second argument to compute the
		* required result.
		* Multiply by the second argument to compute the required
		* result.
		*/
		mul128To256(asig0, asig1, tsig0, tsig1,
		&asig0, &asig1, &asig2, &asig3);
		aexp += texp - 0x3ffe;
		if (arg1_exp == 0) {
		normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
		}
		@@ -1573,8 +1722,10 @@ void helper_fyl2xp1(CPUX86State *env)
		/* This result is inexact. */
		asig1 \|= 1;
		env->fp_status.float_rounding_mode = save_mode;
		ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
		ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
		asig0, asig1, &env->fp_status);
		}

		env->fp_status.floatx80_rounding_precision = save_prec;
		}
		fpop(env);

tests/tcg/i386/test-i386-fyl2x.c

0 → 100644

+1161 −0

File added.

Preview size limit exceeded, changes collapsed.