Files
gcc/libgcc/soft-fp/floatbitintdd.c
Jakub Jelinek a57ea0a189 libgcc: Add DPD support + fix big-endian support of _BitInt <-> dfp conversions
The following patch fixes
FAIL: gcc.dg/dfp/bitint-1.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-2.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-3.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-4.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-5.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-6.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-8.c (test for excess errors)
FAIL: gcc.dg/dfp/int128-1.c (test for excess errors)
FAIL: gcc.dg/dfp/int128-2.c (test for excess errors)
FAIL: gcc.dg/dfp/int128-4.c (test for excess errors)
on s390x-linux (with the 3 not yet posted patches).

The patch does multiple things:
1) the routines were written for the DFP BID (binary integer decimal)
   format which is used on all arches but powerpc*/s390* (those use
   DPD - densely packed decimal format); as most of the code is actually
   the same for both BID and DPD formats, I haven't copied the sources
   + slightly modified them, but added the DPD support directly, + renaming
   of the exported symbols from __bid_* prefixed to __dpd_* prefixed that
   GCC expects on the DPD targets
2) while testing that I've found some big-endian issues in the existing
   support
3) testing also revealed that in some cases __builtin_clzll (~msb) was
   called with msb set to all ones, so invoking UB; apparently on aarch64
   and x86 we were lucky and got some value that happened to work well,
   but that wasn't the case on s390x

For 1), the patch uses two ~ 2KB tables to speed up the decoding/encoding.
I haven't found such tables in what is added into libgcc.a, though they
are in libdecnumber/bid/bid2dpd_dpd2bid.h, but there they are just huge
and next to other huge tables - there is d2b which is like __dpd_d2bbitint
in the patch but it uses 64-bit entries rather than 16-bit, then there is
d2b2 with 64-bit entries like in d2b all multiplied by 1000, then d2b3
similarly multiplied by 1000000, then d2b4 similarly multiplied by
1000000000, then d2b5 similarly multiplied by 1000000000000ULL and
d2b6 similarly multipled by 1000000000000000ULL.  Arguably it can
save some of the multiplications, but on the other side accesses memory
which is unlikely in the caches, and the 2048 bytes in the patch vs.
24 times more for d2b is IMHO significant.
For b2d, libdecnumber/bid/bid2dpd_dpd2bid.h has again b2d table like
__dpd_b2dbitint in the patch, except that it has 64-bit entries rather
than 16-bit (this time 1000 entries), but then has b2d2 which has the
same entries shifted left by 10, then b2d3 shifted left by 20, b2d4 shifted
left by 30 and b2d5 shifted left by 40.  I can understand for d2b paying
memory cost to speed up multiplications, but don't understand paying
extra 4 * 8 * 1000 bytes (+ 6 * 1000 bytes for b2d not using ushort)
just to avoid shifts.

2025-05-27  Jakub Jelinek  <jakub@redhat.com>

	* config/t-softfp (softfp_bid_list): Don't guard with
	$(enable_decimal_float) == bid.
	* soft-fp/bitint.h (__bid_pow10bitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_pow10bitint.
	(__dpd_d2bbitint, __dpd_b2dbitint): Declare.
	* soft-fp/bitintpow10.c (__dpd_d2bbitint, __dpd_b2dbitint): New
	variables.
	* soft-fp/fixsdbitint.c (__bid_fixsdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
	Add DPD support.  Fix big-endian support.
	* soft-fp/fixddbitint.c (__bid_fixddbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
	Add DPD support.  Fix big-endian support.
	* soft-fp/fixtdbitint.c (__bid_fixtdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
	Add DPD support.  Fix big-endian support.
	* soft-fp/fixsdti.c (__bid_fixsdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
	(__bid_fixsdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_fixsdti.
	* soft-fp/fixddti.c (__bid_fixddbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
	(__bid_fixddti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_fixddti.
	* soft-fp/fixtdti.c (__bid_fixtdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
	(__bid_fixtdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_fixtdti.
	* soft-fp/fixunssdti.c (__bid_fixsdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
	(__bid_fixunssdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_fixunssdti.
	* soft-fp/fixunsddti.c (__bid_fixddbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
	(__bid_fixunsddti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_fixunsddti.
	* soft-fp/fixunstdti.c (__bid_fixtdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
	(__bid_fixunstdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_fixunstdti.
	* soft-fp/floatbitintsd.c (__bid_floatbitintsd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintsd.
	Add DPD support.  Avoid calling __builtin_clzll with 0 argument.  Fix
	big-endian support.
	* soft-fp/floatbitintdd.c (__bid_floatbitintdd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintdd.
	Add DPD support.  Avoid calling __builtin_clzll with 0 argument.  Fix
	big-endian support.
	* soft-fp/floatbitinttd.c (__bid_floatbitinttd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitinttd.
	Add DPD support.  Avoid calling __builtin_clzll with 0 argument.  Fix
	big-endian support.
	* soft-fp/floattisd.c (__bid_floatbitintsd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintsd.
	(__bid_floattisd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_floattisd.
	* soft-fp/floattidd.c (__bid_floatbitintdd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintdd.
	(__bid_floattidd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_floattidd.
	* soft-fp/floattitd.c (__bid_floatbitinttd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitinttd.
	(__bid_floattitd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_floattitd.
	* soft-fp/floatuntisd.c (__bid_floatbitintsd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintsd.
	(__bid_floatuntisd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_floatuntisd.
	* soft-fp/floatuntidd.c (__bid_floatbitintdd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintdd.
	(__bid_floatuntidd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_floatuntidd.
	* soft-fp/floatuntitd.c (__bid_floatbitinttd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitinttd.
	(__bid_floatuntitd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_floatuntitd.
2025-05-27 23:10:08 +02:00

304 lines
8.8 KiB
C

/* Software floating-point emulation.
Convert a _BitInt to _Decimal64.
Copyright (C) 2023-2025 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "soft-fp.h"
#include "bitint.h"
#ifdef __BITINT_MAXWIDTH__
#ifndef ENABLE_DECIMAL_BID_FORMAT
#define __bid_floatbitintdd __dpd_floatbitintdd
#endif
extern _Decimal64 __bid_floatbitintdd (const UBILtype *, SItype);
_Decimal64
__bid_floatbitintdd (const UBILtype *i, SItype iprec)
{
iprec = bitint_reduce_prec (&i, iprec);
USItype aiprec = iprec < 0 ? -iprec : iprec;
USItype in = (aiprec + BIL_TYPE_SIZE - 1) / BIL_TYPE_SIZE;
USItype idx = BITINT_END (0, in - 1);
UBILtype msb = i[idx];
UDItype mantissa;
SItype exponent = 0;
UBILtype inexact = 0;
union { _Decimal64 d; UDItype u; } u, ui;
if (aiprec % BIL_TYPE_SIZE)
{
if (iprec > 0)
msb &= ((UBILtype) 1 << (aiprec % BIL_TYPE_SIZE)) - 1;
else
msb |= (UBILtype) -1 << (aiprec % BIL_TYPE_SIZE);
}
if (iprec < 0)
{
SItype n;
if (msb == ~(UBILtype) 0)
n = 1;
else
n = sizeof (0ULL) * __CHAR_BIT__ + 1 - __builtin_clzll (~msb);
aiprec = (in - 1) * BIL_TYPE_SIZE + n;
}
else if (msb == 0)
aiprec = 1;
else
{
SItype n = sizeof (0ULL) * __CHAR_BIT__ - __builtin_clzll (msb);
aiprec = (in - 1) * BIL_TYPE_SIZE + n;
}
/* Number of bits in (_BitInt(2048)) 9999999999999999e+369DD. */
if (aiprec > 1279 + (iprec < 0))
{
ovf:
if (iprec < 0)
u.d = -9000000000000000e+369DD;
else
u.d = 9000000000000000e+369DD;
__asm ("" : "+g" (u.d));
u.d += u.d;
__asm ("" : "+g" (u.d));
goto done;
}
/* Bit precision of 9999999999999999uwb. */
if (aiprec >= 54)
{
USItype pow10_limbs, q_limbs, q2_limbs, j;
USItype exp_bits = 0, e;
UDItype m;
UBILtype *buf;
/* First do a possibly large divide smaller enough such that
we only need to check remainder for 0 or non-0 and then
we'll do further division. */
if (aiprec >= 54 + 4 + 10)
{
exp_bits = (aiprec - 54 - 4) / 10;
exponent = exp_bits * 3;
/* Upper estimate for pow10 (exponent) bits. */
exp_bits = exp_bits * 10 - exp_bits / 30;
}
pow10_limbs = (exp_bits + BIL_TYPE_SIZE - 1) / BIL_TYPE_SIZE;
/* 72 is the highest number of quotient bits needed on
aiprec range of [68, 1279]. E.g. if aiprec is 1277,
exponent will be 363 and exp_bits 1206. 1277 - 1206 + 1
is 72. Unfortunately that means the result doesn't fit into
UDItype... */
q_limbs = (72 + BIL_TYPE_SIZE - 1) / BIL_TYPE_SIZE;
q2_limbs = 64 / BIL_TYPE_SIZE;
buf = __builtin_alloca ((q_limbs + pow10_limbs * 2 + q2_limbs + 2)
* sizeof (UBILtype));
if (exponent)
{
__bid_pow10bitint (buf + q_limbs, exp_bits, exponent);
__divmodbitint4 (buf, q_limbs * BIL_TYPE_SIZE,
buf + q_limbs + pow10_limbs,
pow10_limbs * BIL_TYPE_SIZE,
i, iprec < 0 ? -aiprec : aiprec,
buf + q_limbs, exp_bits);
if (iprec < 0)
bitint_negate (buf + BITINT_END (q_limbs - 1, 0),
buf + BITINT_END (q_limbs - 1, 0), q_limbs);
inexact = buf[q_limbs + pow10_limbs];
for (j = 1; j < pow10_limbs; ++j)
inexact |= buf[q_limbs + pow10_limbs + j];
}
else
{
__builtin_memcpy (buf + BITINT_END (q_limbs - in + 1, 0),
i + BITINT_END (1, 0),
(in - 1) * sizeof (UBILtype));
buf[BITINT_END (q_limbs - in, in - 1)] = msb;
if (iprec < 0)
bitint_negate (buf + BITINT_END (q_limbs - 1, 0),
buf + BITINT_END (q_limbs - 1, 0), in);
if (q_limbs > in)
__builtin_memset (buf + BITINT_END (0, in), '\0',
(q_limbs - in) * sizeof (UBILtype));
}
e = 0;
#if BIL_TYPE_SIZE == 64
m = buf[BITINT_END (1, 0)];
#elif BIL_TYPE_SIZE == 32
m = (UDItype) buf[1] << 32 | buf[BITINT_END (2, 0)];
#else
# error Unsupported BIL_TYPE_SIZE
#endif
if (buf[BITINT_END (0, q_limbs - 1)])
{
if (buf[BITINT_END (0, q_limbs - 1)] > 0x5)
{
/* 1000000000000000000000wb */
if (buf[BITINT_END (0, q_limbs - 1)] > 0x36
|| (buf[BITINT_END (0, q_limbs - 1)] == 0x36
&& m >= (UDItype) 0x35c9adc5dea00000))
e = 6;
else
e = 5;
}
/* 100000000000000000000wb */
else if (buf[BITINT_END (0, q_limbs - 1)] == 0x5
&& m >= (UDItype) 0x6bc75e2d63100000)
e = 5;
else
e = 4;
}
else if (m >= (UDItype) 1000000000000000000)
{
if (m >= (UDItype) 10000000000000000000ULL)
e = 4;
else
e = 3;
}
else if (m >= (UDItype) 100000000000000000)
e = 2;
else if (m >= (UDItype) 10000000000000000)
e = 1;
exponent += e;
if (exponent > 369)
goto ovf;
if (e)
{
UBILtype rem, half;
__bid_pow10bitint (buf + q_limbs + pow10_limbs * 2,
BIL_TYPE_SIZE, e);
__divmodbitint4 (buf + q_limbs + pow10_limbs * 2 + 1,
q2_limbs * BIL_TYPE_SIZE,
buf + q_limbs + pow10_limbs * 2 + 1 + q2_limbs,
BIL_TYPE_SIZE,
buf, q_limbs * BIL_TYPE_SIZE,
buf + q_limbs + pow10_limbs * 2, BIL_TYPE_SIZE);
half = buf[q_limbs + pow10_limbs * 2] / 2;
rem = buf[q_limbs + pow10_limbs * 2 + 1 + q2_limbs];
if (inexact)
{
/* If first division discovered some non-0 digits
and this second division is by 10, e.g.
for XXXXXX5499999999999 or XXXXXX5000000000001
if first division is by 10^12 and second by 10^1,
doing rem |= 1 wouldn't change the 5. Similarly
for rem 4 doing rem |= 1 would change it to 5,
but we don't want to change it in that case. */
if (e == 1)
{
if (rem == 5)
rem = 6;
else if (rem != 4)
rem |= 1;
}
else
rem |= 1;
}
/* Set inexact to 0, 1, 2, 3 depending on if remainder
of the divisions is exact 0, smaller than 10^exponent / 2,
exactly 10^exponent / 2 or greater than that. */
if (rem >= half)
inexact = 2 + (rem > half);
else
inexact = (rem != 0);
#if BIL_TYPE_SIZE == 64
mantissa = buf[q_limbs + pow10_limbs * 2 + 1];
#else
mantissa
= ((UDItype)
buf[q_limbs + pow10_limbs * 2 + 1 + BITINT_END (0, 1)] << 32
| buf[q_limbs + pow10_limbs * 2 + 1 + BITINT_END (1, 0)]);
#endif
}
else
#if BIL_TYPE_SIZE == 64
mantissa = buf[BITINT_END (1, 0)];
#else
mantissa = (UDItype) buf[1] << 32 | buf[BITINT_END (2, 0)];
#endif
}
else
{
#if BIL_TYPE_SIZE == 64
mantissa = msb;
#else
if (in == 1)
mantissa = iprec < 0 ? (UDItype) (BILtype) msb : (UDItype) msb;
else
mantissa = (UDItype) msb << 32 | i[BITINT_END (1, 0)];
#endif
if (iprec < 0)
mantissa = -mantissa;
}
exponent += 398;
#ifdef ENABLE_DECIMAL_BID_FORMAT
if (mantissa >= (UDItype) 0x20000000000000)
u.u = (((((iprec < 0) << 2) | (UDItype) 3) << 61)
| (((UDItype) exponent) << 51)
| (mantissa ^ (UDItype) 0x20000000000000));
else
u.u = ((((UDItype) (iprec < 0)) << 63)
| (((UDItype) exponent) << 53)
| mantissa);
#else
u.u = mantissa;
mantissa = __dpd_b2dbitint[u.u % 1000];
u.u /= 1000;
mantissa |= ((UDItype) __dpd_b2dbitint[u.u % 1000]) << 10;
u.u /= 1000;
mantissa |= ((UDItype) __dpd_b2dbitint[u.u % 1000]) << 20;
u.u /= 1000;
mantissa |= ((UDItype) __dpd_b2dbitint[u.u % 1000]) << 30;
u.u /= 1000;
mantissa |= ((UDItype) __dpd_b2dbitint[u.u % 1000]) << 40;
u.u /= 1000;
if (u.u >= 8)
u.u = (((((iprec < 0) << 2) | (UDItype) 3) << 61)
| (((UDItype) exponent & (3 << 8)) << 51)
| ((u.u & 1) << 58)
| (((UDItype) exponent & 255) << 50)
| mantissa);
else
u.u = ((((UDItype) (iprec < 0)) << 63)
| (((UDItype) exponent & (3 << 8)) << 53)
| (u.u << 58)
| (((UDItype) exponent & 255) << 50)
| mantissa);
#endif
if (inexact)
{
ui.u = ((((UDItype) (iprec < 0)) << 63)
#ifdef ENABLE_DECIMAL_BID_FORMAT
| (((UDItype) (exponent - 1)) << 53)
#else
| (((UDItype) (exponent - 1) & (3 << 8)) << 53)
| (((UDItype) (exponent - 1) & 255) << 50)
#endif
| (inexact + 3));
__asm ("" : "+g" (u.d));
__asm ("" : "+g" (ui.d));
u.d += ui.d;
__asm ("" : "+g" (u.d));
}
done:
return u.d;
}
#endif