Files
gcc-reflection/libgcc/soft-fp/bitint.h
Jakub Jelinek a57ea0a189 libgcc: Add DPD support + fix big-endian support of _BitInt <-> dfp conversions
The following patch fixes
FAIL: gcc.dg/dfp/bitint-1.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-2.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-3.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-4.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-5.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-6.c (test for excess errors)
FAIL: gcc.dg/dfp/bitint-8.c (test for excess errors)
FAIL: gcc.dg/dfp/int128-1.c (test for excess errors)
FAIL: gcc.dg/dfp/int128-2.c (test for excess errors)
FAIL: gcc.dg/dfp/int128-4.c (test for excess errors)
on s390x-linux (with the 3 not yet posted patches).

The patch does multiple things:
1) the routines were written for the DFP BID (binary integer decimal)
   format which is used on all arches but powerpc*/s390* (those use
   DPD - densely packed decimal format); as most of the code is actually
   the same for both BID and DPD formats, I haven't copied the sources
   + slightly modified them, but added the DPD support directly, + renaming
   of the exported symbols from __bid_* prefixed to __dpd_* prefixed that
   GCC expects on the DPD targets
2) while testing that I've found some big-endian issues in the existing
   support
3) testing also revealed that in some cases __builtin_clzll (~msb) was
   called with msb set to all ones, so invoking UB; apparently on aarch64
   and x86 we were lucky and got some value that happened to work well,
   but that wasn't the case on s390x

For 1), the patch uses two ~ 2KB tables to speed up the decoding/encoding.
I haven't found such tables in what is added into libgcc.a, though they
are in libdecnumber/bid/bid2dpd_dpd2bid.h, but there they are just huge
and next to other huge tables - there is d2b which is like __dpd_d2bbitint
in the patch but it uses 64-bit entries rather than 16-bit, then there is
d2b2 with 64-bit entries like in d2b all multiplied by 1000, then d2b3
similarly multiplied by 1000000, then d2b4 similarly multiplied by
1000000000, then d2b5 similarly multiplied by 1000000000000ULL and
d2b6 similarly multipled by 1000000000000000ULL.  Arguably it can
save some of the multiplications, but on the other side accesses memory
which is unlikely in the caches, and the 2048 bytes in the patch vs.
24 times more for d2b is IMHO significant.
For b2d, libdecnumber/bid/bid2dpd_dpd2bid.h has again b2d table like
__dpd_b2dbitint in the patch, except that it has 64-bit entries rather
than 16-bit (this time 1000 entries), but then has b2d2 which has the
same entries shifted left by 10, then b2d3 shifted left by 20, b2d4 shifted
left by 30 and b2d5 shifted left by 40.  I can understand for d2b paying
memory cost to speed up multiplications, but don't understand paying
extra 4 * 8 * 1000 bytes (+ 6 * 1000 bytes for b2d not using ushort)
just to avoid shifts.

2025-05-27  Jakub Jelinek  <jakub@redhat.com>

	* config/t-softfp (softfp_bid_list): Don't guard with
	$(enable_decimal_float) == bid.
	* soft-fp/bitint.h (__bid_pow10bitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_pow10bitint.
	(__dpd_d2bbitint, __dpd_b2dbitint): Declare.
	* soft-fp/bitintpow10.c (__dpd_d2bbitint, __dpd_b2dbitint): New
	variables.
	* soft-fp/fixsdbitint.c (__bid_fixsdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
	Add DPD support.  Fix big-endian support.
	* soft-fp/fixddbitint.c (__bid_fixddbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
	Add DPD support.  Fix big-endian support.
	* soft-fp/fixtdbitint.c (__bid_fixtdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
	Add DPD support.  Fix big-endian support.
	* soft-fp/fixsdti.c (__bid_fixsdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
	(__bid_fixsdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_fixsdti.
	* soft-fp/fixddti.c (__bid_fixddbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
	(__bid_fixddti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_fixddti.
	* soft-fp/fixtdti.c (__bid_fixtdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
	(__bid_fixtdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_fixtdti.
	* soft-fp/fixunssdti.c (__bid_fixsdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
	(__bid_fixunssdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_fixunssdti.
	* soft-fp/fixunsddti.c (__bid_fixddbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
	(__bid_fixunsddti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_fixunsddti.
	* soft-fp/fixunstdti.c (__bid_fixtdbitint): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
	(__bid_fixunstdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_fixunstdti.
	* soft-fp/floatbitintsd.c (__bid_floatbitintsd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintsd.
	Add DPD support.  Avoid calling __builtin_clzll with 0 argument.  Fix
	big-endian support.
	* soft-fp/floatbitintdd.c (__bid_floatbitintdd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintdd.
	Add DPD support.  Avoid calling __builtin_clzll with 0 argument.  Fix
	big-endian support.
	* soft-fp/floatbitinttd.c (__bid_floatbitinttd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitinttd.
	Add DPD support.  Avoid calling __builtin_clzll with 0 argument.  Fix
	big-endian support.
	* soft-fp/floattisd.c (__bid_floatbitintsd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintsd.
	(__bid_floattisd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_floattisd.
	* soft-fp/floattidd.c (__bid_floatbitintdd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintdd.
	(__bid_floattidd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_floattidd.
	* soft-fp/floattitd.c (__bid_floatbitinttd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitinttd.
	(__bid_floattitd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
	__dpd_floattitd.
	* soft-fp/floatuntisd.c (__bid_floatbitintsd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintsd.
	(__bid_floatuntisd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_floatuntisd.
	* soft-fp/floatuntidd.c (__bid_floatbitintdd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitintdd.
	(__bid_floatuntidd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_floatuntidd.
	* soft-fp/floatuntitd.c (__bid_floatbitinttd): For
	!defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_floatbitinttd.
	(__bid_floatuntitd): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
	to __dpd_floatuntitd.
2025-05-27 23:10:08 +02:00

364 lines
11 KiB
C

/* Software floating-point emulation.
Definitions for _BitInt implementation details.
Copyright (C) 2023-2025 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef GCC_SOFT_FP_BITINT_H
#define GCC_SOFT_FP_BITINT_H
#ifdef __BITINT_MAXWIDTH__
#define BIL_UNITS_PER_WORD (__LIBGCC_BITINT_LIMB_WIDTH__ / __CHAR_BIT__)
#if BIL_UNITS_PER_WORD == 8
#define BIL_TYPE_SIZE (8 * __CHAR_BIT__)
#define BILtype DItype
typedef UDItype __attribute__ ((__may_alias__)) UBILtype;
#elif BIL_UNITS_PER_WORD == 4
#define BIL_TYPE_SIZE (4 * __CHAR_BIT__)
#define BILtype SItype
typedef USItype __attribute__ ((__may_alias__)) UBILtype;
#elif BIL_UNITS_PER_WORD == 2
#define BIL_TYPE_SIZE (2 * __CHAR_BIT__)
#define BILtype HItype
typedef UHItype __attribute__ ((__may_alias__)) UBILtype;
#else
#define BIL_TYPE_SIZE __CHAR_BIT__
#define BILtype QItype
typedef UQItype __attribute__ ((__may_alias__)) UBILtype;
#endif
/* If *P is zero or sign extended (the latter only for PREC < 0) from
some narrower _BitInt value, reduce precision. */
static inline __attribute__((__always_inline__)) SItype
bitint_reduce_prec (const UBILtype **p, SItype prec)
{
UBILtype mslimb;
SItype i;
if (prec < 0)
{
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
i = 0;
#else
i = ((USItype) -1 - prec) / BIL_TYPE_SIZE;
#endif
mslimb = (*p)[i];
if (mslimb & ((UBILtype) 1 << (((USItype) -1 - prec) % BIL_TYPE_SIZE)))
{
SItype n = ((USItype) -prec) % BIL_TYPE_SIZE;
if (n)
{
mslimb |= ((UBILtype) -1 << (((USItype) -1 - prec) % BIL_TYPE_SIZE));
if (mslimb == (UBILtype) -1)
{
prec += n;
if (prec >= -1)
return -2;
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
++*p;
#else
--i;
#endif
mslimb = (*p)[i];
n = 0;
}
}
while (mslimb == (UBILtype) -1)
{
prec += BIL_TYPE_SIZE;
if (prec >= -1)
return -2;
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
++*p;
#else
--i;
#endif
mslimb = (*p)[i];
}
if (n == 0)
{
if ((BILtype) mslimb >= 0)
{
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
--*p;
#endif
return prec - 1;
}
}
return prec;
}
else
prec = -prec;
}
else
{
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
i = 0;
#else
i = ((USItype) prec - 1) / BIL_TYPE_SIZE;
#endif
mslimb = (*p)[i];
}
SItype n = ((USItype) prec) % BIL_TYPE_SIZE;
if (n)
{
mslimb &= ((UBILtype) 1 << (((USItype) prec) % BIL_TYPE_SIZE)) - 1;
if (mslimb == 0)
{
prec -= n;
if (prec == 0)
return 1;
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
++*p;
#else
--i;
#endif
mslimb = (*p)[i];
}
}
while (mslimb == 0)
{
prec -= BIL_TYPE_SIZE;
if (prec == 0)
return 1;
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
++*p;
#else
--i;
#endif
mslimb = (*p)[i];
}
return prec;
}
#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
# define BITINT_INC -1
# define BITINT_END(be, le) (be)
#else
# define BITINT_INC 1
# define BITINT_END(be, le) (le)
#endif
/* Negate N limbs from S into D. D and S should point to
the least significant limb. */
static inline __attribute__((__always_inline__)) void
bitint_negate (UBILtype *d, const UBILtype *s, SItype n)
{
UBILtype c = 1;
do
{
UBILtype sv = *s, lo;
s += BITINT_INC;
c = __builtin_add_overflow (~sv, c, &lo);
*d = lo;
d += BITINT_INC;
}
while (--n);
}
/* Common final part of __fix?fbitint conversion functions.
The A floating point value should have been converted using
soft-fp macros into RV, U##DI##type DI##_BITS precise normal
integral type and SHIFT, how many bits should that value be
shifted to the left. R is pointer to limbs array passed to the
function, RN number of limbs in it, ARPREC absolute value of
RPREC argument passed to it, RSIZE number of significant bits in RV.
RSIGNED is non-zero if the result is signed bit-precise integer,
otherwise zero. If OVF is true, instead of storing RV shifted left
by SHIFT bits and zero or sign extended store minimum or maximum
of the signed or unsigned bit-precise integer type or zero depending on if
RV contains the minimum or maximum signed or unsigned value or zero. */
#define FP_TO_BITINT(r, rn, arprec, shift, rv, rsize, rsigned, ovf, DI) \
if (ovf) \
{ \
if ((rv & 1) != 0) \
__builtin_memset (r, -1, rn * sizeof (UBILtype)); \
else \
__builtin_memset (r, 0, rn * sizeof (UBILtype)); \
if (rv & (((U##DI##type) 1) << (rsize - 1))) \
r[BITINT_END (0, rn - 1)] \
|= (UBILtype) -1 << ((arprec - 1) % BIL_TYPE_SIZE); \
else \
r[BITINT_END (0, rn - 1)] \
&= ~((UBILtype) -1 << ((arprec - 1) % BIL_TYPE_SIZE)); \
} \
else \
{ \
USItype shiftl = shift / BIL_TYPE_SIZE; \
rsize = DI##_BITS; \
if (rsigned && (DI##type) rv >= 0) \
rsigned = 0; \
if (shift + DI##_BITS > arprec) \
rsize = arprec - shift; \
USItype shiftr = shift % BIL_TYPE_SIZE; \
if (shiftl) \
__builtin_memset (r + BITINT_END (rn - shiftl, 0), 0, \
shiftl * sizeof (UBILtype)); \
USItype idx = BITINT_END (rn - shiftl - 1, shiftl); \
DI##type rvs = rv; \
if (shiftr) \
{ \
r[idx] = (rsigned ? (UBILtype) rvs : (UBILtype) rv) << shiftr;\
idx += BITINT_INC; \
if (rsize > BIL_TYPE_SIZE - shiftr) \
{ \
rv >>= BIL_TYPE_SIZE - shiftr; \
rvs >>= BIL_TYPE_SIZE - shiftr; \
rsize -= BIL_TYPE_SIZE - shiftr; \
} \
else \
rsize = 0; \
} \
while (rsize) \
{ \
r[idx] = rsigned ? (UBILtype) rvs : (UBILtype) rv; \
idx += BITINT_INC; \
if (rsize <= BIL_TYPE_SIZE) \
break; \
rv >>= (DI##_BITS > BIL_TYPE_SIZE ? BIL_TYPE_SIZE : 0); \
rvs >>= (DI##_BITS > BIL_TYPE_SIZE ? BIL_TYPE_SIZE : 0); \
rsize -= BIL_TYPE_SIZE; \
} \
if (idx < rn) \
__builtin_memset (r + BITINT_END (0, idx), rsigned ? -1 : 0, \
BITINT_END (idx + 1, rn - idx) \
* sizeof (UBILtype)); \
}
/* Common initial part of __floatbitint?f conversion functions.
I and IPREC are arguments passed to those functions, convert that
into a pair of DI##type IV integer and SHIFT, such that converting
IV to floating point and multiplicating that by pow (2, SHIFT)
gives the expected result. IV size needs to be chosen such that
it is larger than number of bits in floating-point mantissa and
contains there even at least a two bits below the mantissa for
rounding purposes. If any of the SHIFT bits shifted out is non-zero,
the least significant bit should be non-zero. */
#define FP_FROM_BITINT(i, iprec, iv, shift, DI) \
do \
{ \
iprec = bitint_reduce_prec (&i, iprec); \
USItype aiprec = iprec < 0 ? -iprec : iprec; \
USItype in = (aiprec + BIL_TYPE_SIZE - 1) / BIL_TYPE_SIZE; \
USItype idx = BITINT_END (0, in - 1); \
UBILtype msb = i[idx]; \
SItype n = 0; \
if (aiprec % BIL_TYPE_SIZE) \
{ \
if (iprec > 0) \
msb &= ((UBILtype) 1 << (aiprec % BIL_TYPE_SIZE)) - 1; \
else \
msb |= (UBILtype) -1 << (aiprec % BIL_TYPE_SIZE); \
} \
if (iprec < 0) \
{ \
if (msb == (UBILtype) -1) \
n = 1; \
else \
n = (sizeof (0ULL) * __CHAR_BIT__ + 1 \
- __builtin_clzll (~msb)); \
if (BIL_TYPE_SIZE > DI##_BITS && n > DI##_BITS) \
{ \
iv = msb >> (n - DI##_BITS); \
shift = n - DI##_BITS; \
n = 0; \
} \
else \
{ \
iv = (BILtype) msb; \
n = DI##_BITS - n; \
} \
} \
/* bitint_reduce_prec guarantees that if msb is 0, then whole \
i must be zero, otherwise it would have reduced the \
precision. */ \
else if (msb == 0) \
iv = 0; \
else \
{ \
n = sizeof (0ULL) * __CHAR_BIT__ - __builtin_clzll (msb); \
if (BIL_TYPE_SIZE >= DI##_BITS && n >= DI##_BITS) \
{ \
iv = msb >> (n - DI##_BITS + 1); \
shift = n - DI##_BITS + 1; \
n = 0; \
} \
else \
{ \
iv = msb; \
n = DI##_BITS - 1 - n; \
} \
} \
while (n && BITINT_END (idx < in - 1, idx)) \
{ \
idx -= BITINT_INC; \
msb = i[idx]; \
if (BIL_TYPE_SIZE < DI##_BITS && n >= BIL_TYPE_SIZE) \
{ \
iv = (U##DI##type) iv << (BIL_TYPE_SIZE < DI##_BITS \
? BIL_TYPE_SIZE : 0); \
iv |= msb; \
n -= BIL_TYPE_SIZE; \
} \
else \
{ \
iv = (U##DI##type) iv << n; \
iv |= msb >> (BIL_TYPE_SIZE - n); \
shift = BIL_TYPE_SIZE - n; \
break; \
} \
} \
\
UBILtype low_bits = 0; \
if (shift) \
low_bits = msb & (((UBILtype) 1 << shift) - 1); \
shift += BITINT_END (in - 1 - idx, idx) * BIL_TYPE_SIZE; \
while (!low_bits && BITINT_END (idx < in - 1, idx)) \
{ \
idx -= BITINT_INC; \
low_bits |= i[idx]; \
} \
iv |= (low_bits != 0); \
} \
while (0)
extern void __mulbitint3 (UBILtype *, SItype, const UBILtype *, SItype,
const UBILtype *, SItype);
extern void __divmodbitint4 (UBILtype *, SItype, UBILtype *, SItype,
const UBILtype *, SItype,
const UBILtype *, SItype);
#ifndef ENABLE_DECIMAL_BID_FORMAT
#define __bid_pow10bitint __dpd_pow10bitint
extern const unsigned short __dpd_d2bbitint[1024], __dpd_b2dbitint[1000];
#endif
extern USItype __bid_pow10bitint (UBILtype *, SItype, USItype);
#endif /* __BITINT_MAXWIDTH__ */
#endif /* GCC_SOFT_FP_BITINT_H */