bitintlower: Ensure extension of the most significant limb on info->extended targets

Shifts are the only special case I'm aware of where the most
significant limb (if it is has padding bits) is accessed inside of a loop or
with access outside of a loop but with variable idx.  Everything else should
access the most significant limb using INTEGER_CST idx and thus can (and
should) deal with the needed extension on that access directly.
And RSHIFT_EXPR shouldn't really violate the content of the padding bits.

For LSHIFT_EXPR we should IMHO do the following (which fixes the testcase
on s390x-linux).
The LSHIFT_EXPR is
      /* Lower
           dst = src << n;
         as
           unsigned n1 = n % limb_prec;
           size_t n2 = n / limb_prec;
           size_t n3 = n1 != 0;
           unsigned n4 = (limb_prec - n1) % limb_prec;
           size_t idx;
           size_t p = prec / limb_prec - (prec % limb_prec == 0);
           for (idx = p; (ssize_t) idx >= (ssize_t) (n2 + n3); --idx)
             dst[idx] = (src[idx - n2] << n1) | (src[idx - n2 - n3] >> n4);
           if (n1)
             {
               dst[idx] = src[idx - n2] << n1;
               --idx;
             }
           for (; (ssize_t) idx >= 0; --idx)
             dst[idx] = 0;  */
as described in the comment (note, the comments are for the little-endian
lowering only, didn't want to complicate it with endianity).
As can be seen, the most significant limb can be modifier either inside
of the loop or in the if (n1) body if the loop had 0 iterations.
In your patch you've modified I believe just the loop and not the if body,
and made it conditional on every iteration (furthermore through
gimplification of COND_EXPR which is not the way this is done elsewhere in
gimple-lower-bitint.cc, there is if_then helper and it builds
gimple_build_cond etc.).  I think that is way too expensive.  In theory we
could peel off the first iteration manually and do the info->extended
handling in there and do it again inside of the if (n1) case if idx ==
(bitint_big_endian ? size_zero_node : p) in that case, but I think just
doing the extension after the loops is easier.

Note, we don't need to worry about volatile here, the shift is done into
an addressable variable memory only if it is non-volatile, otherwise it
is computed into a temporary and then copied over into the volatile var.

2025-05-22  Jakub Jelinek  <jakub@redhat.com>

	* gimple-lower-bitint.cc (bitint_extended): New variable.
	(bitint_large_huge::lower_shift_stmt): For LSHIFT_EXPR with
	bitint_extended if lhs has most significant partial limb extend
	it afterwards.

	* gcc.dg/bitintext.h: New file.
	* gcc.dg/torture/bitint-82.c: New test.
This commit is contained in:
Jakub Jelinek
2025-05-22 11:01:13 +02:00
committed by Jakub Jelinek
parent 9c621efd2c
commit 63cd56d487
3 changed files with 142 additions and 1 deletions

View File

@@ -77,7 +77,7 @@ enum bitint_prec_kind {
static int small_max_prec, mid_min_prec, large_min_prec, huge_min_prec;
static int limb_prec;
static bool bitint_big_endian;
static bool bitint_big_endian, bitint_extended;
/* Categorize _BitInt(PREC) as small, middle, large or huge. */
@@ -103,6 +103,7 @@ bitint_precision_kind (int prec)
return bitint_prec_small;
}
bitint_big_endian = info.big_endian;
bitint_extended = info.extended;
if (!large_min_prec
&& GET_MODE_PRECISION (limb_mode) < MAX_FIXED_MODE_SIZE)
large_min_prec = MAX_FIXED_MODE_SIZE + 1;
@@ -3882,6 +3883,23 @@ bitint_large_huge::lower_shift_stmt (tree obj, gimple *stmt)
g = gimple_build_cond (GE_EXPR, add_cast (ssizetype, idx_next),
ssize_int (0), NULL_TREE, NULL_TREE);
insert_before (g);
if (bitint_extended && prec % limb_prec != 0)
{
/* The most significant limb has been updated either in the
loop or in the if after it. To simplify the code, just
read it back from memory and extend. */
m_gsi = gsi_after_labels (edge_false->dest);
idx = bitint_big_endian ? size_zero_node : p;
tree l = limb_access (TREE_TYPE (lhs), obj, idx, true);
tree type = limb_access_type (TREE_TYPE (lhs), idx);
tree v = make_ssa_name (m_limb_type);
g = gimple_build_assign (v, l);
insert_before (g);
v = add_cast (type, v);
l = limb_access (TREE_TYPE (lhs), obj, idx, true);
g = gimple_build_assign (l, add_cast (m_limb_type, v));
insert_before (g);
}
}
}

View File

@@ -0,0 +1,29 @@
[[gnu::noipa]] void
do_copy (void *p, const void *q, __SIZE_TYPE__ r)
{
__builtin_memcpy (p, q, r);
}
/* Macro to test whether (on targets where psABI requires it) _BitInt
with padding bits have those filled with sign or zero extension. */
#if defined(__s390x__) || defined(__arm__) || defined(__loongarch__)
#define BEXTC(x) \
do { \
if ((typeof (x)) -1 < 0) \
{ \
_BitInt(sizeof (x) * __CHAR_BIT__) __x; \
do_copy (&__x, &(x), sizeof (__x)); \
if (__x != (x)) \
__builtin_abort (); \
} \
else \
{ \
unsigned _BitInt(sizeof (x) * __CHAR_BIT__) __x; \
do_copy (&__x, &(x), sizeof (__x)); \
if (__x != (x)) \
__builtin_abort (); \
} \
} while (0)
#else
#define BEXTC(x) do { (void) (x); } while (0)
#endif

View File

@@ -0,0 +1,94 @@
/* { dg-do run { target bitint } } */
/* { dg-options "-std=c23 -pedantic-errors" } */
/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
#if __BITINT_MAXWIDTH__ >= 532
_BitInt(5) a = 2, b = -2;
_BitInt(38) c = 12345, d = -12345;
_BitInt(129) e = 147090211948845388976606115811401318743wb, f = -147090211948845388976606115811401318743wb;
_BitInt(532) g = 34476769918317100226195145251004381172591594205376273814wb, h = -102116935649428556311918486808926113041433456371844211259677321wb;
unsigned _BitInt(1) i = 1;
unsigned _BitInt(17) j = 49127uwb;
unsigned _BitInt(60) k = 588141367522129848uwb;
unsigned _BitInt(205) l = 33991671979236490040668305838261113909013362173682935296620088uwb;
unsigned _BitInt(475) z = 14834685124553929878903720794923785539321715423294864257448721201977655202426343038777008759878862591302200019811097993772912691139803983786083uwb;
#endif
#include "../bitintext.h"
#if __BITINT_MAXWIDTH__ >= 532
[[gnu::noipa]] _BitInt(217)
f1 (_BitInt(9) a, unsigned _BitInt(12) b, _BitInt(36) c, unsigned _BitInt(105) d,
_BitInt(135) e, unsigned _BitInt(168) f, _BitInt(207) g, _BitInt(207) h,
unsigned _BitInt(531) i, _BitInt(36) j)
{
BEXTC (a); BEXTC (b); BEXTC (c); BEXTC (d);
BEXTC (e); BEXTC (f); BEXTC (g); BEXTC (h);
BEXTC (i); BEXTC (j);
_BitInt(9) k = a + 1;
unsigned _BitInt(12) l = b - a;
_BitInt(36) m = c * j;
unsigned _BitInt(105) n = d >> (-2 * j);
_BitInt(135) o = e | -j;
unsigned _BitInt(168) p = f & 101010101010101010101010uwb;
_BitInt(207) q = g * j;
_BitInt(207) r = g + h;
unsigned _BitInt(531) s = i / j;
BEXTC (k); BEXTC (l); BEXTC (m); BEXTC (n);
BEXTC (o); BEXTC (p); BEXTC (q); BEXTC (r);
BEXTC (s);
unsigned _BitInt(105) t = d << (38 - j);
BEXTC (t);
_Atomic _BitInt(5) u = 15;
u += 8U;
BEXTC (u);
_BitInt(135) v = e << 28;
BEXTC (v);
unsigned _BitInt(475) w = z << (29 + j);
BEXTC (w);
return a + 4;
}
#endif
int
main ()
{
#if __BITINT_MAXWIDTH__ >= 532
BEXTC (a); BEXTC (b);
BEXTC (c); BEXTC (d);
BEXTC (e); BEXTC (f);
BEXTC (g); BEXTC (h);
BEXTC (i);
BEXTC (j);
BEXTC (k);
BEXTC (l);
BEXTC (z);
{
_BitInt(5) a = 2, b = -2;
_BitInt(38) c = 12345, d = -12345;
_BitInt(129) e = 147090211948845388976606115811401318743wb, f = -147090211948845388976606115811401318743wb;
_BitInt(532) g = 34476769918317100226195145251004381172591594205376273814wb, h = -102116935649428556311918486808926113041433456371844211259677321wb;
unsigned _BitInt(1) i = 1;
unsigned _BitInt(17) j = 49127uwb;
unsigned _BitInt(60) k = 588141367522129848uwb;
unsigned _BitInt(205) l = 33991671979236490040668305838261113909013362173682935296620088uwb;
BEXTC (a); BEXTC (b);
BEXTC (c); BEXTC (d);
BEXTC (e); BEXTC (f);
BEXTC (g); BEXTC (h);
BEXTC (i);
BEXTC (j);
BEXTC (k);
BEXTC (l);
}
_BitInt(217) m = f1 (57wb, 3927uwb, 10625699364wb, 23030359755638571619326514462579uwb,
20797625176303404170317957140841712396356wb,
111831871006433449872067089878311637796827405335256uwb,
64853652491049541618437564623858346454131583900201311683495230wb,
25108562626494976011700565632680191924545340440636663075662700wb,
6366583146545926097709747296452085257498446783797668089081516596003270602920229800152065594152964557479773813310423759077951305431130758723519892452009351743676uwb,
-1);
BEXTC (m);
#endif
}