mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
tree-optimization/120032 - CLZ matching, fallback for missing range-info
The following allows us to emit a conditional move when the value of the table based CLZ/CLZ implementation at zero differs from what the target implementation guarantees or we cannot easily fixup otherwise. In that case emit a val == 0 ? table-based-zero-result : ... PR tree-optimization/120032 * tree-ssa-forwprop.cc (simplify_count_zeroes): When we cannot use the IFN to determine the result at zero use a conditional move to reproduce the correct result from the table-based algorithm. * gcc.target/i386/pr120032-3.c: New testcase.
This commit is contained in:
20
gcc/testsuite/gcc.target/i386/pr120032-3.c
Normal file
20
gcc/testsuite/gcc.target/i386/pr120032-3.c
Normal file
@@ -0,0 +1,20 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mlzcnt" } */
|
||||
|
||||
unsigned int
|
||||
ZSTD_countLeadingZeros32_fallback(unsigned int val)
|
||||
{
|
||||
static const unsigned int DeBruijnClz[32]
|
||||
= { 0, 9, 1, 10, 13, 21, 2, 29,
|
||||
11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7,
|
||||
19, 27, 23, 6, 26, 5, 4, 31};
|
||||
val |= val >> 1;
|
||||
val |= val >> 2;
|
||||
val |= val >> 4;
|
||||
val |= val >> 8;
|
||||
val |= val >> 16;
|
||||
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "lzcnt" } } */
|
||||
@@ -2728,13 +2728,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
|
||||
nargs = 1;
|
||||
}
|
||||
|
||||
/* Skip if there is no value defined at zero, or if we can't easily
|
||||
return the correct value for zero. */
|
||||
if (!zero_ok)
|
||||
return false;
|
||||
if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == input_bits))
|
||||
return false;
|
||||
|
||||
gimple_seq seq = NULL;
|
||||
gimple *g;
|
||||
gcall *call = gimple_build_call_internal (fn, nargs, res_ops[0],
|
||||
@@ -2758,8 +2751,10 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
|
||||
prev_lhs = gimple_assign_lhs (g);
|
||||
}
|
||||
|
||||
if (zero_ok && zero_val == ctz_val)
|
||||
;
|
||||
/* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */
|
||||
if (zero_val == 0 && ctz_val == input_bits)
|
||||
else if (zero_ok && zero_val == 0 && ctz_val == input_bits)
|
||||
{
|
||||
g = gimple_build_assign (make_ssa_name (integer_type_node),
|
||||
BIT_AND_EXPR, prev_lhs,
|
||||
@@ -2769,6 +2764,22 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
|
||||
gimple_seq_add_stmt (&seq, g);
|
||||
prev_lhs = gimple_assign_lhs (g);
|
||||
}
|
||||
/* As fallback emit a conditional move. */
|
||||
else
|
||||
{
|
||||
g = gimple_build_assign (make_ssa_name (boolean_type_node), EQ_EXPR,
|
||||
res_ops[0], build_zero_cst (input_type));
|
||||
gimple_set_location (g, gimple_location (stmt));
|
||||
gimple_seq_add_stmt (&seq, g);
|
||||
tree cond = gimple_assign_lhs (g);
|
||||
g = gimple_build_assign (make_ssa_name (integer_type_node),
|
||||
COND_EXPR, cond,
|
||||
build_int_cst (integer_type_node, zero_val),
|
||||
prev_lhs);
|
||||
gimple_set_location (g, gimple_location (stmt));
|
||||
gimple_seq_add_stmt (&seq, g);
|
||||
prev_lhs = gimple_assign_lhs (g);
|
||||
}
|
||||
|
||||
g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
|
||||
gimple_seq_add_stmt (&seq, g);
|
||||
|
||||
Reference in New Issue
Block a user