i386: implement costs for float<->int conversions in ix86_vector_costs::add_stmt_cost

This patch adds pattern matching for float<->int conversions both as normal
statements and promote_demote.  While updating promote_demote I noticed that
in cleanups I turned "stmt_cost =" into "int stmt_cost = " which turned
the existing FP costing to NOOP. I also added comment on how demotes are done
when turning i.e. 32bit into 8bit value (which is the case of pr19919.c).

The patch disables vectorization in pr119919.c on generic tuning, but keeps
it at both zen and skylake+. The underlying problem is bad cost of open-coded
scatter which is tracked by 119902 so I simply added -mtune=znver1 so the testcase
keeps testing vectorization.

gcc/ChangeLog:

	* config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Add FLOAT_EXPR;
	FIX_TRUNC_EXPR and vec_promote_demote costs.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr119919.c: Add -mtune=znver1
This commit is contained in:
Jan Hubicka
2025-05-07 15:33:44 +02:00
parent 210d06502f
commit 2c8d632d9e
2 changed files with 40 additions and 12 deletions

View File

@@ -25767,6 +25767,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(ix86_tune_cost, GET_MODE_BITSIZE (mode));
break;
case FLOAT_EXPR:
if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
stmt_cost = ix86_cost->cvtsi2ss;
else if (X87_FLOAT_MODE_P (mode))
/* TODO: We do not have cost tables for x87. */
stmt_cost = ix86_cost->fadd;
else
stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
break;
case FIX_TRUNC_EXPR:
if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
stmt_cost = ix86_cost->cvtss2si;
else if (X87_FLOAT_MODE_P (mode))
/* TODO: We do not have cost tables for x87. */
stmt_cost = ix86_cost->fadd;
else
stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
break;
case COND_EXPR:
{
/* SSE2 conditinal move sequence is:
@@ -25930,8 +25950,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
if (kind == vec_promote_demote
&& fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
if (kind == vec_promote_demote)
{
int outer_size
= tree_to_uhwi
@@ -25941,16 +25960,25 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
= tree_to_uhwi
(TYPE_SIZE
(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
int stmt_cost = vec_fp_conversion_cost
(ix86_tune_cost, GET_MODE_BITSIZE (mode));
/* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
up doing two conversions and packing them. */
bool inner_fp = FLOAT_TYPE_P
(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
if (fp && inner_fp)
stmt_cost = vec_fp_conversion_cost
(ix86_tune_cost, GET_MODE_BITSIZE (mode));
else if (fp && !inner_fp)
stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
else if (!fp && inner_fp)
stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
else
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
/* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
greater than inner size we will end up doing two conversions and
packing them. We always pack pairs; if the size difference is greater
it is split into multiple demote operations. */
if (inner_size > outer_size)
{
int n = inner_size / outer_size;
stmt_cost = stmt_cost * n
+ (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
}
stmt_cost = stmt_cost * 2
+ ix86_vec_cost (mode, ix86_cost->sse_op);
}
/* If we do elementwise loads into a vector then we are bound by

View File

@@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */
/* { dg-options "-O2 -msse2 -fdump-tree-vect-details -mtune=znver1" } */
int a[9*9];
bool b[9];
void test()