mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 12:00:11 -05:00
tree-optimization/121395 - SLP of SIMD calls w/o LHS
The following records the alternate SLP instance entries coming from stmts with stores that have no SSA def, like OMP SIMD calls without LHS. There's a bit of fallout with having a SLP tree with a NULL vectype, but nothing too gross. PR tree-optimization/121395 * tree-vectorizer.h (_loop_vec_info::alternate_defs): New member. (LOOP_VINFO_ALTERNATE_DEFS): New. * tree-vect-stmts.cc (vect_stmt_relevant_p): Populate it. (vectorizable_simd_clone_call): Do not register a SLP def when there is none. * tree-vect-slp.cc (vect_build_slp_tree_1): Allow a NULL vectype when there's no LHS. Allow all calls w/o LHS. (vect_analyze_slp): Process LOOP_VINFO_ALTERNATE_DEFS as SLP graph entries. (vect_make_slp_decision): Handle a NULL SLP_TREE_VECTYPE. (vect_slp_analyze_node_operations_1): Likewise. (vect_schedule_slp_node): Likewise. * gcc.dg/vect/pr59984.c: Adjust.
This commit is contained in:
committed by
Richard Biener
parent
9732b57443
commit
32b1be7eb4
@@ -64,3 +64,7 @@ main ()
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "31:17: optimized: loop vectorized" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump "37:7: optimized: loop vectorized" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump "44:17: optimized: loop vectorized" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump "50:7: optimized: loop vectorized" "vect" } } */
|
||||
|
||||
@@ -1140,7 +1140,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
||||
soft_fail_nunits_vectype = nunits_vectype;
|
||||
}
|
||||
|
||||
gcc_assert (vectype);
|
||||
gcc_assert (vectype || !gimple_get_lhs (first_stmt_info->stmt));
|
||||
*node_vectype = vectype;
|
||||
|
||||
/* For every stmt in NODE find its def stmt/s. */
|
||||
@@ -1187,10 +1187,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
||||
|
||||
gcall *call_stmt = dyn_cast <gcall *> (stmt);
|
||||
tree lhs = gimple_get_lhs (stmt);
|
||||
if (lhs == NULL_TREE
|
||||
&& (!call_stmt
|
||||
|| !gimple_call_internal_p (stmt)
|
||||
|| !internal_store_fn_p (gimple_call_internal_fn (stmt))))
|
||||
if (lhs == NULL_TREE && !call_stmt)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
@@ -4917,6 +4914,22 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
|
||||
return opt_result::failure_at (vect_location,
|
||||
"SLP build failed.\n");
|
||||
}
|
||||
|
||||
stmt_vec_info stmt_info;
|
||||
FOR_EACH_VEC_ELT (LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo), i, stmt_info)
|
||||
{
|
||||
vec<stmt_vec_info> stmts;
|
||||
vec<stmt_vec_info> roots = vNULL;
|
||||
vec<tree> remain = vNULL;
|
||||
stmts.create (1);
|
||||
stmts.quick_push (stmt_info);
|
||||
if (! vect_build_slp_instance (vinfo, slp_inst_kind_store,
|
||||
stmts, roots, remain, max_tree_size,
|
||||
&limit, bst_map, NULL,
|
||||
force_single_lane))
|
||||
return opt_result::failure_at (vect_location,
|
||||
"SLP build failed.\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
|
||||
@@ -7634,7 +7647,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
|
||||
/* If all instances ended up with vector(1) T roots make sure to
|
||||
not vectorize. RVV for example relies on loop vectorization
|
||||
when some instances are essentially kept scalar. See PR121048. */
|
||||
if (known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
|
||||
if (SLP_TREE_VECTYPE (root)
|
||||
&& known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
|
||||
decided_to_slp++;
|
||||
}
|
||||
|
||||
@@ -7961,7 +7975,10 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
|
||||
elements in a vector. For single-defuse-cycle, lane-reducing op, and
|
||||
PHI statement that starts reduction comprised of only lane-reducing ops,
|
||||
the number is more than effective vector statements actually required. */
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
|
||||
if (SLP_TREE_VECTYPE (node))
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
|
||||
else
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
||||
|
||||
/* Handle purely internal nodes. */
|
||||
if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
|
||||
@@ -11318,8 +11335,10 @@ vect_schedule_slp_node (vec_info *vinfo,
|
||||
|
||||
stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
|
||||
|
||||
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
|
||||
SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
|
||||
gcc_assert (!SLP_TREE_VECTYPE (node)
|
||||
|| SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
|
||||
if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0)
|
||||
SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
|
||||
|
||||
if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
|
||||
&& STMT_VINFO_DATA_REF (stmt_info))
|
||||
|
||||
@@ -386,6 +386,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"vec_stmt_relevant_p: stmt has vdefs.\n");
|
||||
*relevant = vect_used_in_scope;
|
||||
if (! STMT_VINFO_DATA_REF (stmt_info)
|
||||
&& zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
|
||||
LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
|
||||
}
|
||||
|
||||
/* uses outside the loop. */
|
||||
@@ -4752,7 +4755,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|
||||
}
|
||||
}
|
||||
|
||||
SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
|
||||
if (gimple_get_lhs (new_stmt))
|
||||
SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
|
||||
}
|
||||
|
||||
for (i = 0; i < nargs; ++i)
|
||||
|
||||
@@ -947,6 +947,10 @@ public:
|
||||
stmt in the chain. */
|
||||
auto_vec<stmt_vec_info> reduction_chains;
|
||||
|
||||
/* Defs that could not be analyzed such as OMP SIMD calls without
|
||||
a LHS. */
|
||||
auto_vec<stmt_vec_info> alternate_defs;
|
||||
|
||||
/* Cost vector for a single scalar iteration. */
|
||||
auto_vec<stmt_info_for_cost> scalar_cost_vec;
|
||||
|
||||
@@ -1186,6 +1190,7 @@ public:
|
||||
#define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
|
||||
#define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L) (L)->inv_pattern_def_seq
|
||||
#define LOOP_VINFO_DRS_ADVANCED_BY(L) (L)->drs_advanced_by
|
||||
#define LOOP_VINFO_ALTERNATE_DEFS(L) (L)->alternate_defs
|
||||
|
||||
#define LOOP_VINFO_FULLY_MASKED_P(L) \
|
||||
(LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
|
||||
|
||||
Reference in New Issue
Block a user