unswitching of outer loops

This allows loop unswitching to unswitch outer loops conditions are
invariant in.  We restrict ourselves to unswitch conditions in innermost
loops and will only unswitch loop nests that do not contain any sibling loops.
To simplify the implementation the loop nest unswitched is the deepest all
unswitching candidates are invariant in.

For 507.cactuBSSN_r it can be observed we unswitch the outer loops
of the compute kernels for the fdOrder parameter.  It seems to be within
the existing growth limitations to perform the unswitchings, a performance
benefit is not seen.

	* tree-ssa-loop-unswitch.cc (init_loop_unswitch_info): First collect
	candidates and determine the outermost loop to unswitch.
	(tree_ssa_unswitch_loops): First perform all guard hoisting,
	then perform unswitching on innermost loop predicates.
	(find_unswitching_predicates_for_bb): Keep track of the
	outermost loop to unswitch.
	(evaluate_bbs): Adjust exit test.
	(tree_unswitch_single_loop): Dump whether we unswitched an outer
	loop.
	(tree_unswitch_loop): Remove assert we unswitch only innermost
	loops.

	* gcc.dg/loop-unswitch-18.c: New testcase.
	* gcc.dg/tree-ssa/loopclosedphi.c: Disable unswitching,
	adjust expected counts.
	* gcc.dg/torture/pr71462.c: Add -w to ignore undefined
	behavior diagnostics after now unswitching outer loops.
This commit is contained in:
Richard Biener
2022-09-27 10:16:52 +02:00
parent 49de156a6a
commit 9e11ceef16
4 changed files with 143 additions and 84 deletions

View File

@@ -0,0 +1,13 @@
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-unswitch-optimized" } */
void bar();
void foo (int x, int n, int m)
{
for (int i = 0; i < n; ++i)
for (int j = 0; j < m; ++j)
if (x)
bar ();
}
/* { dg-final { scan-tree-dump "unswitching outer loop" "unswitch" } } */

View File

@@ -1,4 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-w" } */
short a;
long b;

View File

@@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O3 -fno-tree-ch -w -fdump-tree-loopdone-details" } */
/* { dg-options "-O3 -fno-tree-ch -fno-unswitch-loops -w -fdump-tree-loopdone-details" } */
void
t6 (int qz, int wh)
@@ -18,4 +18,4 @@ t6 (int qz, int wh)
qz = jl * wh;
}
/* { dg-final { scan-tree-dump-times "Replacing" 2 "loopdone"} } */
/* { dg-final { scan-tree-dump-times "Replacing" 3 "loopdone"} } */

View File

@@ -217,6 +217,7 @@ static bool tree_unswitch_single_loop (class loop *, dump_user_location_t,
basic_block = NULL);
static void
find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
class loop *&outer_loop,
vec<unswitch_predicate *> &candidates,
unswitch_predicate *&hottest,
basic_block &hottest_bb);
@@ -249,36 +250,31 @@ set_predicates_for_bb (basic_block bb, vec<unswitch_predicate *> predicates)
}
/* Initialize LOOP information reused during the unswitching pass.
Return total number of instructions in the loop. */
Return total number of instructions in the loop. Adjusts LOOP to
the outermost loop all candidates are invariant in. */
static unsigned
init_loop_unswitch_info (class loop *loop, unswitch_predicate *&hottest,
init_loop_unswitch_info (class loop *&loop, unswitch_predicate *&hottest,
basic_block &hottest_bb)
{
unsigned total_insns = 0;
/* Calculate instruction count. */
basic_block *bbs = get_loop_body (loop);
for (unsigned i = 0; i < loop->num_nodes; i++)
{
unsigned insns = 0;
for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
gsi_next (&gsi))
insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
bbs[i]->aux = (void *)(uintptr_t)insns;
total_insns += insns;
}
/* Unswitch only nests with no sibling loops. */
class loop *outer_loop = loop;
while (loop_outer (outer_loop)->num != 0
&& !loop_outer (outer_loop)->inner->next)
outer_loop = loop_outer (outer_loop);
hottest = NULL;
hottest_bb = NULL;
/* Find all unswitching candidates. */
/* Find all unswitching candidates in the innermost loop. */
for (unsigned i = 0; i != loop->num_nodes; i++)
{
/* Find a bb to unswitch on. */
vec<unswitch_predicate *> candidates;
candidates.create (1);
find_unswitching_predicates_for_bb (bbs[i], loop, candidates,
find_unswitching_predicates_for_bb (bbs[i], loop, outer_loop, candidates,
hottest, hottest_bb);
if (!candidates.is_empty ())
set_predicates_for_bb (bbs[i], candidates);
@@ -291,8 +287,34 @@ init_loop_unswitch_info (class loop *loop, unswitch_predicate *&hottest,
}
}
if (outer_loop != loop)
{
free (bbs);
bbs = get_loop_body (outer_loop);
}
/* Calculate instruction count. */
for (unsigned i = 0; i < outer_loop->num_nodes; i++)
{
unsigned insns = 0;
for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
gsi_next (&gsi))
insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
/* No predicates to unswitch on in the outer loops. */
if (!flow_bb_inside_loop_p (loop, bbs[i]))
{
gimple *last = last_stmt (bbs[i]);
if (last != NULL)
gimple_set_uid (last, 0);
}
bbs[i]->aux = (void *)(uintptr_t)insns;
total_insns += insns;
}
free (bbs);
loop = outer_loop;
return total_insns;
}
@@ -307,74 +329,76 @@ tree_ssa_unswitch_loops (function *fun)
ranger = enable_ranger (fun);
/* Go through all loops starting from innermost. */
/* Go through all loops starting from innermost, hoisting guards. */
for (auto loop : loops_list (fun, LI_FROM_INNERMOST))
{
if (!loop->inner)
{
/* Perform initial tests if unswitch is eligible. */
dump_user_location_t loc = find_loop_location (loop);
/* Do not unswitch in cold regions. */
if (optimize_loop_for_size_p (loop))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, loc,
"Not unswitching cold loops\n");
continue;
}
/* If the loop is not expected to iterate, there is no need
for unswitching. */
HOST_WIDE_INT iterations = estimated_loop_iterations_int (loop);
if (iterations < 0)
iterations = likely_max_loop_iterations_int (loop);
if (iterations >= 0 && iterations <= 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, loc,
"Not unswitching, loop is not expected"
" to iterate\n");
continue;
}
bb_predicates = new vec<vec<unswitch_predicate *>> ();
bb_predicates->safe_push (vec<unswitch_predicate *> ());
unswitch_predicate::predicates = new vec<unswitch_predicate *> ();
/* Unswitch innermost loop. */
unswitch_predicate *hottest;
basic_block hottest_bb;
unsigned int loop_size = init_loop_unswitch_info (loop, hottest,
hottest_bb);
unsigned int budget = loop_size + param_max_unswitch_insns;
predicate_vector predicate_path;
predicate_path.create (8);
auto_bitmap handled;
changed_unswitch
|= tree_unswitch_single_loop (loop, loc, predicate_path,
loop_size, budget,
ignored_edge_flag, handled,
hottest, hottest_bb);
predicate_path.release ();
for (auto predlist : bb_predicates)
predlist.release ();
bb_predicates->release ();
delete bb_predicates;
bb_predicates = NULL;
for (auto pred : unswitch_predicate::predicates)
delete pred;
unswitch_predicate::predicates->release ();
delete unswitch_predicate::predicates;
unswitch_predicate::predicates = NULL;
}
else
if (loop->inner)
changed_hoist |= tree_unswitch_outer_loop (loop);
}
/* Go through innermost loops, unswitching on invariant predicates
within those. */
for (auto loop : loops_list (fun, LI_ONLY_INNERMOST))
{
/* Perform initial tests if unswitch is eligible. */
dump_user_location_t loc = find_loop_location (loop);
/* Do not unswitch in cold regions. */
if (optimize_loop_for_size_p (loop))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, loc,
"Not unswitching cold loops\n");
continue;
}
/* If the loop is not expected to iterate, there is no need
for unswitching. */
HOST_WIDE_INT iterations = estimated_loop_iterations_int (loop);
if (iterations < 0)
iterations = likely_max_loop_iterations_int (loop);
if (iterations >= 0 && iterations <= 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, loc,
"Not unswitching, loop is not expected"
" to iterate\n");
continue;
}
bb_predicates = new vec<vec<unswitch_predicate *>> ();
bb_predicates->safe_push (vec<unswitch_predicate *> ());
unswitch_predicate::predicates = new vec<unswitch_predicate *> ();
/* Unswitch loop. */
unswitch_predicate *hottest;
basic_block hottest_bb;
unsigned int loop_size = init_loop_unswitch_info (loop, hottest,
hottest_bb);
unsigned int budget = loop_size + param_max_unswitch_insns;
predicate_vector predicate_path;
predicate_path.create (8);
auto_bitmap handled;
changed_unswitch |= tree_unswitch_single_loop (loop, loc, predicate_path,
loop_size, budget,
ignored_edge_flag, handled,
hottest, hottest_bb);
predicate_path.release ();
for (auto predlist : bb_predicates)
predlist.release ();
bb_predicates->release ();
delete bb_predicates;
bb_predicates = NULL;
for (auto pred : unswitch_predicate::predicates)
delete pred;
unswitch_predicate::predicates->release ();
delete unswitch_predicate::predicates;
unswitch_predicate::predicates = NULL;
}
disable_ranger (fun);
clear_aux_for_blocks ();
@@ -463,10 +487,13 @@ is_maybe_undefined (const tree name, gimple *stmt, class loop *loop)
/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
basic blocks (for what it means see comments below).
All candidates all filled to the provided vector CANDIDATES. */
All candidates all filled to the provided vector CANDIDATES.
OUTER_LOOP is updated to the innermost loop all found candidates are
invariant in. */
static void
find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
class loop *&outer_loop,
vec<unswitch_predicate *> &candidates,
unswitch_predicate *&hottest,
basic_block &hottest_bb)
@@ -506,6 +533,18 @@ find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
if (is_maybe_undefined (use, stmt, loop))
return;
}
/* Narrow OUTER_LOOP. */
if (outer_loop != loop)
FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{
def = SSA_NAME_DEF_STMT (use);
def_bb = gimple_bb (def);
while (outer_loop != loop
&& ((def_bb && flow_bb_inside_loop_p (outer_loop, def_bb))
|| is_maybe_undefined (use, stmt, outer_loop)))
outer_loop = superloop_at_depth (loop,
loop_depth (outer_loop) + 1);
}
unswitch_predicate *predicate = new unswitch_predicate (stmt);
candidates.safe_push (predicate);
@@ -535,6 +574,12 @@ find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
behavior that the original program might never exercise. */
if (is_maybe_undefined (idx, stmt, loop))
return;
/* Narrow OUTER_LOOP. */
while (outer_loop != loop
&& ((def_bb && flow_bb_inside_loop_p (outer_loop, def_bb))
|| is_maybe_undefined (idx, stmt, outer_loop)))
outer_loop = superloop_at_depth (loop,
loop_depth (outer_loop) + 1);
/* Build compound expression for all outgoing edges of the switch. */
auto_vec<tree, 16> preds;
@@ -872,7 +917,7 @@ evaluate_bbs (class loop *loop, predicate_vector *predicate_path,
{
basic_block dest = e->dest;
if (dest->loop_father == loop
if (flow_bb_inside_loop_p (loop, dest)
&& !(dest->flags & reachable_flag)
&& !(e->flags & flags)
&& !ignored_edges.contains (e))
@@ -992,7 +1037,8 @@ tree_unswitch_single_loop (class loop *loop, dump_user_location_t loc,
if (dump_enabled_p ())
{
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
"unswitching loop %d on %qs with condition: %T\n",
"unswitching %sloop %d on %qs with condition: %T\n",
loop->inner ? "outer " : "",
loop->num, predicate->switch_p ? "switch" : "if",
predicate->condition);
dump_printf_loc (MSG_NOTE, loc,
@@ -1064,7 +1110,6 @@ tree_unswitch_loop (class loop *loop, edge edge_true, tree cond)
/* Some sanity checking. */
gcc_assert (flow_bb_inside_loop_p (loop, edge_true->src));
gcc_assert (EDGE_COUNT (edge_true->src->succs) >= 2);
gcc_assert (loop->inner == NULL);
profile_probability prob_true = edge_true->probability;
return loop_version (loop, unshare_expr (cond),