Fix profile updating in ipa-cp

Bootstrapping with autoprofiledbootstrap, LTO and checking enables ICEs in WPA
because we end up mixing local and IPA count in
ipa-cp.cc:update_specialized_profile.  This is because of missing call to
profile_count::adjust_for_ipa_scaling.  While looking into that I however
noticed that the function forgets to update indirect call edges. This made me
to commonize same logic which currently exists in clone_inlined_nodes,
update_specialized_profile, update_profiling_info and
update_counts_for_self_gen_clones.

While testing it I noticed that we also ICE when linking with
-fdump-ipa-all-details-blocks since IPA and local counts are temporarily mixed
during IPA transformation stage, so I also added check to profile_count::dump
to not crash and added verifier to gimple_verify_flow_info.

Other problem I also noticed is that while profile updates done by inliner (via
cgraph_node::clone) are correctly using global0 profiles instead of erasing
profile completely when IPA counts drops to 0, the scaling in ipa-cp is not
doing that, so we lose info and possibly some code quality.  I will fix that
incrementally. Similarly ipa-split, when offlining region with 0 entry count
may re-do frequency propagation to get something useful.

gcc/ChangeLog:

	* cgraph.cc (cgraph_node::apply_scale): New member function.
	* cgraph.h (struct cgraph_node): declare.
	* ipa-cp.cc (update_counts_for_self_gen_clones):
	Use cgraph_node::apply_scale.
	(update_profiling_info): Do not overwrite local
	profile when dropping to 0 global profile.
	(update_specialized_profile): Likewise.
	* ipa-inline-transform.cc (update_noncloned_counts): Remove.
	(can_remove_node_now_p_1): Fix formating.
	(clone_inlined_nodes): Use cgraph_node::apply_scale.
	* profile-count.cc (profile_count::dump): Do not ICE
	when count is not compatible with entry block count.
	* tree-cfg.cc (gimple_verify_flow_info): Check
	compatibility of count and entry block count.
This commit is contained in:
Jan Hubicka
2025-06-06 17:57:00 +02:00
parent 0adb415d12
commit b0d50cbb42
6 changed files with 70 additions and 76 deletions

View File

@@ -179,6 +179,26 @@ cgraph_node::function_version (void)
return cgraph_fnver_htab->find (&key);
}
/* Scale profile by NUM/DEN. Walk into inlined clones. */
void
cgraph_node::apply_scale (profile_count num, profile_count den)
{
struct cgraph_edge *e;
profile_count::adjust_for_ipa_scaling (&num, &den);
for (e = callees; e; e = e->next_callee)
{
if (!e->inline_failed)
e->callee->apply_scale (num, den);
e->count = e->count.apply_scale (num, den);
}
for (e = indirect_calls; e; e = e->next_callee)
e->count = e->count.apply_scale (num, den);
count = count.apply_scale (num, den);
}
/* Insert a new cgraph_function_version_info node into cgraph_fnver_htab
corresponding to cgraph_node NODE. */
cgraph_function_version_info *

View File

@@ -1256,6 +1256,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
it is not used in any other non-standard way. */
bool only_called_directly_p (void);
/* Scale profile by NUM/DEN. Walk into inlined clones. */
void apply_scale (profile_count num, profile_count den);
/* Return true when function is only called directly or it has alias.
i.e. it is not externally visible, address was not taken and
it is not used in any other non-standard way. */

View File

@@ -4666,19 +4666,12 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node,
unsigned i = 0;
for (cgraph_node *n : self_gen_clones)
{
profile_count orig_count = n->count;
profile_count new_count
= (redist_sum / self_gen_clones.length () + other_edges_count[i]);
new_count = lenient_count_portion_handling (new_count, orig_node);
n->count = new_count;
profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
n->apply_scale (new_count, n->count);
for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
{
cs->count = cs->count.apply_scale (new_count, orig_count);
processed_edges.add (cs);
}
for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
cs->count = cs->count.apply_scale (new_count, orig_count);
processed_edges.add (cs);
i++;
}
@@ -4811,20 +4804,24 @@ update_profiling_info (struct cgraph_node *orig_node,
/* The NEW_NODE count and counts of all its outgoing edges
are still unmodified copies of ORIG_NODE's. Just clear
the latter and bail out. */
profile_count zero;
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
zero = profile_count::zero ().guessed_local ();
orig_node->count = orig_node->count.guessed_local ();
else
zero = profile_count::adjusted_zero ();
orig_node->count = zero;
orig_node->count = orig_node->count.global0adjusted ();
for (cgraph_edge *cs = orig_node->callees;
cs;
cs = cs->next_callee)
cs->count = zero;
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
cs->count = orig_node->count.guessed_local ();
else
cs->count = orig_node->count.global0adjusted ();
for (cgraph_edge *cs = orig_node->indirect_calls;
cs;
cs = cs->next_callee)
cs->count = zero;
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
cs->count = orig_node->count.guessed_local ();
else
cs->count = orig_node->count.global0adjusted ();
return;
}
}
@@ -4874,26 +4871,12 @@ update_profiling_info (struct cgraph_node *orig_node,
orig_node);
new_sum = orig_node_count.combine_with_ipa_count (new_sum);
new_node->count = new_sum;
orig_node->count = remainder;
profile_count orig_new_node_count = orig_node_count;
profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
new_node->apply_scale (new_sum, new_node->count);
if (!orig_edges_processed)
{
profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
cs->count = cs->count.apply_scale (remainder, orig_node_count);
for (cgraph_edge *cs = orig_node->indirect_calls;
cs;
cs = cs->next_callee)
cs->count = cs->count.apply_scale (remainder, orig_node_count);
}
orig_node->apply_scale (remainder, orig_node->count);
if (dump_file)
{
@@ -4911,35 +4894,25 @@ update_specialized_profile (struct cgraph_node *new_node,
struct cgraph_node *orig_node,
profile_count redirected_sum)
{
struct cgraph_edge *cs;
profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
if (dump_file)
{
fprintf (dump_file, " the sum of counts of redirected edges is ");
redirected_sum.dump (dump_file);
fprintf (dump_file, "\n old ipa count of the original node is ");
orig_node_count.dump (dump_file);
orig_node->count.dump (dump_file);
fprintf (dump_file, "\n");
}
if (!orig_node_count.nonzero_p ())
if (!orig_node->count.ipa ().nonzero_p ()
|| !redirected_sum.nonzero_p ())
return;
new_node_count = new_node->count;
new_node->count += redirected_sum;
orig_node->count
= lenient_count_portion_handling (orig_node->count - redirected_sum,
orig_node);
orig_node->apply_scale
(lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
orig_node),
orig_node->count);
for (cs = new_node->callees; cs; cs = cs->next_callee)
cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
for (cs = orig_node->callees; cs; cs = cs->next_callee)
{
profile_count dec = cs->count.apply_scale (redirected_sum,
orig_node_count);
cs->count -= dec;
}
new_node->apply_scale (new_node->count.ipa () + redirected_sum,
new_node->count);
if (dump_file)
{

View File

@@ -58,27 +58,6 @@ along with GCC; see the file COPYING3. If not see
int ncalls_inlined;
int nfunctions_inlined;
/* Scale counts of NODE edges by NUM/DEN. */
static void
update_noncloned_counts (struct cgraph_node *node,
profile_count num, profile_count den)
{
struct cgraph_edge *e;
profile_count::adjust_for_ipa_scaling (&num, &den);
for (e = node->callees; e; e = e->next_callee)
{
if (!e->inline_failed)
update_noncloned_counts (e->callee, num, den);
e->count = e->count.apply_scale (num, den);
}
for (e = node->indirect_calls; e; e = e->next_callee)
e->count = e->count.apply_scale (num, den);
node->count = node->count.apply_scale (num, den);
}
/* We removed or are going to remove the last call to NODE.
Return true if we can and want proactively remove the NODE now.
This is important to do, since we want inliner to know when offline
@@ -93,7 +72,7 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
{
cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
if ((alias->callers && alias->callers != e)
|| !can_remove_node_now_p_1 (alias, e))
|| !can_remove_node_now_p_1 (alias, e))
return false;
}
/* FIXME: When address is taken of DECL_EXTERNAL function we still
@@ -212,7 +191,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
}
duplicate = false;
e->callee->externally_visible = false;
update_noncloned_counts (e->callee, e->count, e->callee->count);
e->callee->apply_scale (e->count, e->callee->count);
dump_callgraph_transformation (e->callee, inlining_into,
"inlining to");

View File

@@ -94,9 +94,16 @@ profile_count::dump (FILE *f, struct function *fun) const
else if (fun && initialized_p ()
&& fun->cfg
&& ENTRY_BLOCK_PTR_FOR_FN (fun)->count.initialized_p ())
fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
profile_quality_display_names[m_quality],
to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
{
if (compatible_p (ENTRY_BLOCK_PTR_FOR_FN (fun)->count))
fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
profile_quality_display_names[m_quality],
to_sreal_scale
(ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
else
fprintf (f, "%" PRId64 " (%s, incompatible with entry block count)",
m_val, profile_quality_display_names[m_quality]);
}
else
fprintf (f, "%" PRId64 " (%s)", m_val,
profile_quality_display_names[m_quality]);

View File

@@ -5757,6 +5757,12 @@ gimple_verify_flow_info (void)
error ("probability of edge from entry block not initialized");
err = true;
}
if (!EXIT_BLOCK_PTR_FOR_FN (cfun)
->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
{
error ("exit block count is not compoatible with entry block count");
err = true;
}
FOR_EACH_BB_FN (bb, cfun)
@@ -5780,6 +5786,12 @@ gimple_verify_flow_info (void)
err = true;
}
}
if (!bb->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
{
error ("count of bb %d is not compoatible with entry block count",
bb->index);
err = true;
}
/* Skip labels on the start of basic block. */
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))