mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 12:00:11 -05:00
Fix profile updating in ipa-cp
Bootstrapping with autoprofiledbootstrap, LTO and checking enables ICEs in WPA because we end up mixing local and IPA count in ipa-cp.cc:update_specialized_profile. This is because of missing call to profile_count::adjust_for_ipa_scaling. While looking into that I however noticed that the function forgets to update indirect call edges. This made me to commonize same logic which currently exists in clone_inlined_nodes, update_specialized_profile, update_profiling_info and update_counts_for_self_gen_clones. While testing it I noticed that we also ICE when linking with -fdump-ipa-all-details-blocks since IPA and local counts are temporarily mixed during IPA transformation stage, so I also added check to profile_count::dump to not crash and added verifier to gimple_verify_flow_info. Other problem I also noticed is that while profile updates done by inliner (via cgraph_node::clone) are correctly using global0 profiles instead of erasing profile completely when IPA counts drops to 0, the scaling in ipa-cp is not doing that, so we lose info and possibly some code quality. I will fix that incrementally. Similarly ipa-split, when offlining region with 0 entry count may re-do frequency propagation to get something useful. gcc/ChangeLog: * cgraph.cc (cgraph_node::apply_scale): New member function. * cgraph.h (struct cgraph_node): declare. * ipa-cp.cc (update_counts_for_self_gen_clones): Use cgraph_node::apply_scale. (update_profiling_info): Do not overwrite local profile when dropping to 0 global profile. (update_specialized_profile): Likewise. * ipa-inline-transform.cc (update_noncloned_counts): Remove. (can_remove_node_now_p_1): Fix formating. (clone_inlined_nodes): Use cgraph_node::apply_scale. * profile-count.cc (profile_count::dump): Do not ICE when count is not compatible with entry block count. * tree-cfg.cc (gimple_verify_flow_info): Check compatibility of count and entry block count.
This commit is contained in:
@@ -179,6 +179,26 @@ cgraph_node::function_version (void)
|
||||
return cgraph_fnver_htab->find (&key);
|
||||
}
|
||||
|
||||
/* Scale profile by NUM/DEN. Walk into inlined clones. */
|
||||
|
||||
void
|
||||
cgraph_node::apply_scale (profile_count num, profile_count den)
|
||||
{
|
||||
struct cgraph_edge *e;
|
||||
|
||||
profile_count::adjust_for_ipa_scaling (&num, &den);
|
||||
|
||||
for (e = callees; e; e = e->next_callee)
|
||||
{
|
||||
if (!e->inline_failed)
|
||||
e->callee->apply_scale (num, den);
|
||||
e->count = e->count.apply_scale (num, den);
|
||||
}
|
||||
for (e = indirect_calls; e; e = e->next_callee)
|
||||
e->count = e->count.apply_scale (num, den);
|
||||
count = count.apply_scale (num, den);
|
||||
}
|
||||
|
||||
/* Insert a new cgraph_function_version_info node into cgraph_fnver_htab
|
||||
corresponding to cgraph_node NODE. */
|
||||
cgraph_function_version_info *
|
||||
|
||||
@@ -1256,6 +1256,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
|
||||
it is not used in any other non-standard way. */
|
||||
bool only_called_directly_p (void);
|
||||
|
||||
/* Scale profile by NUM/DEN. Walk into inlined clones. */
|
||||
void apply_scale (profile_count num, profile_count den);
|
||||
|
||||
/* Return true when function is only called directly or it has alias.
|
||||
i.e. it is not externally visible, address was not taken and
|
||||
it is not used in any other non-standard way. */
|
||||
|
||||
@@ -4666,19 +4666,12 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node,
|
||||
unsigned i = 0;
|
||||
for (cgraph_node *n : self_gen_clones)
|
||||
{
|
||||
profile_count orig_count = n->count;
|
||||
profile_count new_count
|
||||
= (redist_sum / self_gen_clones.length () + other_edges_count[i]);
|
||||
new_count = lenient_count_portion_handling (new_count, orig_node);
|
||||
n->count = new_count;
|
||||
profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
|
||||
n->apply_scale (new_count, n->count);
|
||||
for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
|
||||
{
|
||||
cs->count = cs->count.apply_scale (new_count, orig_count);
|
||||
processed_edges.add (cs);
|
||||
}
|
||||
for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
|
||||
cs->count = cs->count.apply_scale (new_count, orig_count);
|
||||
processed_edges.add (cs);
|
||||
|
||||
i++;
|
||||
}
|
||||
@@ -4811,20 +4804,24 @@ update_profiling_info (struct cgraph_node *orig_node,
|
||||
/* The NEW_NODE count and counts of all its outgoing edges
|
||||
are still unmodified copies of ORIG_NODE's. Just clear
|
||||
the latter and bail out. */
|
||||
profile_count zero;
|
||||
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
|
||||
zero = profile_count::zero ().guessed_local ();
|
||||
orig_node->count = orig_node->count.guessed_local ();
|
||||
else
|
||||
zero = profile_count::adjusted_zero ();
|
||||
orig_node->count = zero;
|
||||
orig_node->count = orig_node->count.global0adjusted ();
|
||||
for (cgraph_edge *cs = orig_node->callees;
|
||||
cs;
|
||||
cs = cs->next_callee)
|
||||
cs->count = zero;
|
||||
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
|
||||
cs->count = orig_node->count.guessed_local ();
|
||||
else
|
||||
cs->count = orig_node->count.global0adjusted ();
|
||||
for (cgraph_edge *cs = orig_node->indirect_calls;
|
||||
cs;
|
||||
cs = cs->next_callee)
|
||||
cs->count = zero;
|
||||
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
|
||||
cs->count = orig_node->count.guessed_local ();
|
||||
else
|
||||
cs->count = orig_node->count.global0adjusted ();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -4874,26 +4871,12 @@ update_profiling_info (struct cgraph_node *orig_node,
|
||||
orig_node);
|
||||
|
||||
new_sum = orig_node_count.combine_with_ipa_count (new_sum);
|
||||
new_node->count = new_sum;
|
||||
orig_node->count = remainder;
|
||||
|
||||
profile_count orig_new_node_count = orig_node_count;
|
||||
profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
|
||||
for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
|
||||
cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
|
||||
for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
|
||||
cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
|
||||
new_node->apply_scale (new_sum, new_node->count);
|
||||
|
||||
if (!orig_edges_processed)
|
||||
{
|
||||
profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
|
||||
for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
|
||||
cs->count = cs->count.apply_scale (remainder, orig_node_count);
|
||||
for (cgraph_edge *cs = orig_node->indirect_calls;
|
||||
cs;
|
||||
cs = cs->next_callee)
|
||||
cs->count = cs->count.apply_scale (remainder, orig_node_count);
|
||||
}
|
||||
orig_node->apply_scale (remainder, orig_node->count);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
@@ -4911,35 +4894,25 @@ update_specialized_profile (struct cgraph_node *new_node,
|
||||
struct cgraph_node *orig_node,
|
||||
profile_count redirected_sum)
|
||||
{
|
||||
struct cgraph_edge *cs;
|
||||
profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, " the sum of counts of redirected edges is ");
|
||||
redirected_sum.dump (dump_file);
|
||||
fprintf (dump_file, "\n old ipa count of the original node is ");
|
||||
orig_node_count.dump (dump_file);
|
||||
orig_node->count.dump (dump_file);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
if (!orig_node_count.nonzero_p ())
|
||||
if (!orig_node->count.ipa ().nonzero_p ()
|
||||
|| !redirected_sum.nonzero_p ())
|
||||
return;
|
||||
|
||||
new_node_count = new_node->count;
|
||||
new_node->count += redirected_sum;
|
||||
orig_node->count
|
||||
= lenient_count_portion_handling (orig_node->count - redirected_sum,
|
||||
orig_node);
|
||||
orig_node->apply_scale
|
||||
(lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
|
||||
orig_node),
|
||||
orig_node->count);
|
||||
|
||||
for (cs = new_node->callees; cs; cs = cs->next_callee)
|
||||
cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
|
||||
|
||||
for (cs = orig_node->callees; cs; cs = cs->next_callee)
|
||||
{
|
||||
profile_count dec = cs->count.apply_scale (redirected_sum,
|
||||
orig_node_count);
|
||||
cs->count -= dec;
|
||||
}
|
||||
new_node->apply_scale (new_node->count.ipa () + redirected_sum,
|
||||
new_node->count);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
|
||||
@@ -58,27 +58,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
int ncalls_inlined;
|
||||
int nfunctions_inlined;
|
||||
|
||||
/* Scale counts of NODE edges by NUM/DEN. */
|
||||
|
||||
static void
|
||||
update_noncloned_counts (struct cgraph_node *node,
|
||||
profile_count num, profile_count den)
|
||||
{
|
||||
struct cgraph_edge *e;
|
||||
|
||||
profile_count::adjust_for_ipa_scaling (&num, &den);
|
||||
|
||||
for (e = node->callees; e; e = e->next_callee)
|
||||
{
|
||||
if (!e->inline_failed)
|
||||
update_noncloned_counts (e->callee, num, den);
|
||||
e->count = e->count.apply_scale (num, den);
|
||||
}
|
||||
for (e = node->indirect_calls; e; e = e->next_callee)
|
||||
e->count = e->count.apply_scale (num, den);
|
||||
node->count = node->count.apply_scale (num, den);
|
||||
}
|
||||
|
||||
/* We removed or are going to remove the last call to NODE.
|
||||
Return true if we can and want proactively remove the NODE now.
|
||||
This is important to do, since we want inliner to know when offline
|
||||
@@ -93,7 +72,7 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
|
||||
{
|
||||
cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
|
||||
if ((alias->callers && alias->callers != e)
|
||||
|| !can_remove_node_now_p_1 (alias, e))
|
||||
|| !can_remove_node_now_p_1 (alias, e))
|
||||
return false;
|
||||
}
|
||||
/* FIXME: When address is taken of DECL_EXTERNAL function we still
|
||||
@@ -212,7 +191,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
|
||||
}
|
||||
duplicate = false;
|
||||
e->callee->externally_visible = false;
|
||||
update_noncloned_counts (e->callee, e->count, e->callee->count);
|
||||
e->callee->apply_scale (e->count, e->callee->count);
|
||||
|
||||
dump_callgraph_transformation (e->callee, inlining_into,
|
||||
"inlining to");
|
||||
|
||||
@@ -94,9 +94,16 @@ profile_count::dump (FILE *f, struct function *fun) const
|
||||
else if (fun && initialized_p ()
|
||||
&& fun->cfg
|
||||
&& ENTRY_BLOCK_PTR_FOR_FN (fun)->count.initialized_p ())
|
||||
fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
|
||||
profile_quality_display_names[m_quality],
|
||||
to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
|
||||
{
|
||||
if (compatible_p (ENTRY_BLOCK_PTR_FOR_FN (fun)->count))
|
||||
fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
|
||||
profile_quality_display_names[m_quality],
|
||||
to_sreal_scale
|
||||
(ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
|
||||
else
|
||||
fprintf (f, "%" PRId64 " (%s, incompatible with entry block count)",
|
||||
m_val, profile_quality_display_names[m_quality]);
|
||||
}
|
||||
else
|
||||
fprintf (f, "%" PRId64 " (%s)", m_val,
|
||||
profile_quality_display_names[m_quality]);
|
||||
|
||||
@@ -5757,6 +5757,12 @@ gimple_verify_flow_info (void)
|
||||
error ("probability of edge from entry block not initialized");
|
||||
err = true;
|
||||
}
|
||||
if (!EXIT_BLOCK_PTR_FOR_FN (cfun)
|
||||
->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
|
||||
{
|
||||
error ("exit block count is not compoatible with entry block count");
|
||||
err = true;
|
||||
}
|
||||
|
||||
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
@@ -5780,6 +5786,12 @@ gimple_verify_flow_info (void)
|
||||
err = true;
|
||||
}
|
||||
}
|
||||
if (!bb->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
|
||||
{
|
||||
error ("count of bb %d is not compoatible with entry block count",
|
||||
bb->index);
|
||||
err = true;
|
||||
}
|
||||
|
||||
/* Skip labels on the start of basic block. */
|
||||
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
|
||||
Reference in New Issue
Block a user