lto: Allow other partitionings for toplevel assembly

For balanced and max partitioning this adds proper partitioning of asm
and related symbols.

The special symbols are partitioned with 1to1 and joined together if
there is no name conflict. All other symbols are partitioned with the
requested partitioning.
In typical usage with small number of toplevel assembly and no name
conflicts, all special symbols will be in the single first partition.
balanced partitioning will continue filling last asm partition.

gcc/lto/ChangeLog:

	* lto-partition.cc (join_partitions): Declare.
	(lto_1_to_1_map): Split out to..
	(map_1_to_1): ..here.
	(create_asm_partition): Replaced by..
	(create_asm_partitions): ..this.
	(lto_max_map): Use new create_asm_partitions.
	(lto_balanced_map): Use new create_asm_partitions.

gcc/testsuite/ChangeLog:

	* gcc.dg/lto/toplevel-extended-asm-2_0.c: More partitionings.
	* gcc.dg/lto/toplevel-extended-asm-2_1.c: Likewise.
This commit is contained in:
Michal Jires
2025-12-03 02:16:54 +01:00
parent 666d330268
commit a988ab269e
3 changed files with 123 additions and 27 deletions

View File

@@ -45,6 +45,8 @@ vec<ltrans_partition> ltrans_partitions;
static void add_symbol_to_partition (ltrans_partition part,
toplevel_node *node);
static ltrans_partition join_partitions (ltrans_partition into,
ltrans_partition from);
/* Helper for qsort; compare partitions and return one with smaller order. */
@@ -364,11 +366,10 @@ node_into_file_partition (toplevel_node* node,
add_symbol_to_partition (partition, node);
}
/* Group cgrah nodes by input files. This is used mainly for testing
right now. */
void
lto_1_to_1_map (void)
/* Group cgraph nodes by input files. Used for symbols that must remain
together. */
static void
map_1_to_1 (bool forced_symbols_only)
{
symtab_node *node;
hash_map<lto_file_decl_data *, ltrans_partition> pmap;
@@ -379,6 +380,10 @@ lto_1_to_1_map (void)
|| symbol_partitioned_p (node))
continue;
if (forced_symbols_only && !node->must_remain_in_tu_name
&& !node->must_remain_in_tu_body && !node->no_reorder)
continue;
node_into_file_partition (node, pmap);
}
@@ -386,29 +391,90 @@ lto_1_to_1_map (void)
for (anode = symtab->first_asm_symbol (); anode; anode = safe_as_a<asm_node*>(anode->next))
node_into_file_partition (anode, pmap);
create_partition_if_empty ();
/* Order partitions by order of symbols because they are linked into binary
that way. */
ltrans_partitions.qsort (cmp_partitions_order);
}
/* Creates partition with all toplevel assembly.
/* Group cgrah nodes by input files. This is used mainly for testing
right now. */
Before toplevel asm could be partitioned, all toplevel asm was inserted
into first partition.
This function achieves similar behavior for partitionings that cannot
easily satisfy requirements of toplevel asm. */
static void
create_asm_partition (void)
void
lto_1_to_1_map (void)
{
struct asm_node *anode = symtab->first_asm_symbol ();
if (anode)
map_1_to_1 (false);
create_partition_if_empty ();
}
/* Toplevel assembly and symbols referenced by it can be required to remain
in the same partition and not be renamed.
noreroder symbols are also handled here to keep their order in respect to
these symbols.
This functions partitions these symbols with 1to1 partitioning and unites
translation units to target_size as long as there is no name conflict.
Remaining symbols can be partitioned with any strategy. */
static void
create_asm_partitions (int64_t target_size)
{
if (!symtab->first_asm_symbol ())
return;
map_1_to_1 (true);
size_t join_into = 0;
size_t join_from = 0;
hash_set<const char*> nonrenameable_symbols;
lto_symtab_encoder_iterator lsei;
for (; join_from < ltrans_partitions.length (); join_into++)
{
ltrans_partition partition = new_partition ("asm_nodes");
for (; anode; anode = safe_as_a<asm_node*>(anode->next))
add_symbol_to_partition (partition, anode);
ltrans_partitions[join_into] = ltrans_partitions[join_from];
ltrans_partition p_into = ltrans_partitions[join_into];
nonrenameable_symbols.empty ();
bool first_partition = true;
for (; join_from < ltrans_partitions.length (); join_from++)
{
ltrans_partition p_from = ltrans_partitions[join_from];
if (p_into->insns > target_size)
break;
lto_symtab_encoder_t encoder = p_from->encoder;
/* All symbols that cannot be renamed and might collide. */
for (lsei = lsei_start (encoder); !lsei_end_p (lsei);
lsei_next (&lsei))
{
toplevel_node* tnode = lsei_node (lsei);
if (symtab_node *snode = dyn_cast <symtab_node*> (tnode))
{
if (snode->must_remain_in_tu_name)
if (nonrenameable_symbols.add (snode->asm_name ()))
goto finish_partition;
}
else if (asm_node *anode = dyn_cast <asm_node*> (tnode))
{
symtab_node* snode;
unsigned i = 0;
/* This covers symbols defined by extended assembly. */
for (; anode->symbols_referenced.iterate (i, &snode); i++)
{
if (snode->must_remain_in_tu_name)
if (nonrenameable_symbols.add (snode->asm_name ()))
goto finish_partition;
}
}
}
first_partition = false;
if (p_into != p_from)
join_partitions (p_into, p_from);
}
finish_partition: {}
if (first_partition)
join_from++;
}
ltrans_partitions.truncate (join_into);
}
/* Maximal partitioning. Put every new symbol into new partition if possible. */
@@ -419,6 +485,9 @@ lto_max_map (void)
symtab_node *node;
ltrans_partition partition;
/* Needed for toplevel assembly. */
map_1_to_1 (true);
FOR_EACH_SYMBOL (node)
{
if (node->get_partitioning_class () != SYMBOL_PARTITION
@@ -428,7 +497,6 @@ lto_max_map (void)
add_symbol_to_partition (partition, node);
}
create_asm_partition ();
create_partition_if_empty ();
}
@@ -1169,11 +1237,34 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
if (partition_size < param_min_partition_size)
partition_size = param_min_partition_size;
npartitions = 1;
partition = new_partition ("");
if (dump_file)
fprintf (dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
total_size, partition_size);
create_asm_partitions (partition_size);
if (ltrans_partitions.length ())
{
int64_t insns = 0;
unsigned partitions = ltrans_partitions.length ();
for (unsigned i = 0; i + 1 < partitions ; i++)
insns += ltrans_partitions[i]->insns;
total_size -= insns;
if (partition_size)
n_lto_partitions -= insns / partition_size;
if (n_lto_partitions < 1)
n_lto_partitions = 1;
partition_size = total_size / n_lto_partitions;
if (partition_size < param_min_partition_size)
partition_size = param_min_partition_size;
partition = ltrans_partitions[partitions - 1];
}
else
partition = new_partition ("");
auto_vec<symtab_node *> next_nodes;
for (unsigned i = 0; i < order.length (); i++)
@@ -1434,11 +1525,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
next_nodes.safe_push (noreorder[noreorder_pos++]);
/* For one partition the cost of boundary should be 0 unless we added final
symbols here (these are not accounted) or we have accounting bug. */
gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost || best_cost == -1);
gcc_assert (next_nodes.length () || ltrans_partitions.length () != 1
|| !best_cost || best_cost == -1);
add_sorted_nodes (next_nodes, partition);
create_asm_partition ();
if (dump_file)
{
fprintf (dump_file, "\nPartition sizes:\n");

View File

@@ -1,10 +1,13 @@
/* { dg-lto-do link } */
/* { dg-lto-options {{-O2 -flto -flto-partition=1to1} } } */
/* { dg-lto-options {{-O2 -flto -flto-partition=1to1} {-O2 -flto -flto-partition=max} {-O2 -flto -flto-partition=cache}} } */
extern int use_statics ();
extern int asm_var;
static int a;
asm (".local %cc0\n %cc0:" :: ":"(&a));
int main() {
return use_statics ();
return a + use_statics ();
}

View File

@@ -6,7 +6,10 @@ asm("%cc0:" :: ":" (&static_asm_var));
extern int asm_var;
asm("%cc0:" :: ":" (&asm_var));
static int a;
asm (".local %cc0\n %cc0:" :: ":"(&a));
int use_statics () {
static_asm_fn ();
return static_asm_var;
return static_asm_var + a;
}