Files
gcc/config/bootstrap-lto-locality-cpp-template.mk
Prachi Godbole 348c623d7d ipa-reorder-for-locality - Introduce C++ template heuristics
This patch introduces a new heuristics for reordering functions, to be used
in the absense of profile information.  This approach uses C++ template
instantiation types to group functions together.  Entry functions are sorted
in the beginning, and callees are sorted as part of partition_callchain ().

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Prachi Godbole <pgodbole@nvidia.com>

config/ChangeLog:

	* bootstrap-lto-locality-cpp-template.mk: New file.

gcc/ChangeLog:

	* flag-types.h (enum lto_locality_heuristics): New enum.
	* ipa-locality-cloning.cc (struct templ_info): New struct.
	(struct locality_info): Add templ_info field.
	(templ_hash_map): New hash_map.
	(callee_templ_cmp): Ditto.
	(static_profile_templ_cmp): Ditto.
	(sort_templ_hashes_cmp): Ditto.
	(order_templ_hashes): Ditto.
	(locality_dc_template_p): Ditto.
	(populate_templ_info): Ditto.
	(create_locality_info): Call populate_templ_info.
	(partition_callchain): Call callee_templ_cmp.
	(locality_determine_static_order): Populate and sort templ_hash_map.
	(locality_partition_and_clone): Handle lto_locality_heuristics.
	(lc_execute): Initialize templ_hash_map.
	* params.opt: New param.
2026-01-20 13:54:30 +01:00

23 lines
1.1 KiB
Makefile

# This option enables LTO and locality partitioning for stage2 and stage3 in slim mode
STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality \
--param=lto-partition-locality-heuristics=cpp_template
STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality \
--param=lto-partition-locality-heuristics=cpp_template
STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1
STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1
STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality
# assumes the host supports the linker plugin
LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/
LTO_RANLIB = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ranlib$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/
LTO_NM = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-nm$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/
LTO_EXPORTS = AR="$(LTO_AR)"; export AR; \
RANLIB="$(LTO_RANLIB)"; export RANLIB; \
NM="$(LTO_NM)"; export NM;
LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)" NM="$(LTO_NM)"
do-compare = $(SHELL) $(srcdir)/contrib/compare-lto $$f1 $$f2
extra-compare = gcc/lto1$(exeext)