mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-22 03:46:53 -05:00
This patch introduces a new heuristics for reordering functions, to be used in the absense of profile information. This approach uses C++ template instantiation types to group functions together. Entry functions are sorted in the beginning, and callees are sorted as part of partition_callchain (). Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Prachi Godbole <pgodbole@nvidia.com> config/ChangeLog: * bootstrap-lto-locality-cpp-template.mk: New file. gcc/ChangeLog: * flag-types.h (enum lto_locality_heuristics): New enum. * ipa-locality-cloning.cc (struct templ_info): New struct. (struct locality_info): Add templ_info field. (templ_hash_map): New hash_map. (callee_templ_cmp): Ditto. (static_profile_templ_cmp): Ditto. (sort_templ_hashes_cmp): Ditto. (order_templ_hashes): Ditto. (locality_dc_template_p): Ditto. (populate_templ_info): Ditto. (create_locality_info): Call populate_templ_info. (partition_callchain): Call callee_templ_cmp. (locality_determine_static_order): Populate and sort templ_hash_map. (locality_partition_and_clone): Handle lto_locality_heuristics. (lc_execute): Initialize templ_hash_map. * params.opt: New param.
23 lines
1.1 KiB
Makefile
23 lines
1.1 KiB
Makefile
# This option enables LTO and locality partitioning for stage2 and stage3 in slim mode
|
|
|
|
STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality \
|
|
--param=lto-partition-locality-heuristics=cpp_template
|
|
STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality \
|
|
--param=lto-partition-locality-heuristics=cpp_template
|
|
STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1
|
|
STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1
|
|
STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality
|
|
|
|
# assumes the host supports the linker plugin
|
|
LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/
|
|
LTO_RANLIB = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ranlib$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/
|
|
LTO_NM = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-nm$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/
|
|
|
|
LTO_EXPORTS = AR="$(LTO_AR)"; export AR; \
|
|
RANLIB="$(LTO_RANLIB)"; export RANLIB; \
|
|
NM="$(LTO_NM)"; export NM;
|
|
LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)" NM="$(LTO_NM)"
|
|
|
|
do-compare = $(SHELL) $(srcdir)/contrib/compare-lto $$f1 $$f2
|
|
extra-compare = gcc/lto1$(exeext)
|